|
14 | 14 | from dstack._internal.core.models.files import FilePathMapping |
15 | 15 | from dstack._internal.core.models.fleets import FleetConfiguration |
16 | 16 | from dstack._internal.core.models.gateways import GatewayConfiguration |
17 | | -from dstack._internal.core.models.profiles import ProfileParams, parse_off_duration |
| 17 | +from dstack._internal.core.models.profiles import ProfileParams, parse_duration, parse_off_duration |
18 | 18 | from dstack._internal.core.models.resources import Range, ResourcesSpec |
19 | 19 | from dstack._internal.core.models.services import AnyModel, OpenAIChatModel |
20 | 20 | from dstack._internal.core.models.unix import UnixUser |
|
32 | 32 | RUN_PRIOTIRY_MAX = 100 |
33 | 33 | RUN_PRIORITY_DEFAULT = 0 |
34 | 34 | DEFAULT_REPO_DIR = "/workflow" |
| 35 | +MIN_PROBE_TIMEOUT = 1 |
| 36 | +MIN_PROBE_INTERVAL = 1 |
| 37 | +DEFAULT_PROBE_URL = "/" |
| 38 | +DEFAULT_PROBE_TIMEOUT = 10 |
| 39 | +DEFAULT_PROBE_INTERVAL = 15 |
| 40 | +DEFAULT_PROBE_READY_AFTER = 1 |
| 41 | +MAX_PROBE_URL_LEN = 2048 |
35 | 42 |
|
36 | 43 |
|
37 | 44 | class RunConfigurationType(str, Enum): |
@@ -162,6 +169,74 @@ class RateLimit(CoreModel): |
162 | 169 | ] = 0 |
163 | 170 |
|
164 | 171 |
|
| 172 | +class ProbeConfig(CoreModel): |
| 173 | + type: Literal["http"] # expect other probe types in the future, namely `exec` |
| 174 | + url: Annotated[ |
| 175 | + Optional[str], Field(description=f"The URL to request. Defaults to `{DEFAULT_PROBE_URL}`") |
| 176 | + ] = None |
| 177 | + timeout: Annotated[ |
| 178 | + Optional[Union[int, str]], |
| 179 | + Field( |
| 180 | + description=( |
| 181 | + f"Maximum amount of time the HTTP request is allowed to take. Defaults to `{DEFAULT_PROBE_TIMEOUT}s`" |
| 182 | + ) |
| 183 | + ), |
| 184 | + ] = None |
| 185 | + interval: Annotated[ |
| 186 | + Optional[Union[int, str]], |
| 187 | + Field( |
| 188 | + description=( |
| 189 | + "Minimum amount of time between the end of one probe execution" |
| 190 | + f" and the start of the next. Defaults to `{DEFAULT_PROBE_INTERVAL}s`" |
| 191 | + ) |
| 192 | + ), |
| 193 | + ] = None |
| 194 | + ready_after: Annotated[ |
| 195 | + Optional[int], |
| 196 | + Field( |
| 197 | + ge=1, |
| 198 | + description=( |
| 199 | + "The number of consecutive successful probe executions required for the replica" |
| 200 | + " to be considered ready. Used during rolling deployments." |
| 201 | + f" Defaults to `{DEFAULT_PROBE_READY_AFTER}`" |
| 202 | + ), |
| 203 | + ), |
| 204 | + ] = None |
| 205 | + |
| 206 | + class Config: |
| 207 | + frozen = True |
| 208 | + |
| 209 | + @validator("timeout") |
| 210 | + def parse_timeout(cls, v: Optional[Union[int, str]]) -> Optional[int]: |
| 211 | + if v is None: |
| 212 | + return v |
| 213 | + parsed = parse_duration(v) |
| 214 | + if parsed < MIN_PROBE_TIMEOUT: |
| 215 | + raise ValueError(f"Probe timeout cannot be shorter than {MIN_PROBE_TIMEOUT}s") |
| 216 | + return parsed |
| 217 | + |
| 218 | + @validator("interval") |
| 219 | + def parse_interval(cls, v: Optional[Union[int, str]]) -> Optional[int]: |
| 220 | + if v is None: |
| 221 | + return v |
| 222 | + parsed = parse_duration(v) |
| 223 | + if parsed < MIN_PROBE_INTERVAL: |
| 224 | + raise ValueError(f"Probe interval cannot be shorter than {MIN_PROBE_INTERVAL}s") |
| 225 | + return parsed |
| 226 | + |
| 227 | + @validator("url") |
| 228 | + def validate_url(cls, v: Optional[str]) -> Optional[str]: |
| 229 | + if v is None: |
| 230 | + return v |
| 231 | + if not v.startswith("/"): |
| 232 | + raise ValueError("Must start with `/`") |
| 233 | + if len(v) > MAX_PROBE_URL_LEN: |
| 234 | + raise ValueError(f"Cannot be longer than {MAX_PROBE_URL_LEN} characters") |
| 235 | + if not v.isprintable(): |
| 236 | + raise ValueError("Cannot contain non-printable characters") |
| 237 | + return v |
| 238 | + |
| 239 | + |
165 | 240 | class BaseRunConfiguration(CoreModel): |
166 | 241 | type: Literal["none"] |
167 | 242 | name: Annotated[ |
@@ -448,6 +523,10 @@ class ServiceConfigurationParams(CoreModel): |
448 | 523 | Field(description="The auto-scaling rules. Required if `replicas` is set to a range"), |
449 | 524 | ] = None |
450 | 525 | rate_limits: Annotated[list[RateLimit], Field(description="Rate limiting rules")] = [] |
| 526 | + probes: Annotated[ |
| 527 | + list[ProbeConfig], |
| 528 | + Field(description="List of probes used to determine job health"), |
| 529 | + ] = [] |
451 | 530 |
|
452 | 531 | @validator("port") |
453 | 532 | def convert_port(cls, v) -> PortMapping: |
@@ -511,6 +590,16 @@ def validate_rate_limits(cls, v: list[RateLimit]) -> list[RateLimit]: |
511 | 590 | ) |
512 | 591 | return v |
513 | 592 |
|
| 593 | + @validator("probes") |
| 594 | + def validate_probes(cls, v: list[ProbeConfig]) -> list[ProbeConfig]: |
| 595 | + if len(v) != len(set(v)): |
| 596 | + # Using a custom validator instead of Field(unique_items=True) to avoid Pydantic bug: |
| 597 | + # https://github.com/pydantic/pydantic/issues/3765 |
| 598 | + # Because of the bug, our gen_schema_reference.py fails to determine the type of |
| 599 | + # ServiceConfiguration.probes and insert the correct hyperlink. |
| 600 | + raise ValueError("Probes must be unique") |
| 601 | + return v |
| 602 | + |
514 | 603 |
|
515 | 604 | class ServiceConfiguration( |
516 | 605 | ProfileParams, BaseRunConfigurationWithCommands, ServiceConfigurationParams |
|
0 commit comments