Skip to content

Commit 9d41682

Browse files
authored
Fix utilization_policy in profiles (#2385)
Fixes: #2383
1 parent 2a5fb82 commit 9d41682

File tree

6 files changed

+20
-6
lines changed

6 files changed

+20
-6
lines changed

src/dstack/_internal/cli/services/configurators/run.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ def apply_configuration(
9595
reservation=profile.reservation,
9696
spot_policy=profile.spot_policy,
9797
retry_policy=profile.retry_policy,
98+
utilization_policy=profile.utilization_policy,
9899
max_duration=profile.max_duration,
99100
stop_duration=profile.stop_duration,
100101
max_price=profile.max_price,

src/dstack/_internal/core/models/runs.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
ProfileRetryPolicy,
2424
RetryEvent,
2525
SpotPolicy,
26+
UtilizationPolicy,
2627
)
2728
from dstack._internal.core.models.repos import AnyRunRepoData
2829
from dstack._internal.core.models.resources import Memory, ResourcesSpec
@@ -192,6 +193,7 @@ class JobSpec(CoreModel):
192193
single_branch: Optional[bool] = None
193194
max_duration: Optional[int]
194195
stop_duration: Optional[int] = None
196+
utilization_policy: Optional[UtilizationPolicy] = None
195197
registry_auth: Optional[RegistryAuth]
196198
requirements: Requirements
197199
retry: Optional[Retry]

src/dstack/_internal/server/background/tasks/process_running_jobs.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
348348
job_model.termination_reason = JobTerminationReason.GATEWAY_ERROR
349349

350350
if job_model.status == JobStatus.RUNNING:
351-
await _check_gpu_utilization(session, run_model, job_model)
351+
await _check_gpu_utilization(session, job_model, job)
352352

353353
job_model.last_processed_at = common_utils.get_current_datetime()
354354
await session.commit()
@@ -679,10 +679,8 @@ def _terminate_if_inactivity_duration_exceeded(
679679
)
680680

681681

682-
async def _check_gpu_utilization(
683-
session: AsyncSession, run_model: RunModel, job_model: JobModel
684-
) -> None:
685-
policy = RunSpec.__response__.parse_raw(run_model.run_spec).configuration.utilization_policy
682+
async def _check_gpu_utilization(session: AsyncSession, job_model: JobModel, job: Job) -> None:
683+
policy = job.job_spec.utilization_policy
686684
if policy is None:
687685
return
688686
after = common_utils.get_current_datetime() - timedelta(seconds=policy.time_window)

src/dstack/_internal/server/services/jobs/configurators/base.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@
1313
PythonVersion,
1414
RunConfigurationType,
1515
)
16-
from dstack._internal.core.models.profiles import DEFAULT_STOP_DURATION, SpotPolicy
16+
from dstack._internal.core.models.profiles import (
17+
DEFAULT_STOP_DURATION,
18+
SpotPolicy,
19+
UtilizationPolicy,
20+
)
1721
from dstack._internal.core.models.runs import (
1822
AppSpec,
1923
JobSpec,
@@ -113,6 +117,7 @@ async def _get_job_spec(
113117
single_branch=self._single_branch(),
114118
max_duration=self._max_duration(),
115119
stop_duration=self._stop_duration(),
120+
utilization_policy=self._utilization_policy(),
116121
registry_auth=self._registry_auth(),
117122
requirements=self._requirements(),
118123
retry=self._retry(),
@@ -201,6 +206,9 @@ def _stop_duration(self) -> Optional[int]:
201206
# pydantic validator ensures this is int
202207
return self.run_spec.merged_profile.stop_duration
203208

209+
def _utilization_policy(self) -> Optional[UtilizationPolicy]:
210+
return self.run_spec.merged_profile.utilization_policy
211+
204212
def _registry_auth(self) -> Optional[RegistryAuth]:
205213
return self.run_spec.configuration.registry_auth
206214

src/dstack/api/_public/runs.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
ProfileRetryPolicy,
2626
SpotPolicy,
2727
TerminationPolicy,
28+
UtilizationPolicy,
2829
)
2930
from dstack._internal.core.models.repos.base import Repo
3031
from dstack._internal.core.models.resources import ResourcesSpec
@@ -485,6 +486,7 @@ def get_plan(
485486
resources: Optional[ResourcesSpec] = None,
486487
spot_policy: Optional[SpotPolicy] = None,
487488
retry_policy: Optional[ProfileRetryPolicy] = None,
489+
utilization_policy: Optional[UtilizationPolicy] = None,
488490
max_duration: Optional[Union[int, str]] = None,
489491
max_price: Optional[float] = None,
490492
working_dir: Optional[str] = None,
@@ -535,6 +537,7 @@ def get_plan(
535537
spot_policy=spot_policy,
536538
retry=None,
537539
retry_policy=retry_policy,
540+
utilization_policy=utilization_policy,
538541
max_duration=max_duration,
539542
stop_duration=stop_duration,
540543
max_price=max_price,

src/tests/_internal/server/routers/test_runs.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ def get_dev_env_run_plan_dict(
198198
"single_branch": False,
199199
"max_duration": None,
200200
"stop_duration": 300,
201+
"utilization_policy": None,
201202
"registry_auth": None,
202203
"requirements": {
203204
"resources": {
@@ -357,6 +358,7 @@ def get_dev_env_run_dict(
357358
"single_branch": False,
358359
"max_duration": None,
359360
"stop_duration": 300,
361+
"utilization_policy": None,
360362
"registry_auth": None,
361363
"requirements": {
362364
"resources": {

0 commit comments

Comments
 (0)