Skip to content

Commit e570e8d

Browse files
authored
Introduce effective_spec for runs and fleets (#2579)
* Introduce effective_spec for runs and fleets * Fix spec copy
1 parent 03a26e2 commit e570e8d

File tree

8 files changed

+137
-117
lines changed

8 files changed

+137
-117
lines changed

src/dstack/_internal/cli/services/configurators/fleet.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -246,32 +246,34 @@ def _print_plan_header(plan: FleetPlan):
246246
def th(s: str) -> str:
247247
return f"[bold]{s}[/bold]"
248248

249+
spec = plan.get_effective_spec()
250+
249251
configuration_table = Table(box=None, show_header=False)
250252
configuration_table.add_column(no_wrap=True) # key
251253
configuration_table.add_column() # value
252254

253255
configuration_table.add_row(th("Project"), plan.project_name)
254256
configuration_table.add_row(th("User"), plan.user)
255-
configuration_table.add_row(th("Configuration"), plan.spec.configuration_path or "?")
256-
configuration_table.add_row(th("Type"), plan.spec.configuration.type)
257+
configuration_table.add_row(th("Configuration"), spec.configuration_path or "?")
258+
configuration_table.add_row(th("Type"), spec.configuration.type)
257259

258260
fleet_type = "cloud"
259-
nodes = plan.spec.configuration.nodes or "-"
260-
placement = plan.spec.configuration.placement or InstanceGroupPlacement.ANY
261-
reservation = plan.spec.configuration.reservation
261+
nodes = spec.configuration.nodes or "-"
262+
placement = spec.configuration.placement or InstanceGroupPlacement.ANY
263+
reservation = spec.configuration.reservation
262264
backends = None
263-
if plan.spec.configuration.backends is not None:
264-
backends = ", ".join(b.value for b in plan.spec.configuration.backends)
265+
if spec.configuration.backends is not None:
266+
backends = ", ".join(b.value for b in spec.configuration.backends)
265267
regions = None
266-
if plan.spec.configuration.regions is not None:
267-
regions = ", ".join(plan.spec.configuration.regions)
268+
if spec.configuration.regions is not None:
269+
regions = ", ".join(spec.configuration.regions)
268270
resources = None
269-
if plan.spec.configuration.resources is not None:
270-
resources = plan.spec.configuration.resources.pretty_format()
271-
spot_policy = plan.spec.merged_profile.spot_policy
272-
if plan.spec.configuration.ssh_config is not None:
271+
if spec.configuration.resources is not None:
272+
resources = spec.configuration.resources.pretty_format()
273+
spot_policy = spec.merged_profile.spot_policy
274+
if spec.configuration.ssh_config is not None:
273275
fleet_type = "ssh"
274-
nodes = len(plan.spec.configuration.ssh_config.hosts)
276+
nodes = len(spec.configuration.ssh_config.hosts)
275277
resources = None
276278
spot_policy = None
277279

src/dstack/_internal/cli/utils/run.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
def print_run_plan(
2828
run_plan: RunPlan, max_offers: Optional[int] = None, include_run_properties: bool = True
2929
):
30+
run_spec = run_plan.get_effective_run_spec()
3031
job_plan = run_plan.job_plans[0]
3132

3233
props = Table(box=None, show_header=False)
@@ -43,18 +44,18 @@ def print_run_plan(
4344
)
4445
if include_run_properties:
4546
inactivity_duration = None
46-
if isinstance(run_plan.run_spec.configuration, DevEnvironmentConfiguration):
47+
if isinstance(run_spec.configuration, DevEnvironmentConfiguration):
4748
inactivity_duration = "-"
48-
if isinstance(run_plan.run_spec.configuration.inactivity_duration, int):
49+
if isinstance(run_spec.configuration.inactivity_duration, int):
4950
inactivity_duration = format_pretty_duration(
50-
run_plan.run_spec.configuration.inactivity_duration
51+
run_spec.configuration.inactivity_duration
5152
)
5253
if job_plan.job_spec.retry is None:
5354
retry = "-"
5455
else:
5556
retry = escape(job_plan.job_spec.retry.pretty_format())
5657

57-
profile = run_plan.run_spec.merged_profile
58+
profile = run_spec.merged_profile
5859
creation_policy = profile.creation_policy
5960
# FIXME: This assumes the default idle_duration is the same for client and server.
6061
# If the server changes idle_duration, old clients will see incorrect value.
@@ -79,8 +80,8 @@ def th(s: str) -> str:
7980
props.add_row(th("Project"), run_plan.project_name)
8081
props.add_row(th("User"), run_plan.user)
8182
if include_run_properties:
82-
props.add_row(th("Configuration"), run_plan.run_spec.configuration_path)
83-
props.add_row(th("Type"), run_plan.run_spec.configuration.type)
83+
props.add_row(th("Configuration"), run_spec.configuration_path)
84+
props.add_row(th("Type"), run_spec.configuration.type)
8485
props.add_row(th("Resources"), pretty_req)
8586
props.add_row(th("Spot policy"), spot_policy)
8687
props.add_row(th("Max price"), max_price)
@@ -91,7 +92,7 @@ def th(s: str) -> str:
9192
props.add_row(th("Max duration"), max_duration)
9293
if inactivity_duration is not None: # None means n/a
9394
props.add_row(th("Inactivity duration"), inactivity_duration)
94-
props.add_row(th("Reservation"), run_plan.run_spec.configuration.reservation or "-")
95+
props.add_row(th("Reservation"), run_spec.configuration.reservation or "-")
9596

9697
offers = Table(box=None)
9798
offers.add_column("#")

src/dstack/_internal/core/models/fleets.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,11 +312,17 @@ class FleetPlan(CoreModel):
312312
project_name: str
313313
user: str
314314
spec: FleetSpec
315+
effective_spec: Optional[FleetSpec] = None
315316
current_resource: Optional[Fleet] = None
316317
offers: List[InstanceOfferWithAvailability]
317318
total_offers: int
318319
max_offer_price: Optional[float] = None
319320

321+
def get_effective_spec(self) -> FleetSpec:
322+
if self.effective_spec is not None:
323+
return self.effective_spec
324+
return self.spec
325+
320326

321327
class ApplyFleetPlanInput(CoreModel):
322328
spec: FleetSpec

src/dstack/_internal/core/models/runs.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,10 +455,16 @@ class RunPlan(CoreModel):
455455
project_name: str
456456
user: str
457457
run_spec: RunSpec
458+
effective_run_spec: Optional[RunSpec] = None
458459
job_plans: List[JobPlan]
459460
current_resource: Optional[Run] = None
460461
action: ApplyAction
461462

463+
def get_effective_run_spec(self) -> RunSpec:
464+
if self.effective_run_spec is not None:
465+
return self.effective_run_spec
466+
return self.run_spec
467+
462468

463469
class ApplyRunPlanInput(CoreModel):
464470
run_spec: RunSpec

src/dstack/_internal/server/services/fleets.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -234,32 +234,34 @@ async def get_plan(
234234
user: UserModel,
235235
spec: FleetSpec,
236236
) -> FleetPlan:
237+
effective_spec = FleetSpec.parse_obj(spec.dict())
237238
current_fleet: Optional[Fleet] = None
238239
current_fleet_id: Optional[uuid.UUID] = None
239-
if spec.configuration.name is not None:
240+
if effective_spec.configuration.name is not None:
240241
current_fleet_model = await get_project_fleet_model_by_name(
241-
session=session, project=project, name=spec.configuration.name
242+
session=session, project=project, name=effective_spec.configuration.name
242243
)
243244
if current_fleet_model is not None:
244245
current_fleet = fleet_model_to_fleet(current_fleet_model)
245246
current_fleet_id = current_fleet_model.id
246-
await _check_ssh_hosts_not_yet_added(session, spec, current_fleet_id)
247+
await _check_ssh_hosts_not_yet_added(session, effective_spec, current_fleet_id)
247248

248249
offers = []
249-
if spec.configuration.ssh_config is None:
250+
if effective_spec.configuration.ssh_config is None:
250251
offers_with_backends = await get_create_instance_offers(
251252
project=project,
252-
profile=spec.merged_profile,
253-
requirements=_get_fleet_requirements(spec),
254-
fleet_spec=spec,
255-
blocks=spec.configuration.blocks,
253+
profile=effective_spec.merged_profile,
254+
requirements=_get_fleet_requirements(effective_spec),
255+
fleet_spec=effective_spec,
256+
blocks=effective_spec.configuration.blocks,
256257
)
257258
offers = [offer for _, offer in offers_with_backends]
258-
_remove_fleet_spec_sensitive_info(spec)
259+
_remove_fleet_spec_sensitive_info(effective_spec)
259260
plan = FleetPlan(
260261
project_name=project.name,
261262
user=user.name,
262263
spec=spec,
264+
effective_spec=effective_spec,
263265
current_resource=current_fleet,
264266
offers=offers[:50],
265267
total_offers=len(offers),
@@ -335,7 +337,7 @@ async def create_fleet(
335337

336338
lock_namespace = f"fleet_names_{project.name}"
337339
if get_db().dialect_name == "sqlite":
338-
# Start new transaction to see commited changes after lock
340+
# Start new transaction to see committed changes after lock
339341
await session.commit()
340342
elif get_db().dialect_name == "postgresql":
341343
await session.execute(

src/dstack/_internal/server/services/runs.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -279,45 +279,44 @@ async def get_plan(
279279
run_spec: RunSpec,
280280
max_offers: Optional[int],
281281
) -> RunPlan:
282-
_validate_run_spec_and_set_defaults(run_spec)
282+
effective_run_spec = RunSpec.parse_obj(run_spec.dict())
283+
_validate_run_spec_and_set_defaults(effective_run_spec)
283284

284-
profile = run_spec.merged_profile
285+
profile = effective_run_spec.merged_profile
285286
creation_policy = profile.creation_policy
286287

287288
current_resource = None
288289
action = ApplyAction.CREATE
289-
if run_spec.run_name is not None:
290+
if effective_run_spec.run_name is not None:
290291
current_resource = await get_run_by_name(
291292
session=session,
292293
project=project,
293-
run_name=run_spec.run_name,
294+
run_name=effective_run_spec.run_name,
294295
)
295296
if (
296297
current_resource is not None
297298
and not current_resource.status.is_finished()
298-
and _can_update_run_spec(current_resource.run_spec, run_spec)
299+
and _can_update_run_spec(current_resource.run_spec, effective_run_spec)
299300
):
300301
action = ApplyAction.UPDATE
301302

302-
# TODO(egor-s): do we need to generate all replicas here?
303-
jobs = await get_jobs_from_run_spec(run_spec, replica_num=0)
303+
jobs = await get_jobs_from_run_spec(effective_run_spec, replica_num=0)
304304

305305
volumes = await get_job_configured_volumes(
306306
session=session,
307307
project=project,
308-
run_spec=run_spec,
308+
run_spec=effective_run_spec,
309309
job_num=0,
310310
)
311311

312312
pool_offers = await _get_pool_offers(
313313
session=session,
314314
project=project,
315-
run_spec=run_spec,
315+
run_spec=effective_run_spec,
316316
job=jobs[0],
317317
volumes=volumes,
318318
)
319-
run_name = run_spec.run_name # preserve run_name
320-
run_spec.run_name = "dry-run" # will regenerate jobs on submission
319+
effective_run_spec.run_name = "dry-run" # will regenerate jobs on submission
321320

322321
# Get offers once for all jobs
323322
offers = []
@@ -330,7 +329,7 @@ async def get_plan(
330329
multinode=jobs[0].job_spec.jobs_per_replica > 1,
331330
volumes=volumes,
332331
privileged=jobs[0].job_spec.privileged,
333-
instance_mounts=check_run_spec_requires_instance_mounts(run_spec),
332+
instance_mounts=check_run_spec_requires_instance_mounts(effective_run_spec),
334333
)
335334

336335
job_plans = []
@@ -351,11 +350,12 @@ async def get_plan(
351350
)
352351
job_plans.append(job_plan)
353352

354-
run_spec.run_name = run_name # restore run_name
353+
effective_run_spec.run_name = run_spec.run_name # restore run_name
355354
run_plan = RunPlan(
356355
project_name=project.name,
357356
user=user.name,
358357
run_spec=run_spec,
358+
effective_run_spec=effective_run_spec,
359359
job_plans=job_plans,
360360
current_resource=current_resource,
361361
action=action,

src/tests/_internal/server/routers/test_fleets.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -851,6 +851,7 @@ async def test_returns_plan(self, test_db, session: AsyncSession, client: AsyncC
851851
"project_name": project.name,
852852
"user": user.name,
853853
"spec": spec.dict(),
854+
"effective_spec": spec.dict(),
854855
"current_resource": None,
855856
"offers": [json.loads(o.json()) for o in offers],
856857
"total_offers": len(offers),

0 commit comments

Comments
 (0)