Skip to content

Commit d3ba8b4

Browse files
[CLI] Improve the output of dstack ps (#3253)
* [CLI] Add color to the STATUS column * [CLI] Show `RESOURCES` only in verbose mode, otherwise only show `GPU`. Also move `(spot)` to `PRICE. * [CLI] Fix tests * [CLI] Show `replica=...` and `job=...` only when needed. * [CLI] Show `replica=...` and `job=...` only when needed. (bugfix) * [CLI] Show `replica=...` and `job=...` only when needed. (cleanup+bugfix)
1 parent e9b9812 commit d3ba8b4

File tree

4 files changed

+710
-42
lines changed

4 files changed

+710
-42
lines changed

src/dstack/_internal/cli/utils/run.py

Lines changed: 179 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,24 @@
66

77
from dstack._internal.cli.utils.common import NO_OFFERS_WARNING, add_row_from_dict, console
88
from dstack._internal.core.models.configurations import DevEnvironmentConfiguration
9-
from dstack._internal.core.models.instances import InstanceAvailability
9+
from dstack._internal.core.models.instances import InstanceAvailability, Resources
1010
from dstack._internal.core.models.profiles import (
1111
DEFAULT_RUN_TERMINATION_IDLE_TIME,
1212
TerminationPolicy,
1313
)
1414
from dstack._internal.core.models.runs import (
15+
Job,
16+
JobProvisioningData,
17+
JobRuntimeData,
1518
JobStatus,
19+
JobSubmission,
1620
Probe,
1721
ProbeSpec,
1822
RunPlan,
23+
RunStatus,
24+
)
25+
from dstack._internal.core.models.runs import (
26+
Run as CoreRun,
1927
)
2028
from dstack._internal.core.services.profiles import get_termination
2129
from dstack._internal.utils.common import (
@@ -182,15 +190,151 @@ def th(s: str) -> str:
182190
console.print(NO_OFFERS_WARNING)
183191

184192

193+
def _format_run_status(run) -> str:
194+
status_text = (
195+
run.latest_job_submission.status_message
196+
if run.status.is_finished() and run.latest_job_submission
197+
else run.status_message
198+
)
199+
# Inline of _get_run_status_style
200+
color_map = {
201+
RunStatus.PENDING: "white",
202+
RunStatus.SUBMITTED: "grey",
203+
RunStatus.PROVISIONING: "deep_sky_blue1",
204+
RunStatus.RUNNING: "sea_green3",
205+
RunStatus.TERMINATING: "deep_sky_blue1",
206+
RunStatus.TERMINATED: "grey",
207+
RunStatus.FAILED: "indian_red1",
208+
RunStatus.DONE: "grey",
209+
}
210+
if status_text == "no offers" or status_text == "interrupted":
211+
color = "gold1"
212+
elif status_text == "pulling":
213+
color = "sea_green3"
214+
else:
215+
color = color_map.get(run.status, "white")
216+
status_style = f"bold {color}" if not run.status.is_finished() else color
217+
return f"[{status_style}]{status_text}[/]"
218+
219+
220+
def _format_job_submission_status(job_submission: JobSubmission, verbose: bool) -> str:
221+
status_message = job_submission.status_message
222+
job_status = job_submission.status
223+
if status_message in ("no offers", "interrupted"):
224+
color = "gold1"
225+
elif status_message == "stopped":
226+
color = "grey"
227+
else:
228+
color_map = {
229+
JobStatus.SUBMITTED: "grey",
230+
JobStatus.PROVISIONING: "deep_sky_blue1",
231+
JobStatus.PULLING: "sea_green3",
232+
JobStatus.RUNNING: "sea_green3",
233+
JobStatus.TERMINATING: "deep_sky_blue1",
234+
JobStatus.TERMINATED: "grey",
235+
JobStatus.ABORTED: "gold1",
236+
JobStatus.FAILED: "indian_red1",
237+
JobStatus.DONE: "grey",
238+
}
239+
color = color_map.get(job_status, "white")
240+
status_style = f"bold {color}" if not job_status.is_finished() else color
241+
formatted_status_message = f"[{status_style}]{status_message}[/]"
242+
if verbose and job_submission.inactivity_secs:
243+
inactive_for = format_duration_multiunit(job_submission.inactivity_secs)
244+
formatted_status_message += f" (inactive for {inactive_for})"
245+
return formatted_status_message
246+
247+
248+
def _get_show_deployment_replica_job(run: CoreRun, verbose: bool) -> tuple[bool, bool, bool]:
249+
show_deployment_num = (
250+
verbose and run.run_spec.configuration.type == "service"
251+
) or run.is_deployment_in_progress()
252+
253+
replica_nums = {job.job_spec.replica_num for job in run.jobs}
254+
show_replica = len(replica_nums) > 1
255+
256+
jobs_by_replica: Dict[int, List[Any]] = {}
257+
for job in run.jobs:
258+
replica_num = job.job_spec.replica_num
259+
if replica_num not in jobs_by_replica:
260+
jobs_by_replica[replica_num] = []
261+
jobs_by_replica[replica_num].append(job)
262+
263+
show_job = any(
264+
len({j.job_spec.job_num for j in jobs}) > 1 for jobs in jobs_by_replica.values()
265+
)
266+
267+
return show_deployment_num, show_replica, show_job
268+
269+
270+
def _format_job_name(
271+
job: Job,
272+
latest_job_submission: JobSubmission,
273+
show_deployment_num: bool,
274+
show_replica: bool,
275+
show_job: bool,
276+
) -> str:
277+
name_parts = []
278+
if show_replica:
279+
name_parts.append(f"replica={job.job_spec.replica_num}")
280+
if show_job:
281+
name_parts.append(f"job={job.job_spec.job_num}")
282+
name_suffix = (
283+
f" deployment={latest_job_submission.deployment_num}" if show_deployment_num else ""
284+
)
285+
name_value = " " + (" ".join(name_parts) if name_parts else "")
286+
name_value += name_suffix
287+
return name_value
288+
289+
290+
def _format_price(price: float, is_spot: bool) -> str:
291+
price_str = f"${price:.4f}".rstrip("0").rstrip(".")
292+
if is_spot:
293+
price_str += " (spot)"
294+
return price_str
295+
296+
297+
def _format_backend(backend: Any, region: str) -> str:
298+
backend_str = getattr(backend, "value", backend)
299+
backend_str = backend_str.replace("remote", "ssh")
300+
return f"{backend_str} ({region})"
301+
302+
303+
def _format_instance_type(jpd: JobProvisioningData, jrd: Optional[JobRuntimeData]) -> str:
304+
instance_type = jpd.instance_type.name
305+
if jrd is not None and getattr(jrd, "offer", None) is not None:
306+
if jrd.offer.total_blocks > 1:
307+
instance_type += f" ({jrd.offer.blocks}/{jrd.offer.total_blocks})"
308+
if jpd.reservation:
309+
instance_type += f" ({jpd.reservation})"
310+
return instance_type
311+
312+
313+
def _get_resources(jpd: JobProvisioningData, jrd: Optional[JobRuntimeData]) -> Resources:
314+
resources: Resources = jpd.instance_type.resources
315+
if jrd is not None and getattr(jrd, "offer", None) is not None:
316+
resources = jrd.offer.instance.resources
317+
return resources
318+
319+
320+
def _format_run_name(run: CoreRun, show_deployment_num: bool) -> str:
321+
parts: List[str] = [run.run_spec.run_name]
322+
if show_deployment_num:
323+
parts.append(f" [secondary]deployment={run.deployment_num}[/]")
324+
return "".join(parts)
325+
326+
185327
def get_runs_table(
186328
runs: List[Run], verbose: bool = False, format_date: DateFormatter = pretty_date
187329
) -> Table:
188330
table = Table(box=None, expand=shutil.get_terminal_size(fallback=(120, 40)).columns <= 110)
189331
table.add_column("NAME", style="bold", no_wrap=True, ratio=2)
190332
table.add_column("BACKEND", style="grey58", ratio=2)
191-
table.add_column("RESOURCES", ratio=3 if not verbose else 2)
192333
if verbose:
193-
table.add_column("INSTANCE TYPE", no_wrap=True, ratio=1)
334+
table.add_column("RESOURCES", style="grey58", ratio=3)
335+
table.add_column("INSTANCE TYPE", style="grey58", no_wrap=True, ratio=1)
336+
else:
337+
table.add_column("GPU", ratio=2)
194338
table.add_column("PRICE", style="grey58", ratio=1)
195339
table.add_column("STATUS", no_wrap=True, ratio=1)
196340
if verbose or any(
@@ -205,22 +349,18 @@ def get_runs_table(
205349

206350
for run in runs:
207351
run = run._run # TODO(egor-s): make public attribute
208-
show_deployment_num = (
209-
verbose
210-
and run.run_spec.configuration.type == "service"
211-
or run.is_deployment_in_progress()
352+
show_deployment_num, show_replica, show_job = _get_show_deployment_replica_job(
353+
run, verbose
212354
)
213355
merge_job_rows = len(run.jobs) == 1 and not show_deployment_num
214356

215357
run_row: Dict[Union[str, int], Any] = {
216-
"NAME": run.run_spec.run_name
217-
+ (f" [secondary]deployment={run.deployment_num}[/]" if show_deployment_num else ""),
358+
"NAME": _format_run_name(run, show_deployment_num),
218359
"SUBMITTED": format_date(run.submitted_at),
219-
"STATUS": (
220-
run.latest_job_submission.status_message
221-
if run.status.is_finished() and run.latest_job_submission
222-
else run.status_message
223-
),
360+
"STATUS": _format_run_status(run),
361+
"RESOURCES": "-",
362+
"GPU": "-",
363+
"PRICE": "-",
224364
}
225365
if run.error:
226366
run_row["ERROR"] = run.error
@@ -229,46 +369,44 @@ def get_runs_table(
229369

230370
for job in run.jobs:
231371
latest_job_submission = job.job_submissions[-1]
232-
status = latest_job_submission.status.value
233-
if verbose and latest_job_submission.inactivity_secs:
234-
inactive_for = format_duration_multiunit(latest_job_submission.inactivity_secs)
235-
status += f" (inactive for {inactive_for})"
372+
status_formatted = _format_job_submission_status(latest_job_submission, verbose)
373+
236374
job_row: Dict[Union[str, int], Any] = {
237-
"NAME": f" replica={job.job_spec.replica_num} job={job.job_spec.job_num}"
238-
+ (
239-
f" deployment={latest_job_submission.deployment_num}"
240-
if show_deployment_num
241-
else ""
375+
"NAME": _format_job_name(
376+
job, latest_job_submission, show_deployment_num, show_replica, show_job
242377
),
243-
"STATUS": latest_job_submission.status_message,
378+
"STATUS": status_formatted,
244379
"PROBES": _format_job_probes(
245380
job.job_spec.probes, latest_job_submission.probes, latest_job_submission.status
246381
),
247382
"SUBMITTED": format_date(latest_job_submission.submitted_at),
248383
"ERROR": latest_job_submission.error,
384+
"RESOURCES": "-",
385+
"GPU": "-",
386+
"PRICE": "-",
249387
}
250388
jpd = latest_job_submission.job_provisioning_data
251389
if jpd is not None:
252-
resources = jpd.instance_type.resources
253-
instance_type = jpd.instance_type.name
254390
jrd = latest_job_submission.job_runtime_data
255-
if jrd is not None and jrd.offer is not None:
256-
resources = jrd.offer.instance.resources
257-
if jrd.offer.total_blocks > 1:
258-
instance_type += f" ({jrd.offer.blocks}/{jrd.offer.total_blocks})"
259-
if jpd.reservation:
260-
instance_type += f" ({jpd.reservation})"
261-
job_row.update(
262-
{
263-
"BACKEND": f"{jpd.backend.value.replace('remote', 'ssh')} ({jpd.region})",
264-
"RESOURCES": resources.pretty_format(include_spot=True),
265-
"INSTANCE TYPE": instance_type,
266-
"PRICE": f"${jpd.price:.4f}".rstrip("0").rstrip("."),
267-
}
268-
)
391+
resources = _get_resources(jpd, jrd)
392+
update_dict: Dict[Union[str, int], Any] = {
393+
"BACKEND": _format_backend(jpd.backend, jpd.region),
394+
"RESOURCES": resources.pretty_format(include_spot=False),
395+
"GPU": resources.pretty_format(gpu_only=True, include_spot=False),
396+
"INSTANCE TYPE": _format_instance_type(jpd, jrd),
397+
"PRICE": _format_price(jpd.price, resources.spot),
398+
}
399+
job_row.update(update_dict)
269400
if merge_job_rows:
270-
# merge rows
401+
_status = job_row["STATUS"]
402+
_resources = job_row["RESOURCES"]
403+
_gpu = job_row["GPU"]
404+
_price = job_row["PRICE"]
271405
job_row.update(run_row)
406+
job_row["RESOURCES"] = _resources
407+
job_row["GPU"] = _gpu
408+
job_row["PRICE"] = _price
409+
job_row["STATUS"] = _status
272410
add_row_from_dict(table, job_row, style="secondary" if len(run.jobs) != 1 else None)
273411

274412
return table

src/dstack/_internal/core/models/instances.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,23 @@ def _pretty_format(
8383
gpus: List[Gpu],
8484
spot: bool,
8585
include_spot: bool = False,
86+
gpu_only: bool = False,
8687
) -> str:
88+
if gpu_only:
89+
if not gpus:
90+
return "-"
91+
gpu = gpus[0]
92+
gpu_resources = {
93+
"gpu_name": gpu.name,
94+
"gpu_count": len(gpus),
95+
}
96+
if gpu.memory_mib > 0:
97+
gpu_resources["gpu_memory"] = f"{gpu.memory_mib / 1024:.0f}GB"
98+
output = pretty_resources(**gpu_resources)
99+
if include_spot and spot:
100+
output += " (spot)"
101+
return output
102+
87103
resources = {}
88104
if cpus > 0:
89105
resources["cpus"] = cpus
@@ -103,7 +119,7 @@ def _pretty_format(
103119
output += " (spot)"
104120
return output
105121

106-
def pretty_format(self, include_spot: bool = False) -> str:
122+
def pretty_format(self, include_spot: bool = False, gpu_only: bool = False) -> str:
107123
return Resources._pretty_format(
108124
self.cpus,
109125
self.cpu_arch,
@@ -112,6 +128,7 @@ def pretty_format(self, include_spot: bool = False) -> str:
112128
self.gpus,
113129
self.spot,
114130
include_spot,
131+
gpu_only,
115132
)
116133

117134

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from unittest.mock import Mock
2+
3+
import pytest
4+
5+
from dstack._internal.server.services.docker import ImageConfig, ImageConfigObject
6+
7+
8+
@pytest.fixture
9+
def image_config_mock(monkeypatch: pytest.MonkeyPatch) -> ImageConfig:
10+
image_config = ImageConfig.parse_obj({"User": None, "Entrypoint": None, "Cmd": ["/bin/bash"]})
11+
monkeypatch.setattr(
12+
"dstack._internal.server.services.jobs.configurators.base._get_image_config",
13+
Mock(return_value=image_config),
14+
)
15+
monkeypatch.setattr(
16+
"dstack._internal.server.services.docker.get_image_config",
17+
Mock(return_value=ImageConfigObject(config=image_config)),
18+
)
19+
return image_config

0 commit comments

Comments
 (0)