Skip to content

Commit e23783a

Browse files
authored
[CLI] Divide CPU util by a number of vCPUs in dstack metrics (#2466)
Previously, the max value was (100 * N)% where N is a number of vCPUs, e.g., 3200% for 32-core CPU. This change normalizes CPU util range to 0..100%
1 parent 67ab90f commit e23783a

File tree

1 file changed

+15
-5
lines changed

1 file changed

+15
-5
lines changed

src/dstack/_internal/cli/commands/metrics.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
console,
1515
)
1616
from dstack._internal.core.errors import CLIError
17+
from dstack._internal.core.models.instances import Resources
1718
from dstack._internal.core.models.metrics import JobMetrics
1819
from dstack.api._public import Client
1920
from dstack.api._public.runs import Run
@@ -86,14 +87,23 @@ def _get_metrics_table(run: Run, metrics: List[JobMetrics]) -> Table:
8687
add_row_from_dict(table, run_row)
8788

8889
for job, job_metrics in zip(run._run.jobs, metrics):
90+
jrd = job.job_submissions[-1].job_runtime_data
91+
jpd = job.job_submissions[-1].job_provisioning_data
92+
resources: Optional[Resources] = None
93+
if jrd is not None and jrd.offer is not None:
94+
resources = jrd.offer.instance.resources
95+
elif jpd is not None:
96+
resources = jpd.instance_type.resources
8997
cpu_usage = _get_metric_value(job_metrics, "cpu_usage_percent")
9098
if cpu_usage is not None:
91-
cpu_usage = f"{cpu_usage}%"
99+
if resources is not None:
100+
cpu_usage = cpu_usage / resources.cpus
101+
cpu_usage = f"{cpu_usage:.0f}%"
92102
memory_usage = _get_metric_value(job_metrics, "memory_working_set_bytes")
93103
if memory_usage is not None:
94104
memory_usage = f"{round(memory_usage / 1024 / 1024)}MB"
95-
if job.job_submissions[-1].job_provisioning_data is not None:
96-
memory_usage += f"/{job.job_submissions[-1].job_provisioning_data.instance_type.resources.memory_mib}MB"
105+
if resources is not None:
106+
memory_usage += f"/{resources.memory_mib}MB"
97107
gpu_metrics = ""
98108
gpus_detected_num = _get_metric_value(job_metrics, "gpus_detected_num")
99109
if gpus_detected_num is not None:
@@ -104,8 +114,8 @@ def _get_metrics_table(run: Run, metrics: List[JobMetrics]) -> Table:
104114
if i != 0:
105115
gpu_metrics += "\n"
106116
gpu_metrics += f"#{i} {round(gpu_memory_usage / 1024 / 1024)}MB"
107-
if job.job_submissions[-1].job_provisioning_data is not None:
108-
gpu_metrics += f"/{job.job_submissions[-1].job_provisioning_data.instance_type.resources.gpus[i].memory_mib}MB"
117+
if resources is not None:
118+
gpu_metrics += f"/{resources.gpus[i].memory_mib}MB"
109119
gpu_metrics += f" {gpu_util_percent}% Util"
110120

111121
job_row: Dict[Union[str, int], Any] = {

0 commit comments

Comments
 (0)