Skip to content

Commit 548d797

Browse files
ko3n1gclaude
andcommitted
fix: read actualPhase from type-specific workload endpoints
The /workloads/distributed/{id} and /workloads/trainings/{id} endpoints return actualPhase, not phase (which was the field on the generic /workloads/{id} endpoint). This caused a KeyError crash immediately after the 403 fix landed. Now reads actualPhase first, falls back to phase for compatibility, and returns None (PENDING) if neither field is present. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> Signed-off-by: oliver könig <okoenig@nvidia.com>
1 parent b5ce8fe commit 548d797

2 files changed

Lines changed: 7 additions & 3 deletions

File tree

nemo_run/core/execution/dgxcloud.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,11 @@ def status(self, job_id: str) -> Optional[DGXCloudState]:
377377
return None
378378

379379
r_json = response.json()
380-
return DGXCloudState(r_json["phase"])
380+
phase = r_json.get("actualPhase") or r_json.get("phase")
381+
if not phase:
382+
logger.warning(f"No phase field in status response for job {job_id}: {r_json}")
383+
return None
384+
return DGXCloudState(phase)
381385

382386
def fetch_logs(
383387
self,

test/core/execution/test_dgxcloud.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,7 @@ def test_nproc_per_node_default(self):
877877
def test_status(self, mock_get):
878878
mock_response = MagicMock()
879879
mock_response.status_code = 200
880-
mock_response.json.return_value = {"phase": "Running"}
880+
mock_response.json.return_value = {"actualPhase": "Running"}
881881
mock_get.return_value = mock_response
882882

883883
with patch.object(DGXCloudExecutor, "get_auth_token", return_value="test_token"):
@@ -903,7 +903,7 @@ def test_status(self, mock_get):
903903
def test_status_distributed(self, mock_get):
904904
mock_response = MagicMock()
905905
mock_response.status_code = 200
906-
mock_response.json.return_value = {"phase": "Running"}
906+
mock_response.json.return_value = {"actualPhase": "Running"}
907907
mock_get.return_value = mock_response
908908

909909
with patch.object(DGXCloudExecutor, "get_auth_token", return_value="test_token"):

0 commit comments

Comments
 (0)