Skip to content

Commit b68ab80

Browse files
committed
fix: address PR comment + fix obtaining ray cluster
1 parent 1380f1a commit b68ab80

2 files changed

Lines changed: 9 additions & 10 deletions

File tree

demo-notebooks/guided-demos/6_rayjob_checkpointing_example.ipynb

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -211,21 +211,20 @@
211211
"outputs": [],
212212
"source": [
213213
"print(job.status())\n",
214-
"\n",
215-
"# Resolve RayCluster created by the RayJob (retry until it exists after Kueue admission).\n",
214+
"# KubeRay assigns a generated name — not job.cluster_name (the template).\n",
216215
"cluster = None\n",
217216
"for _ in range(36):\n",
218-
" try:\n",
219-
" cluster = get_cluster(job.cluster_name, namespace=NAMESPACE, verify_tls=False)\n",
220-
" break\n",
221-
" except Exception:\n",
222-
" time.sleep(5)\n",
223-
"\n",
217+
" status_data = job._api.get_job_status(name=job.name, k8s_namespace=NAMESPACE)\n",
218+
" ray_cluster_name = (status_data or {}).get(\"rayClusterName\")\n",
219+
" if ray_cluster_name:\n",
220+
" cluster = get_cluster(ray_cluster_name, namespace=NAMESPACE, verify_tls=False)\n",
221+
" if cluster is not None:\n",
222+
" break\n",
223+
" time.sleep(5)\n",
224224
"if cluster is None:\n",
225225
" raise RuntimeError(\n",
226226
" \"RayCluster not ready — check RayJob / Workload admission and operator logs.\"\n",
227227
" )\n",
228-
"\n",
229228
"print(f\"Ray Dashboard (open in browser): {cluster.cluster_dashboard_uri()}\")\n",
230229
"print(\"In the dashboard, open Jobs and stream logs for the training driver.\")\n",
231230
"print(\n",

demo-notebooks/guided-demos/train_with_checkpoints.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def train_func(config):
137137
"lr": 0.001,
138138
},
139139
scaling_config=ScalingConfig(
140-
num_workers=1,
140+
num_workers=2,
141141
use_gpu=False,
142142
),
143143
run_config=RunConfig(

0 commit comments

Comments
 (0)