Skip to content

Commit 1380f1a

Browse files
committed
RHOAIENG-57679: remove rbac step
1 parent 50795b8 commit 1380f1a

1 file changed

Lines changed: 33 additions & 48 deletions

File tree

demo-notebooks/guided-demos/6_rayjob_checkpointing_example.ipynb

Lines changed: 33 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@
2525
},
2626
{
2727
"cell_type": "code",
28+
"execution_count": null,
2829
"metadata": {},
30+
"outputs": [],
2931
"source": [
3032
"from codeflare_sdk import RayJob, ManagedClusterConfig, set_api_client, get_cluster\n",
3133
"from kube_authkit import AuthConfig, get_k8s_client\n",
3234
"import time"
33-
],
34-
"execution_count": null,
35-
"outputs": []
35+
]
3636
},
3737
{
3838
"cell_type": "markdown",
@@ -43,7 +43,9 @@
4343
},
4444
{
4545
"cell_type": "code",
46+
"execution_count": null,
4647
"metadata": {},
48+
"outputs": [],
4749
"source": [
4850
"import urllib3\n",
4951
"\n",
@@ -83,9 +85,7 @@
8385
"JOB_NAME = \"checkpointing-job\"\n",
8486
"# Must match metadata.name of a LocalQueue in NAMESPACE (create per OpenShift Kueue docs).\n",
8587
"LOCAL_QUEUE = \"default\""
86-
],
87-
"execution_count": null,
88-
"outputs": []
88+
]
8989
},
9090
{
9191
"cell_type": "markdown",
@@ -105,17 +105,17 @@
105105
},
106106
{
107107
"cell_type": "code",
108+
"execution_count": null,
108109
"metadata": {},
110+
"outputs": [],
109111
"source": [
110112
"# Optional: verify Kueue objects exist (cluster admin / user with read access)\n",
111113
"# !oc get resourceflavor.kueue.x-k8s.io\n",
112114
"# !oc get clusterqueue.kueue.x-k8s.io\n",
113115
"# !oc get localqueue.kueue.x-k8s.io -n $NAMESPACE\n",
114116
"\n",
115117
"print(f\"Namespace: {NAMESPACE!r}, RayJob name: {JOB_NAME!r}, LocalQueue: {LOCAL_QUEUE!r}\")"
116-
],
117-
"execution_count": null,
118-
"outputs": []
118+
]
119119
},
120120
{
121121
"cell_type": "markdown",
@@ -126,7 +126,9 @@
126126
},
127127
{
128128
"cell_type": "code",
129+
"execution_count": null,
129130
"metadata": {},
131+
"outputs": [],
130132
"source": [
131133
"# Set your AWS credentials\n",
132134
"# WARNING: Do not commit credentials to version control. For production,\n",
@@ -142,23 +144,6 @@
142144
"# AWS_CREDENTIALS[\"AWS_SESSION_TOKEN\"] = \"your-session-token\"\n",
143145
"\n",
144146
"print(f\"Using bucket: {AWS_CREDENTIALS['AWS_S3_BUCKET']}\")"
145-
],
146-
"execution_count": null,
147-
"outputs": []
148-
},
149-
{
150-
"cell_type": "markdown",
151-
"metadata": {},
152-
"source": [
153-
"## Grant workbench permissions (one-time, admin)\n",
154-
"\n",
155-
"To create RayJob resources from a workbench, the workbench service account typically needs elevated RBAC in the project. Run **once** with a user that has `cluster-admin` or sufficient privileges (replace `your-namespace`):\n",
156-
"\n",
157-
"```bash\n",
158-
"oc adm policy add-role-to-user admin \\\n",
159-
" system:serviceaccount:your-namespace:your-namespace-wb \\\n",
160-
" -n your-namespace\n",
161-
"```"
162147
]
163148
},
164149
{
@@ -170,7 +155,9 @@
170155
},
171156
{
172157
"cell_type": "code",
158+
"execution_count": null,
173159
"metadata": {},
160+
"outputs": [],
174161
"source": [
175162
"managed = ManagedClusterConfig(\n",
176163
" num_workers=2,\n",
@@ -206,9 +193,7 @@
206193
")\n",
207194
"print(f\"RayCluster name (when assigned): {job.cluster_name}\")\n",
208195
"print(\"Watch logs for: NO CHECKPOINT FOUND - Starting fresh\")"
209-
],
210-
"execution_count": null,
211-
"outputs": []
196+
]
212197
},
213198
{
214199
"cell_type": "markdown",
@@ -221,7 +206,9 @@
221206
},
222207
{
223208
"cell_type": "code",
209+
"execution_count": null,
224210
"metadata": {},
211+
"outputs": [],
225212
"source": [
226213
"print(job.status())\n",
227214
"\n",
@@ -244,9 +231,7 @@
244231
"print(\n",
245232
" \"Wait for at least one full epoch and a checkpoint to S3 before running the suspend cell.\"\n",
246233
")"
247-
],
248-
"execution_count": null,
249-
"outputs": []
234+
]
250235
},
251236
{
252237
"cell_type": "markdown",
@@ -261,7 +246,9 @@
261246
},
262247
{
263248
"cell_type": "code",
249+
"execution_count": null,
264250
"metadata": {},
251+
"outputs": [],
265252
"source": [
266253
"print(\"=\" * 60)\n",
267254
"print(\"SUSPENDING RayJob (checkpoint demo — not deleting the RayJob CR)\")\n",
@@ -270,9 +257,7 @@
270257
"\n",
271258
"job.stop()\n",
272259
"print(\"Stop requested; poll job.status() until the RayJob reports suspended / non-running.\")"
273-
],
274-
"execution_count": null,
275-
"outputs": []
260+
]
276261
},
277262
{
278263
"cell_type": "markdown",
@@ -285,7 +270,9 @@
285270
},
286271
{
287272
"cell_type": "code",
273+
"execution_count": null,
288274
"metadata": {},
275+
"outputs": [],
289276
"source": [
290277
"print(\"=\" * 60)\n",
291278
"print(\"RESUMING RayJob after suspend\")\n",
@@ -295,9 +282,7 @@
295282
"job.resubmit()\n",
296283
"time.sleep(10)\n",
297284
"print(job.status())"
298-
],
299-
"execution_count": null,
300-
"outputs": []
285+
]
301286
},
302287
{
303288
"cell_type": "markdown",
@@ -317,7 +302,9 @@
317302
},
318303
{
319304
"cell_type": "code",
305+
"execution_count": null,
320306
"metadata": {},
307+
"outputs": [],
321308
"source": [
322309
"print(job.status())\n",
323310
"try:\n",
@@ -326,9 +313,7 @@
326313
"except Exception as e:\n",
327314
" print(f\"Could not resolve cluster yet: {e}\")\n",
328315
"print(\"Check Jobs tab for: RESUMING FROM CHECKPOINT - Starting at epoch N\")"
329-
],
330-
"execution_count": null,
331-
"outputs": []
316+
]
332317
},
333318
{
334319
"cell_type": "markdown",
@@ -341,7 +326,9 @@
341326
},
342327
{
343328
"cell_type": "code",
329+
"execution_count": null,
344330
"metadata": {},
331+
"outputs": [],
345332
"source": [
346333
"print(\"Cleaning up...\")\n",
347334
"cluster_name = job.cluster_name\n",
@@ -357,18 +344,16 @@
357344
" pass\n",
358345
"\n",
359346
"print(\"Cleanup attempted (RayJob delete; cluster.down if RayCluster still exists).\")"
360-
],
361-
"execution_count": null,
362-
"outputs": []
347+
]
363348
},
364349
{
365350
"cell_type": "code",
351+
"execution_count": null,
366352
"metadata": {},
353+
"outputs": [],
367354
"source": [
368355
"# No explicit logout needed - authentication is managed automatically by kube-authkit"
369-
],
370-
"execution_count": null,
371-
"outputs": []
356+
]
372357
}
373358
],
374359
"metadata": {
@@ -392,4 +377,4 @@
392377
},
393378
"nbformat": 4,
394379
"nbformat_minor": 2
395-
}
380+
}

0 commit comments

Comments
 (0)