@@ -16,277 +16,48 @@ env:
1616jobs :
1717 verify-local_interactive :
1818 if : ${{ github.event.label.name == 'test-additional-notebooks' }}
19- runs-on : ubuntu-latest-4core
19+ runs-on : ubuntu-latest
2020
2121 steps :
22- - name : Checkout code
23- uses : actions/checkout@v4
24- with :
25- submodules : recursive
26-
27- - name : Checkout common repo code
28- uses : actions/checkout@v4
29- with :
30- repository : ' project-codeflare/codeflare-common'
31- ref : ' main'
32- path : ' common'
33-
34- - name : Set up specific Python version
35- uses : actions/setup-python@v5
36- with :
37- python-version : ' 3.11'
38- cache : ' pip' # caching pip dependencies
39-
40- - name : Setup and start KinD cluster
41- uses : ./common/github-actions/kind
42-
43- - name : Deploy Kueue and KubeRay
44- id : deploy
45- run : |
46- # Install Kueue
47- echo "Installing Kueue ${KUEUE_VERSION}..."
48- kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/${KUEUE_VERSION}/manifests.yaml
49- kubectl wait --timeout=120s --for=condition=Available=true deployment -n kueue-system kueue-controller-manager
50-
51- # Install KubeRay from opendatahub-io fork (has RHOAI features)
52- echo "Installing KubeRay ${KUBERAY_VERSION} from opendatahub-io..."
53- kubectl create -k "github.com/opendatahub-io/kuberay/ray-operator/config/default?ref=${KUBERAY_VERSION}"
54- kubectl wait --timeout=120s --for=condition=Available=true deployment kuberay-operator
55-
56- # Create default Kueue resources for the tests
57- echo "Creating Kueue resources..."
58- kubectl apply -f - <<EOF
59- apiVersion: kueue.x-k8s.io/v1beta1
60- kind: ResourceFlavor
61- metadata:
62- name: default-flavor
63- ---
64- apiVersion: kueue.x-k8s.io/v1beta1
65- kind: ClusterQueue
66- metadata:
67- name: cluster-queue
68- spec:
69- namespaceSelector: {}
70- resourceGroups:
71- - coveredResources: ["cpu", "memory", "nvidia.com/gpu"]
72- flavors:
73- - name: default-flavor
74- resources:
75- - name: cpu
76- nominalQuota: 100
77- - name: memory
78- nominalQuota: 100Gi
79- - name: nvidia.com/gpu
80- nominalQuota: 10
81- ---
82- apiVersion: kueue.x-k8s.io/v1beta1
83- kind: LocalQueue
84- metadata:
85- name: local-queue
86- namespace: default
87- annotations:
88- kueue.x-k8s.io/default-queue: "true"
89- spec:
90- clusterQueue: cluster-queue
91- EOF
92-
93- - name : Setup Additional demo notebooks execution
94- run : |
95- echo "Installing papermill and dependencies..."
96- pip install poetry papermill ipython ipykernel
97- # Disable virtualenv due to problems using packaged in virtualenv in papermill
98- poetry config virtualenvs.create false
99-
100- echo "Installing SDK..."
101- poetry install --with test,docs
102-
103- - name : Run local_interactive.ipynb
104- run : |
105- set -euo pipefail
106-
107- # Remove login/logout cells, as KinD doesn't support authentication using token
108- jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object and log in to desired user account")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
109- jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
110- # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
111- sed -i "s/cluster_uri()/local_client_url()/g" local_interactive.ipynb
112- # Replace async logs with waiting for job to finish, async logs don't work properly in papermill
113- JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
114- jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
115- # Set explicit namespace as SDK need it (currently) to resolve local queues
116- sed -i "s/worker_cpu_requests=1,/worker_cpu_requests='250m', namespace='default',/" local_interactive.ipynb
117- # Disable dashboard check as KinD doesn't have HTTPRoute/Route configured
118- sed -i "s/cluster.wait_ready()/cluster.wait_ready(dashboard_check=False)/" local_interactive.ipynb
119- # Run notebook
120- poetry run papermill local_interactive.ipynb local_interactive_out.ipynb --log-output --execution-timeout 1200
121- env :
122- GRPC_DNS_RESOLVER : " native"
123- working-directory : demo-notebooks/additional-demos
124-
125- - name : Print Kueue operator logs
126- if : always() && steps.deploy.outcome == 'success'
127- run : |
128- echo "Printing Kueue operator logs"
129- KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
130- kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
131-
132- - name : Print KubeRay operator logs
133- if : always() && steps.deploy.outcome == 'success'
134- run : |
135- echo "Printing KubeRay operator logs"
136- kubectl logs --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
137-
138- - name : Export all KinD pod logs
139- uses : ./common/github-actions/kind-export-logs
140- if : always() && steps.deploy.outcome == 'success'
141- with :
142- output-directory : ${TEMP_DIR}
143-
144- - name : Upload logs
145- uses : actions/upload-artifact@v4
146- if : always() && steps.deploy.outcome == 'success'
147- with :
148- name : logs-local_interactive
149- retention-days : 10
150- path : |
151- ${{ env.TEMP_DIR }}/**/*.log
22+ - name : Skip notification
23+ run : |
24+ echo "::notice::SKIPPED: verify-local_interactive test is currently disabled."
25+ echo ""
26+ echo "=============================================================================="
27+ echo " TEST SKIPPED: local_interactive.ipynb"
28+ echo "=============================================================================="
29+ echo ""
30+ echo " Reason: This notebook requires mTLS (mutual TLS) certificates for"
31+ echo " interactive Ray connections via ray.init(address=cluster.local_client_url())."
32+ echo ""
33+ echo " The mTLS CA secret was previously created by codeflare-operator, which has"
34+ echo " been removed from the RHOAI 3.x stack. While opendatahub-io/kuberay includes"
35+ echo " some mTLS features, they require OpenShift-specific components that are not"
36+ echo " available in KinD."
37+ echo ""
38+ echo " This test should be run on a full OpenShift cluster with RHOAI installed."
39+ echo "=============================================================================="
15240
15341 verify-ray_job_client :
15442 if : ${{ github.event.label.name == 'test-additional-notebooks' }}
155- runs-on : ubuntu-latest-4core
43+ runs-on : ubuntu-latest
15644
15745 steps :
158- - name : Checkout code
159- uses : actions/checkout@v4
160- with :
161- submodules : recursive
162-
163- - name : Checkout common repo code
164- uses : actions/checkout@v4
165- with :
166- repository : ' project-codeflare/codeflare-common'
167- ref : ' main'
168- path : ' common'
169-
170- - name : Set up specific Python version
171- uses : actions/setup-python@v5
172- with :
173- python-version : ' 3.11'
174- cache : ' pip' # caching pip dependencies
175-
176- - name : Setup and start KinD cluster
177- uses : ./common/github-actions/kind
178-
179- - name : Deploy Kueue and KubeRay
180- id : deploy
181- run : |
182- # Install Kueue
183- echo "Installing Kueue ${KUEUE_VERSION}..."
184- kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/${KUEUE_VERSION}/manifests.yaml
185- kubectl wait --timeout=120s --for=condition=Available=true deployment -n kueue-system kueue-controller-manager
186-
187- # Install KubeRay from opendatahub-io fork (has RHOAI features)
188- echo "Installing KubeRay ${KUBERAY_VERSION} from opendatahub-io..."
189- kubectl create -k "github.com/opendatahub-io/kuberay/ray-operator/config/default?ref=${KUBERAY_VERSION}"
190- kubectl wait --timeout=120s --for=condition=Available=true deployment kuberay-operator
191-
192- # Create default Kueue resources for the tests
193- echo "Creating Kueue resources..."
194- kubectl apply -f - <<EOF
195- apiVersion: kueue.x-k8s.io/v1beta1
196- kind: ResourceFlavor
197- metadata:
198- name: default-flavor
199- ---
200- apiVersion: kueue.x-k8s.io/v1beta1
201- kind: ClusterQueue
202- metadata:
203- name: cluster-queue
204- spec:
205- namespaceSelector: {}
206- resourceGroups:
207- - coveredResources: ["cpu", "memory", "nvidia.com/gpu"]
208- flavors:
209- - name: default-flavor
210- resources:
211- - name: cpu
212- nominalQuota: 100
213- - name: memory
214- nominalQuota: 100Gi
215- - name: nvidia.com/gpu
216- nominalQuota: 10
217- ---
218- apiVersion: kueue.x-k8s.io/v1beta1
219- kind: LocalQueue
220- metadata:
221- name: local-queue
222- namespace: default
223- annotations:
224- kueue.x-k8s.io/default-queue: "true"
225- spec:
226- clusterQueue: cluster-queue
227- EOF
228-
229- - name : Setup Additional demo notebooks execution
230- run : |
231- echo "Installing papermill and dependencies..."
232- pip install poetry papermill ipython ipykernel
233- # Disable virtualenv due to problems using packaged in virtualenv in papermill
234- poetry config virtualenvs.create false
235-
236- echo "Installing SDK..."
237- poetry install --with test,docs
238-
239- - name : Run ray_job_client.ipynb
240- run : |
241- set -euo pipefail
242-
243- # Remove login/logout cells, as KinD doesn't support authentication using token
244- jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
245- jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
246- # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
247- sed -i "s/cluster_uri()/local_client_url()/g" ray_job_client.ipynb
248- # Replace async logs with waiting for job to finish, async logs don't work properly in papermill
249- JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
250- jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
251- # Set explicit namespace as SDK need it (currently) to resolve local queues
252- sed -i "s/worker_cpu_requests=1,/worker_cpu_requests='250m', namespace='default',/" ray_job_client.ipynb
253- sed -i "s/worker_memory_requests=4,/worker_memory_requests=1,/" ray_job_client.ipynb
254- sed -i "s/worker_memory_limits=4,/worker_memory_limits=1,/" ray_job_client.ipynb
255- sed -i "s/'Authorization': .*/'Authorization': None\",/" ray_job_client.ipynb
256- sed -i "s/num_workers=2/num_workers=1/" ray_job_client.ipynb
257- sed -i "s/RayJobClient(address=ray_dashboard, headers=header, verify=True)/RayJobClient(address=ray_dashboard, verify=False)/" ray_job_client.ipynb
258- # Disable dashboard check as KinD doesn't have HTTPRoute/Route configured
259- sed -i "s/cluster.wait_ready()/cluster.wait_ready(dashboard_check=False)/" ray_job_client.ipynb
260- # Run notebook
261- poetry run papermill ray_job_client.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
262- env :
263- GRPC_DNS_RESOLVER : " native"
264- working-directory : demo-notebooks/additional-demos
265-
266- - name : Print Kueue operator logs
267- if : always() && steps.deploy.outcome == 'success'
268- run : |
269- echo "Printing Kueue operator logs"
270- KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
271- kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
272-
273- - name : Print KubeRay operator logs
274- if : always() && steps.deploy.outcome == 'success'
275- run : |
276- echo "Printing KubeRay operator logs"
277- kubectl logs --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
278-
279- - name : Export all KinD pod logs
280- uses : ./common/github-actions/kind-export-logs
281- if : always() && steps.deploy.outcome == 'success'
282- with :
283- output-directory : ${TEMP_DIR}
284-
285- - name : Upload logs
286- uses : actions/upload-artifact@v4
287- if : always() && steps.deploy.outcome == 'success'
288- with :
289- name : logs-ray_job_client
290- retention-days : 10
291- path : |
292- ${{ env.TEMP_DIR }}/**/*.log
46+ - name : Skip notification
47+ run : |
48+ echo "::notice::SKIPPED: verify-ray_job_client test is currently disabled."
49+ echo ""
50+ echo "=============================================================================="
51+ echo " TEST SKIPPED: ray_job_client.ipynb"
52+ echo "=============================================================================="
53+ echo ""
54+ echo " Reason: This notebook requires mTLS (mutual TLS) certificates for"
55+ echo " interactive Ray connections via the Ray Job Client."
56+ echo ""
57+ echo " The mTLS CA secret was previously created by codeflare-operator, which has"
58+ echo " been removed from the RHOAI 3.x stack. While opendatahub-io/kuberay includes"
59+ echo " some mTLS features, they require OpenShift-specific components that are not"
60+ echo " available in KinD."
61+ echo ""
62+ echo " This test should be run on a full OpenShift cluster with RHOAI installed."
63+ echo "=============================================================================="
0 commit comments