@@ -2,254 +2,63 @@ name: Additional demo notebooks tests
22
33on :
44 pull_request :
5- types : [ labeled ]
5+ branches : [ main ]
6+ types : [ labeled, synchronize ]
67 workflow_dispatch :
78
89concurrency :
910 group : ${{ github.head_ref }}-${{ github.workflow }}
1011 cancel-in-progress : true
1112
1213env :
13- CODEFLARE_OPERATOR_IMG : " quay.io/project-codeflare/codeflare-operator:dev"
14+ KUEUE_VERSION : v0.13.4
15+ KUBERAY_VERSION : v1.4.2
1416
1517jobs :
1618 verify-local_interactive :
17- if : ${{ github.event.label. name == 'test-additional-notebooks' }}
18- runs-on : ubuntu-latest-4core
19+ if : ${{ contains( github.event.pull_request.labels.*. name, 'test-additional-notebooks') }}
20+ runs-on : ubuntu-latest
1921
2022 steps :
21- - name : Checkout code
22- uses : actions/checkout@v4
23- with :
24- submodules : recursive
25-
26- - name : Checkout common repo code
27- uses : actions/checkout@v4
28- with :
29- repository : ' project-codeflare/codeflare-common'
30- ref : ' main'
31- path : ' common'
32-
33- - name : Checkout CodeFlare operator repository
34- uses : actions/checkout@v4
35- with :
36- repository : project-codeflare/codeflare-operator
37- path : codeflare-operator
38-
39- - name : Set Go
40- uses : actions/setup-go@v5
41- with :
42- go-version-file : ' ./codeflare-operator/go.mod'
43- cache-dependency-path : " ./codeflare-operator/go.sum"
44-
45- - name : Set up gotestfmt
46- uses : gotesttools/gotestfmt-action@v2
47- with :
48- token : ${{ secrets.GITHUB_TOKEN }}
49-
50- - name : Set up specific Python version
51- uses : actions/setup-python@v5
52- with :
53- python-version : ' 3.11'
54- cache : ' pip' # caching pip dependencies
55-
56- - name : Setup and start KinD cluster
57- uses : ./common/github-actions/kind
58-
59- - name : Deploy CodeFlare stack
60- id : deploy
61- run : |
62- cd codeflare-operator
63- echo Setting up CodeFlare stack
64- make setup-e2e
65- echo Deploying CodeFlare operator
66- make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e"
67- kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
68- cd ..
69-
70- - name : Setup Additional demo notebooks execution
23+ - name : Skip notification
7124 run : |
72- echo "Installing papermill and dependencies..."
73- pip install poetry papermill ipython ipykernel
74- # Disable virtualenv due to problems using packaged in virtualenv in papermill
75- poetry config virtualenvs.create false
76-
77- echo "Installing SDK..."
78- poetry install --with test,docs
79-
80- - name : Run local_interactive.ipynb
81- run : |
82- set -euo pipefail
83-
84- # Remove login/logout cells, as KinD doesn't support authentication using token
85- jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object and log in to desired user account")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
86- jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
87- # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
88- sed -i "s/cluster_uri()/local_client_url()/g" local_interactive.ipynb
89- # Replace async logs with waiting for job to finish, async logs don't work properly in papermill
90- JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
91- jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
92- # Set explicit namespace as SDK need it (currently) to resolve local queues
93- sed -i "s/worker_cpu_requests=1,/worker_cpu_requests='250m', namespace='default',/" local_interactive.ipynb
94- # Run notebook
95- poetry run papermill local_interactive.ipynb local_interactive_out.ipynb --log-output --execution-timeout 1200
96- env :
97- GRPC_DNS_RESOLVER : " native"
98- working-directory : demo-notebooks/additional-demos
99-
100- - name : Print CodeFlare operator logs
101- if : always() && steps.deploy.outcome == 'success'
102- run : |
103- echo "Printing CodeFlare operator logs"
104- kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
105-
106- - name : Print Kueue operator logs
107- if : always() && steps.deploy.outcome == 'success'
108- run : |
109- echo "Printing Kueue operator logs"
110- KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
111- kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
112-
113- - name : Print KubeRay operator logs
114- if : always() && steps.deploy.outcome == 'success'
115- run : |
116- echo "Printing KubeRay operator logs"
117- kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
118-
119- - name : Export all KinD pod logs
120- uses : ./common/github-actions/kind-export-logs
121- if : always() && steps.deploy.outcome == 'success'
122- with :
123- output-directory : ${TEMP_DIR}
124-
125- - name : Upload logs
126- uses : actions/upload-artifact@v4
127- if : always() && steps.deploy.outcome == 'success'
128- with :
129- name : logs-local_interactive
130- retention-days : 10
131- path : |
132- ${{ env.TEMP_DIR }}/**/*.log
25+ echo "::notice::SKIPPED: verify-local_interactive test is currently disabled."
26+ echo ""
27+ echo "=============================================================================="
28+ echo " TEST SKIPPED: local_interactive.ipynb"
29+ echo "=============================================================================="
30+ echo ""
31+ echo " Reason: This notebook requires mTLS (mutual TLS) certificates for"
32+ echo " interactive Ray connections via ray.init(address=cluster.local_client_url())."
33+ echo ""
34+ echo " The mTLS CA secret was previously created by codeflare-operator, which has"
35+ echo " been removed from the RHOAI 3.x stack. While opendatahub-io/kuberay includes"
36+ echo " some mTLS features, they require OpenShift-specific components that are not"
37+ echo " available in KinD."
38+ echo ""
39+ echo " This test should be run on a full OpenShift cluster with RHOAI installed."
40+ echo "=============================================================================="
13341
13442 verify-ray_job_client :
135- if : ${{ github.event.label. name == 'test-additional-notebooks' }}
136- runs-on : ubuntu-latest-4core
43+ if : ${{ contains( github.event.pull_request.labels.*. name, 'test-additional-notebooks') }}
44+ runs-on : ubuntu-latest
13745
13846 steps :
139- - name : Checkout code
140- uses : actions/checkout@v4
141- with :
142- submodules : recursive
143-
144- - name : Checkout common repo code
145- uses : actions/checkout@v4
146- with :
147- repository : ' project-codeflare/codeflare-common'
148- ref : ' main'
149- path : ' common'
150-
151- - name : Checkout CodeFlare operator repository
152- uses : actions/checkout@v4
153- with :
154- repository : project-codeflare/codeflare-operator
155- path : codeflare-operator
156-
157- - name : Set Go
158- uses : actions/setup-go@v5
159- with :
160- go-version-file : ' ./codeflare-operator/go.mod'
161- cache-dependency-path : " ./codeflare-operator/go.sum"
162-
163- - name : Set up gotestfmt
164- uses : gotesttools/gotestfmt-action@v2
165- with :
166- token : ${{ secrets.GITHUB_TOKEN }}
167-
168- - name : Set up specific Python version
169- uses : actions/setup-python@v5
170- with :
171- python-version : ' 3.11'
172- cache : ' pip' # caching pip dependencies
173-
174- - name : Setup and start KinD cluster
175- uses : ./common/github-actions/kind
176-
177- - name : Deploy CodeFlare stack
178- id : deploy
179- run : |
180- cd codeflare-operator
181- echo Setting up CodeFlare stack
182- make setup-e2e
183- echo Deploying CodeFlare operator
184- make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e"
185- kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
186- cd ..
187-
188- - name : Setup Additional demo notebooks execution
47+ - name : Skip notification
18948 run : |
190- echo "Installing papermill and dependencies..."
191- pip install poetry papermill ipython ipykernel
192- # Disable virtualenv due to problems using packaged in virtualenv in papermill
193- poetry config virtualenvs.create false
194-
195- echo "Installing SDK..."
196- poetry install --with test,docs
197-
198- - name : Run ray_job_client.ipynb
199- run : |
200- set -euo pipefail
201-
202- # Remove login/logout cells, as KinD doesn't support authentication using token
203- jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
204- jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
205- # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
206- sed -i "s/cluster_uri()/local_client_url()/g" ray_job_client.ipynb
207- # Replace async logs with waiting for job to finish, async logs don't work properly in papermill
208- JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
209- jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
210- # Set explicit namespace as SDK need it (currently) to resolve local queues
211- sed -i "s/worker_cpu_requests=1,/worker_cpu_requests='250m', namespace='default',/" ray_job_client.ipynb
212- sed -i "s/worker_memory_requests=4,/worker_memory_requests=1,/" ray_job_client.ipynb
213- sed -i "s/worker_memory_limits=4,/worker_memory_limits=1,/" ray_job_client.ipynb
214- sed -i "s/'Authorization': .*/'Authorization': None\",/" ray_job_client.ipynb
215- sed -i "s/num_workers=2/num_workers=1/" ray_job_client.ipynb
216- sed -i "s/RayJobClient(address=ray_dashboard, headers=header, verify=True)/RayJobClient(address=ray_dashboard, verify=False)/" ray_job_client.ipynb
217- # Run notebook
218- poetry run papermill ray_job_client.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
219- env :
220- GRPC_DNS_RESOLVER : " native"
221- working-directory : demo-notebooks/additional-demos
222-
223- - name : Print CodeFlare operator logs
224- if : always() && steps.deploy.outcome == 'success'
225- run : |
226- echo "Printing CodeFlare operator logs"
227- kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
228-
229- - name : Print Kueue operator logs
230- if : always() && steps.deploy.outcome == 'success'
231- run : |
232- echo "Printing Kueue operator logs"
233- KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
234- kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
235-
236- - name : Print KubeRay operator logs
237- if : always() && steps.deploy.outcome == 'success'
238- run : |
239- echo "Printing KubeRay operator logs"
240- kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
241-
242- - name : Export all KinD pod logs
243- uses : ./common/github-actions/kind-export-logs
244- if : always() && steps.deploy.outcome == 'success'
245- with :
246- output-directory : ${TEMP_DIR}
247-
248- - name : Upload logs
249- uses : actions/upload-artifact@v4
250- if : always() && steps.deploy.outcome == 'success'
251- with :
252- name : logs-ray_job_client
253- retention-days : 10
254- path : |
255- ${{ env.TEMP_DIR }}/**/*.log
49+ echo "::notice::SKIPPED: verify-ray_job_client test is currently disabled."
50+ echo ""
51+ echo "=============================================================================="
52+ echo " TEST SKIPPED: ray_job_client.ipynb"
53+ echo "=============================================================================="
54+ echo ""
55+ echo " Reason: This notebook requires mTLS (mutual TLS) certificates for"
56+ echo " interactive Ray connections via the Ray Job Client."
57+ echo ""
58+ echo " The mTLS CA secret was previously created by codeflare-operator, which has"
59+ echo " been removed from the RHOAI 3.x stack. While opendatahub-io/kuberay includes"
60+ echo " some mTLS features, they require OpenShift-specific components that are not"
61+ echo " available in KinD."
62+ echo ""
63+ echo " This test should be run on a full OpenShift cluster with RHOAI installed."
64+ echo "=============================================================================="
0 commit comments