Skip to content

Commit e8f6a7b

Browse files
test: fix/ disable not working notebook tests and bump python version
1 parent 17485ee commit e8f6a7b

11 files changed

Lines changed: 314 additions & 533 deletions

.github/workflows/additional_demo_notebook_tests.yaml

Lines changed: 42 additions & 233 deletions
Original file line numberDiff line numberDiff line change
@@ -2,254 +2,63 @@ name: Additional demo notebooks tests
22

33
on:
44
pull_request:
5-
types: [ labeled ]
5+
branches: [ main ]
6+
types: [ labeled, synchronize ]
67
workflow_dispatch:
78

89
concurrency:
910
group: ${{ github.head_ref }}-${{ github.workflow }}
1011
cancel-in-progress: true
1112

1213
env:
13-
CODEFLARE_OPERATOR_IMG: "quay.io/project-codeflare/codeflare-operator:dev"
14+
KUEUE_VERSION: v0.13.4
15+
KUBERAY_VERSION: v1.4.2
1416

1517
jobs:
1618
verify-local_interactive:
17-
if: ${{ github.event.label.name == 'test-additional-notebooks' }}
18-
runs-on: ubuntu-latest-4core
19+
if: ${{ contains(github.event.pull_request.labels.*.name, 'test-additional-notebooks') }}
20+
runs-on: ubuntu-latest
1921

2022
steps:
21-
- name: Checkout code
22-
uses: actions/checkout@v4
23-
with:
24-
submodules: recursive
25-
26-
- name: Checkout common repo code
27-
uses: actions/checkout@v4
28-
with:
29-
repository: 'project-codeflare/codeflare-common'
30-
ref: 'main'
31-
path: 'common'
32-
33-
- name: Checkout CodeFlare operator repository
34-
uses: actions/checkout@v4
35-
with:
36-
repository: project-codeflare/codeflare-operator
37-
path: codeflare-operator
38-
39-
- name: Set Go
40-
uses: actions/setup-go@v5
41-
with:
42-
go-version-file: './codeflare-operator/go.mod'
43-
cache-dependency-path: "./codeflare-operator/go.sum"
44-
45-
- name: Set up gotestfmt
46-
uses: gotesttools/gotestfmt-action@v2
47-
with:
48-
token: ${{ secrets.GITHUB_TOKEN }}
49-
50-
- name: Set up specific Python version
51-
uses: actions/setup-python@v5
52-
with:
53-
python-version: '3.11'
54-
cache: 'pip' # caching pip dependencies
55-
56-
- name: Setup and start KinD cluster
57-
uses: ./common/github-actions/kind
58-
59-
- name: Deploy CodeFlare stack
60-
id: deploy
61-
run: |
62-
cd codeflare-operator
63-
echo Setting up CodeFlare stack
64-
make setup-e2e
65-
echo Deploying CodeFlare operator
66-
make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e"
67-
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
68-
cd ..
69-
70-
- name: Setup Additional demo notebooks execution
23+
- name: Skip notification
7124
run: |
72-
echo "Installing papermill and dependencies..."
73-
pip install poetry papermill ipython ipykernel
74-
# Disable virtualenv due to problems using packaged in virtualenv in papermill
75-
poetry config virtualenvs.create false
76-
77-
echo "Installing SDK..."
78-
poetry install --with test,docs
79-
80-
- name: Run local_interactive.ipynb
81-
run: |
82-
set -euo pipefail
83-
84-
# Remove login/logout cells, as KinD doesn't support authentication using token
85-
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object and log in to desired user account")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
86-
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
87-
# Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
88-
sed -i "s/cluster_uri()/local_client_url()/g" local_interactive.ipynb
89-
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
90-
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
91-
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
92-
# Set explicit namespace as SDK need it (currently) to resolve local queues
93-
sed -i "s/worker_cpu_requests=1,/worker_cpu_requests='250m', namespace='default',/" local_interactive.ipynb
94-
# Run notebook
95-
poetry run papermill local_interactive.ipynb local_interactive_out.ipynb --log-output --execution-timeout 1200
96-
env:
97-
GRPC_DNS_RESOLVER: "native"
98-
working-directory: demo-notebooks/additional-demos
99-
100-
- name: Print CodeFlare operator logs
101-
if: always() && steps.deploy.outcome == 'success'
102-
run: |
103-
echo "Printing CodeFlare operator logs"
104-
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
105-
106-
- name: Print Kueue operator logs
107-
if: always() && steps.deploy.outcome == 'success'
108-
run: |
109-
echo "Printing Kueue operator logs"
110-
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
111-
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
112-
113-
- name: Print KubeRay operator logs
114-
if: always() && steps.deploy.outcome == 'success'
115-
run: |
116-
echo "Printing KubeRay operator logs"
117-
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
118-
119-
- name: Export all KinD pod logs
120-
uses: ./common/github-actions/kind-export-logs
121-
if: always() && steps.deploy.outcome == 'success'
122-
with:
123-
output-directory: ${TEMP_DIR}
124-
125-
- name: Upload logs
126-
uses: actions/upload-artifact@v4
127-
if: always() && steps.deploy.outcome == 'success'
128-
with:
129-
name: logs-local_interactive
130-
retention-days: 10
131-
path: |
132-
${{ env.TEMP_DIR }}/**/*.log
25+
echo "::notice::SKIPPED: verify-local_interactive test is currently disabled."
26+
echo ""
27+
echo "=============================================================================="
28+
echo " TEST SKIPPED: local_interactive.ipynb"
29+
echo "=============================================================================="
30+
echo ""
31+
echo " Reason: This notebook requires mTLS (mutual TLS) certificates for"
32+
echo " interactive Ray connections via ray.init(address=cluster.local_client_url())."
33+
echo ""
34+
echo " The mTLS CA secret was previously created by codeflare-operator, which has"
35+
echo " been removed from the RHOAI 3.x stack. While opendatahub-io/kuberay includes"
36+
echo " some mTLS features, they require OpenShift-specific components that are not"
37+
echo " available in KinD."
38+
echo ""
39+
echo " This test should be run on a full OpenShift cluster with RHOAI installed."
40+
echo "=============================================================================="
13341
13442
verify-ray_job_client:
135-
if: ${{ github.event.label.name == 'test-additional-notebooks' }}
136-
runs-on: ubuntu-latest-4core
43+
if: ${{ contains(github.event.pull_request.labels.*.name, 'test-additional-notebooks') }}
44+
runs-on: ubuntu-latest
13745

13846
steps:
139-
- name: Checkout code
140-
uses: actions/checkout@v4
141-
with:
142-
submodules: recursive
143-
144-
- name: Checkout common repo code
145-
uses: actions/checkout@v4
146-
with:
147-
repository: 'project-codeflare/codeflare-common'
148-
ref: 'main'
149-
path: 'common'
150-
151-
- name: Checkout CodeFlare operator repository
152-
uses: actions/checkout@v4
153-
with:
154-
repository: project-codeflare/codeflare-operator
155-
path: codeflare-operator
156-
157-
- name: Set Go
158-
uses: actions/setup-go@v5
159-
with:
160-
go-version-file: './codeflare-operator/go.mod'
161-
cache-dependency-path: "./codeflare-operator/go.sum"
162-
163-
- name: Set up gotestfmt
164-
uses: gotesttools/gotestfmt-action@v2
165-
with:
166-
token: ${{ secrets.GITHUB_TOKEN }}
167-
168-
- name: Set up specific Python version
169-
uses: actions/setup-python@v5
170-
with:
171-
python-version: '3.11'
172-
cache: 'pip' # caching pip dependencies
173-
174-
- name: Setup and start KinD cluster
175-
uses: ./common/github-actions/kind
176-
177-
- name: Deploy CodeFlare stack
178-
id: deploy
179-
run: |
180-
cd codeflare-operator
181-
echo Setting up CodeFlare stack
182-
make setup-e2e
183-
echo Deploying CodeFlare operator
184-
make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e"
185-
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
186-
cd ..
187-
188-
- name: Setup Additional demo notebooks execution
47+
- name: Skip notification
18948
run: |
190-
echo "Installing papermill and dependencies..."
191-
pip install poetry papermill ipython ipykernel
192-
# Disable virtualenv due to problems using packaged in virtualenv in papermill
193-
poetry config virtualenvs.create false
194-
195-
echo "Installing SDK..."
196-
poetry install --with test,docs
197-
198-
- name: Run ray_job_client.ipynb
199-
run: |
200-
set -euo pipefail
201-
202-
# Remove login/logout cells, as KinD doesn't support authentication using token
203-
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
204-
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
205-
# Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
206-
sed -i "s/cluster_uri()/local_client_url()/g" ray_job_client.ipynb
207-
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
208-
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
209-
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
210-
# Set explicit namespace as SDK need it (currently) to resolve local queues
211-
sed -i "s/worker_cpu_requests=1,/worker_cpu_requests='250m', namespace='default',/" ray_job_client.ipynb
212-
sed -i "s/worker_memory_requests=4,/worker_memory_requests=1,/" ray_job_client.ipynb
213-
sed -i "s/worker_memory_limits=4,/worker_memory_limits=1,/" ray_job_client.ipynb
214-
sed -i "s/'Authorization': .*/'Authorization': None\",/" ray_job_client.ipynb
215-
sed -i "s/num_workers=2/num_workers=1/" ray_job_client.ipynb
216-
sed -i "s/RayJobClient(address=ray_dashboard, headers=header, verify=True)/RayJobClient(address=ray_dashboard, verify=False)/" ray_job_client.ipynb
217-
# Run notebook
218-
poetry run papermill ray_job_client.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
219-
env:
220-
GRPC_DNS_RESOLVER: "native"
221-
working-directory: demo-notebooks/additional-demos
222-
223-
- name: Print CodeFlare operator logs
224-
if: always() && steps.deploy.outcome == 'success'
225-
run: |
226-
echo "Printing CodeFlare operator logs"
227-
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
228-
229-
- name: Print Kueue operator logs
230-
if: always() && steps.deploy.outcome == 'success'
231-
run: |
232-
echo "Printing Kueue operator logs"
233-
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
234-
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
235-
236-
- name: Print KubeRay operator logs
237-
if: always() && steps.deploy.outcome == 'success'
238-
run: |
239-
echo "Printing KubeRay operator logs"
240-
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
241-
242-
- name: Export all KinD pod logs
243-
uses: ./common/github-actions/kind-export-logs
244-
if: always() && steps.deploy.outcome == 'success'
245-
with:
246-
output-directory: ${TEMP_DIR}
247-
248-
- name: Upload logs
249-
uses: actions/upload-artifact@v4
250-
if: always() && steps.deploy.outcome == 'success'
251-
with:
252-
name: logs-ray_job_client
253-
retention-days: 10
254-
path: |
255-
${{ env.TEMP_DIR }}/**/*.log
49+
echo "::notice::SKIPPED: verify-ray_job_client test is currently disabled."
50+
echo ""
51+
echo "=============================================================================="
52+
echo " TEST SKIPPED: ray_job_client.ipynb"
53+
echo "=============================================================================="
54+
echo ""
55+
echo " Reason: This notebook requires mTLS (mutual TLS) certificates for"
56+
echo " interactive Ray connections via the Ray Job Client."
57+
echo ""
58+
echo " The mTLS CA secret was previously created by codeflare-operator, which has"
59+
echo " been removed from the RHOAI 3.x stack. While opendatahub-io/kuberay includes"
60+
echo " some mTLS features, they require OpenShift-specific components that are not"
61+
echo " available in KinD."
62+
echo ""
63+
echo " This test should be run on a full OpenShift cluster with RHOAI installed."
64+
echo "=============================================================================="

.github/workflows/coverage-badge.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
- name: Set up Python 3.11
2020
uses: actions/setup-python@v5
2121
with:
22-
python-version: 3.11
22+
python-version: 3.12
2323
- name: Install dependencies
2424
run: |
2525
python -m pip install --upgrade pip

0 commit comments

Comments
 (0)