Skip to content

Commit 603f4d3

Browse files
RHOAIENG-69301: fix disconnected env job submission: tier1 (3.4)
1 parent 1bc3451 commit 603f4d3

4 files changed

Lines changed: 57 additions & 97 deletions

File tree

tests/e2e/mnist_raycluster_sdk_oauth_test.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -142,13 +142,10 @@ def assert_jobsubmit_withoutLogin(self, cluster):
142142
# API endpoint is directly under the hostname
143143
api_url = dashboard_url + "/api/jobs/"
144144

145+
job_spec = get_mnist_job_submission_spec()
145146
jobdata = {
146-
"entrypoint": "python mnist.py",
147-
"runtime_env": {
148-
"working_dir": "./tests/e2e/",
149-
"pip": "./tests/e2e/mnist_pip_requirements.txt",
150-
"env_vars": get_setup_env_variables(),
151-
},
147+
"entrypoint": job_spec["entrypoint"],
148+
"runtime_env": job_spec["runtime_env"],
152149
}
153150

154151
# Try to submit a job without authentication
@@ -382,13 +379,11 @@ def assert_jobsubmit_withlogin(self, cluster):
382379
"Verified: No jobs exist from the previous unauthenticated submission attempt."
383380
)
384381

382+
job_spec = get_mnist_job_submission_spec()
383+
print(f"Submitting job: {job_spec['entrypoint']}")
385384
submission_id = client.submit_job(
386-
entrypoint="python mnist.py",
387-
runtime_env={
388-
"working_dir": "./tests/e2e/",
389-
"pip": "./tests/e2e/mnist_pip_requirements.txt",
390-
"env_vars": get_setup_env_variables(),
391-
},
385+
entrypoint=job_spec["entrypoint"],
386+
runtime_env=job_spec["runtime_env"],
392387
entrypoint_num_cpus=1,
393388
)
394389
print(f"Submitted job with ID: {submission_id}")
@@ -414,9 +409,10 @@ def assert_jobsubmit_withlogin(self, cluster):
414409
client.delete_job(submission_id)
415410

416411
def assert_job_completion(self, status):
417-
if status == "SUCCEEDED":
418-
print(f"Job has completed: '{status}'")
412+
status_value = getattr(status, "value", status)
413+
if status_value == "SUCCEEDED":
414+
print(f"Job has completed: '{status_value}'")
419415
assert True
420416
else:
421-
print(f"Job has completed: '{status}'")
417+
print(f"Job has completed: '{status_value}'")
422418
assert False

tests/e2e/support.py

Lines changed: 40 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -165,44 +165,12 @@ def _env_flag_disabled(name):
165165
return os.environ.get(name, "").strip().lower() in ("0", "false", "no", "off")
166166

167167

168-
def _has_registry_mirror_configured():
169-
"""
170-
Detect mirror-only registry layout typical of disconnected OpenShift installs.
171-
172-
Checks ImageDigestMirrorSet (OCP 4.14+) and ImageContentSourcePolicy (legacy).
173-
"""
174-
try:
175-
from kubernetes import client as k8s_client
176-
177-
custom_api = k8s_client.CustomObjectsApi()
178-
mirror_types = [
179-
("config.openshift.io", "v1", "imagedigestmirrorsets"),
180-
("operator.openshift.io", "v1alpha1", "imagecontentsourcepolicies"),
181-
]
182-
for group, version, plural in mirror_types:
183-
try:
184-
result = custom_api.list_cluster_custom_object(
185-
group=group,
186-
version=version,
187-
plural=plural,
188-
)
189-
if result.get("items"):
190-
return True
191-
except Exception:
192-
continue
193-
except Exception:
194-
pass
195-
return False
196-
197-
198168
def _disconnected_cluster_signals():
199169
"""Return True when the cluster is likely disconnected / air-gapped."""
200170
if _env_flag_enabled("DISCONNECTED_CLUSTER") or _env_flag_enabled(
201171
"IS_DISCONNECTED_CLUSTER"
202172
):
203173
return True
204-
if _has_registry_mirror_configured():
205-
return True
206174
try:
207175
server = (run_oc_command(["whoami", "--show-server=true"]) or "").lower()
208176
if "-dis-" in server or "disconnected" in server:
@@ -212,7 +180,7 @@ def _disconnected_cluster_signals():
212180
return False
213181

214182

215-
def _upgrade_mnist_prerequisites_met():
183+
def _mnist_prerequisites_met():
216184
"""
217185
Return True when full MNIST can run (pip packages + dataset reachable).
218186
@@ -229,49 +197,42 @@ def _upgrade_mnist_prerequisites_met():
229197
if pip_ok and aws_ok:
230198
print(
231199
"Disconnected cluster with PIP mirror and S3 endpoint configured; "
232-
"using full MNIST upgrade job"
200+
"using full MNIST job"
233201
)
234202
return True
235203
return False
236204

237205

238-
def use_upgrade_smoke_job():
206+
def use_smoke_job():
239207
"""
240-
Use a lightweight Ray job for upgrade tests when full MNIST is not viable.
241-
242-
Full MNIST pulls torch/pytorch via pip and may download datasets from the
243-
public internet — both fail on disconnected clusters without mirrors.
208+
Use a lightweight Ray job when full MNIST is not viable.
244209
245210
Detection order (first match wins):
246-
1. UPGRADE_USE_SMOKE_JOB=true|false (explicit override)
211+
1. USE_SMOKE_JOB / UPGRADE_USE_SMOKE_JOB=true|false (explicit override)
247212
2. Full MNIST prerequisites met (connected, or disconnected with mirrors)
248213
3. DISCONNECTED_CLUSTER / IS_DISCONNECTED_CLUSTER env (Jenkins)
249-
4. ImageDigestMirrorSet / ImageContentSourcePolicy on cluster
250-
5. API server URL heuristic (-dis- / disconnected), last resort
214+
4. API server URL heuristic (-dis- / disconnected), last resort
215+
216+
ImageDigestMirrorSet / ICSP are intentionally not used: many connected
217+
OpenShift clusters mirror container registries without blocking pip/PyPI.
251218
"""
252-
if _env_flag_enabled("UPGRADE_USE_SMOKE_JOB"):
253-
print("UPGRADE_USE_SMOKE_JOB enabled; using upgrade smoke job")
254-
return True
255-
if _env_flag_disabled("UPGRADE_USE_SMOKE_JOB"):
256-
print("UPGRADE_USE_SMOKE_JOB disabled; using full MNIST upgrade job")
257-
return False
219+
for name in ("USE_SMOKE_JOB", "UPGRADE_USE_SMOKE_JOB"):
220+
if _env_flag_enabled(name):
221+
print(f"{name} enabled; using smoke job (no pip install)")
222+
return True
223+
if _env_flag_disabled(name):
224+
print(f"{name} disabled; using full MNIST job")
225+
return False
258226

259-
if _upgrade_mnist_prerequisites_met():
227+
if _mnist_prerequisites_met():
260228
return False
261229

262230
if _env_flag_enabled("DISCONNECTED_CLUSTER") or _env_flag_enabled(
263231
"IS_DISCONNECTED_CLUSTER"
264232
):
265233
print(
266234
"Disconnected cluster env set without PIP/S3 mirrors; "
267-
"using upgrade smoke job (no pip install)"
268-
)
269-
return True
270-
271-
if _has_registry_mirror_configured():
272-
print(
273-
"Registry mirror detected (ImageDigestMirrorSet/ICSP) without "
274-
"PIP/S3 mirrors; using upgrade smoke job (no pip install)"
235+
"using smoke job (no pip install)"
275236
)
276237
return True
277238

@@ -280,7 +241,7 @@ def use_upgrade_smoke_job():
280241
if "-dis-" in server or "disconnected" in server:
281242
print(
282243
"Detected disconnected cluster from API server URL; "
283-
"using upgrade smoke job (no pip install)"
244+
"using smoke job (no pip install)"
284245
)
285246
return True
286247
except Exception:
@@ -289,28 +250,37 @@ def use_upgrade_smoke_job():
289250
return False
290251

291252

292-
def get_upgrade_job_submission_spec():
293-
"""
294-
Return entrypoint and runtime_env for post-upgrade job submission tests.
295-
"""
296-
if use_upgrade_smoke_job():
253+
def use_upgrade_smoke_job():
254+
"""Backward-compatible alias for upgrade tests."""
255+
return use_smoke_job()
256+
257+
258+
def get_mnist_job_submission_spec(**kwargs):
259+
"""Return entrypoint and runtime_env for tier1 / upgrade MNIST job submission tests."""
260+
env_vars = get_setup_env_variables(**kwargs)
261+
if use_smoke_job():
297262
return {
298263
"entrypoint": "python upgrade_job_smoke.py",
299264
"runtime_env": {
300265
"working_dir": "./tests/e2e/",
301-
"env_vars": get_setup_env_variables(),
266+
"env_vars": env_vars,
302267
},
303268
}
304269
return {
305270
"entrypoint": "python mnist.py",
306271
"runtime_env": {
307272
"working_dir": "./tests/e2e/",
308273
"pip": "./tests/e2e/mnist_pip_requirements.txt",
309-
"env_vars": get_setup_env_variables(),
274+
"env_vars": env_vars,
310275
},
311276
}
312277

313278

279+
def get_upgrade_job_submission_spec(**kwargs):
280+
"""Backward-compatible alias for post-upgrade job submission tests."""
281+
return get_mnist_job_submission_spec(**kwargs)
282+
283+
314284
def random_choice():
315285
alphabet = string.ascii_lowercase + string.digits
316286
return "".join(random.choices(alphabet, k=5))
@@ -831,18 +801,12 @@ def assert_get_cluster_and_jobsubmit(
831801
client = cluster.job_client
832802

833803
# Submit a job and get the submission ID
834-
env_vars = (
835-
get_setup_env_variables(ACCELERATOR=accelerator)
836-
if accelerator
837-
else get_setup_env_variables()
838-
)
804+
spec_kwargs = {"ACCELERATOR": accelerator} if accelerator else {}
805+
job_spec = get_mnist_job_submission_spec(**spec_kwargs)
806+
print(f"Submitting job: {job_spec['entrypoint']}")
839807
submission_id = client.submit_job(
840-
entrypoint="python mnist.py",
841-
runtime_env={
842-
"working_dir": "./tests/e2e/",
843-
"pip": "./tests/e2e/mnist_pip_requirements.txt",
844-
"env_vars": env_vars,
845-
},
808+
entrypoint=job_spec["entrypoint"],
809+
runtime_env=job_spec["runtime_env"],
846810
entrypoint_num_cpus=1 if number_of_gpus is None else None,
847811
entrypoint_num_gpus=number_of_gpus,
848812
)

tests/upgrade/01_raycluster_sdk_upgrade_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def assert_jobsubmit_withlogin(self, cluster):
249249
client = RayJobClient(address=ray_dashboard, headers=header, verify=False)
250250

251251
job_spec = get_upgrade_job_submission_spec()
252-
print(f"Submitting upgrade job: {job_spec['entrypoint']}")
252+
print(f"Submitting job: {job_spec['entrypoint']}")
253253

254254
# Submit the job
255255
submission_id = client.submit_job(

tests/upgrade/README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,22 +49,22 @@ Post-upgrade job tests use `is_byoidc_cluster_detected()` from `tests/e2e/suppor
4949

5050
On **htpasswd/LDAP** QE clusters (e.g. ods-qe-psi-17), job tests use `oc whoami --show-token=true` with `OCP_ADMIN_USER_*` credentials — not Keycloak password grant.
5151

52-
## Disconnected clusters (post-upgrade job submission)
52+
## Disconnected clusters (post-upgrade and tier1 job submission)
5353

5454
Full MNIST installs torch via pip and may fetch datasets from the public internet. On disconnected clusters that fails unless mirrors are configured.
5555

56-
Post-upgrade job tests use `get_upgrade_job_submission_spec()` from `tests/e2e/support.py`:
56+
Post-upgrade and tier1 MNIST job tests use `get_mnist_job_submission_spec()` from `tests/e2e/support.py`:
5757

5858
| Job | When |
5959
|-----|------|
6060
| **`upgrade_job_smoke.py`** (no pip) | Disconnected / mirror-only cluster without PyPI + S3 env |
6161
| **`mnist.py`** (full pip + training) | Connected cluster, or disconnected with `PIP_INDEX_URL` (non-pypi.org) **and** `AWS_DEFAULT_ENDPOINT` set |
6262

63-
**Detection order:** `UPGRADE_USE_SMOKE_JOB` override → MNIST prerequisites → `DISCONNECTED_CLUSTER` / `IS_DISCONNECTED_CLUSTER` (Jenkins)`ImageDigestMirrorSet` / `ImageContentSourcePolicy` → API URL `-dis-` heuristic (last resort).
63+
**Detection order:** `USE_SMOKE_JOB` / `UPGRADE_USE_SMOKE_JOB` override → MNIST prerequisites → `DISCONNECTED_CLUSTER` / `IS_DISCONNECTED_CLUSTER` (Jenkins) → API URL `-dis-` heuristic (last resort).
6464

65-
Set `UPGRADE_USE_SMOKE_JOB=false` to force full MNIST when mirrors are configured. Set `DISCONNECTED_CLUSTER=true` in the test env file for explicit lab config (aligns with Jenkins `IS_DISCONNECTED_CLUSTER`).
65+
Registry image mirrors (`ImageDigestMirrorSet` / `ICSP`) are **not** used for detection — connected OpenShift clusters often have them without blocking pip.
6666

67-
**Porting:** apply the same helpers (`use_upgrade_smoke_job`, `upgrade_job_smoke.py`, `get_upgrade_job_submission_spec` usage) to **`main`** (3.5 / `2.x-3.x-upgrade-tests-post-upgrade`) and **`3.3`** upgrade branches.
67+
Set `USE_SMOKE_JOB=false` or `UPGRADE_USE_SMOKE_JOB=false` to force full MNIST when mirrors are configured. Set `DISCONNECTED_CLUSTER=true` in the test env file for explicit lab config (aligns with Jenkins `IS_DISCONNECTED_CLUSTER`).
6868

6969
## Migration script
7070

0 commit comments

Comments
 (0)