Skip to content

Commit b71cdb9

Browse files
RHOAIENG-69301: fix disconnected env job submission: tier1 (3.3)
1 parent 29cbdba commit b71cdb9

3 files changed

Lines changed: 56 additions & 80 deletions

File tree

tests/e2e/mnist_raycluster_sdk_oauth_test.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,10 @@ def assert_jobsubmit_withoutLogin(self, cluster):
8989
# API endpoint is directly under the hostname
9090
api_url = dashboard_url + "/api/jobs/"
9191

92+
job_spec = get_mnist_job_submission_spec()
9293
jobdata = {
93-
"entrypoint": "python mnist.py",
94-
"runtime_env": {
95-
"working_dir": "./tests/e2e/",
96-
"pip": "./tests/e2e/mnist_pip_requirements.txt",
97-
"env_vars": get_setup_env_variables(),
98-
},
94+
"entrypoint": job_spec["entrypoint"],
95+
"runtime_env": job_spec["runtime_env"],
9996
}
10097

10198
# Try to submit a job without authentication
@@ -189,13 +186,11 @@ def assert_jobsubmit_withlogin(self, cluster):
189186
"Verified: No jobs exist from the previous unauthenticated submission attempt."
190187
)
191188

189+
job_spec = get_mnist_job_submission_spec()
190+
print(f"Submitting job: {job_spec['entrypoint']}")
192191
submission_id = client.submit_job(
193-
entrypoint="python mnist.py",
194-
runtime_env={
195-
"working_dir": "./tests/e2e/",
196-
"pip": "./tests/e2e/mnist_pip_requirements.txt",
197-
"env_vars": get_setup_env_variables(),
198-
},
192+
entrypoint=job_spec["entrypoint"],
193+
runtime_env=job_spec["runtime_env"],
199194
entrypoint_num_cpus=1,
200195
)
201196
print(f"Submitted job with ID: {submission_id}")
@@ -221,9 +216,10 @@ def assert_jobsubmit_withlogin(self, cluster):
221216
client.delete_job(submission_id)
222217

223218
def assert_job_completion(self, status):
224-
if status == "SUCCEEDED":
225-
print(f"Job has completed: '{status}'")
219+
status_value = getattr(status, "value", status)
220+
if status_value == "SUCCEEDED":
221+
print(f"Job has completed: '{status_value}'")
226222
assert True
227223
else:
228-
print(f"Job has completed: '{status}'")
224+
print(f"Job has completed: '{status_value}'")
229225
assert False

tests/e2e/support.py

Lines changed: 44 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -151,40 +151,12 @@ def _env_flag_disabled(name):
151151
return os.environ.get(name, "").strip().lower() in ("0", "false", "no", "off")
152152

153153

154-
def _has_registry_mirror_configured():
155-
"""
156-
Detect mirror-only registry layout typical of disconnected OpenShift installs.
157-
"""
158-
try:
159-
custom_api = client.CustomObjectsApi()
160-
mirror_types = [
161-
("config.openshift.io", "v1", "imagedigestmirrorsets"),
162-
("operator.openshift.io", "v1alpha1", "imagecontentsourcepolicies"),
163-
]
164-
for group, version, plural in mirror_types:
165-
try:
166-
result = custom_api.list_cluster_custom_object(
167-
group=group,
168-
version=version,
169-
plural=plural,
170-
)
171-
if result.get("items"):
172-
return True
173-
except Exception:
174-
continue
175-
except Exception:
176-
pass
177-
return False
178-
179-
180154
def _disconnected_cluster_signals():
181155
"""Return True when the cluster is likely disconnected / air-gapped."""
182156
if _env_flag_enabled("DISCONNECTED_CLUSTER") or _env_flag_enabled(
183157
"IS_DISCONNECTED_CLUSTER"
184158
):
185159
return True
186-
if _has_registry_mirror_configured():
187-
return True
188160
try:
189161
server = (run_oc_command(["whoami", "--show-server=true"]) or "").lower()
190162
if "-dis-" in server or "disconnected" in server:
@@ -194,7 +166,7 @@ def _disconnected_cluster_signals():
194166
return False
195167

196168

197-
def _upgrade_mnist_prerequisites_met():
169+
def _mnist_prerequisites_met():
198170
"""
199171
Return True when full MNIST can run (pip packages + dataset reachable).
200172
"""
@@ -208,39 +180,42 @@ def _upgrade_mnist_prerequisites_met():
208180
if pip_ok and aws_ok:
209181
print(
210182
"Disconnected cluster with PIP mirror and S3 endpoint configured; "
211-
"using full MNIST upgrade job"
183+
"using full MNIST job"
212184
)
213185
return True
214186
return False
215187

216188

217-
def use_upgrade_smoke_job():
189+
def use_smoke_job():
218190
"""
219-
Use a lightweight Ray job for upgrade tests when full MNIST is not viable.
191+
Use a lightweight Ray job when full MNIST is not viable.
192+
193+
Detection order (first match wins):
194+
1. USE_SMOKE_JOB / UPGRADE_USE_SMOKE_JOB=true|false (explicit override)
195+
2. Full MNIST prerequisites met (connected, or disconnected with mirrors)
196+
3. DISCONNECTED_CLUSTER / IS_DISCONNECTED_CLUSTER env (Jenkins)
197+
4. API server URL heuristic (-dis- / disconnected), last resort
198+
199+
ImageDigestMirrorSet / ICSP are intentionally not used: many connected
200+
OpenShift clusters mirror container registries without blocking pip/PyPI.
220201
"""
221-
if _env_flag_enabled("UPGRADE_USE_SMOKE_JOB"):
222-
print("UPGRADE_USE_SMOKE_JOB enabled; using upgrade smoke job")
223-
return True
224-
if _env_flag_disabled("UPGRADE_USE_SMOKE_JOB"):
225-
print("UPGRADE_USE_SMOKE_JOB disabled; using full MNIST upgrade job")
226-
return False
202+
for name in ("USE_SMOKE_JOB", "UPGRADE_USE_SMOKE_JOB"):
203+
if _env_flag_enabled(name):
204+
print(f"{name} enabled; using smoke job (no pip install)")
205+
return True
206+
if _env_flag_disabled(name):
207+
print(f"{name} disabled; using full MNIST job")
208+
return False
227209

228-
if _upgrade_mnist_prerequisites_met():
210+
if _mnist_prerequisites_met():
229211
return False
230212

231213
if _env_flag_enabled("DISCONNECTED_CLUSTER") or _env_flag_enabled(
232214
"IS_DISCONNECTED_CLUSTER"
233215
):
234216
print(
235217
"Disconnected cluster env set without PIP/S3 mirrors; "
236-
"using upgrade smoke job (no pip install)"
237-
)
238-
return True
239-
240-
if _has_registry_mirror_configured():
241-
print(
242-
"Registry mirror detected (ImageDigestMirrorSet/ICSP) without "
243-
"PIP/S3 mirrors; using upgrade smoke job (no pip install)"
218+
"using smoke job (no pip install)"
244219
)
245220
return True
246221

@@ -249,7 +224,7 @@ def use_upgrade_smoke_job():
249224
if "-dis-" in server or "disconnected" in server:
250225
print(
251226
"Detected disconnected cluster from API server URL; "
252-
"using upgrade smoke job (no pip install)"
227+
"using smoke job (no pip install)"
253228
)
254229
return True
255230
except Exception:
@@ -258,26 +233,37 @@ def use_upgrade_smoke_job():
258233
return False
259234

260235

261-
def get_upgrade_job_submission_spec():
262-
"""Return entrypoint and runtime_env for post-upgrade job submission tests."""
263-
if use_upgrade_smoke_job():
236+
def use_upgrade_smoke_job():
237+
"""Backward-compatible alias for upgrade tests."""
238+
return use_smoke_job()
239+
240+
241+
def get_mnist_job_submission_spec(**kwargs):
242+
"""Return entrypoint and runtime_env for tier1 / upgrade MNIST job submission tests."""
243+
env_vars = get_setup_env_variables(**kwargs)
244+
if use_smoke_job():
264245
return {
265246
"entrypoint": "python upgrade_job_smoke.py",
266247
"runtime_env": {
267248
"working_dir": "./tests/e2e/",
268-
"env_vars": get_setup_env_variables(),
249+
"env_vars": env_vars,
269250
},
270251
}
271252
return {
272253
"entrypoint": "python mnist.py",
273254
"runtime_env": {
274255
"working_dir": "./tests/e2e/",
275256
"pip": "./tests/e2e/mnist_pip_requirements.txt",
276-
"env_vars": get_setup_env_variables(),
257+
"env_vars": env_vars,
277258
},
278259
}
279260

280261

262+
def get_upgrade_job_submission_spec(**kwargs):
263+
"""Backward-compatible alias for post-upgrade job submission tests."""
264+
return get_mnist_job_submission_spec(**kwargs)
265+
266+
281267
def random_choice():
282268
alphabet = string.ascii_lowercase + string.digits
283269
return "".join(random.choices(alphabet, k=5))
@@ -721,18 +707,12 @@ def assert_get_cluster_and_jobsubmit(
721707
client = cluster.job_client
722708

723709
# Submit a job and get the submission ID
724-
env_vars = (
725-
get_setup_env_variables(ACCELERATOR=accelerator)
726-
if accelerator
727-
else get_setup_env_variables()
728-
)
710+
spec_kwargs = {"ACCELERATOR": accelerator} if accelerator else {}
711+
job_spec = get_mnist_job_submission_spec(**spec_kwargs)
712+
print(f"Submitting job: {job_spec['entrypoint']}")
729713
submission_id = client.submit_job(
730-
entrypoint="python mnist.py",
731-
runtime_env={
732-
"working_dir": "./tests/e2e/",
733-
"pip": "./tests/e2e/mnist_pip_requirements.txt",
734-
"env_vars": env_vars,
735-
},
714+
entrypoint=job_spec["entrypoint"],
715+
runtime_env=job_spec["runtime_env"],
736716
entrypoint_num_cpus=1 if number_of_gpus is None else None,
737717
entrypoint_num_gpus=number_of_gpus,
738718
)

tests/upgrade/01_raycluster_sdk_upgrade_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ def assert_jobsubmit_withlogin(self, cluster):
263263
client = RayJobClient(address=ray_dashboard, headers=header, verify=False)
264264

265265
job_spec = get_upgrade_job_submission_spec()
266-
print(f"Submitting upgrade job: {job_spec['entrypoint']}")
266+
print(f"Submitting job: {job_spec['entrypoint']}")
267267

268268
submission_id = client.submit_job(
269269
entrypoint=job_spec["entrypoint"],

0 commit comments

Comments
 (0)