Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions images/tests/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ COPY --from=builder /codeflare-sdk/.venv /codeflare-sdk/.venv
COPY pyproject.toml poetry.lock* ./
COPY README.md ./
COPY src/ ./src/
COPY scripts/migration/ ./scripts/migration/
COPY tests/ ./tests/

# Copy test runner script, entrypoint, and RBAC file
Expand Down
26 changes: 11 additions & 15 deletions tests/e2e/mnist_raycluster_sdk_oauth_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,10 @@ def assert_jobsubmit_withoutLogin(self, cluster):
# API endpoint is directly under the hostname
api_url = dashboard_url + "/api/jobs/"

job_spec = get_mnist_job_submission_spec()
jobdata = {
"entrypoint": "python mnist.py",
"runtime_env": {
"working_dir": "./tests/e2e/",
"pip": "./tests/e2e/mnist_pip_requirements.txt",
"env_vars": get_setup_env_variables(),
},
"entrypoint": job_spec["entrypoint"],
"runtime_env": job_spec["runtime_env"],
}

# Try to submit a job without authentication
Expand Down Expand Up @@ -382,13 +379,11 @@ def assert_jobsubmit_withlogin(self, cluster):
"Verified: No jobs exist from the previous unauthenticated submission attempt."
)

job_spec = get_mnist_job_submission_spec()
print(f"Submitting job: {job_spec['entrypoint']}")
submission_id = client.submit_job(
entrypoint="python mnist.py",
runtime_env={
"working_dir": "./tests/e2e/",
"pip": "./tests/e2e/mnist_pip_requirements.txt",
"env_vars": get_setup_env_variables(),
},
entrypoint=job_spec["entrypoint"],
runtime_env=job_spec["runtime_env"],
entrypoint_num_cpus=1,
)
print(f"Submitted job with ID: {submission_id}")
Expand All @@ -414,9 +409,10 @@ def assert_jobsubmit_withlogin(self, cluster):
client.delete_job(submission_id)

def assert_job_completion(self, status):
if status == "SUCCEEDED":
print(f"Job has completed: '{status}'")
status_value = getattr(status, "value", status)
if status_value == "SUCCEEDED":
print(f"Job has completed: '{status_value}'")
assert True
else:
print(f"Job has completed: '{status}'")
print(f"Job has completed: '{status_value}'")
assert False
178 changes: 147 additions & 31 deletions tests/e2e/support.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os
import random
import string
Expand Down Expand Up @@ -156,6 +157,130 @@ def get_setup_env_variables(**kwargs):
return env_vars


def _env_flag_enabled(name):
return os.environ.get(name, "").strip().lower() in ("1", "true", "yes")


def _env_flag_disabled(name):
return os.environ.get(name, "").strip().lower() in ("0", "false", "no", "off")


def _disconnected_cluster_signals():
"""Return True when the cluster is likely disconnected / air-gapped."""
if _env_flag_enabled("DISCONNECTED_CLUSTER") or _env_flag_enabled(
"IS_DISCONNECTED_CLUSTER"
):
return True
try:
server = (run_oc_command(["whoami", "--show-server=true"]) or "").lower()
if "-dis-" in server or "disconnected" in server:
return True
except Exception:
pass
return False


def _mnist_prerequisites_met():
"""
Return True when full MNIST can run (pip packages + dataset reachable).

Connected labs may use public PyPI and MNIST mirrors with no extra env.
Disconnected labs need internal PIP_INDEX_URL and AWS_DEFAULT_ENDPOINT (MinIO).
"""
if not _disconnected_cluster_signals():
return True

pip_url = (os.environ.get("PIP_INDEX_URL") or "").strip()
aws_endpoint = (os.environ.get("AWS_DEFAULT_ENDPOINT") or "").strip()
pip_ok = bool(pip_url) and "pypi.org" not in pip_url
aws_ok = bool(aws_endpoint)
if pip_ok and aws_ok:
print(
"Disconnected cluster with PIP mirror and S3 endpoint configured; "
"using full MNIST job"
)
return True
return False


def use_smoke_job():
"""
Use a lightweight Ray job when full MNIST is not viable.

Detection order (first match wins):
1. USE_SMOKE_JOB / UPGRADE_USE_SMOKE_JOB=true|false (explicit override)
2. Full MNIST prerequisites met (connected, or disconnected with mirrors)
3. DISCONNECTED_CLUSTER / IS_DISCONNECTED_CLUSTER env (Jenkins)
4. API server URL heuristic (-dis- / disconnected), last resort

ImageDigestMirrorSet / ICSP are intentionally not used: many connected
OpenShift clusters mirror container registries without blocking pip/PyPI.
"""
for name in ("USE_SMOKE_JOB", "UPGRADE_USE_SMOKE_JOB"):
if _env_flag_enabled(name):
print(f"{name} enabled; using smoke job (no pip install)")
return True
if _env_flag_disabled(name):
print(f"{name} disabled; using full MNIST job")
return False

if _mnist_prerequisites_met():
return False

if _env_flag_enabled("DISCONNECTED_CLUSTER") or _env_flag_enabled(
"IS_DISCONNECTED_CLUSTER"
):
print(
"Disconnected cluster env set without PIP/S3 mirrors; "
"using smoke job (no pip install)"
)
return True

try:
server = (run_oc_command(["whoami", "--show-server=true"]) or "").lower()
if "-dis-" in server or "disconnected" in server:
print(
"Detected disconnected cluster from API server URL; "
"using smoke job (no pip install)"
)
return True
except Exception:
pass

return False


def use_upgrade_smoke_job():
"""Backward-compatible alias for upgrade tests."""
return use_smoke_job()


def get_mnist_job_submission_spec(**kwargs):
"""Return entrypoint and runtime_env for tier1 / upgrade MNIST job submission tests."""
env_vars = get_setup_env_variables(**kwargs)
if use_smoke_job():
return {
"entrypoint": "python upgrade_job_smoke.py",
"runtime_env": {
"working_dir": "./tests/e2e/",
"env_vars": env_vars,
},
}
return {
"entrypoint": "python mnist.py",
"runtime_env": {
"working_dir": "./tests/e2e/",
"pip": "./tests/e2e/mnist_pip_requirements.txt",
"env_vars": env_vars,
},
}


def get_upgrade_job_submission_spec(**kwargs):
"""Backward-compatible alias for post-upgrade job submission tests."""
return get_mnist_job_submission_spec(**kwargs)


def random_choice():
alphabet = string.ascii_lowercase + string.digits
return "".join(random.choices(alphabet, k=5))
Expand Down Expand Up @@ -611,38 +736,35 @@ def is_byoidc_cluster_detected():

spec = auth_resource.get("spec", {})

# Check oidcProviders for BYOIDC-specific issuer URL patterns
# BYOIDC clusters register Authentication.spec.type as OIDC
if (spec.get("type") or "").upper() == "OIDC":
print("Detected BYOIDC cluster: Authentication spec.type is OIDC")
return True

# Check oidcProviders for Keycloak / QE BYOIDC issuer URLs
if "oidcProviders" in spec and spec["oidcProviders"]:
for provider in spec["oidcProviders"]:
issuer_url = provider.get("issuer", {}).get("issuerURL", "")
if (
"keycloak" in issuer_url.lower()
and (
"rh-ods.com" in issuer_url or "qe.rh-ods.com" in issuer_url
)
) or "realms/openshift" in issuer_url:
if "keycloak" in issuer_url.lower() and (
"rh-ods.com" in issuer_url or "qe.rh-ods.com" in issuer_url
):
print(f"Detected BYOIDC cluster with OIDC issuer: {issuer_url}")
return True

# Check webhookTokenAuthenticators
if (
"webhookTokenAuthenticators" in spec
and spec["webhookTokenAuthenticators"]
):
# Check webhookTokenAuthenticators (external OIDC token review)
if spec.get("webhookTokenAuthenticators"):
for webhook in spec["webhookTokenAuthenticators"]:
if webhook.get("kubeConfig", {}):
print("Detected BYOIDC cluster with webhook token authenticator")
return True

# Check status.oidcClients for cli component (BYOIDC-specific).
# clientID is nested under currentOIDCClients[]; componentName=="cli" is
# simpler and always present when BYOIDC is active.
# status.oidcClients with componentName "cli" is NOT BYOIDC-specific (false positive
# on standard OpenShift). BYOIDC registers client id "oc-cli" — see run-tests.sh.
status = auth_resource.get("status", {})
if "oidcClients" in status and status["oidcClients"]:
for oidc_client in status["oidcClients"]:
if oidc_client.get("componentName") == "cli":
print("Detected BYOIDC cluster from status.oidcClients (cli component)")
return True
oidc_clients_blob = json.dumps(status.get("oidcClients", []))
if "oc-cli" in oidc_clients_blob:
print("Detected BYOIDC cluster from status.oidcClients (oc-cli client)")
return True

print("No BYOIDC indicators found in cluster Authentication resource")
return False
Expand Down Expand Up @@ -679,18 +801,12 @@ def assert_get_cluster_and_jobsubmit(
client = cluster.job_client

# Submit a job and get the submission ID
env_vars = (
get_setup_env_variables(ACCELERATOR=accelerator)
if accelerator
else get_setup_env_variables()
)
spec_kwargs = {"ACCELERATOR": accelerator} if accelerator else {}
job_spec = get_mnist_job_submission_spec(**spec_kwargs)
print(f"Submitting job: {job_spec['entrypoint']}")
submission_id = client.submit_job(
entrypoint="python mnist.py",
runtime_env={
"working_dir": "./tests/e2e/",
"pip": "./tests/e2e/mnist_pip_requirements.txt",
"env_vars": env_vars,
},
entrypoint=job_spec["entrypoint"],
runtime_env=job_spec["runtime_env"],
entrypoint_num_cpus=1 if number_of_gpus is None else None,
entrypoint_num_gpus=number_of_gpus,
)
Expand Down
16 changes: 16 additions & 0 deletions tests/e2e/upgrade_job_smoke.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright 2024 IBM, Red Hat
#
# Minimal Ray job for upgrade qualification on disconnected clusters.
# Validates job submission and execution without pip installs or external datasets.

import sys


def main() -> int:
print("upgrade-job-smoke: job started")
print("upgrade-job-smoke: job finished successfully")
return 0


if __name__ == "__main__":
sys.exit(main())
43 changes: 43 additions & 0 deletions tests/upgrade/00_ray_migration_post_upgrade_finalize_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright 2024 IBM, Red Hat
#
# First post_upgrade step: migrate the qualification RayCluster after RHOAI OLM upgrade.

import pytest

from tests.upgrade.constants import CLUSTER_NAME, NAMESPACE
from tests.upgrade.migration_support import (
assert_raycluster_migrated_if_present,
assert_raycluster_ready_after_post_upgrade,
run_migration_post_upgrade,
)


@pytest.mark.post_upgrade
class TestRayMigrationPostUpgradeFinalize:
"""
First test in the post_upgrade suite (see conftest collection ordering).

Invokes ray_cluster_migration.py post-upgrade for the seeded mnist cluster.
Skips clusters already migrated (3.x→3.x). Required before job/UI post tests
when upgrading from 2.x with legacy TLS/OAuth on the RayCluster CR.
"""

def test_ray_migration_post_upgrade_finalize(self):
result = run_migration_post_upgrade(
namespace=NAMESPACE,
cluster_name=CLUSTER_NAME,
)

if result.returncode != 0:
print(
f"Migration post-upgrade exited with code {result.returncode}; "
"verifying RayCluster state anyway."
)

assert_raycluster_migrated_if_present()
assert_raycluster_ready_after_post_upgrade()

print(
"\n=== Ray migration post-upgrade finalize complete. "
"Proceed with post_upgrade job and UI tests. ===\n"
)
Loading
Loading