From f22fdfa315018fd6c97ab470414e1228b4a58150 Mon Sep 17 00:00:00 2001 From: Pawel Paszki Date: Wed, 13 May 2026 13:32:50 +0100 Subject: [PATCH 1/3] RHOAIENG-62238: fix sdk tests (byoidc and non-byoidc) --- images/tests/run-tests.sh | 212 +++++++++++++++--- tests/e2e/mnist.py | 2 +- tests/e2e/mnist_raycluster_sdk_oauth_test.py | 108 +++------ tests/e2e/support.py | 109 +++++---- .../upgrade/01_raycluster_sdk_upgrade_test.py | 155 +++---------- 5 files changed, 305 insertions(+), 281 deletions(-) diff --git a/images/tests/run-tests.sh b/images/tests/run-tests.sh index a62732c00..6b67d518f 100644 --- a/images/tests/run-tests.sh +++ b/images/tests/run-tests.sh @@ -149,16 +149,27 @@ echo "Detecting cluster authentication type from cluster configuration..." # Detect BYOIDC from cluster configuration alone CLUSTER_IS_BYOIDC=false -# Method 1: Check Authentication resource type (most reliable for OIDC clusters) -AUTH_TYPE=$(oc get authentication cluster -o jsonpath='{.spec.type}' 2>/dev/null) -if [ "$AUTH_TYPE" = "OIDC" ]; then - echo "Detected BYOIDC cluster: Authentication spec.type = OIDC" +# Method 0: Check kubeconfig exec plugin format (no API call — safe before auth setup) +# Handles clusters where the kubeconfig uses oc get-token with OIDC (e.g., PSI BYOIDC with exec plugin) +EXEC_ARGS=$(oc config view --minify -o jsonpath='{.users[0].user.exec.args}' 2>/dev/null) || true +if echo "$EXEC_ARGS" | grep -qi "oc-cli\|realms/openshift"; then + echo "Detected BYOIDC cluster: kubeconfig uses OIDC exec plugin (oc-cli / realms/openshift)" CLUSTER_IS_BYOIDC=true fi +# Method 1: Check Authentication resource type (most reliable for OIDC clusters) +# Uses timeout to avoid hanging when kubeconfig requires interactive token refresh +if [ "$CLUSTER_IS_BYOIDC" = "false" ]; then + AUTH_TYPE=$(timeout 10 oc get authentication cluster -o jsonpath='{.spec.type}' 2>/dev/null) || true + if [ "$AUTH_TYPE" = "OIDC" ]; then + echo "Detected BYOIDC cluster: Authentication spec.type = OIDC" + CLUSTER_IS_BYOIDC=true + fi +fi + # Method 2: Check for OIDC providers in Authentication resource (fallback) if [ "$CLUSTER_IS_BYOIDC" = "false" ]; then - OIDC_ISSUER=$(oc get authentication cluster -o jsonpath='{.spec.oidcProviders[*].issuer.issuerURL}' 2>/dev/null) + OIDC_ISSUER=$(timeout 10 oc get authentication cluster -o jsonpath='{.spec.oidcProviders[*].issuer.issuerURL}' 2>/dev/null) || true if [ -n "$OIDC_ISSUER" ]; then echo "Detected BYOIDC cluster: Authentication has oidcProviders with issuerURL: $OIDC_ISSUER" CLUSTER_IS_BYOIDC=true @@ -167,7 +178,7 @@ fi # Method 3: Check for oidcClients in Authentication status (another fallback) if [ "$CLUSTER_IS_BYOIDC" = "false" ]; then - if oc get authentication cluster -o jsonpath='{.status.oidcClients}' 2>/dev/null | grep -q "oc-cli"; then + if timeout 10 oc get authentication cluster -o jsonpath='{.status.oidcClients}' 2>/dev/null | grep -q "oc-cli"; then echo "Detected BYOIDC cluster: Authentication status has oidcClients with oc-cli" CLUSTER_IS_BYOIDC=true fi @@ -175,7 +186,7 @@ fi # Method 4: Check OAuth resource for openID identity provider (legacy OIDC setup) if [ "$CLUSTER_IS_BYOIDC" = "false" ]; then - if oc get oauth cluster -o jsonpath='{.spec.identityProviders[*].type}' 2>/dev/null | grep -qi "OpenID"; then + if timeout 10 oc get oauth cluster -o jsonpath='{.spec.identityProviders[*].type}' 2>/dev/null | grep -qi "OpenID"; then echo "Detected BYOIDC cluster: OAuth has OpenID identity provider" CLUSTER_IS_BYOIDC=true fi @@ -374,7 +385,8 @@ set_kueue_management_state() { # ============================================================================ echo "Extracting OpenShift API URL from active oc session..." # Try to get URL from active oc session first (if already logged in) -OCP_API_URL=$(oc whoami --show-server 2>/dev/null) +# Use timeout to avoid hanging when kubeconfig uses an exec plugin (e.g., BYOIDC with oc get-token) +OCP_API_URL=$(timeout 10 oc whoami --show-server 2>/dev/null) || true if [ -z "$OCP_API_URL" ]; then echo "No active oc session found, extracting from kubeconfig..." @@ -403,6 +415,9 @@ fi # ============================================================================ echo "Setting up authentication for RBAC policies..." +# Save original kubeconfig directory for token cache lookup later +ORIGINAL_KUBE_DIR=$(dirname "${KUBECONFIG:-/codeflare-sdk/tests/.kube/config}") + # Create a temporary kubeconfig (since the mounted one is read-only) TEMP_KUBECONFIG="/tmp/kubeconfig-$$" cp "${KUBECONFIG}" "${TEMP_KUBECONFIG}" 2>/dev/null || { @@ -464,17 +479,158 @@ elif [ "$AUTH_METHOD" = "byoidc" ]; then NEEDS_CONVERSION=true fi + # Allow a pre-extracted token to be injected via BYOIDC_ADMIN_TOKEN env var. + # Useful for local runs on BYOIDC clusters where the exec plugin can't run + # inside the container (no token cache, no browser). + # Obtain it on the host: python3 -c "import json,glob,os; [print(json.load(open(f))['id_token']) for f in glob.glob(os.path.expanduser('~/.kube/cache/oc/*')) if 'id_token' in json.load(open(f))]" | head -1 + if [ -z "$KUBECONFIG_TOKEN" ] && [ -n "${BYOIDC_ADMIN_TOKEN:-}" ]; then + echo "Using pre-extracted token from BYOIDC_ADMIN_TOKEN" + KUBECONFIG_TOKEN="$BYOIDC_ADMIN_TOKEN" + NEEDS_CONVERSION=true + fi + # Check for exec plugin format (oc-oidc plugin) if [ -z "$KUBECONFIG_TOKEN" ]; then HAS_EXEC_PLUGIN=$(oc config view --minify -o jsonpath='{.users[0].user.exec.command}' 2>/dev/null) if [ -n "$HAS_EXEC_PLUGIN" ]; then echo "Detected exec-plugin format ($HAS_EXEC_PLUGIN), searching for cached token..." - # Try to extract a token from cached locations - if [ -f ~/.kube/oidc-login.cache ]; then + # Check oc's built-in token cache (~/.kube/cache/oc/ JSON files). + # oc stores tokens at /cache/oc/ where is the + # directory containing the kubeconfig. This is populated when ~/.kube/ is + # mounted into the container (not just ~/.kube/config). + OC_CACHE_DIR="${ORIGINAL_KUBE_DIR}/cache/oc" + if [ -z "$KUBECONFIG_TOKEN" ] && [ -d "$OC_CACHE_DIR" ]; then + for cache_file in "$OC_CACHE_DIR"/*; do + if [ -f "$cache_file" ]; then + TOKEN=$(grep -o '"id_token":"[^"]*"' "$cache_file" 2>/dev/null | head -1 | cut -d'"' -f4) + if [ -n "$TOKEN" ]; then + KUBECONFIG_TOKEN="$TOKEN" + echo "Found cached OIDC token in oc cache: $(basename "$cache_file")" + break + fi + fi + done + fi + + # Also check legacy oidc-login cache location + if [ -z "$KUBECONFIG_TOKEN" ] && [ -f ~/.kube/oidc-login.cache ]; then KUBECONFIG_TOKEN=$(cat ~/.kube/oidc-login.cache 2>/dev/null | grep -o '"id_token":"[^"]*"' | cut -d'"' -f4) fi - NEEDS_CONVERSION=true + + # Last resort: try running the exec plugin directly to get a fresh token. + # Works when oc can refresh the token non-interactively (e.g. valid refresh_token + # is in the cache AND the cache directory is mounted into the container). + if [ -z "$KUBECONFIG_TOKEN" ]; then + echo "No cached token found, attempting to run exec plugin directly..." + mapfile -t EXEC_PLUGIN_ARGS < <(oc config view --minify -o jsonpath='{range .users[0].user.exec.args[*]}{@}{"\n"}{end}' 2>/dev/null) + if [ ${#EXEC_PLUGIN_ARGS[@]} -gt 0 ]; then + EXEC_OUTPUT=$(timeout 30 "$HAS_EXEC_PLUGIN" "${EXEC_PLUGIN_ARGS[@]}" 2>/dev/null) || true + if [ -n "$EXEC_OUTPUT" ]; then + KUBECONFIG_TOKEN=$(echo "$EXEC_OUTPUT" | grep -o '"token":"[^"]*"' | head -1 | cut -d'"' -f4) + [ -n "$KUBECONFIG_TOKEN" ] && echo "Obtained token from exec plugin" + fi + fi + fi + + if [ -n "$KUBECONFIG_TOKEN" ]; then + NEEDS_CONVERSION=true + else + # No cached token found. Use the same approach as Jenkins loginByoidcUser: + # call Keycloak's token endpoint directly with grant_type=password, then + # inject id-token + refresh-token into the kubeconfig via auth-provider. + # The OIDC issuer URL and client ID are already in the exec plugin args. + if [ -n "${OCP_ADMIN_USER_USERNAME:-}" ] && [ -n "${OCP_ADMIN_USER_PASSWORD:-}" ]; then + echo "Attempting Keycloak password grant (same method as Jenkins loginByoidcUser)..." + + # Extract issuer URL and client ID directly from exec plugin args in kubeconfig + OIDC_ISSUER=$(oc config view --minify \ + -o jsonpath='{range .users[0].user.exec.args[*]}{@}{"\n"}{end}' 2>/dev/null \ + | grep -- '--issuer-url=' | sed 's/--issuer-url=//' | tr -d '[:space:]') + OIDC_CLIENT_ID=$(oc config view --minify \ + -o jsonpath='{range .users[0].user.exec.args[*]}{@}{"\n"}{end}' 2>/dev/null \ + | grep -- '--client-id=' | sed 's/--client-id=//' | tr -d '[:space:]') + + # Allow env overrides (consistent with Jenkins configData fields) + OIDC_ISSUER="${CLUSTER_OIDC_ISSUER:-$OIDC_ISSUER}" + OIDC_CLIENT_ID="${CLIENT_ID_OC_CLI:-${OIDC_CLIENT_ID:-oc-cli}}" + OIDC_TOKEN_ENDPOINT="${CLUSTER_OIDC_TOKEN_ENDPOINT:-${OIDC_ISSUER}/protocol/openid-connect/token}" + + if [ -n "$OIDC_ISSUER" ]; then + echo " OIDC issuer: $OIDC_ISSUER" + echo " Admin user: $OCP_ADMIN_USER_USERNAME" + + # Use OIDC well-known discovery to find the correct token endpoint + # (mirrors opendatahub-tests get_oidc_token_endpoint()) + if [ -z "${CLUSTER_OIDC_TOKEN_ENDPOINT:-}" ]; then + WELL_KNOWN=$(curl -sk --max-time 10 "${OIDC_ISSUER}/.well-known/openid-configuration" 2>/dev/null) || true + if [ -n "$WELL_KNOWN" ]; then + DISCOVERED_ENDPOINT=$(echo "$WELL_KNOWN" | python3 -c \ + "import json,sys; print(json.load(sys.stdin).get('token_endpoint',''))" \ + 2>/dev/null || true) + [ -n "$DISCOVERED_ENDPOINT" ] && OIDC_TOKEN_ENDPOINT="$DISCOVERED_ENDPOINT" + fi + fi + echo " Token endpoint: $OIDC_TOKEN_ENDPOINT" + + # mirrors Jenkins loginByoidcUser / opendatahub-tests get_oidc_tokens(): + # --data-urlencode safely handles special characters in credentials. + # scope matches Jenkins OIDC_LOGIN_SCOPE (default "openid"). + OIDC_SCOPE="${OIDC_LOGIN_SCOPE:-openid}" + TOKENS=$(curl -sk -L -X POST "$OIDC_TOKEN_ENDPOINT" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -H "User-Agent: python-requests" \ + --data-urlencode "username=${OCP_ADMIN_USER_USERNAME}" \ + --data-urlencode "password=${OCP_ADMIN_USER_PASSWORD}" \ + -d "grant_type=password" \ + -d "client_id=${OIDC_CLIENT_ID}" \ + -d "scope=${OIDC_SCOPE}" 2>/dev/null) || true + + if [ -n "$TOKENS" ]; then + ID_TOKEN=$(echo "$TOKENS" | python3 -c \ + "import json,sys; print(json.load(sys.stdin).get('id_token',''))" \ + 2>/dev/null || echo "$TOKENS" | grep -o '"id_token":"[^"]*"' | cut -d'"' -f4) + REFRESH_TOKEN_VAL=$(echo "$TOKENS" | python3 -c \ + "import json,sys; print(json.load(sys.stdin).get('refresh_token',''))" \ + 2>/dev/null || echo "$TOKENS" | grep -o '"refresh_token":"[^"]*"' | cut -d'"' -f4) + + if [ -n "$ID_TOKEN" ] && [ "$ID_TOKEN" != "None" ] && [ "$ID_TOKEN" != "" ]; then + # Inject tokens into kubeconfig using auth-provider format + # (identical to Jenkins loginByoidcUser kubectl config set-credentials call) + oc config set-credentials "${OCP_ADMIN_USER_USERNAME}" \ + --auth-provider=oidc \ + --auth-provider-arg=idp-issuer-url="${OIDC_ISSUER}" \ + --auth-provider-arg=client-id="${OIDC_CLIENT_ID}" \ + --auth-provider-arg=client-secret="" \ + --auth-provider-arg=refresh-token="${REFRESH_TOKEN_VAL}" \ + --auth-provider-arg=id-token="${ID_TOKEN}" 2>/dev/null + oc config set-context --current --user="${OCP_ADMIN_USER_USERNAME}" 2>/dev/null + cp "${TEMP_KUBECONFIG}" ~/.kube/config 2>/dev/null || true + echo " ✓ Kubeconfig updated with Keycloak id-token + refresh-token" + KUBECONFIG_TOKEN="$ID_TOKEN" + NEEDS_CONVERSION=false + else + ERR=$(echo "$TOKENS" | python3 -c \ + "import json,sys; d=json.load(sys.stdin); print(d.get('error','?') + ': ' + d.get('error_description',''))" \ + 2>/dev/null || echo "$TOKENS" | head -c 200) + echo " WARNING: Keycloak token request failed: $ERR" + echo " Falling back to kubeconfig exec plugin as-is" + fi + else + echo " WARNING: No response from Keycloak token endpoint" + fi + else + echo " WARNING: Could not determine OIDC issuer from exec plugin args" + fi + fi + + if [ -z "$KUBECONFIG_TOKEN" ]; then + # Last resort: use kubeconfig with exec plugin as-is. + # When running interactively (-it), oc get-token may prompt for device/browser auth. + echo "Using kubeconfig with exec plugin as-is (no static token conversion)" + echo "oc commands will invoke the exec plugin to authenticate when needed" + fi + fi fi fi @@ -529,15 +685,6 @@ elif [ "$AUTH_METHOD" = "byoidc" ]; then # Update ~/.kube/config after conversion cp "${TEMP_KUBECONFIG}" ~/.kube/config 2>/dev/null || true echo "✓ Converted to token-based authentication" - else - echo "ERROR: Cannot extract token from kubeconfig" - echo "The kubeconfig uses exec-plugin or auth-provider format but no token could be extracted." - echo "" - echo "Solution: Jenkins should mount a kubeconfig with a static token." - echo "You can verify the Jenkins kubeconfig by running:" - echo " kubectl config view --minify -o jsonpath='{.users[0].user}'" - rm -f "${TEMP_KUBECONFIG}" - exit 1 fi fi @@ -560,23 +707,30 @@ elif [ "$AUTH_METHOD" = "byoidc" ]; then echo "Note: oc whoami not available on this cluster (external OIDC mode)" echo "Using alternative authentication verification..." - # Method 1: Try to get API server version (basic connectivity test) - if oc version 2>/dev/null | grep -q "Server"; then - echo " ✓ API server connectivity verified" + # Method 1: Raw connectivity check — /version is unauthenticated, avoids exec plugin + if [ -n "${OCP_API_URL:-}" ] && \ + curl -sk --max-time 10 "${OCP_API_URL}/version" 2>/dev/null | grep -q '"major"'; then + echo " ✓ API server connectivity verified (unauthenticated /version endpoint)" + elif [ -z "$KUBECONFIG_TOKEN" ]; then + # No static token — exec plugin as-is; connectivity may still work through Python client. + # Don't abort: the Python kubernetes client handles exec plugins in-process and may succeed. + echo " WARNING: Could not verify API connectivity via /version (exec plugin as-is mode)" + echo " Tests will attempt to run; Python's kubernetes client invokes the exec plugin natively." + echo " If tests fail with 401, ensure BYOIDC_ADMIN_PASSWORD is correct in your env file," + echo " or extract the id_token from the oc cache on your host and set BYOIDC_ADMIN_TOKEN:" + echo " python3 -c \"import json,glob,os; [print(json.load(open(f))['id_token']) for f in glob.glob(os.path.expanduser('~/.kube/cache/oc/*')) if 'id_token' in json.load(open(f))]\" | head -1" else - echo "ERROR: Cannot connect to API server" + echo "ERROR: Cannot connect to API server at ${OCP_API_URL:-[not set]}" rm -f "${TEMP_KUBECONFIG}" exit 1 fi - # Method 2: Check if we can perform basic API calls + # Method 2: Check if we can perform basic API calls (requires valid token) if oc auth can-i get namespaces 2>/dev/null | grep -q "yes"; then echo " ✓ Authentication verified (can get namespaces)" else - echo "ERROR: Cannot verify authentication - no namespace access" - echo "oc whoami output: $WHOAMI_OUTPUT" - rm -f "${TEMP_KUBECONFIG}" - exit 1 + echo " WARNING: Cannot verify API authentication via oc auth can-i" + echo " This is expected when the exec plugin is used without a cached token." fi # Method 3: Try to extract username from token (JWT sub claim) diff --git a/tests/e2e/mnist.py b/tests/e2e/mnist.py index b8135d0fc..f4c9667dc 100644 --- a/tests/e2e/mnist.py +++ b/tests/e2e/mnist.py @@ -17,7 +17,7 @@ import torch import requests from pytorch_lightning import LightningModule, Trainer -from pytorch_lightning.callbacks.progress import TQDMProgressBar +from pytorch_lightning.callbacks import TQDMProgressBar from torch import nn from torch.nn import functional as F from torch.utils.data import DataLoader, random_split, RandomSampler diff --git a/tests/e2e/mnist_raycluster_sdk_oauth_test.py b/tests/e2e/mnist_raycluster_sdk_oauth_test.py index 0ce0a3a12..cf5905311 100644 --- a/tests/e2e/mnist_raycluster_sdk_oauth_test.py +++ b/tests/e2e/mnist_raycluster_sdk_oauth_test.py @@ -221,10 +221,10 @@ def assert_jobsubmit_withoutLogin(self, cluster): def _is_byoidc_cluster(self): """ - Simple BYOIDC cluster detection by checking for OIDC issuer in cluster Authentication resource. + BYOIDC cluster detection by checking OpenShift cluster Authentication resource. + Detection is based solely on cluster state — no environment variable fallback. """ try: - # Check if cluster has OIDC authentication configured auth_resource = self.custom_api.get_cluster_custom_object( group="config.openshift.io", version="v1", @@ -232,70 +232,37 @@ def _is_byoidc_cluster(self): name="cluster", ) - # Look for OIDC issuer URL in the authentication spec spec = auth_resource.get("spec", {}) - # Check oidcProviders first - must be BYOIDC-specific + # Any non-empty spec.oidcProviders.issuerURL means external OIDC is configured. + # This field is only populated on BYOIDC clusters — never on standard OAuth clusters. if "oidcProviders" in spec and spec["oidcProviders"]: for provider in spec["oidcProviders"]: - issuer_url = provider.get("issuer", {}).get("url", "") - # More specific check for BYOIDC patterns - if ( - "keycloak" in issuer_url.lower() - and ( - "rh-ods.com" in issuer_url or "qe.rh-ods.com" in issuer_url - ) - ) or "realms/openshift" in issuer_url: + issuer_url = provider.get("issuer", {}).get("issuerURL", "") + if issuer_url: print(f"Detected BYOIDC cluster with OIDC issuer: {issuer_url}") return True - # Also check for webhookTokenAuthenticators (alternative OIDC config) + # Check webhookTokenAuthenticators if ( "webhookTokenAuthenticators" in spec and spec["webhookTokenAuthenticators"] ): for webhook in spec["webhookTokenAuthenticators"]: - kubeconfig = webhook.get("kubeConfig", {}) - if kubeconfig: + if webhook.get("kubeConfig", {}): print( "Detected BYOIDC cluster with webhook token authenticator" ) return True - # Check status for BYOIDC-specific OIDC clients - status = auth_resource.get("status", {}) - if "oidcClients" in status and status["oidcClients"]: - # Check if oc-cli client exists (BYOIDC-specific) - for client in status["oidcClients"]: - if client.get("clientID") == "oc-cli": - print( - "Detected BYOIDC cluster from status.oidcClients (oc-cli)" - ) - return True - - # Fallback: check if we can detect OIDC from environment or other indicators - # This is a simple heuristic - if we're using Jenkins vault credentials for BYOIDC - import os + # Do NOT check status.oidcClients — it is present on standard OpenShift 4.14+ + # clusters too and causes false positives. - if os.getenv("BYOIDC_ADMIN_USERNAME") or os.getenv( - "TEST_USER_USERNAME", "" - ).startswith("odh-"): - print("Detected BYOIDC cluster from environment variables") - return True - - print("No BYOIDC OIDC providers found in cluster Authentication resource") + print("No BYOIDC indicators found in cluster Authentication resource") return False except Exception as e: print(f"Could not check cluster authentication method: {e}") - # Fallback: check environment variables as last resort - import os - - if os.getenv("BYOIDC_ADMIN_USERNAME") or os.getenv( - "TEST_USER_USERNAME", "" - ).startswith("odh-"): - print("Detected BYOIDC cluster from environment variables (fallback)") - return True return False def assert_jobsubmit_withlogin(self, cluster): @@ -305,50 +272,33 @@ def assert_jobsubmit_withlogin(self, cluster): ray_cluster_auth_enabled = self._is_ray_cluster_auth_enabled(cluster) print(f"Ray cluster authentication enabled: {ray_cluster_auth_enabled}") - # For BYOIDC clusters, skip the Ray Dashboard test entirely - # BYOIDC uses different authentication mechanisms (OIDC tokens, cookies) that are - # not compatible with the simple bearer token approach used in this test - # Simple detection: check if we have OIDC issuer in cluster is_byoidc_cluster = self._is_byoidc_cluster() + if is_byoidc_cluster: - print("Skipping Ray Dashboard authentication test for BYOIDC cluster") - print( - "BYOIDC authentication is handled differently and requires browser-based OIDC flow" - ) - # Instead, just verify that the cluster is working by checking its status - print("Verifying cluster is accessible via Kubernetes API...") - ray_cluster = get_ray_cluster(cluster.config.name, cluster.config.namespace) - if ray_cluster and ray_cluster.get("status", {}).get("state") == "ready": - print( - "✓ Ray cluster is ready and accessible - BYOIDC authentication test passed" - ) - return - else: - cluster_state = ( - ray_cluster.get("status", {}).get("state", "unknown") - if ray_cluster - else "not found" + # On BYOIDC clusters oc whoami --show-token=true is unavailable. + # Obtain an OIDC id_token via Keycloak password grant (same approach + # as Jenkins loginByoidcUser / opendatahub-tests get_oidc_tokens). + username = os.environ.get("OCP_ADMIN_USER_USERNAME", "") + password = os.environ.get("OCP_ADMIN_USER_PASSWORD", "") + if not username or not password: + raise RuntimeError( + "OCP_ADMIN_USER_USERNAME and OCP_ADMIN_USER_PASSWORD must be set " + "for BYOIDC job submission" ) - print( - f"✗ Ray cluster is not ready or not accessible (state: {cluster_state})" + issuer_url = get_byoidc_issuer_url() + id_token, _ = get_oidc_tokens(username, password, issuer_url) + if not id_token: + raise RuntimeError( + "Failed to obtain OIDC token for Ray Dashboard authentication. " + "Check OCP_ADMIN_USER_PASSWORD." ) - print("This may be due to:") - print(" - ServiceAccount creation delays (known product issue)") - print(" - Authentication controller timing issues") - print(" - Cluster resource constraints") - assert ( - False - ), f"Ray cluster is not accessible via Kubernetes API (state: {cluster_state})" - - if not ray_cluster_auth_enabled: - # If Ray cluster doesn't have authentication enabled, don't send auth headers + header = {"Authorization": f"Bearer {id_token}"} + elif not ray_cluster_auth_enabled: print( "Ray cluster authentication is disabled - proceeding without auth headers" ) header = {} else: - # For authenticated clusters, try to get token via oc command - # This works for both legacy and kubeconfig-based authentication try: auth_token = run_oc_command(["whoami", "--show-token=true"]) if auth_token: diff --git a/tests/e2e/support.py b/tests/e2e/support.py index 00f6ed013..d7d1f5862 100644 --- a/tests/e2e/support.py +++ b/tests/e2e/support.py @@ -600,48 +600,49 @@ def get_tolerations_from_flavor(self, flavor_name): def is_byoidc_cluster_detected(): """ - Simple BYOIDC cluster detection by checking environment variables. - This is a fallback method for support functions that don't have access to self. + BYOIDC cluster detection by checking OpenShift cluster Authentication resource. + Detection is based solely on cluster state — no environment variable fallback. """ try: - import os + from kubernetes import client as k8s_client - # Check environment variables as indicator of BYOIDC - if os.getenv("BYOIDC_ADMIN_USERNAME") or os.getenv( - "TEST_USER_USERNAME", "" - ).startswith("odh-"): - print("Detected BYOIDC cluster from environment variables") - return True + custom_api = k8s_client.CustomObjectsApi() + auth_resource = custom_api.get_cluster_custom_object( + group="config.openshift.io", + version="v1", + plural="authentications", + name="cluster", + ) - # Try to check cluster authentication if possible - try: - from kubernetes import client - - custom_api = client.CustomObjectsApi() - auth_resource = custom_api.get_cluster_custom_object( - group="config.openshift.io", - version="v1", - plural="authentications", - name="cluster", - ) + spec = auth_resource.get("spec", {}) - # Check status for BYOIDC-specific OIDC clients - status = auth_resource.get("status", {}) - if "oidcClients" in status and status["oidcClients"]: - # Check if oc-cli client exists (BYOIDC-specific) - for client in status["oidcClients"]: - if client.get("clientID") == "oc-cli": - print( - "Detected BYOIDC cluster from status.oidcClients (oc-cli)" - ) - return True + # Any non-empty spec.oidcProviders.issuerURL means external OIDC is configured. + # This field is only populated on BYOIDC clusters — never on standard OAuth clusters. + if "oidcProviders" in spec and spec["oidcProviders"]: + for provider in spec["oidcProviders"]: + issuer_url = provider.get("issuer", {}).get("issuerURL", "") + if issuer_url: + print(f"Detected BYOIDC cluster with OIDC issuer: {issuer_url}") + return True + + # Check webhookTokenAuthenticators + if ( + "webhookTokenAuthenticators" in spec + and spec["webhookTokenAuthenticators"] + ): + for webhook in spec["webhookTokenAuthenticators"]: + if webhook.get("kubeConfig", {}): + print("Detected BYOIDC cluster with webhook token authenticator") + return True - except Exception: - pass # Ignore API errors, fall back to environment detection + # Do NOT check status.oidcClients — it is present on standard OpenShift 4.14+ + # clusters too and causes false positives. + print("No BYOIDC indicators found in cluster Authentication resource") return False - except Exception: + except Exception as e: + print(f"Could not check cluster authentication method: {e}") return False @@ -653,23 +654,35 @@ def assert_get_cluster_and_jobsubmit( cluster.details() - # Check if this is a BYOIDC cluster - skip Ray Dashboard job submission for BYOIDC is_byoidc_cluster = is_byoidc_cluster_detected() if is_byoidc_cluster: - print("Skipping Ray Dashboard job submission test for BYOIDC cluster") - print( - "BYOIDC authentication requires browser-based OIDC flow for Ray Dashboard" - ) - # Just verify cluster is accessible and clean up - print("✓ Ray cluster retrieved and accessible via Kubernetes API") - print( - "Note: Skipping due to known BYOIDC/Ray Dashboard compatibility limitations" - ) - cluster.down() - return + # On BYOIDC clusters cluster.job_client uses oc whoami --show-token=true which + # is unavailable. Obtain an OIDC id_token via Keycloak password grant instead. + username = os.environ.get("OCP_ADMIN_USER_USERNAME", "") + password = os.environ.get("OCP_ADMIN_USER_PASSWORD", "") + if not username or not password: + raise RuntimeError( + "OCP_ADMIN_USER_USERNAME and OCP_ADMIN_USER_PASSWORD must be set " + "for BYOIDC job submission" + ) + issuer_url = get_byoidc_issuer_url() + id_token, _ = get_oidc_tokens(username, password, issuer_url) + if not id_token: + raise RuntimeError( + "Failed to obtain OIDC token for Ray Dashboard authentication. " + "Check OCP_ADMIN_USER_PASSWORD." + ) + from codeflare_sdk.ray.client import RayJobClient - # Initialize the job client - client = cluster.job_client + ray_dashboard = cluster.cluster_dashboard_uri() + client = RayJobClient( + address=ray_dashboard, + headers={"Authorization": f"Bearer {id_token}"}, + verify=False, + ) + else: + # Initialize the job client + client = cluster.job_client # Submit a job and get the submission ID env_vars = ( @@ -1938,7 +1951,7 @@ def get_byoidc_issuer_url(): spec = auth_resource.get("spec", {}) if "oidcProviders" in spec and spec["oidcProviders"]: for provider in spec["oidcProviders"]: - issuer_url = provider.get("issuer", {}).get("url", "") + issuer_url = provider.get("issuer", {}).get("issuerURL", "") if issuer_url: return issuer_url diff --git a/tests/upgrade/01_raycluster_sdk_upgrade_test.py b/tests/upgrade/01_raycluster_sdk_upgrade_test.py index 28fd6dc48..4ad2685f6 100644 --- a/tests/upgrade/01_raycluster_sdk_upgrade_test.py +++ b/tests/upgrade/01_raycluster_sdk_upgrade_test.py @@ -1,3 +1,4 @@ +import os import pytest import requests from time import sleep @@ -125,10 +126,10 @@ def setup_method(self): def _is_byoidc_cluster(self): """ - Simple BYOIDC cluster detection by checking for OIDC issuer in cluster Authentication resource. + BYOIDC cluster detection by checking OpenShift cluster Authentication resource. + Detection is based solely on cluster state — no environment variable fallback. """ try: - # Check if cluster has OIDC authentication configured auth_resource = self.custom_api.get_cluster_custom_object( group="config.openshift.io", version="v1", @@ -136,70 +137,37 @@ def _is_byoidc_cluster(self): name="cluster", ) - # Look for OIDC issuer URL in the authentication spec spec = auth_resource.get("spec", {}) - # Check oidcProviders first - must be BYOIDC-specific + # Any non-empty spec.oidcProviders.issuerURL means external OIDC is configured. + # This field is only populated on BYOIDC clusters — never on standard OAuth clusters. if "oidcProviders" in spec and spec["oidcProviders"]: for provider in spec["oidcProviders"]: - issuer_url = provider.get("issuer", {}).get("url", "") - # More specific check for BYOIDC patterns - if ( - "keycloak" in issuer_url.lower() - and ( - "rh-ods.com" in issuer_url or "qe.rh-ods.com" in issuer_url - ) - ) or "realms/openshift" in issuer_url: + issuer_url = provider.get("issuer", {}).get("issuerURL", "") + if issuer_url: print(f"Detected BYOIDC cluster with OIDC issuer: {issuer_url}") return True - # Also check for webhookTokenAuthenticators (alternative OIDC config) + # Check webhookTokenAuthenticators if ( "webhookTokenAuthenticators" in spec and spec["webhookTokenAuthenticators"] ): for webhook in spec["webhookTokenAuthenticators"]: - kubeconfig = webhook.get("kubeConfig", {}) - if kubeconfig: + if webhook.get("kubeConfig", {}): print( "Detected BYOIDC cluster with webhook token authenticator" ) return True - # Check status for BYOIDC-specific OIDC clients - status = auth_resource.get("status", {}) - if "oidcClients" in status and status["oidcClients"]: - # Check if oc-cli client exists (BYOIDC-specific) - for client in status["oidcClients"]: - if client.get("clientID") == "oc-cli": - print( - "Detected BYOIDC cluster from status.oidcClients (oc-cli)" - ) - return True - - # Fallback: check if we can detect OIDC from environment or other indicators - # This is a simple heuristic - if we're using Jenkins vault credentials for BYOIDC - import os - - if os.getenv("BYOIDC_ADMIN_USERNAME") or os.getenv( - "TEST_USER_USERNAME", "" - ).startswith("odh-"): - print("Detected BYOIDC cluster from environment variables") - return True + # Do NOT check status.oidcClients — it is present on standard OpenShift 4.14+ + # clusters too and causes false positives. - print("No BYOIDC OIDC providers found in cluster Authentication resource") + print("No BYOIDC indicators found in cluster Authentication resource") return False except Exception as e: print(f"Could not check cluster authentication method: {e}") - # Fallback: check environment variables as last resort - import os - - if os.getenv("BYOIDC_ADMIN_USERNAME") or os.getenv( - "TEST_USER_USERNAME", "" - ).startswith("odh-"): - print("Detected BYOIDC cluster from environment variables (fallback)") - return True return False def test_mnist_job_submission(self): @@ -331,93 +299,32 @@ def assert_jobsubmit_withoutLogin(self, cluster): def assert_jobsubmit_withlogin(self, cluster): ray_dashboard = cluster.cluster_dashboard_uri() - # Check if this is a BYOIDC cluster - skip Ray Dashboard job submission for BYOIDC is_byoidc_cluster = self._is_byoidc_cluster() - # For BYOIDC clusters, skip Ray Dashboard job submission due to authentication incompatibility if is_byoidc_cluster: - print("Skipping Ray Dashboard job submission test for BYOIDC cluster") - print( - "BYOIDC authentication requires browser-based OIDC flow for Ray Dashboard" - ) - print("Verifying cluster is accessible via Kubernetes API instead...") - - # Verify cluster is accessible via Kubernetes API as an alternative test - try: - cluster_details = cluster.details() - if ( - cluster_details - and hasattr(cluster_details, "status") - and cluster_details.status - ): - print( - f"✓ Ray cluster is accessible and has status: {cluster_details.status}" - ) - print( - "✓ BYOIDC authentication test passed - cluster is accessible via Kubernetes API" - ) - return - else: - raise RuntimeError( - "Cluster details could not be retrieved or cluster is not ready" - ) - except Exception as e: + # On BYOIDC clusters oc whoami --show-token=true is unavailable. + # Obtain an OIDC id_token via Keycloak password grant (same approach + # as Jenkins loginByoidcUser / opendatahub-tests get_oidc_tokens). + username = os.environ.get("OCP_ADMIN_USER_USERNAME", "") + password = os.environ.get("OCP_ADMIN_USER_PASSWORD", "") + if not username or not password: raise RuntimeError( - f"Failed to verify cluster accessibility via Kubernetes API: {e}" + "OCP_ADMIN_USER_USERNAME and OCP_ADMIN_USER_PASSWORD must be set " + "for BYOIDC job submission" ) - - # For legacy authentication, proceed with Ray Dashboard job submission - print("Using legacy authentication for Ray Dashboard job submission...") - - try: - # For legacy auth, use oc command to get token - auth_token = run_oc_command(["whoami", "--show-token=true"]) - header = {"Authorization": f"Bearer {auth_token}"} - except Exception as e: - print(f"Warning: Could not get auth token via oc command: {e}") - print("Attempting to use kubeconfig token...") - # Try to extract token from kubeconfig - try: - import yaml - - kubeconfig_path = os.getenv( - "KUBECONFIG", os.path.expanduser("~/.kube/config") - ) - with open(kubeconfig_path, "r") as f: - kubeconfig = yaml.safe_load(f) - - # Find current context and extract token - current_context = kubeconfig.get("current-context") - user_name = None - for context in kubeconfig.get("contexts", []): - if context["name"] == current_context: - user_name = context["context"]["user"] - break - - auth_token = None - for user in kubeconfig.get("users", []): - if user["name"] == user_name: - user_info = user.get("user", {}) - auth_token = user_info.get("token") - if not auth_token and "auth-provider" in user_info: - # Handle auth-provider token - auth_provider = user_info["auth-provider"] - if "config" in auth_provider: - auth_token = auth_provider["config"].get("access-token") - break - - if auth_token: - header = {"Authorization": f"Bearer {auth_token}"} - else: - # Fall back to no auth header (kubeconfig should handle auth) - header = {} - print("Warning: Using RayJobClient without explicit auth header") - except Exception as token_error: - print( - f"Warning: Could not extract token from kubeconfig: {token_error}" + issuer_url = get_byoidc_issuer_url() + id_token, _ = get_oidc_tokens(username, password, issuer_url) + if not id_token: + raise RuntimeError( + "Failed to obtain OIDC token for Ray Dashboard authentication. " + "Check OCP_ADMIN_USER_PASSWORD." ) - header = {} + auth_token = id_token + else: + # For non-BYOIDC clusters use the OpenShift OAuth token + auth_token = run_oc_command(["whoami", "--show-token=true"]) + header = {"Authorization": f"Bearer {auth_token}"} client = RayJobClient(address=ray_dashboard, headers=header, verify=False) # Submit the job From 65a89f1108a502607c00c528de47c25493ce48c2 Mon Sep 17 00:00:00 2001 From: Pawel Paszki Date: Wed, 13 May 2026 13:33:43 +0100 Subject: [PATCH 2/3] fix: lint --- tests/e2e/support.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/e2e/support.py b/tests/e2e/support.py index d7d1f5862..ae04ecadc 100644 --- a/tests/e2e/support.py +++ b/tests/e2e/support.py @@ -626,10 +626,7 @@ def is_byoidc_cluster_detected(): return True # Check webhookTokenAuthenticators - if ( - "webhookTokenAuthenticators" in spec - and spec["webhookTokenAuthenticators"] - ): + if "webhookTokenAuthenticators" in spec and spec["webhookTokenAuthenticators"]: for webhook in spec["webhookTokenAuthenticators"]: if webhook.get("kubeConfig", {}): print("Detected BYOIDC cluster with webhook token authenticator") From 3803d0ac1f1c343eb6d450e367db36f30e6af616 Mon Sep 17 00:00:00 2001 From: Pawel Paszki Date: Wed, 13 May 2026 13:58:52 +0100 Subject: [PATCH 3/3] test: address PR comment --- tests/e2e/mnist_raycluster_sdk_oauth_test.py | 48 +----------------- .../upgrade/01_raycluster_sdk_upgrade_test.py | 50 +------------------ 2 files changed, 3 insertions(+), 95 deletions(-) diff --git a/tests/e2e/mnist_raycluster_sdk_oauth_test.py b/tests/e2e/mnist_raycluster_sdk_oauth_test.py index cf5905311..f981f6de9 100644 --- a/tests/e2e/mnist_raycluster_sdk_oauth_test.py +++ b/tests/e2e/mnist_raycluster_sdk_oauth_test.py @@ -219,52 +219,6 @@ def assert_jobsubmit_withoutLogin(self, cluster): assert True, "Job submission without authentication was correctly blocked" - def _is_byoidc_cluster(self): - """ - BYOIDC cluster detection by checking OpenShift cluster Authentication resource. - Detection is based solely on cluster state — no environment variable fallback. - """ - try: - auth_resource = self.custom_api.get_cluster_custom_object( - group="config.openshift.io", - version="v1", - plural="authentications", - name="cluster", - ) - - spec = auth_resource.get("spec", {}) - - # Any non-empty spec.oidcProviders.issuerURL means external OIDC is configured. - # This field is only populated on BYOIDC clusters — never on standard OAuth clusters. - if "oidcProviders" in spec and spec["oidcProviders"]: - for provider in spec["oidcProviders"]: - issuer_url = provider.get("issuer", {}).get("issuerURL", "") - if issuer_url: - print(f"Detected BYOIDC cluster with OIDC issuer: {issuer_url}") - return True - - # Check webhookTokenAuthenticators - if ( - "webhookTokenAuthenticators" in spec - and spec["webhookTokenAuthenticators"] - ): - for webhook in spec["webhookTokenAuthenticators"]: - if webhook.get("kubeConfig", {}): - print( - "Detected BYOIDC cluster with webhook token authenticator" - ) - return True - - # Do NOT check status.oidcClients — it is present on standard OpenShift 4.14+ - # clusters too and causes false positives. - - print("No BYOIDC indicators found in cluster Authentication resource") - return False - - except Exception as e: - print(f"Could not check cluster authentication method: {e}") - return False - def assert_jobsubmit_withlogin(self, cluster): ray_dashboard = cluster.cluster_dashboard_uri() @@ -272,7 +226,7 @@ def assert_jobsubmit_withlogin(self, cluster): ray_cluster_auth_enabled = self._is_ray_cluster_auth_enabled(cluster) print(f"Ray cluster authentication enabled: {ray_cluster_auth_enabled}") - is_byoidc_cluster = self._is_byoidc_cluster() + is_byoidc_cluster = is_byoidc_cluster_detected() if is_byoidc_cluster: # On BYOIDC clusters oc whoami --show-token=true is unavailable. diff --git a/tests/upgrade/01_raycluster_sdk_upgrade_test.py b/tests/upgrade/01_raycluster_sdk_upgrade_test.py index 4ad2685f6..e9ea083c0 100644 --- a/tests/upgrade/01_raycluster_sdk_upgrade_test.py +++ b/tests/upgrade/01_raycluster_sdk_upgrade_test.py @@ -124,52 +124,6 @@ def setup_method(self): if not self.cluster: raise RuntimeError("TestRayClusterUp needs to be run before this test") - def _is_byoidc_cluster(self): - """ - BYOIDC cluster detection by checking OpenShift cluster Authentication resource. - Detection is based solely on cluster state — no environment variable fallback. - """ - try: - auth_resource = self.custom_api.get_cluster_custom_object( - group="config.openshift.io", - version="v1", - plural="authentications", - name="cluster", - ) - - spec = auth_resource.get("spec", {}) - - # Any non-empty spec.oidcProviders.issuerURL means external OIDC is configured. - # This field is only populated on BYOIDC clusters — never on standard OAuth clusters. - if "oidcProviders" in spec and spec["oidcProviders"]: - for provider in spec["oidcProviders"]: - issuer_url = provider.get("issuer", {}).get("issuerURL", "") - if issuer_url: - print(f"Detected BYOIDC cluster with OIDC issuer: {issuer_url}") - return True - - # Check webhookTokenAuthenticators - if ( - "webhookTokenAuthenticators" in spec - and spec["webhookTokenAuthenticators"] - ): - for webhook in spec["webhookTokenAuthenticators"]: - if webhook.get("kubeConfig", {}): - print( - "Detected BYOIDC cluster with webhook token authenticator" - ) - return True - - # Do NOT check status.oidcClients — it is present on standard OpenShift 4.14+ - # clusters too and causes false positives. - - print("No BYOIDC indicators found in cluster Authentication resource") - return False - - except Exception as e: - print(f"Could not check cluster authentication method: {e}") - return False - def test_mnist_job_submission(self): self.assert_jobsubmit_withoutLogin(self.cluster) self.assert_jobsubmit_withlogin(self.cluster) @@ -179,7 +133,7 @@ def assert_jobsubmit_withoutLogin(self, cluster): dashboard_url = cluster.cluster_dashboard_uri() # Check if this is a BYOIDC cluster - is_byoidc_cluster = self._is_byoidc_cluster() + is_byoidc_cluster = is_byoidc_cluster_detected() # For BYOIDC clusters, authentication is enforced at the gateway level if is_byoidc_cluster: @@ -299,7 +253,7 @@ def assert_jobsubmit_withoutLogin(self, cluster): def assert_jobsubmit_withlogin(self, cluster): ray_dashboard = cluster.cluster_dashboard_uri() - is_byoidc_cluster = self._is_byoidc_cluster() + is_byoidc_cluster = is_byoidc_cluster_detected() if is_byoidc_cluster: # On BYOIDC clusters oc whoami --show-token=true is unavailable.