Skip to content

Commit cf2b4c0

Browse files
authored
RHOAIENG-62238: fix sdk tests (byoidc and non-byoidc) (#1087)
* RHOAIENG-62238: fix sdk tests (byoidc and non-byoidc)
1 parent 1b46de2 commit cf2b4c0

5 files changed

Lines changed: 283 additions & 354 deletions

File tree

images/tests/run-tests.sh

Lines changed: 183 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -149,16 +149,27 @@ echo "Detecting cluster authentication type from cluster configuration..."
149149
# Detect BYOIDC from cluster configuration alone
150150
CLUSTER_IS_BYOIDC=false
151151

152-
# Method 1: Check Authentication resource type (most reliable for OIDC clusters)
153-
AUTH_TYPE=$(oc get authentication cluster -o jsonpath='{.spec.type}' 2>/dev/null)
154-
if [ "$AUTH_TYPE" = "OIDC" ]; then
155-
echo "Detected BYOIDC cluster: Authentication spec.type = OIDC"
152+
# Method 0: Check kubeconfig exec plugin format (no API call — safe before auth setup)
153+
# Handles clusters where the kubeconfig uses oc get-token with OIDC (e.g., PSI BYOIDC with exec plugin)
154+
EXEC_ARGS=$(oc config view --minify -o jsonpath='{.users[0].user.exec.args}' 2>/dev/null) || true
155+
if echo "$EXEC_ARGS" | grep -qi "oc-cli\|realms/openshift"; then
156+
echo "Detected BYOIDC cluster: kubeconfig uses OIDC exec plugin (oc-cli / realms/openshift)"
156157
CLUSTER_IS_BYOIDC=true
157158
fi
158159

160+
# Method 1: Check Authentication resource type (most reliable for OIDC clusters)
161+
# Uses timeout to avoid hanging when kubeconfig requires interactive token refresh
162+
if [ "$CLUSTER_IS_BYOIDC" = "false" ]; then
163+
AUTH_TYPE=$(timeout 10 oc get authentication cluster -o jsonpath='{.spec.type}' 2>/dev/null) || true
164+
if [ "$AUTH_TYPE" = "OIDC" ]; then
165+
echo "Detected BYOIDC cluster: Authentication spec.type = OIDC"
166+
CLUSTER_IS_BYOIDC=true
167+
fi
168+
fi
169+
159170
# Method 2: Check for OIDC providers in Authentication resource (fallback)
160171
if [ "$CLUSTER_IS_BYOIDC" = "false" ]; then
161-
OIDC_ISSUER=$(oc get authentication cluster -o jsonpath='{.spec.oidcProviders[*].issuer.issuerURL}' 2>/dev/null)
172+
OIDC_ISSUER=$(timeout 10 oc get authentication cluster -o jsonpath='{.spec.oidcProviders[*].issuer.issuerURL}' 2>/dev/null) || true
162173
if [ -n "$OIDC_ISSUER" ]; then
163174
echo "Detected BYOIDC cluster: Authentication has oidcProviders with issuerURL: $OIDC_ISSUER"
164175
CLUSTER_IS_BYOIDC=true
@@ -167,15 +178,15 @@ fi
167178

168179
# Method 3: Check for oidcClients in Authentication status (another fallback)
169180
if [ "$CLUSTER_IS_BYOIDC" = "false" ]; then
170-
if oc get authentication cluster -o jsonpath='{.status.oidcClients}' 2>/dev/null | grep -q "oc-cli"; then
181+
if timeout 10 oc get authentication cluster -o jsonpath='{.status.oidcClients}' 2>/dev/null | grep -q "oc-cli"; then
171182
echo "Detected BYOIDC cluster: Authentication status has oidcClients with oc-cli"
172183
CLUSTER_IS_BYOIDC=true
173184
fi
174185
fi
175186

176187
# Method 4: Check OAuth resource for openID identity provider (legacy OIDC setup)
177188
if [ "$CLUSTER_IS_BYOIDC" = "false" ]; then
178-
if oc get oauth cluster -o jsonpath='{.spec.identityProviders[*].type}' 2>/dev/null | grep -qi "OpenID"; then
189+
if timeout 10 oc get oauth cluster -o jsonpath='{.spec.identityProviders[*].type}' 2>/dev/null | grep -qi "OpenID"; then
179190
echo "Detected BYOIDC cluster: OAuth has OpenID identity provider"
180191
CLUSTER_IS_BYOIDC=true
181192
fi
@@ -374,7 +385,8 @@ set_kueue_management_state() {
374385
# ============================================================================
375386
echo "Extracting OpenShift API URL from active oc session..."
376387
# Try to get URL from active oc session first (if already logged in)
377-
OCP_API_URL=$(oc whoami --show-server 2>/dev/null)
388+
# Use timeout to avoid hanging when kubeconfig uses an exec plugin (e.g., BYOIDC with oc get-token)
389+
OCP_API_URL=$(timeout 10 oc whoami --show-server 2>/dev/null) || true
378390

379391
if [ -z "$OCP_API_URL" ]; then
380392
echo "No active oc session found, extracting from kubeconfig..."
@@ -403,6 +415,9 @@ fi
403415
# ============================================================================
404416
echo "Setting up authentication for RBAC policies..."
405417

418+
# Save original kubeconfig directory for token cache lookup later
419+
ORIGINAL_KUBE_DIR=$(dirname "${KUBECONFIG:-/codeflare-sdk/tests/.kube/config}")
420+
406421
# Create a temporary kubeconfig (since the mounted one is read-only)
407422
TEMP_KUBECONFIG="/tmp/kubeconfig-$$"
408423
cp "${KUBECONFIG}" "${TEMP_KUBECONFIG}" 2>/dev/null || {
@@ -464,17 +479,158 @@ elif [ "$AUTH_METHOD" = "byoidc" ]; then
464479
NEEDS_CONVERSION=true
465480
fi
466481

482+
# Allow a pre-extracted token to be injected via BYOIDC_ADMIN_TOKEN env var.
483+
# Useful for local runs on BYOIDC clusters where the exec plugin can't run
484+
# inside the container (no token cache, no browser).
485+
# Obtain it on the host: python3 -c "import json,glob,os; [print(json.load(open(f))['id_token']) for f in glob.glob(os.path.expanduser('~/.kube/cache/oc/*')) if 'id_token' in json.load(open(f))]" | head -1
486+
if [ -z "$KUBECONFIG_TOKEN" ] && [ -n "${BYOIDC_ADMIN_TOKEN:-}" ]; then
487+
echo "Using pre-extracted token from BYOIDC_ADMIN_TOKEN"
488+
KUBECONFIG_TOKEN="$BYOIDC_ADMIN_TOKEN"
489+
NEEDS_CONVERSION=true
490+
fi
491+
467492
# Check for exec plugin format (oc-oidc plugin)
468493
if [ -z "$KUBECONFIG_TOKEN" ]; then
469494
HAS_EXEC_PLUGIN=$(oc config view --minify -o jsonpath='{.users[0].user.exec.command}' 2>/dev/null)
470495
if [ -n "$HAS_EXEC_PLUGIN" ]; then
471496
echo "Detected exec-plugin format ($HAS_EXEC_PLUGIN), searching for cached token..."
472497

473-
# Try to extract a token from cached locations
474-
if [ -f ~/.kube/oidc-login.cache ]; then
498+
# Check oc's built-in token cache (~/.kube/cache/oc/<hash> JSON files).
499+
# oc stores tokens at <kube-dir>/cache/oc/<hash> where <kube-dir> is the
500+
# directory containing the kubeconfig. This is populated when ~/.kube/ is
501+
# mounted into the container (not just ~/.kube/config).
502+
OC_CACHE_DIR="${ORIGINAL_KUBE_DIR}/cache/oc"
503+
if [ -z "$KUBECONFIG_TOKEN" ] && [ -d "$OC_CACHE_DIR" ]; then
504+
for cache_file in "$OC_CACHE_DIR"/*; do
505+
if [ -f "$cache_file" ]; then
506+
TOKEN=$(grep -o '"id_token":"[^"]*"' "$cache_file" 2>/dev/null | head -1 | cut -d'"' -f4)
507+
if [ -n "$TOKEN" ]; then
508+
KUBECONFIG_TOKEN="$TOKEN"
509+
echo "Found cached OIDC token in oc cache: $(basename "$cache_file")"
510+
break
511+
fi
512+
fi
513+
done
514+
fi
515+
516+
# Also check legacy oidc-login cache location
517+
if [ -z "$KUBECONFIG_TOKEN" ] && [ -f ~/.kube/oidc-login.cache ]; then
475518
KUBECONFIG_TOKEN=$(cat ~/.kube/oidc-login.cache 2>/dev/null | grep -o '"id_token":"[^"]*"' | cut -d'"' -f4)
476519
fi
477-
NEEDS_CONVERSION=true
520+
521+
# Last resort: try running the exec plugin directly to get a fresh token.
522+
# Works when oc can refresh the token non-interactively (e.g. valid refresh_token
523+
# is in the cache AND the cache directory is mounted into the container).
524+
if [ -z "$KUBECONFIG_TOKEN" ]; then
525+
echo "No cached token found, attempting to run exec plugin directly..."
526+
mapfile -t EXEC_PLUGIN_ARGS < <(oc config view --minify -o jsonpath='{range .users[0].user.exec.args[*]}{@}{"\n"}{end}' 2>/dev/null)
527+
if [ ${#EXEC_PLUGIN_ARGS[@]} -gt 0 ]; then
528+
EXEC_OUTPUT=$(timeout 30 "$HAS_EXEC_PLUGIN" "${EXEC_PLUGIN_ARGS[@]}" 2>/dev/null) || true
529+
if [ -n "$EXEC_OUTPUT" ]; then
530+
KUBECONFIG_TOKEN=$(echo "$EXEC_OUTPUT" | grep -o '"token":"[^"]*"' | head -1 | cut -d'"' -f4)
531+
[ -n "$KUBECONFIG_TOKEN" ] && echo "Obtained token from exec plugin"
532+
fi
533+
fi
534+
fi
535+
536+
if [ -n "$KUBECONFIG_TOKEN" ]; then
537+
NEEDS_CONVERSION=true
538+
else
539+
# No cached token found. Use the same approach as Jenkins loginByoidcUser:
540+
# call Keycloak's token endpoint directly with grant_type=password, then
541+
# inject id-token + refresh-token into the kubeconfig via auth-provider.
542+
# The OIDC issuer URL and client ID are already in the exec plugin args.
543+
if [ -n "${OCP_ADMIN_USER_USERNAME:-}" ] && [ -n "${OCP_ADMIN_USER_PASSWORD:-}" ]; then
544+
echo "Attempting Keycloak password grant (same method as Jenkins loginByoidcUser)..."
545+
546+
# Extract issuer URL and client ID directly from exec plugin args in kubeconfig
547+
OIDC_ISSUER=$(oc config view --minify \
548+
-o jsonpath='{range .users[0].user.exec.args[*]}{@}{"\n"}{end}' 2>/dev/null \
549+
| grep -- '--issuer-url=' | sed 's/--issuer-url=//' | tr -d '[:space:]')
550+
OIDC_CLIENT_ID=$(oc config view --minify \
551+
-o jsonpath='{range .users[0].user.exec.args[*]}{@}{"\n"}{end}' 2>/dev/null \
552+
| grep -- '--client-id=' | sed 's/--client-id=//' | tr -d '[:space:]')
553+
554+
# Allow env overrides (consistent with Jenkins configData fields)
555+
OIDC_ISSUER="${CLUSTER_OIDC_ISSUER:-$OIDC_ISSUER}"
556+
OIDC_CLIENT_ID="${CLIENT_ID_OC_CLI:-${OIDC_CLIENT_ID:-oc-cli}}"
557+
OIDC_TOKEN_ENDPOINT="${CLUSTER_OIDC_TOKEN_ENDPOINT:-${OIDC_ISSUER}/protocol/openid-connect/token}"
558+
559+
if [ -n "$OIDC_ISSUER" ]; then
560+
echo " OIDC issuer: $OIDC_ISSUER"
561+
echo " Admin user: $OCP_ADMIN_USER_USERNAME"
562+
563+
# Use OIDC well-known discovery to find the correct token endpoint
564+
# (mirrors opendatahub-tests get_oidc_token_endpoint())
565+
if [ -z "${CLUSTER_OIDC_TOKEN_ENDPOINT:-}" ]; then
566+
WELL_KNOWN=$(curl -sk --max-time 10 "${OIDC_ISSUER}/.well-known/openid-configuration" 2>/dev/null) || true
567+
if [ -n "$WELL_KNOWN" ]; then
568+
DISCOVERED_ENDPOINT=$(echo "$WELL_KNOWN" | python3 -c \
569+
"import json,sys; print(json.load(sys.stdin).get('token_endpoint',''))" \
570+
2>/dev/null || true)
571+
[ -n "$DISCOVERED_ENDPOINT" ] && OIDC_TOKEN_ENDPOINT="$DISCOVERED_ENDPOINT"
572+
fi
573+
fi
574+
echo " Token endpoint: $OIDC_TOKEN_ENDPOINT"
575+
576+
# mirrors Jenkins loginByoidcUser / opendatahub-tests get_oidc_tokens():
577+
# --data-urlencode safely handles special characters in credentials.
578+
# scope matches Jenkins OIDC_LOGIN_SCOPE (default "openid").
579+
OIDC_SCOPE="${OIDC_LOGIN_SCOPE:-openid}"
580+
TOKENS=$(curl -sk -L -X POST "$OIDC_TOKEN_ENDPOINT" \
581+
-H "Content-Type: application/x-www-form-urlencoded" \
582+
-H "User-Agent: python-requests" \
583+
--data-urlencode "username=${OCP_ADMIN_USER_USERNAME}" \
584+
--data-urlencode "password=${OCP_ADMIN_USER_PASSWORD}" \
585+
-d "grant_type=password" \
586+
-d "client_id=${OIDC_CLIENT_ID}" \
587+
-d "scope=${OIDC_SCOPE}" 2>/dev/null) || true
588+
589+
if [ -n "$TOKENS" ]; then
590+
ID_TOKEN=$(echo "$TOKENS" | python3 -c \
591+
"import json,sys; print(json.load(sys.stdin).get('id_token',''))" \
592+
2>/dev/null || echo "$TOKENS" | grep -o '"id_token":"[^"]*"' | cut -d'"' -f4)
593+
REFRESH_TOKEN_VAL=$(echo "$TOKENS" | python3 -c \
594+
"import json,sys; print(json.load(sys.stdin).get('refresh_token',''))" \
595+
2>/dev/null || echo "$TOKENS" | grep -o '"refresh_token":"[^"]*"' | cut -d'"' -f4)
596+
597+
if [ -n "$ID_TOKEN" ] && [ "$ID_TOKEN" != "None" ] && [ "$ID_TOKEN" != "" ]; then
598+
# Inject tokens into kubeconfig using auth-provider format
599+
# (identical to Jenkins loginByoidcUser kubectl config set-credentials call)
600+
oc config set-credentials "${OCP_ADMIN_USER_USERNAME}" \
601+
--auth-provider=oidc \
602+
--auth-provider-arg=idp-issuer-url="${OIDC_ISSUER}" \
603+
--auth-provider-arg=client-id="${OIDC_CLIENT_ID}" \
604+
--auth-provider-arg=client-secret="" \
605+
--auth-provider-arg=refresh-token="${REFRESH_TOKEN_VAL}" \
606+
--auth-provider-arg=id-token="${ID_TOKEN}" 2>/dev/null
607+
oc config set-context --current --user="${OCP_ADMIN_USER_USERNAME}" 2>/dev/null
608+
cp "${TEMP_KUBECONFIG}" ~/.kube/config 2>/dev/null || true
609+
echo " ✓ Kubeconfig updated with Keycloak id-token + refresh-token"
610+
KUBECONFIG_TOKEN="$ID_TOKEN"
611+
NEEDS_CONVERSION=false
612+
else
613+
ERR=$(echo "$TOKENS" | python3 -c \
614+
"import json,sys; d=json.load(sys.stdin); print(d.get('error','?') + ': ' + d.get('error_description',''))" \
615+
2>/dev/null || echo "$TOKENS" | head -c 200)
616+
echo " WARNING: Keycloak token request failed: $ERR"
617+
echo " Falling back to kubeconfig exec plugin as-is"
618+
fi
619+
else
620+
echo " WARNING: No response from Keycloak token endpoint"
621+
fi
622+
else
623+
echo " WARNING: Could not determine OIDC issuer from exec plugin args"
624+
fi
625+
fi
626+
627+
if [ -z "$KUBECONFIG_TOKEN" ]; then
628+
# Last resort: use kubeconfig with exec plugin as-is.
629+
# When running interactively (-it), oc get-token may prompt for device/browser auth.
630+
echo "Using kubeconfig with exec plugin as-is (no static token conversion)"
631+
echo "oc commands will invoke the exec plugin to authenticate when needed"
632+
fi
633+
fi
478634
fi
479635
fi
480636

@@ -529,15 +685,6 @@ elif [ "$AUTH_METHOD" = "byoidc" ]; then
529685
# Update ~/.kube/config after conversion
530686
cp "${TEMP_KUBECONFIG}" ~/.kube/config 2>/dev/null || true
531687
echo "✓ Converted to token-based authentication"
532-
else
533-
echo "ERROR: Cannot extract token from kubeconfig"
534-
echo "The kubeconfig uses exec-plugin or auth-provider format but no token could be extracted."
535-
echo ""
536-
echo "Solution: Jenkins should mount a kubeconfig with a static token."
537-
echo "You can verify the Jenkins kubeconfig by running:"
538-
echo " kubectl config view --minify -o jsonpath='{.users[0].user}'"
539-
rm -f "${TEMP_KUBECONFIG}"
540-
exit 1
541688
fi
542689
fi
543690

@@ -560,23 +707,30 @@ elif [ "$AUTH_METHOD" = "byoidc" ]; then
560707
echo "Note: oc whoami not available on this cluster (external OIDC mode)"
561708
echo "Using alternative authentication verification..."
562709

563-
# Method 1: Try to get API server version (basic connectivity test)
564-
if oc version 2>/dev/null | grep -q "Server"; then
565-
echo " ✓ API server connectivity verified"
710+
# Method 1: Raw connectivity check — /version is unauthenticated, avoids exec plugin
711+
if [ -n "${OCP_API_URL:-}" ] && \
712+
curl -sk --max-time 10 "${OCP_API_URL}/version" 2>/dev/null | grep -q '"major"'; then
713+
echo " ✓ API server connectivity verified (unauthenticated /version endpoint)"
714+
elif [ -z "$KUBECONFIG_TOKEN" ]; then
715+
# No static token — exec plugin as-is; connectivity may still work through Python client.
716+
# Don't abort: the Python kubernetes client handles exec plugins in-process and may succeed.
717+
echo " WARNING: Could not verify API connectivity via /version (exec plugin as-is mode)"
718+
echo " Tests will attempt to run; Python's kubernetes client invokes the exec plugin natively."
719+
echo " If tests fail with 401, ensure BYOIDC_ADMIN_PASSWORD is correct in your env file,"
720+
echo " or extract the id_token from the oc cache on your host and set BYOIDC_ADMIN_TOKEN:"
721+
echo " python3 -c \"import json,glob,os; [print(json.load(open(f))['id_token']) for f in glob.glob(os.path.expanduser('~/.kube/cache/oc/*')) if 'id_token' in json.load(open(f))]\" | head -1"
566722
else
567-
echo "ERROR: Cannot connect to API server"
723+
echo "ERROR: Cannot connect to API server at ${OCP_API_URL:-[not set]}"
568724
rm -f "${TEMP_KUBECONFIG}"
569725
exit 1
570726
fi
571727

572-
# Method 2: Check if we can perform basic API calls
728+
# Method 2: Check if we can perform basic API calls (requires valid token)
573729
if oc auth can-i get namespaces 2>/dev/null | grep -q "yes"; then
574730
echo " ✓ Authentication verified (can get namespaces)"
575731
else
576-
echo "ERROR: Cannot verify authentication - no namespace access"
577-
echo "oc whoami output: $WHOAMI_OUTPUT"
578-
rm -f "${TEMP_KUBECONFIG}"
579-
exit 1
732+
echo " WARNING: Cannot verify API authentication via oc auth can-i"
733+
echo " This is expected when the exec plugin is used without a cached token."
580734
fi
581735

582736
# Method 3: Try to extract username from token (JWT sub claim)

tests/e2e/mnist.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import torch
1818
import requests
1919
from pytorch_lightning import LightningModule, Trainer
20-
from pytorch_lightning.callbacks.progress import TQDMProgressBar
20+
from pytorch_lightning.callbacks import TQDMProgressBar
2121
from torch import nn
2222
from torch.nn import functional as F
2323
from torch.utils.data import DataLoader, random_split, RandomSampler

0 commit comments

Comments
 (0)