diff --git a/.github/workflows/nightly-e2e.yaml b/.github/workflows/nightly-e2e.yaml index b61a6d3a23..800c0faf01 100644 --- a/.github/workflows/nightly-e2e.yaml +++ b/.github/workflows/nightly-e2e.yaml @@ -15,9 +15,8 @@ # probe → live inference. Validates the multi-agent architecture. # skip-permissions-e2e Validates --dangerously-skip-permissions activates the permissive # policy (not stuck in Pending) and sandbox egress works (not 403). -# gpu-e2e Local Ollama inference on a GPU self-hosted runner. -# Controlled by the GPU_E2E_ENABLED repository variable. -# Set vars.GPU_E2E_ENABLED to "true" in repo settings to enable. +# gpu-e2e Local Ollama inference on an NVKS ephemeral GPU runner. +# gpu-double-onboard-e2e Ollama proxy token consistency after re-onboard (#2553). # notify-on-failure Auto-creates a GitHub issue when any E2E job fails. # # Runs directly on the runner (not inside Docker) because OpenShell bootstraps @@ -545,15 +544,12 @@ jobs: if-no-files-found: ignore # ── GPU E2E (Ollama local inference) ────────────────────────── - # Enable by setting repository variable GPU_E2E_ENABLED=true - # (Settings → Secrets and variables → Actions → Variables) - # - # Runner labels: using 'self-hosted' for now. Refine to - # [self-hosted, linux, x64, gpu] once NVIDIA runner labels are confirmed. + # Runs on an NVKS ephemeral GPU runner (RTX Pro 6000, 36 GB VRAM). + # Each job gets a fresh VM — no state leakage between runs. gpu-e2e: - if: github.repository == 'NVIDIA/NemoClaw' && vars.GPU_E2E_ENABLED == 'true' - runs-on: self-hosted - timeout-minutes: 60 + if: github.repository == 'NVIDIA/NemoClaw' + runs-on: linux-amd64-gpu-rtxpro6000-latest-1 + timeout-minutes: 30 env: NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" @@ -594,6 +590,62 @@ jobs: path: /tmp/nemoclaw-gpu-e2e-test.log if-no-files-found: ignore + # ── GPU Double-Onboard E2E (Ollama token consistency) ──────── + # Reproduces issue #2553: re-onboard with Ollama must not leave the + # proxy running with a different token than what's persisted to disk. + # Runs on its own ephemeral VM — no dependency on gpu-e2e. + gpu-double-onboard-e2e: + if: github.repository == 'NVIDIA/NemoClaw' + runs-on: linux-amd64-gpu-rtxpro6000-latest-1 + timeout-minutes: 30 + env: + NEMOCLAW_NON_INTERACTIVE: "1" + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" + NEMOCLAW_SANDBOX_NAME: "e2e-gpu-double-onboard" + NEMOCLAW_RECREATE_SANDBOX: "1" + NEMOCLAW_PROVIDER: "ollama" + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Verify GPU availability + run: | + echo "=== GPU Info ===" + nvidia-smi + echo "" + echo "=== VRAM ===" + nvidia-smi --query-gpu=name,memory.total --format=csv,noheader + echo "" + echo "=== Docker ===" + docker info --format '{{.ServerVersion}}' + + - name: Run GPU double-onboard E2E test + run: bash test/e2e/test-gpu-double-onboard.sh + + - name: Upload install log on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: gpu-double-onboard-install-log + path: /tmp/nemoclaw-gpu-double-onboard-install.log + if-no-files-found: ignore + + - name: Upload re-onboard log on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: gpu-double-onboard-reonboard-log + path: /tmp/nemoclaw-gpu-double-onboard-reonboard.log + if-no-files-found: ignore + + - name: Upload test log on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: gpu-double-onboard-test-log + path: /tmp/nemoclaw-gpu-double-onboard-test.log + if-no-files-found: ignore + notify-on-failure: runs-on: ubuntu-latest needs: @@ -616,6 +668,7 @@ jobs: rebuild-hermes-e2e, overlayfs-autofix-e2e, gpu-e2e, + gpu-double-onboard-e2e, ] if: ${{ always() && (contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')) }} permissions: diff --git a/test/e2e/test-gpu-double-onboard.sh b/test/e2e/test-gpu-double-onboard.sh new file mode 100755 index 0000000000..4d528262c7 --- /dev/null +++ b/test/e2e/test-gpu-double-onboard.sh @@ -0,0 +1,571 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# GPU Double-Onboard E2E: Ollama proxy token consistency after re-onboard. +# +# Reproduces the exact scenario from issue #2553 — the Ollama proxy token +# divergence bug where re-running onboard left the proxy running with a +# different token than what was persisted to disk, causing silent HTTP 401 +# on all inference. +# +# Flow: +# 1. Prerequisites — Docker, nvidia-smi, env vars +# 2. Install Ollama binary (do NOT start it — onboard handles that) +# 3. First onboard — install.sh --non-interactive with NEMOCLAW_PROVIDER=ollama +# 4. Verify sandbox, proxy, token file, inference through sandbox +# 5. Second onboard (re-onboard) — nemoclaw onboard --non-interactive +# 6. Token consistency verification (the core of this test): +# - Read ~/.nemoclaw/ollama-proxy-token +# - Verify proxy accepts that token (not 401) +# - Verify inference through sandbox succeeds (not 401) +# 7. Destroy and cleanup +# +# Key differences from test-gpu-e2e.sh: +# - Adds a second onboard + token consistency check +# - Uses nemoclaw onboard CLI directly for re-onboard (not install.sh) +# - Distinct sandbox name e2e-gpu-double-onboard +# +# Key differences from test-double-onboard.sh: +# - Uses NEMOCLAW_PROVIDER=ollama (real GPU inference) +# - Tests token consistency explicitly +# - Runs on NVKS ephemeral GPU runner (L40G) +# +# Prerequisites: +# - NVIDIA GPU with drivers (nvidia-smi works) +# - Docker +# - NEMOCLAW_NON_INTERACTIVE=1 +# - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 +# - Internet access (ollama.com for install, registry.ollama.ai for model pull) +# +# Usage: +# NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ +# bash test/e2e/test-gpu-double-onboard.sh + +set -uo pipefail + +export NEMOCLAW_E2E_DEFAULT_TIMEOUT=1800 +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +# shellcheck source=test/e2e/e2e-timeout.sh +source "${SCRIPT_DIR}/e2e-timeout.sh" + +PASS=0 +FAIL=0 +SKIP=0 +TOTAL=0 + +pass() { + ((PASS++)) + ((TOTAL++)) + printf '\033[32m PASS: %s\033[0m\n' "$1" +} +fail() { + ((FAIL++)) + ((TOTAL++)) + printf '\033[31m FAIL: %s\033[0m\n' "$1" +} +# shellcheck disable=SC2329 +skip() { + ((SKIP++)) + ((TOTAL++)) + printf '\033[33m SKIP: %s\033[0m\n' "$1" +} +section() { + echo "" + printf '\033[1;36m=== %s ===\033[0m\n' "$1" +} +info() { printf '\033[1;34m [info]\033[0m %s\n' "$1"; } + +# Parse chat completion response — handles both content and reasoning_content +parse_chat_content() { + python3 -c " +import json, sys +try: + r = json.load(sys.stdin) + c = r['choices'][0]['message'] + content = c.get('content') or c.get('reasoning_content') or c.get('reasoning') or '' + print(content.strip()) +except Exception as e: + print(f'PARSE_ERROR: {e}', file=sys.stderr) + sys.exit(1) +" +} + +# Determine repo root +if [ -d /workspace ] && [ -f /workspace/install.sh ]; then + REPO="/workspace" +elif [ -f "$(cd "$(dirname "$0")/../.." && pwd)/install.sh" ]; then + REPO="$(cd "$(dirname "$0")/../.." && pwd)" +else + echo "ERROR: Cannot find repo root." + exit 1 +fi + +SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-gpu-double-onboard}" +TEST_LOG="/tmp/nemoclaw-gpu-double-onboard-test.log" +INSTALL_LOG="/tmp/nemoclaw-gpu-double-onboard-install.log" +REONBOARD_LOG="/tmp/nemoclaw-gpu-double-onboard-reonboard.log" +PROXY_PORT="${NEMOCLAW_OLLAMA_PROXY_PORT:-11435}" +TOKEN_FILE="$HOME/.nemoclaw/ollama-proxy-token" + +# Enforce Ollama provider — this script only tests local GPU inference. +export NEMOCLAW_PROVIDER="${NEMOCLAW_PROVIDER:-ollama}" +if [ "$NEMOCLAW_PROVIDER" != "ollama" ]; then + echo "ERROR: NEMOCLAW_PROVIDER must be 'ollama' for GPU double-onboard E2E (got: $NEMOCLAW_PROVIDER)" + exit 1 +fi + +exec > >(tee -a "$TEST_LOG") 2>&1 + +# Best-effort cleanup on any exit (prevents dirty state on reused runners) +# shellcheck disable=SC2329 # invoked via trap +cleanup() { + info "Running exit cleanup..." + if command -v nemoclaw >/dev/null 2>&1; then + nemoclaw "$SANDBOX_NAME" destroy --yes 2>/dev/null || true + fi + if command -v openshell >/dev/null 2>&1; then + openshell sandbox delete "$SANDBOX_NAME" 2>/dev/null || true + openshell gateway destroy -g nemoclaw 2>/dev/null || true + fi + pkill -f "ollama serve" 2>/dev/null || true + pkill -f "ollama-auth-proxy" 2>/dev/null || true +} +trap cleanup EXIT + +# ══════════════════════════════════════════════════════════════════ +# Phase 0: Pre-cleanup +# ══════════════════════════════════════════════════════════════════ +section "Phase 0: Pre-cleanup" +info "Destroying any leftover sandbox/gateway from previous runs..." +if command -v nemoclaw >/dev/null 2>&1; then + nemoclaw "$SANDBOX_NAME" destroy --yes 2>/dev/null || true +fi +if command -v openshell >/dev/null 2>&1; then + openshell sandbox delete "$SANDBOX_NAME" 2>/dev/null || true + openshell gateway destroy -g nemoclaw 2>/dev/null || true +fi +pkill -f "ollama serve" 2>/dev/null || true +pkill -f "ollama-auth-proxy" 2>/dev/null || true +sleep 2 +pass "Pre-cleanup complete" + +# ══════════════════════════════════════════════════════════════════ +# Phase 1: Prerequisites +# ══════════════════════════════════════════════════════════════════ +section "Phase 1: Prerequisites" + +if docker info >/dev/null 2>&1; then + pass "Docker is running" +else + fail "Docker is not running — cannot continue" + exit 1 +fi + +if nvidia-smi >/dev/null 2>&1; then + VRAM_MB=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits 2>/dev/null | head -1) + pass "nvidia-smi works (GPU VRAM: ${VRAM_MB:-unknown} MB)" +else + fail "nvidia-smi failed — no NVIDIA GPU available" + exit 1 +fi + +if [ "${NEMOCLAW_NON_INTERACTIVE:-}" != "1" ]; then + fail "NEMOCLAW_NON_INTERACTIVE=1 is required" + exit 1 +fi + +if [ "${NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE:-}" != "1" ]; then + fail "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required for non-interactive install" + exit 1 +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 2: Install Ollama binary +# ══════════════════════════════════════════════════════════════════ +section "Phase 2: Install Ollama binary" + +# Only install the binary — do NOT start Ollama or pull models. +# The nemoclaw onboard flow handles startup and model pull itself. +if command -v ollama >/dev/null 2>&1; then + pass "Ollama already installed: $(ollama --version 2>/dev/null || echo unknown)" +else + info "Installing Ollama..." + if curl -fsSL https://ollama.com/install.sh | sh 2>&1; then + pass "Ollama installed: $(ollama --version 2>/dev/null || echo unknown)" + else + fail "Ollama installation failed" + exit 1 + fi +fi + +# If the Ollama installer started a system service, stop it so onboard +# can start Ollama with OLLAMA_HOST=0.0.0.0:11434 (required for containers). +if curl -sf http://127.0.0.1:11434/api/tags >/dev/null 2>&1; then + info "Ollama service is running — attempting to stop for clean onboard..." + systemctl --user stop ollama 2>/dev/null || true + systemctl stop ollama 2>/dev/null || true + pkill -f "ollama serve" 2>/dev/null || true + sleep 2 + + if curl -sf http://127.0.0.1:11434/api/tags >/dev/null 2>&1; then + info "Could not stop existing Ollama — onboard will use it as-is" + else + pass "Existing Ollama stopped — port 11434 is free for onboard" + fi +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 3: First onboard — install.sh --non-interactive +# ══════════════════════════════════════════════════════════════════ +section "Phase 3: First onboard (install.sh --non-interactive)" + +cd "$REPO" || { + fail "Could not cd to repo root: $REPO" + exit 1 +} + +info "Running install.sh --non-interactive with NEMOCLAW_PROVIDER=ollama..." +info "Onboard will start Ollama, pull the model, and create the sandbox." + +bash install.sh --non-interactive >"$INSTALL_LOG" 2>&1 & +install_pid=$! +tail -f "$INSTALL_LOG" --pid=$install_pid 2>/dev/null & +tail_pid=$! +wait $install_pid +install_exit=$? +kill $tail_pid 2>/dev/null || true +wait $tail_pid 2>/dev/null || true + +# Source shell profile to pick up nvm/PATH changes +if [ -f "$HOME/.bashrc" ]; then + source "$HOME/.bashrc" 2>/dev/null || true +fi +export NVM_DIR="${NVM_DIR:-$HOME/.nvm}" +[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" +if [ -d "$HOME/.local/bin" ] && [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then + export PATH="$HOME/.local/bin:$PATH" +fi + +if [ $install_exit -eq 0 ]; then + pass "install.sh completed (exit 0)" +else + fail "install.sh failed (exit $install_exit)" + info "Last 30 lines of install log:" + tail -30 "$INSTALL_LOG" + exit 1 +fi + +if command -v nemoclaw >/dev/null 2>&1; then + pass "nemoclaw on PATH: $(command -v nemoclaw)" +else + fail "nemoclaw not found on PATH after install" + exit 1 +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 4: Verify first onboard +# ══════════════════════════════════════════════════════════════════ +section "Phase 4: Verify first onboard" + +# 4a: Sandbox exists +if list_output=$(nemoclaw list 2>&1); then + if echo "$list_output" | grep -Fq -- "$SANDBOX_NAME"; then + pass "nemoclaw list contains '${SANDBOX_NAME}'" + else + fail "nemoclaw list does not contain '${SANDBOX_NAME}'" + fi +else + fail "nemoclaw list failed: ${list_output:0:200}" +fi + +# 4b: Status ok +if nemoclaw "$SANDBOX_NAME" status >/dev/null 2>&1; then + pass "nemoclaw ${SANDBOX_NAME} status exits 0" +else + fail "nemoclaw ${SANDBOX_NAME} status failed" +fi + +# 4c: Ollama is running and reachable +if curl -sf http://127.0.0.1:11434/api/tags >/dev/null 2>&1; then + pass "Ollama running on 127.0.0.1:11434" +else + fail "Ollama not running — onboard should have started it" +fi + +# 4d: Auth proxy is running +if curl -sf --connect-timeout 3 "http://127.0.0.1:${PROXY_PORT}/api/tags" >/dev/null 2>&1; then + pass "Auth proxy running on :${PROXY_PORT}" +else + fail "Auth proxy not running on :${PROXY_PORT}" +fi + +# 4e: Token file exists with correct permissions +if [ -f "$TOKEN_FILE" ]; then + pass "Proxy token persisted at $TOKEN_FILE" + PERMS=$(stat -c "%a" "$TOKEN_FILE" 2>/dev/null || stat -f "%Lp" "$TOKEN_FILE" 2>/dev/null) + if [ "$PERMS" = "600" ]; then + pass "Token file permissions: 600" + else + fail "Token file permissions: expected 600, got $PERMS" + fi +else + fail "Proxy token file missing after first onboard" +fi + +# 4f: Record the first-onboard token for later comparison +TOKEN_AFTER_FIRST="" +if [ -f "$TOKEN_FILE" ]; then + TOKEN_AFTER_FIRST=$(cat "$TOKEN_FILE" | tr -d '[:space:]') + info "Token after first onboard: ${TOKEN_AFTER_FIRST:0:8}..." +fi + +# 4g: Verify proxy accepts first-onboard token +if [ -n "$TOKEN_AFTER_FIRST" ]; then + FIRST_AUTH_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $TOKEN_AFTER_FIRST" \ + "http://127.0.0.1:${PROXY_PORT}/v1/models" 2>/dev/null) || FIRST_AUTH_STATUS="000" + if [ "$FIRST_AUTH_STATUS" = "200" ]; then + pass "Proxy accepts first-onboard token (200)" + else + fail "Proxy rejects first-onboard token (status: $FIRST_AUTH_STATUS)" + fi +fi + +# 4h: Determine model for inference tests +CONFIGURED_MODEL="${NEMOCLAW_MODEL:-}" +if [ -z "$CONFIGURED_MODEL" ]; then + CONFIGURED_MODEL=$(curl -sf http://127.0.0.1:11434/api/tags 2>/dev/null \ + | python3 -c "import json,sys; m=json.load(sys.stdin).get('models',[]); print(m[0]['name'] if m else '')" 2>/dev/null || echo "") +fi +if [ -n "$CONFIGURED_MODEL" ]; then + info "Model for inference tests: $CONFIGURED_MODEL" +else + fail "No models found in Ollama" +fi + +# 4i: First-onboard inference through sandbox +info "Testing inference through sandbox after first onboard..." +ssh_config="$(mktemp)" +sandbox_response="" + +if openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config" 2>/dev/null; then + sandbox_response=$(run_with_timeout 120 ssh -F "$ssh_config" \ + -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null \ + -o ConnectTimeout=10 \ + -o LogLevel=ERROR \ + "openshell-${SANDBOX_NAME}" \ + "curl -s --max-time 90 https://inference.local/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{\"model\":\"$CONFIGURED_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":200}'" \ + 2>&1) || true +else + fail "openshell sandbox ssh-config failed" +fi +rm -f "$ssh_config" + +if [ -n "$sandbox_response" ]; then + sandbox_content=$(echo "$sandbox_response" | parse_chat_content 2>/dev/null) || true + if echo "$sandbox_content" | grep -qi "PONG"; then + pass "First-onboard sandbox inference succeeded" + else + fail "First-onboard sandbox inference: expected PONG, got: ${sandbox_content:0:200}" + fi +else + fail "First-onboard sandbox inference: no response" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 5: Second onboard (re-onboard) +# ══════════════════════════════════════════════════════════════════ +section "Phase 5: Second onboard (re-onboard via nemoclaw onboard)" + +info "Running nemoclaw onboard --non-interactive with NEMOCLAW_RECREATE_SANDBOX=1..." +info "This exercises the exact code path from issue #2553:" +info " startOllamaAuthProxy() → killStaleProxy() → token generation → persistProxyToken()" + +export NEMOCLAW_RECREATE_SANDBOX=1 +nemoclaw onboard --non-interactive >"$REONBOARD_LOG" 2>&1 & +reonboard_pid=$! +tail -f "$REONBOARD_LOG" --pid=$reonboard_pid 2>/dev/null & +tail_pid=$! +wait $reonboard_pid +reonboard_exit=$? +kill $tail_pid 2>/dev/null || true +wait $tail_pid 2>/dev/null || true + +if [ $reonboard_exit -eq 0 ]; then + pass "Re-onboard completed (exit 0)" +else + fail "Re-onboard failed (exit $reonboard_exit)" + info "Last 30 lines of re-onboard log:" + tail -30 "$REONBOARD_LOG" + exit 1 +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 6: Token consistency verification (core of this test) +# ══════════════════════════════════════════════════════════════════ +section "Phase 6: Token consistency verification (#2553 regression check)" + +info "This is the exact check that would have caught the token divergence bug." +info "After re-onboard, the token on disk MUST match what the running proxy accepts." + +# 6a: Token file still exists +if [ -f "$TOKEN_FILE" ]; then + pass "Proxy token file exists after re-onboard" +else + fail "Proxy token file missing after re-onboard" + exit 1 +fi + +# 6b: Read the post-re-onboard token +TOKEN_AFTER_SECOND=$(cat "$TOKEN_FILE" | tr -d '[:space:]') +info "Token after re-onboard: ${TOKEN_AFTER_SECOND:0:8}..." + +# 6c: Token file permissions preserved +PERMS=$(stat -c "%a" "$TOKEN_FILE" 2>/dev/null || stat -f "%Lp" "$TOKEN_FILE" 2>/dev/null) +if [ "$PERMS" = "600" ]; then + pass "Token file permissions preserved: 600" +else + fail "Token file permissions: expected 600, got $PERMS" +fi + +# 6d: Auth proxy is running after re-onboard +if curl -sf --connect-timeout 3 "http://127.0.0.1:${PROXY_PORT}/api/tags" >/dev/null 2>&1; then + pass "Auth proxy running on :${PROXY_PORT} after re-onboard" +else + fail "Auth proxy not running after re-onboard" +fi + +# 6e: THE CRITICAL CHECK — proxy accepts the persisted token (not 401) +# This is the exact failure mode from #2553: the proxy was running with +# a NEW token in memory, but the OLD token was persisted to disk. +TOKEN_AUTH_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $TOKEN_AFTER_SECOND" \ + "http://127.0.0.1:${PROXY_PORT}/v1/models" 2>/dev/null) || TOKEN_AUTH_STATUS="000" +if [ "$TOKEN_AUTH_STATUS" = "200" ]; then + pass "Proxy accepts persisted token after re-onboard (200 — not 401)" +else + fail "PROXY TOKEN DIVERGENCE DETECTED (#2553 regression)" + fail "Token on disk does not match running proxy (status: $TOKEN_AUTH_STATUS)" + info "This is the exact bug from #2553 — the proxy has a different token than what's on disk." +fi + +# 6f: Proxy rejects unauthenticated requests (sanity check) +UNAUTH_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST \ + "http://127.0.0.1:${PROXY_PORT}/api/generate" -d '{}' 2>/dev/null) || UNAUTH_STATUS="000" +if [ "$UNAUTH_STATUS" = "401" ]; then + pass "Proxy rejects unauthenticated POST after re-onboard (401)" +else + fail "Proxy should reject unauthenticated POST, got $UNAUTH_STATUS" +fi + +# 6g: Proxy rejects a wrong token (sanity check) +WRONG_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer wrong-token-$(date +%s)" \ + -X POST "http://127.0.0.1:${PROXY_PORT}/api/generate" -d '{}' 2>/dev/null) || WRONG_STATUS="000" +if [ "$WRONG_STATUS" = "401" ]; then + pass "Proxy rejects wrong token after re-onboard (401)" +else + fail "Proxy should reject wrong token, got $WRONG_STATUS" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 7: Inference through sandbox after re-onboard +# ══════════════════════════════════════════════════════════════════ +section "Phase 7: Inference through sandbox after re-onboard" + +info "Verifying end-to-end inference still works after re-onboard..." +info "Path: sandbox → openshell gateway → auth proxy (:${PROXY_PORT}) → Ollama GPU (:11434)" + +ssh_config="$(mktemp)" +sandbox_response="" + +if openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config" 2>/dev/null; then + sandbox_response=$(run_with_timeout 120 ssh -F "$ssh_config" \ + -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null \ + -o ConnectTimeout=10 \ + -o LogLevel=ERROR \ + "openshell-${SANDBOX_NAME}" \ + "curl -s --max-time 90 https://inference.local/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{\"model\":\"$CONFIGURED_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":200}'" \ + 2>&1) || true +else + fail "openshell sandbox ssh-config failed after re-onboard" +fi +rm -f "$ssh_config" + +if [ -n "$sandbox_response" ]; then + sandbox_content=$(echo "$sandbox_response" | parse_chat_content 2>/dev/null) || true + if echo "$sandbox_content" | grep -qi "PONG"; then + pass "Sandbox inference after re-onboard succeeded" + info "Full path proven: sandbox → gateway → auth proxy (:${PROXY_PORT}) → Ollama GPU (:11434)" + else + # Check if the failure is specifically a 401 (token divergence) + if echo "$sandbox_response" | grep -q "401"; then + fail "SANDBOX INFERENCE RETURNED 401 — token divergence (#2553 regression)" + else + fail "Sandbox inference after re-onboard: expected PONG, got: ${sandbox_content:0:200}" + fi + fi +else + fail "Sandbox inference after re-onboard: no response" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 8: Destroy and cleanup +# ══════════════════════════════════════════════════════════════════ +section "Phase 8: Destroy and cleanup" + +info "Destroying sandbox ${SANDBOX_NAME}..." +nemoclaw "$SANDBOX_NAME" destroy --yes 2>&1 | tail -5 || true + +# Verify against the registry file directly (see test-gpu-e2e.sh comment). +registry_file="${HOME}/.nemoclaw/sandboxes.json" +if [ -f "$registry_file" ] && grep -Fq "\"${SANDBOX_NAME}\"" "$registry_file"; then + fail "Sandbox ${SANDBOX_NAME} still in registry after destroy" +else + pass "Sandbox ${SANDBOX_NAME} removed from registry" +fi + +openshell gateway destroy -g nemoclaw 2>/dev/null || true + +info "Stopping Ollama..." +pkill -f "ollama serve" 2>/dev/null || true +pkill -f "ollama-auth-proxy" 2>/dev/null || true +pass "Cleanup complete" + +# ══════════════════════════════════════════════════════════════════ +# Summary +# ══════════════════════════════════════════════════════════════════ +echo "" +echo "========================================" +echo " GPU Double-Onboard E2E Results (Ollama Token Consistency):" +echo " Passed: $PASS" +echo " Failed: $FAIL" +echo " Skipped: $SKIP" +echo " Total: $TOTAL" +echo "========================================" +echo "" +echo " What this tested (issue #2553 regression):" +echo " - GPU detection (nvidia-smi)" +echo " - Ollama binary install" +echo " - First onboard: install.sh → Ollama + auth proxy + sandbox + inference" +echo " - Second onboard (re-onboard): nemoclaw onboard --non-interactive" +echo " - TOKEN CONSISTENCY: persisted token matches running proxy after re-onboard" +echo " - Proxy auth enforcement: accept correct token, reject unauth + wrong token" +echo " - End-to-end inference through sandbox after re-onboard" +echo " - Destroy + cleanup" +echo "" + +if [ "$FAIL" -eq 0 ]; then + printf '\n\033[1;32m GPU DOUBLE-ONBOARD E2E PASSED — Ollama proxy token consistency verified.\033[0m\n' + exit 0 +else + printf '\n\033[1;31m %d test(s) failed.\033[0m\n' "$FAIL" + exit 1 +fi