From 6878c7a7aca34ab2da4cf96d0c708130a02eed77 Mon Sep 17 00:00:00 2001 From: Evan Takahashi Date: Mon, 20 Apr 2026 17:19:25 -0700 Subject: [PATCH 1/5] test(e2e): add dashboard reachability coverage Adds test/e2e/test-dashboard-reachability.sh validating that the OpenClaw dashboard is reachable from the host on the forwarded port after onboard: port bound (polled), HTTP 200 (polled), HTML body signature (soft marker check). Wires it into nightly-e2e.yaml as a new top-level job with a 30-minute timeout and adds it to notify-on-failure. Closes #2100 Signed-off-by: Evan Takahashi --- .github/workflows/nightly-e2e.yaml | 29 ++ test/e2e/test-dashboard-reachability.sh | 343 ++++++++++++++++++++++++ 2 files changed, 372 insertions(+) create mode 100755 test/e2e/test-dashboard-reachability.sh diff --git a/.github/workflows/nightly-e2e.yaml b/.github/workflows/nightly-e2e.yaml index be6206d0b7..1630d674f2 100644 --- a/.github/workflows/nightly-e2e.yaml +++ b/.github/workflows/nightly-e2e.yaml @@ -256,6 +256,34 @@ jobs: path: test-sandbox-operations-*.log if-no-files-found: ignore + # ── Dashboard reachability ─────────────────────────────────── + # Validates the OpenClaw dashboard is reachable from the host on the + # forwarded port after onboard: port bound, HTTP 200, HTML body signature. + dashboard-reachability-e2e: + if: github.repository == 'NVIDIA/NemoClaw' + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Run dashboard reachability E2E test + env: + NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} + NEMOCLAW_NON_INTERACTIVE: "1" + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" + NEMOCLAW_POLICY_TIER: "open" + GITHUB_TOKEN: ${{ github.token }} + run: bash test/e2e/test-dashboard-reachability.sh + + - name: Upload test log on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: dashboard-reachability-test-log + path: test-dashboard-reachability-*.log + if-no-files-found: ignore + # ── Inference routing (credential isolation + error classification) ── # TC-INF-05: real API key absent from sandbox env/process/filesystem # TC-INF-06: invalid API key → classified credential error (PR-safe) @@ -572,6 +600,7 @@ jobs: hermes-e2e, skip-permissions-e2e, sandbox-operations-e2e, + dashboard-reachability-e2e, inference-routing-e2e, network-policy-e2e, deployment-services-e2e, diff --git a/test/e2e/test-dashboard-reachability.sh b/test/e2e/test-dashboard-reachability.sh new file mode 100755 index 0000000000..2e75c176b2 --- /dev/null +++ b/test/e2e/test-dashboard-reachability.sh @@ -0,0 +1,343 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# ============================================================================= +# test-dashboard-reachability.sh +# NemoClaw OpenClaw Dashboard Reachability E2E Test +# +# Covers: TC-DASH-01 through TC-DASH-03 +# Verifies the host → pod serving chain for the OpenClaw dashboard (default +# port 18789): port bound on host, HTTP 200 response, and a body-signature +# check so an unrelated process binding the port cannot silently pass. +# ============================================================================= + +set -euo pipefail + +# ── Overall timeout (prevents hung CI jobs) ────────────────────────────────── +if [ -z "${NEMOCLAW_E2E_NO_TIMEOUT:-}" ]; then + export NEMOCLAW_E2E_NO_TIMEOUT=1 + TIMEOUT_SECONDS="${NEMOCLAW_E2E_TIMEOUT_SECONDS:-1800}" + if command -v timeout >/dev/null 2>&1; then + exec timeout -s TERM "$TIMEOUT_SECONDS" bash "$0" "$@" + elif command -v gtimeout >/dev/null 2>&1; then + exec gtimeout -s TERM "$TIMEOUT_SECONDS" bash "$0" "$@" + fi +fi + +# ── Config ─────────────────────────────────────────────────────────────────── +SANDBOX="test-dash" +DASHBOARD_PORT="${NEMOCLAW_DASHBOARD_PORT:-18789}" +DASHBOARD_URL="http://127.0.0.1:${DASHBOARD_PORT}/" +POLL_ATTEMPTS=30 +POLL_INTERVAL=1 +LOG_FILE="test-dashboard-reachability-$(date +%Y%m%d-%H%M%S).log" + +# macOS uses gtimeout (from coreutils); Linux uses timeout +if command -v gtimeout &>/dev/null; then + TIMEOUT_CMD="gtimeout" +elif command -v timeout &>/dev/null; then + TIMEOUT_CMD="timeout" +else + echo "ERROR: Neither timeout nor gtimeout found. Install coreutils: brew install coreutils" + exit 1 +fi + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +NC='\033[0m' + +# ── Counters ───────────────────────────────────────────────────────────────── +PASS=0 +FAIL=0 +TOTAL=0 + +# ── Helpers ────────────────────────────────────────────────────────────────── +log() { echo -e "${CYAN}[$(date +%H:%M:%S)]${NC} $*" | tee -a "$LOG_FILE"; } +pass() { + ((PASS += 1)) + ((TOTAL += 1)) + echo -e "${GREEN} PASS${NC} $1" | tee -a "$LOG_FILE" +} +fail() { + ((FAIL += 1)) + ((TOTAL += 1)) + echo -e "${RED} FAIL${NC} $1 — $2" | tee -a "$LOG_FILE" +} + +# Onboard the test sandbox in non-interactive mode. Returns 0 if the sandbox +# appears in nemoclaw list. +onboard_sandbox() { + local name="$1" + log " Onboarding sandbox '$name'..." + + rm -f "$HOME/.nemoclaw/onboard.lock" 2>/dev/null || true + + local onboard_exit=0 + NEMOCLAW_SANDBOX_NAME="$name" \ + NEMOCLAW_NON_INTERACTIVE=1 \ + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ + NEMOCLAW_RECREATE_SANDBOX=1 \ + nemoclaw onboard --non-interactive --yes-i-accept-third-party-software \ + 2>&1 | tee -a "$LOG_FILE" || onboard_exit=$? + + if [[ $onboard_exit -ne 0 ]]; then + log " [onboard_sandbox] nemoclaw onboard exited with code $onboard_exit" + return 1 + fi + + if ! nemoclaw list 2>/dev/null | grep -q "$name"; then + log " [onboard_sandbox] Sandbox '$name' not found in nemoclaw list after onboard" + return 1 + fi + return 0 +} + +# ── Resolve repo root ──────────────────────────────────────────────────────── +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +if [ -f "$SCRIPT_DIR/../../install.sh" ]; then + REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +elif [ -f "./install.sh" ]; then + REPO_ROOT="$(pwd)" +else + echo "ERROR: Cannot find install.sh — run from the repo root or test/e2e/" + exit 1 +fi + +# ── Install NemoClaw if not present ────────────────────────────────────────── +install_nemoclaw() { + if command -v nemoclaw &>/dev/null; then + log "nemoclaw already installed: $(nemoclaw --version 2>/dev/null || echo 'unknown')" + return 0 + fi + + log "=== Installing NemoClaw via install.sh ===" + + local install_exit=0 + bash "$REPO_ROOT/install.sh" --non-interactive --yes-i-accept-third-party-software \ + 2>&1 | tee -a "$LOG_FILE" || install_exit=$? + + if [ -f "$HOME/.bashrc" ]; then + # shellcheck source=/dev/null + source "$HOME/.bashrc" 2>/dev/null || true + fi + export NVM_DIR="${NVM_DIR:-$HOME/.nvm}" + if [ -s "$NVM_DIR/nvm.sh" ]; then + # shellcheck source=/dev/null + . "$NVM_DIR/nvm.sh" + fi + if [ -d "$HOME/.local/bin" ] && [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then + export PATH="$HOME/.local/bin:$PATH" + fi + + if [[ $install_exit -ne 0 ]]; then + echo -e "${RED}FATAL: install.sh failed (exit $install_exit)${NC}" + exit 1 + fi + + if ! command -v nemoclaw &>/dev/null; then + echo -e "${RED}FATAL: nemoclaw not found on PATH after install${NC}" + exit 1 + fi + + log "nemoclaw installed: $(nemoclaw --version 2>/dev/null || echo 'unknown')" + + local install_sandbox + install_sandbox="${NEMOCLAW_SANDBOX_NAME:-my-assistant}" + if nemoclaw list 2>/dev/null | grep -q "$install_sandbox"; then + log "Destroying install sandbox '$install_sandbox'..." + nemoclaw "$install_sandbox" destroy --yes 2>/dev/null || true + fi +} + +# ── Pre-flight ─────────────────────────────────────────────────────────────── +preflight() { + log "=== Pre-flight checks ===" + + if ! docker info &>/dev/null; then + echo -e "${RED}ERROR: Docker is not running.${NC}" + exit 1 + fi + log "Docker is running" + + if [[ -z "${NVIDIA_API_KEY:-}" && -z "${OPENAI_API_KEY:-}" && -z "${ANTHROPIC_API_KEY:-}" ]]; then + echo -e "${YELLOW}WARNING: No API key detected.${NC}" + fi + + install_nemoclaw + + log "nemoclaw: $(nemoclaw --version 2>/dev/null || echo 'unknown')" + log "openshell: $(openshell --version 2>&1 | head -1 || echo 'unknown')" + log "dashboard port: $DASHBOARD_PORT" + log "timeout: $TIMEOUT_CMD" + + if [[ -f "$HOME/.nemoclaw/onboard.lock" ]]; then + log "Removing stale onboard lock" + rm -f "$HOME/.nemoclaw/onboard.lock" + fi + + if nemoclaw list 2>/dev/null | grep -q "$SANDBOX"; then + log "Cleaning up leftover sandbox: $SANDBOX" + nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true + fi + + log "Pre-flight complete" + echo "" +} + +# ── Setup: Onboard the test sandbox ───────────────────────────────────────── +setup_sandbox() { + log "=== Setup: Onboarding sandbox '$SANDBOX' ===" + log "This may take a few minutes..." + + if ! onboard_sandbox "$SANDBOX"; then + echo -e "${RED}FATAL: Onboard failed — sandbox '$SANDBOX' not found.${NC}" + exit 1 + fi + + # Defensively re-establish the port-forward. nemoclaw onboard already + # starts it, but an earlier crashed run can leave a stale entry and the + # dashboard test is meaningless without a live forward. + log "Ensuring port-forward on $DASHBOARD_PORT..." + openshell forward start --background "$DASHBOARD_PORT" "$SANDBOX" \ + >>"$LOG_FILE" 2>&1 || log " forward start returned non-zero (may already be running)" + + log "Sandbox '$SANDBOX' onboarded successfully" + echo "" +} + +# ============================================================================= +# Test cases +# ============================================================================= + +# ── TC-DASH-01: Dashboard port bound on host ───────────────────────────────── +# Confirms the port-forward exists before we try HTTP. Separating this from +# the HTTP check gives a clearer failure signal: if the port is not bound at +# all, it's a forward-layer problem, not a gateway-process problem. +test_dash_01_port_bound() { + log "=== TC-DASH-01: Dashboard port bound on host ===" + + if lsof -iTCP:"$DASHBOARD_PORT" -sTCP:LISTEN >/dev/null 2>&1; then + pass "TC-DASH-01: Port $DASHBOARD_PORT is bound" + else + fail "TC-DASH-01: Dashboard port bound" \ + "Nothing listening on $DASHBOARD_PORT — port-forward not established" + fi +} + +# ── TC-DASH-02: Dashboard returns HTTP 200 ────────────────────────────────── +# Polls the dashboard up to POLL_ATTEMPTS × POLL_INTERVAL seconds. The +# gateway can take several seconds after onboard to start accepting +# connections, so a single-shot check would be flaky. +test_dash_02_http_200() { + log "=== TC-DASH-02: Dashboard returns HTTP 200 ===" + + local status="" + local i + for i in $(seq 1 "$POLL_ATTEMPTS"); do + status=$(curl -s -o /dev/null -w '%{http_code}' \ + --max-time 5 "$DASHBOARD_URL" 2>/dev/null || echo "000") + if [[ "$status" == "200" ]]; then + pass "TC-DASH-02: HTTP 200 after ${i}s" + return + fi + sleep "$POLL_INTERVAL" + done + + fail "TC-DASH-02: Dashboard HTTP 200" \ + "Last status after ${POLL_ATTEMPTS}s: $status (expected 200)" +} + +# ── TC-DASH-03: Response body signature ───────────────────────────────────── +# Guards against an unrelated process binding the dashboard port. The real +# OpenClaw dashboard is an HTML page identifying itself in the body; any +# other service returning 200 would not match. +test_dash_03_body_signature() { + log "=== TC-DASH-03: Response body signature ===" + + local body + body=$(curl -s --max-time 10 "$DASHBOARD_URL" 2>/dev/null || true) + + if [[ -z "$body" ]]; then + fail "TC-DASH-03: Body signature" "Empty response body" + return + fi + + # Primary: looks like HTML. + if ! echo "$body" | grep -qiE ' contains an OpenClaw / Control UI marker. + if echo "$body" | grep -qiE 'openclaw|control[- ]?ui|nemoclaw'; then + pass "TC-DASH-03: Response body identifies as OpenClaw dashboard" + else + fail "TC-DASH-03: Body signature" \ + "HTML served but no OpenClaw/Control-UI marker in body" + fi +} + +# ── Teardown ───────────────────────────────────────────────────────────────── +teardown() { + # Disable errexit during teardown — cleanup must be best-effort + set +e + log "" + log "=== Teardown ===" + openshell forward stop "$DASHBOARD_PORT" 2>/dev/null || true + if nemoclaw list 2>/dev/null | grep -q "$SANDBOX"; then + log "Destroying sandbox '$SANDBOX'..." + nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true + fi + openshell gateway destroy -g nemoclaw 2>/dev/null || true + rm -f "$HOME/.nemoclaw/onboard.lock" 2>/dev/null || true + log "Teardown complete" + set -e +} + +# ── Summary ────────────────────────────────────────────────────────────────── +summary() { + echo "" + echo "============================================================" + echo " TEST SUMMARY" + echo "============================================================" + echo -e " ${GREEN}PASS: $PASS${NC}" + echo -e " ${RED}FAIL: $FAIL${NC}" + echo " TOTAL: $TOTAL" + echo "============================================================" + echo " Log: $LOG_FILE" + echo "============================================================" + echo "" + + if [[ $FAIL -gt 0 ]]; then + exit 1 + fi + exit 0 +} + +# ── Main ───────────────────────────────────────────────────────────────────── +main() { + echo "" + echo "============================================================" + echo " NemoClaw Dashboard Reachability E2E Test" + echo " $(date)" + echo "============================================================" + echo "" + + preflight + setup_sandbox + + test_dash_01_port_bound + test_dash_02_http_200 + test_dash_03_body_signature + + trap - EXIT + teardown + summary +} + +trap teardown EXIT +main "$@" From 71fe95374497093e6c03847dd80953d7ff41dc11 Mon Sep 17 00:00:00 2001 From: Evan Takahashi Date: Mon, 20 Apr 2026 17:34:08 -0700 Subject: [PATCH 2/5] test(e2e): address review feedback on dashboard reachability Poll port-bound check (TC-DASH-01) since `openshell forward start --background` forks and returns before the port is bound. Soften body-marker check (TC-DASH-03) to WARN+pass on missing marker so SPA shells don't trip the assertion while still hard-failing on non-HTML responses. Remove dead TIMEOUT_CMD detection. Signed-off-by: Evan Takahashi --- test/e2e/test-dashboard-reachability.sh | 44 +++++++++++-------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/test/e2e/test-dashboard-reachability.sh b/test/e2e/test-dashboard-reachability.sh index 2e75c176b2..1b68556060 100755 --- a/test/e2e/test-dashboard-reachability.sh +++ b/test/e2e/test-dashboard-reachability.sh @@ -33,16 +33,6 @@ POLL_ATTEMPTS=30 POLL_INTERVAL=1 LOG_FILE="test-dashboard-reachability-$(date +%Y%m%d-%H%M%S).log" -# macOS uses gtimeout (from coreutils); Linux uses timeout -if command -v gtimeout &>/dev/null; then - TIMEOUT_CMD="gtimeout" -elif command -v timeout &>/dev/null; then - TIMEOUT_CMD="timeout" -else - echo "ERROR: Neither timeout nor gtimeout found. Install coreutils: brew install coreutils" - exit 1 -fi - RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' @@ -171,7 +161,6 @@ preflight() { log "nemoclaw: $(nemoclaw --version 2>/dev/null || echo 'unknown')" log "openshell: $(openshell --version 2>&1 | head -1 || echo 'unknown')" log "dashboard port: $DASHBOARD_PORT" - log "timeout: $TIMEOUT_CMD" if [[ -f "$HOME/.nemoclaw/onboard.lock" ]]; then log "Removing stale onboard lock" @@ -216,15 +205,24 @@ setup_sandbox() { # Confirms the port-forward exists before we try HTTP. Separating this from # the HTTP check gives a clearer failure signal: if the port is not bound at # all, it's a forward-layer problem, not a gateway-process problem. +# +# Polls because `openshell forward start --background` forks and returns +# before the child has actually bound the port (see src/lib/onboard.ts, +# ensureDashboardForward). test_dash_01_port_bound() { log "=== TC-DASH-01: Dashboard port bound on host ===" - if lsof -iTCP:"$DASHBOARD_PORT" -sTCP:LISTEN >/dev/null 2>&1; then - pass "TC-DASH-01: Port $DASHBOARD_PORT is bound" - else - fail "TC-DASH-01: Dashboard port bound" \ - "Nothing listening on $DASHBOARD_PORT — port-forward not established" - fi + local i + for i in $(seq 1 "$POLL_ATTEMPTS"); do + if lsof -iTCP:"$DASHBOARD_PORT" -sTCP:LISTEN >/dev/null 2>&1; then + pass "TC-DASH-01: Port $DASHBOARD_PORT is bound (after ${i}s)" + return + fi + sleep "$POLL_INTERVAL" + done + + fail "TC-DASH-01: Dashboard port bound" \ + "Nothing listening on $DASHBOARD_PORT after ${POLL_ATTEMPTS}s — port-forward not established" } # ── TC-DASH-02: Dashboard returns HTTP 200 ────────────────────────────────── @@ -251,9 +249,9 @@ test_dash_02_http_200() { } # ── TC-DASH-03: Response body signature ───────────────────────────────────── -# Guards against an unrelated process binding the dashboard port. The real -# OpenClaw dashboard is an HTML page identifying itself in the body; any -# other service returning 200 would not match. +# Guards against an unrelated process binding the dashboard port. The +# structural HTML check is the fail gate; the marker check is soft because +# the dashboard may be an SPA whose raw HTML has no visible branding. test_dash_03_body_signature() { log "=== TC-DASH-03: Response body signature ===" @@ -265,19 +263,17 @@ test_dash_03_body_signature() { return fi - # Primary: looks like HTML. if ! echo "$body" | grep -qiE ' contains an OpenClaw / Control UI marker. if echo "$body" | grep -qiE 'openclaw|control[- ]?ui|nemoclaw'; then pass "TC-DASH-03: Response body identifies as OpenClaw dashboard" else - fail "TC-DASH-03: Body signature" \ - "HTML served but no OpenClaw/Control-UI marker in body" + log " ${YELLOW}WARN${NC} No OpenClaw/Control-UI marker in HTML body (may be SPA shell)" + pass "TC-DASH-03: HTML served on $DASHBOARD_PORT" fi } From bbb40e11046ff729284bb5d4e39f4fe2dec757c3 Mon Sep 17 00:00:00 2001 From: Evan Takahashi Date: Mon, 20 Apr 2026 23:52:57 -0700 Subject: [PATCH 3/5] test(e2e): use exact-match grep for sandbox name checks Harden `nemoclaw list` existence checks with `grep -Fqw --` so a sandbox name cannot be satisfied by a substring/prefix match (e.g. `test-dash` matching `test-dashboard-foo`). Addresses CodeRabbit review feedback on PR #2123. Signed-off-by: Evan Takahashi --- test/e2e/test-dashboard-reachability.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/e2e/test-dashboard-reachability.sh b/test/e2e/test-dashboard-reachability.sh index 1b68556060..6b8f4e3d6d 100755 --- a/test/e2e/test-dashboard-reachability.sh +++ b/test/e2e/test-dashboard-reachability.sh @@ -78,7 +78,7 @@ onboard_sandbox() { return 1 fi - if ! nemoclaw list 2>/dev/null | grep -q "$name"; then + if ! nemoclaw list 2>/dev/null | grep -Fqw -- "$name"; then log " [onboard_sandbox] Sandbox '$name' not found in nemoclaw list after onboard" return 1 fi @@ -136,7 +136,7 @@ install_nemoclaw() { local install_sandbox install_sandbox="${NEMOCLAW_SANDBOX_NAME:-my-assistant}" - if nemoclaw list 2>/dev/null | grep -q "$install_sandbox"; then + if nemoclaw list 2>/dev/null | grep -Fqw -- "$install_sandbox"; then log "Destroying install sandbox '$install_sandbox'..." nemoclaw "$install_sandbox" destroy --yes 2>/dev/null || true fi @@ -167,7 +167,7 @@ preflight() { rm -f "$HOME/.nemoclaw/onboard.lock" fi - if nemoclaw list 2>/dev/null | grep -q "$SANDBOX"; then + if nemoclaw list 2>/dev/null | grep -Fqw -- "$SANDBOX"; then log "Cleaning up leftover sandbox: $SANDBOX" nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true fi @@ -284,7 +284,7 @@ teardown() { log "" log "=== Teardown ===" openshell forward stop "$DASHBOARD_PORT" 2>/dev/null || true - if nemoclaw list 2>/dev/null | grep -q "$SANDBOX"; then + if nemoclaw list 2>/dev/null | grep -Fqw -- "$SANDBOX"; then log "Destroying sandbox '$SANDBOX'..." nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true fi From 2ff54ceb0536e1a4e3e48243f5ece0c7af09e2b9 Mon Sep 17 00:00:00 2001 From: Evan Takahashi Date: Tue, 21 Apr 2026 00:00:25 -0700 Subject: [PATCH 4/5] test(e2e): exact-match sandbox names and ephemeral install sandbox Replace `grep -Fqw` existence checks with an awk first-field match helper (`sandbox_exists`) so hyphenated prefixes can no longer false-positive (e.g. `test-dash` matching `test-dash-old`). Also set an ephemeral `NEMOCLAW_SANDBOX_NAME=test-dash-install-$$` before running install.sh so cleanup can never destroy a user's real `my-assistant` sandbox when this script is run locally. Addresses CodeRabbit review feedback on PR #2123. Signed-off-by: Evan Takahashi --- test/e2e/test-dashboard-reachability.sh | 27 ++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/test/e2e/test-dashboard-reachability.sh b/test/e2e/test-dashboard-reachability.sh index 6b8f4e3d6d..60cd6f3264 100755 --- a/test/e2e/test-dashboard-reachability.sh +++ b/test/e2e/test-dashboard-reachability.sh @@ -78,13 +78,22 @@ onboard_sandbox() { return 1 fi - if ! nemoclaw list 2>/dev/null | grep -Fqw -- "$name"; then + if ! sandbox_exists "$name"; then log " [onboard_sandbox] Sandbox '$name' not found in nemoclaw list after onboard" return 1 fi return 0 } +# ── Exact sandbox-name match helper ────────────────────────────────────────── +# `nemoclaw list` prints one indented sandbox name per line (optionally followed +# by " *" for the default). `grep -Fw` word-boundary matches still accept +# hyphenated prefixes (e.g. "test-dash" matches "test-dash-old"), so we match +# the first whitespace-delimited field exactly instead. +sandbox_exists() { + nemoclaw list 2>/dev/null | awk -v n="$1" '$1==n { found=1; exit } END { exit !found }' +} + # ── Resolve repo root ──────────────────────────────────────────────────────── SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" if [ -f "$SCRIPT_DIR/../../install.sh" ]; then @@ -98,6 +107,11 @@ fi # ── Install NemoClaw if not present ────────────────────────────────────────── install_nemoclaw() { + # Use an ephemeral, test-only install-sandbox name so cleanup can never + # destroy a user's real 'my-assistant' sandbox when this script is run + # locally outside CI. + local install_sandbox="test-dash-install-$$" + if command -v nemoclaw &>/dev/null; then log "nemoclaw already installed: $(nemoclaw --version 2>/dev/null || echo 'unknown')" return 0 @@ -106,7 +120,8 @@ install_nemoclaw() { log "=== Installing NemoClaw via install.sh ===" local install_exit=0 - bash "$REPO_ROOT/install.sh" --non-interactive --yes-i-accept-third-party-software \ + NEMOCLAW_SANDBOX_NAME="$install_sandbox" \ + bash "$REPO_ROOT/install.sh" --non-interactive --yes-i-accept-third-party-software \ 2>&1 | tee -a "$LOG_FILE" || install_exit=$? if [ -f "$HOME/.bashrc" ]; then @@ -134,9 +149,7 @@ install_nemoclaw() { log "nemoclaw installed: $(nemoclaw --version 2>/dev/null || echo 'unknown')" - local install_sandbox - install_sandbox="${NEMOCLAW_SANDBOX_NAME:-my-assistant}" - if nemoclaw list 2>/dev/null | grep -Fqw -- "$install_sandbox"; then + if sandbox_exists "$install_sandbox"; then log "Destroying install sandbox '$install_sandbox'..." nemoclaw "$install_sandbox" destroy --yes 2>/dev/null || true fi @@ -167,7 +180,7 @@ preflight() { rm -f "$HOME/.nemoclaw/onboard.lock" fi - if nemoclaw list 2>/dev/null | grep -Fqw -- "$SANDBOX"; then + if sandbox_exists "$SANDBOX"; then log "Cleaning up leftover sandbox: $SANDBOX" nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true fi @@ -284,7 +297,7 @@ teardown() { log "" log "=== Teardown ===" openshell forward stop "$DASHBOARD_PORT" 2>/dev/null || true - if nemoclaw list 2>/dev/null | grep -Fqw -- "$SANDBOX"; then + if sandbox_exists "$SANDBOX"; then log "Destroying sandbox '$SANDBOX'..." nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true fi From cea7e2a60365d55645041d24754c64b3c60745fa Mon Sep 17 00:00:00 2001 From: Evan Takahashi Date: Tue, 21 Apr 2026 00:13:38 -0700 Subject: [PATCH 5/5] test(e2e): ephemeral main sandbox name, fail fast on forward start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make SANDBOX an ephemeral per-run name (NEMOCLAW_E2E_SANDBOX_NAME override, defaulting to `test-dash-$$`) so cleanup can never destroy a user's unrelated `test-dash` sandbox on local runs — same class of fix already applied to the install sandbox. Harden the defensive forward re-establishment: stop any existing forward first, then hard-fail if `openshell forward start` fails. Previously we swallowed the non-zero exit with `|| log ...`, which meant TC-DASH-0{1,2,3} could pass spuriously against a stale listener from another process. Signed-off-by: Evan Takahashi --- test/e2e/test-dashboard-reachability.sh | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/test/e2e/test-dashboard-reachability.sh b/test/e2e/test-dashboard-reachability.sh index 60cd6f3264..001c6bd0f3 100755 --- a/test/e2e/test-dashboard-reachability.sh +++ b/test/e2e/test-dashboard-reachability.sh @@ -26,7 +26,7 @@ if [ -z "${NEMOCLAW_E2E_NO_TIMEOUT:-}" ]; then fi # ── Config ─────────────────────────────────────────────────────────────────── -SANDBOX="test-dash" +SANDBOX="${NEMOCLAW_E2E_SANDBOX_NAME:-test-dash-$$}" DASHBOARD_PORT="${NEMOCLAW_DASHBOARD_PORT:-18789}" DASHBOARD_URL="http://127.0.0.1:${DASHBOARD_PORT}/" POLL_ATTEMPTS=30 @@ -201,10 +201,17 @@ setup_sandbox() { # Defensively re-establish the port-forward. nemoclaw onboard already # starts it, but an earlier crashed run can leave a stale entry and the - # dashboard test is meaningless without a live forward. + # dashboard test is meaningless without a live forward. Stop any existing + # forward first so `forward start` is never a no-op against a stale + # listener — if the fresh start fails we must fail the suite, since + # TC-DASH-0{1,2,3} against a lingering listener would pass spuriously. log "Ensuring port-forward on $DASHBOARD_PORT..." - openshell forward start --background "$DASHBOARD_PORT" "$SANDBOX" \ - >>"$LOG_FILE" 2>&1 || log " forward start returned non-zero (may already be running)" + openshell forward stop "$DASHBOARD_PORT" >/dev/null 2>&1 || true + if ! openshell forward start --background "$DASHBOARD_PORT" "$SANDBOX" \ + >>"$LOG_FILE" 2>&1; then + echo -e "${RED}FATAL: failed to establish dashboard forward on $DASHBOARD_PORT${NC}" + exit 1 + fi log "Sandbox '$SANDBOX' onboarded successfully" echo ""