From 6878c7a7aca34ab2da4cf96d0c708130a02eed77 Mon Sep 17 00:00:00 2001
From: Evan Takahashi <evan10takahashi@gmail.com>
Date: Mon, 20 Apr 2026 17:19:25 -0700
Subject: [PATCH 1/5] test(e2e): add dashboard reachability coverage

Adds test/e2e/test-dashboard-reachability.sh validating that the
OpenClaw dashboard is reachable from the host on the forwarded port
after onboard: port bound (polled), HTTP 200 (polled), HTML body
signature (soft marker check). Wires it into nightly-e2e.yaml as a
new top-level job with a 30-minute timeout and adds it to
notify-on-failure.

Closes #2100

Signed-off-by: Evan Takahashi <evan10takahashi@gmail.com>
---
 .github/workflows/nightly-e2e.yaml      |  29 ++
 test/e2e/test-dashboard-reachability.sh | 343 ++++++++++++++++++++++++
 2 files changed, 372 insertions(+)
 create mode 100755 test/e2e/test-dashboard-reachability.sh

diff --git a/.github/workflows/nightly-e2e.yaml b/.github/workflows/nightly-e2e.yaml
index be6206d0b7..1630d674f2 100644
--- a/.github/workflows/nightly-e2e.yaml
+++ b/.github/workflows/nightly-e2e.yaml
@@ -256,6 +256,34 @@ jobs:
           path: test-sandbox-operations-*.log
           if-no-files-found: ignore
 
+  # ── Dashboard reachability ───────────────────────────────────
+  # Validates the OpenClaw dashboard is reachable from the host on the
+  # forwarded port after onboard: port bound, HTTP 200, HTML body signature.
+  dashboard-reachability-e2e:
+    if: github.repository == 'NVIDIA/NemoClaw'
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+
+      - name: Run dashboard reachability E2E test
+        env:
+          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NEMOCLAW_NON_INTERACTIVE: "1"
+          NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
+          NEMOCLAW_POLICY_TIER: "open"
+          GITHUB_TOKEN: ${{ github.token }}
+        run: bash test/e2e/test-dashboard-reachability.sh
+
+      - name: Upload test log on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: dashboard-reachability-test-log
+          path: test-dashboard-reachability-*.log
+          if-no-files-found: ignore
+
   # ── Inference routing (credential isolation + error classification) ──
   # TC-INF-05: real API key absent from sandbox env/process/filesystem
   # TC-INF-06: invalid API key → classified credential error (PR-safe)
@@ -572,6 +600,7 @@ jobs:
         hermes-e2e,
         skip-permissions-e2e,
         sandbox-operations-e2e,
+        dashboard-reachability-e2e,
         inference-routing-e2e,
         network-policy-e2e,
         deployment-services-e2e,
diff --git a/test/e2e/test-dashboard-reachability.sh b/test/e2e/test-dashboard-reachability.sh
new file mode 100755
index 0000000000..2e75c176b2
--- /dev/null
+++ b/test/e2e/test-dashboard-reachability.sh
@@ -0,0 +1,343 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# =============================================================================
+# test-dashboard-reachability.sh
+# NemoClaw OpenClaw Dashboard Reachability E2E Test
+#
+# Covers: TC-DASH-01 through TC-DASH-03
+# Verifies the host → pod serving chain for the OpenClaw dashboard (default
+# port 18789): port bound on host, HTTP 200 response, and a body-signature
+# check so an unrelated process binding the port cannot silently pass.
+# =============================================================================
+
+set -euo pipefail
+
+# ── Overall timeout (prevents hung CI jobs) ──────────────────────────────────
+if [ -z "${NEMOCLAW_E2E_NO_TIMEOUT:-}" ]; then
+  export NEMOCLAW_E2E_NO_TIMEOUT=1
+  TIMEOUT_SECONDS="${NEMOCLAW_E2E_TIMEOUT_SECONDS:-1800}"
+  if command -v timeout >/dev/null 2>&1; then
+    exec timeout -s TERM "$TIMEOUT_SECONDS" bash "$0" "$@"
+  elif command -v gtimeout >/dev/null 2>&1; then
+    exec gtimeout -s TERM "$TIMEOUT_SECONDS" bash "$0" "$@"
+  fi
+fi
+
+# ── Config ───────────────────────────────────────────────────────────────────
+SANDBOX="test-dash"
+DASHBOARD_PORT="${NEMOCLAW_DASHBOARD_PORT:-18789}"
+DASHBOARD_URL="http://127.0.0.1:${DASHBOARD_PORT}/"
+POLL_ATTEMPTS=30
+POLL_INTERVAL=1
+LOG_FILE="test-dashboard-reachability-$(date +%Y%m%d-%H%M%S).log"
+
+# macOS uses gtimeout (from coreutils); Linux uses timeout
+if command -v gtimeout &>/dev/null; then
+  TIMEOUT_CMD="gtimeout"
+elif command -v timeout &>/dev/null; then
+  TIMEOUT_CMD="timeout"
+else
+  echo "ERROR: Neither timeout nor gtimeout found. Install coreutils: brew install coreutils"
+  exit 1
+fi
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+# ── Counters ─────────────────────────────────────────────────────────────────
+PASS=0
+FAIL=0
+TOTAL=0
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+log() { echo -e "${CYAN}[$(date +%H:%M:%S)]${NC} $*" | tee -a "$LOG_FILE"; }
+pass() {
+  ((PASS += 1))
+  ((TOTAL += 1))
+  echo -e "${GREEN}  PASS${NC} $1" | tee -a "$LOG_FILE"
+}
+fail() {
+  ((FAIL += 1))
+  ((TOTAL += 1))
+  echo -e "${RED}  FAIL${NC} $1 — $2" | tee -a "$LOG_FILE"
+}
+
+# Onboard the test sandbox in non-interactive mode. Returns 0 if the sandbox
+# appears in nemoclaw list.
+onboard_sandbox() {
+  local name="$1"
+  log "  Onboarding sandbox '$name'..."
+
+  rm -f "$HOME/.nemoclaw/onboard.lock" 2>/dev/null || true
+
+  local onboard_exit=0
+  NEMOCLAW_SANDBOX_NAME="$name" \
+    NEMOCLAW_NON_INTERACTIVE=1 \
+    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
+    NEMOCLAW_RECREATE_SANDBOX=1 \
+    nemoclaw onboard --non-interactive --yes-i-accept-third-party-software \
+    2>&1 | tee -a "$LOG_FILE" || onboard_exit=$?
+
+  if [[ $onboard_exit -ne 0 ]]; then
+    log "  [onboard_sandbox] nemoclaw onboard exited with code $onboard_exit"
+    return 1
+  fi
+
+  if ! nemoclaw list 2>/dev/null | grep -q "$name"; then
+    log "  [onboard_sandbox] Sandbox '$name' not found in nemoclaw list after onboard"
+    return 1
+  fi
+  return 0
+}
+
+# ── Resolve repo root ────────────────────────────────────────────────────────
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
+if [ -f "$SCRIPT_DIR/../../install.sh" ]; then
+  REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+elif [ -f "./install.sh" ]; then
+  REPO_ROOT="$(pwd)"
+else
+  echo "ERROR: Cannot find install.sh — run from the repo root or test/e2e/"
+  exit 1
+fi
+
+# ── Install NemoClaw if not present ──────────────────────────────────────────
+install_nemoclaw() {
+  if command -v nemoclaw &>/dev/null; then
+    log "nemoclaw already installed: $(nemoclaw --version 2>/dev/null || echo 'unknown')"
+    return 0
+  fi
+
+  log "=== Installing NemoClaw via install.sh ==="
+
+  local install_exit=0
+  bash "$REPO_ROOT/install.sh" --non-interactive --yes-i-accept-third-party-software \
+    2>&1 | tee -a "$LOG_FILE" || install_exit=$?
+
+  if [ -f "$HOME/.bashrc" ]; then
+    # shellcheck source=/dev/null
+    source "$HOME/.bashrc" 2>/dev/null || true
+  fi
+  export NVM_DIR="${NVM_DIR:-$HOME/.nvm}"
+  if [ -s "$NVM_DIR/nvm.sh" ]; then
+    # shellcheck source=/dev/null
+    . "$NVM_DIR/nvm.sh"
+  fi
+  if [ -d "$HOME/.local/bin" ] && [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then
+    export PATH="$HOME/.local/bin:$PATH"
+  fi
+
+  if [[ $install_exit -ne 0 ]]; then
+    echo -e "${RED}FATAL: install.sh failed (exit $install_exit)${NC}"
+    exit 1
+  fi
+
+  if ! command -v nemoclaw &>/dev/null; then
+    echo -e "${RED}FATAL: nemoclaw not found on PATH after install${NC}"
+    exit 1
+  fi
+
+  log "nemoclaw installed: $(nemoclaw --version 2>/dev/null || echo 'unknown')"
+
+  local install_sandbox
+  install_sandbox="${NEMOCLAW_SANDBOX_NAME:-my-assistant}"
+  if nemoclaw list 2>/dev/null | grep -q "$install_sandbox"; then
+    log "Destroying install sandbox '$install_sandbox'..."
+    nemoclaw "$install_sandbox" destroy --yes 2>/dev/null || true
+  fi
+}
+
+# ── Pre-flight ───────────────────────────────────────────────────────────────
+preflight() {
+  log "=== Pre-flight checks ==="
+
+  if ! docker info &>/dev/null; then
+    echo -e "${RED}ERROR: Docker is not running.${NC}"
+    exit 1
+  fi
+  log "Docker is running"
+
+  if [[ -z "${NVIDIA_API_KEY:-}" && -z "${OPENAI_API_KEY:-}" && -z "${ANTHROPIC_API_KEY:-}" ]]; then
+    echo -e "${YELLOW}WARNING: No API key detected.${NC}"
+  fi
+
+  install_nemoclaw
+
+  log "nemoclaw: $(nemoclaw --version 2>/dev/null || echo 'unknown')"
+  log "openshell: $(openshell --version 2>&1 | head -1 || echo 'unknown')"
+  log "dashboard port: $DASHBOARD_PORT"
+  log "timeout: $TIMEOUT_CMD"
+
+  if [[ -f "$HOME/.nemoclaw/onboard.lock" ]]; then
+    log "Removing stale onboard lock"
+    rm -f "$HOME/.nemoclaw/onboard.lock"
+  fi
+
+  if nemoclaw list 2>/dev/null | grep -q "$SANDBOX"; then
+    log "Cleaning up leftover sandbox: $SANDBOX"
+    nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true
+  fi
+
+  log "Pre-flight complete"
+  echo ""
+}
+
+# ── Setup: Onboard the test sandbox ─────────────────────────────────────────
+setup_sandbox() {
+  log "=== Setup: Onboarding sandbox '$SANDBOX' ==="
+  log "This may take a few minutes..."
+
+  if ! onboard_sandbox "$SANDBOX"; then
+    echo -e "${RED}FATAL: Onboard failed — sandbox '$SANDBOX' not found.${NC}"
+    exit 1
+  fi
+
+  # Defensively re-establish the port-forward. nemoclaw onboard already
+  # starts it, but an earlier crashed run can leave a stale entry and the
+  # dashboard test is meaningless without a live forward.
+  log "Ensuring port-forward on $DASHBOARD_PORT..."
+  openshell forward start --background "$DASHBOARD_PORT" "$SANDBOX" \
+    >>"$LOG_FILE" 2>&1 || log "  forward start returned non-zero (may already be running)"
+
+  log "Sandbox '$SANDBOX' onboarded successfully"
+  echo ""
+}
+
+# =============================================================================
+# Test cases
+# =============================================================================
+
+# ── TC-DASH-01: Dashboard port bound on host ─────────────────────────────────
+# Confirms the port-forward exists before we try HTTP. Separating this from
+# the HTTP check gives a clearer failure signal: if the port is not bound at
+# all, it's a forward-layer problem, not a gateway-process problem.
+test_dash_01_port_bound() {
+  log "=== TC-DASH-01: Dashboard port bound on host ==="
+
+  if lsof -iTCP:"$DASHBOARD_PORT" -sTCP:LISTEN >/dev/null 2>&1; then
+    pass "TC-DASH-01: Port $DASHBOARD_PORT is bound"
+  else
+    fail "TC-DASH-01: Dashboard port bound" \
+      "Nothing listening on $DASHBOARD_PORT — port-forward not established"
+  fi
+}
+
+# ── TC-DASH-02: Dashboard returns HTTP 200 ──────────────────────────────────
+# Polls the dashboard up to POLL_ATTEMPTS × POLL_INTERVAL seconds. The
+# gateway can take several seconds after onboard to start accepting
+# connections, so a single-shot check would be flaky.
+test_dash_02_http_200() {
+  log "=== TC-DASH-02: Dashboard returns HTTP 200 ==="
+
+  local status=""
+  local i
+  for i in $(seq 1 "$POLL_ATTEMPTS"); do
+    status=$(curl -s -o /dev/null -w '%{http_code}' \
+      --max-time 5 "$DASHBOARD_URL" 2>/dev/null || echo "000")
+    if [[ "$status" == "200" ]]; then
+      pass "TC-DASH-02: HTTP 200 after ${i}s"
+      return
+    fi
+    sleep "$POLL_INTERVAL"
+  done
+
+  fail "TC-DASH-02: Dashboard HTTP 200" \
+    "Last status after ${POLL_ATTEMPTS}s: $status (expected 200)"
+}
+
+# ── TC-DASH-03: Response body signature ─────────────────────────────────────
+# Guards against an unrelated process binding the dashboard port. The real
+# OpenClaw dashboard is an HTML page identifying itself in the body; any
+# other service returning 200 would not match.
+test_dash_03_body_signature() {
+  log "=== TC-DASH-03: Response body signature ==="
+
+  local body
+  body=$(curl -s --max-time 10 "$DASHBOARD_URL" 2>/dev/null || true)
+
+  if [[ -z "$body" ]]; then
+    fail "TC-DASH-03: Body signature" "Empty response body"
+    return
+  fi
+
+  # Primary: looks like HTML.
+  if ! echo "$body" | grep -qiE '<html|<!doctype'; then
+    fail "TC-DASH-03: Body signature" \
+      "Response is not HTML — something else is bound to $DASHBOARD_PORT"
+    return
+  fi
+
+  # Secondary: body or <title> contains an OpenClaw / Control UI marker.
+  if echo "$body" | grep -qiE 'openclaw|control[- ]?ui|nemoclaw'; then
+    pass "TC-DASH-03: Response body identifies as OpenClaw dashboard"
+  else
+    fail "TC-DASH-03: Body signature" \
+      "HTML served but no OpenClaw/Control-UI marker in body"
+  fi
+}
+
+# ── Teardown ─────────────────────────────────────────────────────────────────
+teardown() {
+  # Disable errexit during teardown — cleanup must be best-effort
+  set +e
+  log ""
+  log "=== Teardown ==="
+  openshell forward stop "$DASHBOARD_PORT" 2>/dev/null || true
+  if nemoclaw list 2>/dev/null | grep -q "$SANDBOX"; then
+    log "Destroying sandbox '$SANDBOX'..."
+    nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true
+  fi
+  openshell gateway destroy -g nemoclaw 2>/dev/null || true
+  rm -f "$HOME/.nemoclaw/onboard.lock" 2>/dev/null || true
+  log "Teardown complete"
+  set -e
+}
+
+# ── Summary ──────────────────────────────────────────────────────────────────
+summary() {
+  echo ""
+  echo "============================================================"
+  echo "  TEST SUMMARY"
+  echo "============================================================"
+  echo -e "  ${GREEN}PASS: $PASS${NC}"
+  echo -e "  ${RED}FAIL: $FAIL${NC}"
+  echo "  TOTAL: $TOTAL"
+  echo "============================================================"
+  echo "  Log: $LOG_FILE"
+  echo "============================================================"
+  echo ""
+
+  if [[ $FAIL -gt 0 ]]; then
+    exit 1
+  fi
+  exit 0
+}
+
+# ── Main ─────────────────────────────────────────────────────────────────────
+main() {
+  echo ""
+  echo "============================================================"
+  echo "  NemoClaw Dashboard Reachability E2E Test"
+  echo "  $(date)"
+  echo "============================================================"
+  echo ""
+
+  preflight
+  setup_sandbox
+
+  test_dash_01_port_bound
+  test_dash_02_http_200
+  test_dash_03_body_signature
+
+  trap - EXIT
+  teardown
+  summary
+}
+
+trap teardown EXIT
+main "$@"

From 71fe95374497093e6c03847dd80953d7ff41dc11 Mon Sep 17 00:00:00 2001
From: Evan Takahashi <evan10takahashi@gmail.com>
Date: Mon, 20 Apr 2026 17:34:08 -0700
Subject: [PATCH 2/5] test(e2e): address review feedback on dashboard
 reachability

Poll port-bound check (TC-DASH-01) since `openshell forward start
--background` forks and returns before the port is bound. Soften
body-marker check (TC-DASH-03) to WARN+pass on missing marker so
SPA shells don't trip the assertion while still hard-failing on
non-HTML responses. Remove dead TIMEOUT_CMD detection.

Signed-off-by: Evan Takahashi <evan10takahashi@gmail.com>
---
 test/e2e/test-dashboard-reachability.sh | 44 +++++++++++--------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/test/e2e/test-dashboard-reachability.sh b/test/e2e/test-dashboard-reachability.sh
index 2e75c176b2..1b68556060 100755
--- a/test/e2e/test-dashboard-reachability.sh
+++ b/test/e2e/test-dashboard-reachability.sh
@@ -33,16 +33,6 @@ POLL_ATTEMPTS=30
 POLL_INTERVAL=1
 LOG_FILE="test-dashboard-reachability-$(date +%Y%m%d-%H%M%S).log"
 
-# macOS uses gtimeout (from coreutils); Linux uses timeout
-if command -v gtimeout &>/dev/null; then
-  TIMEOUT_CMD="gtimeout"
-elif command -v timeout &>/dev/null; then
-  TIMEOUT_CMD="timeout"
-else
-  echo "ERROR: Neither timeout nor gtimeout found. Install coreutils: brew install coreutils"
-  exit 1
-fi
-
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
@@ -171,7 +161,6 @@ preflight() {
   log "nemoclaw: $(nemoclaw --version 2>/dev/null || echo 'unknown')"
   log "openshell: $(openshell --version 2>&1 | head -1 || echo 'unknown')"
   log "dashboard port: $DASHBOARD_PORT"
-  log "timeout: $TIMEOUT_CMD"
 
   if [[ -f "$HOME/.nemoclaw/onboard.lock" ]]; then
     log "Removing stale onboard lock"
@@ -216,15 +205,24 @@ setup_sandbox() {
 # Confirms the port-forward exists before we try HTTP. Separating this from
 # the HTTP check gives a clearer failure signal: if the port is not bound at
 # all, it's a forward-layer problem, not a gateway-process problem.
+#
+# Polls because `openshell forward start --background` forks and returns
+# before the child has actually bound the port (see src/lib/onboard.ts,
+# ensureDashboardForward).
 test_dash_01_port_bound() {
   log "=== TC-DASH-01: Dashboard port bound on host ==="
 
-  if lsof -iTCP:"$DASHBOARD_PORT" -sTCP:LISTEN >/dev/null 2>&1; then
-    pass "TC-DASH-01: Port $DASHBOARD_PORT is bound"
-  else
-    fail "TC-DASH-01: Dashboard port bound" \
-      "Nothing listening on $DASHBOARD_PORT — port-forward not established"
-  fi
+  local i
+  for i in $(seq 1 "$POLL_ATTEMPTS"); do
+    if lsof -iTCP:"$DASHBOARD_PORT" -sTCP:LISTEN >/dev/null 2>&1; then
+      pass "TC-DASH-01: Port $DASHBOARD_PORT is bound (after ${i}s)"
+      return
+    fi
+    sleep "$POLL_INTERVAL"
+  done
+
+  fail "TC-DASH-01: Dashboard port bound" \
+    "Nothing listening on $DASHBOARD_PORT after ${POLL_ATTEMPTS}s — port-forward not established"
 }
 
 # ── TC-DASH-02: Dashboard returns HTTP 200 ──────────────────────────────────
@@ -251,9 +249,9 @@ test_dash_02_http_200() {
 }
 
 # ── TC-DASH-03: Response body signature ─────────────────────────────────────
-# Guards against an unrelated process binding the dashboard port. The real
-# OpenClaw dashboard is an HTML page identifying itself in the body; any
-# other service returning 200 would not match.
+# Guards against an unrelated process binding the dashboard port. The
+# structural HTML check is the fail gate; the marker check is soft because
+# the dashboard may be an SPA whose raw HTML has no visible branding.
 test_dash_03_body_signature() {
   log "=== TC-DASH-03: Response body signature ==="
 
@@ -265,19 +263,17 @@ test_dash_03_body_signature() {
     return
   fi
 
-  # Primary: looks like HTML.
   if ! echo "$body" | grep -qiE '<html|<!doctype'; then
     fail "TC-DASH-03: Body signature" \
       "Response is not HTML — something else is bound to $DASHBOARD_PORT"
     return
   fi
 
-  # Secondary: body or <title> contains an OpenClaw / Control UI marker.
   if echo "$body" | grep -qiE 'openclaw|control[- ]?ui|nemoclaw'; then
     pass "TC-DASH-03: Response body identifies as OpenClaw dashboard"
   else
-    fail "TC-DASH-03: Body signature" \
-      "HTML served but no OpenClaw/Control-UI marker in body"
+    log "  ${YELLOW}WARN${NC} No OpenClaw/Control-UI marker in HTML body (may be SPA shell)"
+    pass "TC-DASH-03: HTML served on $DASHBOARD_PORT"
   fi
 }
 

From bbb40e11046ff729284bb5d4e39f4fe2dec757c3 Mon Sep 17 00:00:00 2001
From: Evan Takahashi <evan10takahashi@gmail.com>
Date: Mon, 20 Apr 2026 23:52:57 -0700
Subject: [PATCH 3/5] test(e2e): use exact-match grep for sandbox name checks

Harden `nemoclaw list` existence checks with `grep -Fqw --` so a
sandbox name cannot be satisfied by a substring/prefix match
(e.g. `test-dash` matching `test-dashboard-foo`). Addresses
CodeRabbit review feedback on PR #2123.

Signed-off-by: Evan Takahashi <evan10takahashi@gmail.com>
---
 test/e2e/test-dashboard-reachability.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/e2e/test-dashboard-reachability.sh b/test/e2e/test-dashboard-reachability.sh
index 1b68556060..6b8f4e3d6d 100755
--- a/test/e2e/test-dashboard-reachability.sh
+++ b/test/e2e/test-dashboard-reachability.sh
@@ -78,7 +78,7 @@ onboard_sandbox() {
     return 1
   fi
 
-  if ! nemoclaw list 2>/dev/null | grep -q "$name"; then
+  if ! nemoclaw list 2>/dev/null | grep -Fqw -- "$name"; then
     log "  [onboard_sandbox] Sandbox '$name' not found in nemoclaw list after onboard"
     return 1
   fi
@@ -136,7 +136,7 @@ install_nemoclaw() {
 
   local install_sandbox
   install_sandbox="${NEMOCLAW_SANDBOX_NAME:-my-assistant}"
-  if nemoclaw list 2>/dev/null | grep -q "$install_sandbox"; then
+  if nemoclaw list 2>/dev/null | grep -Fqw -- "$install_sandbox"; then
     log "Destroying install sandbox '$install_sandbox'..."
     nemoclaw "$install_sandbox" destroy --yes 2>/dev/null || true
   fi
@@ -167,7 +167,7 @@ preflight() {
     rm -f "$HOME/.nemoclaw/onboard.lock"
   fi
 
-  if nemoclaw list 2>/dev/null | grep -q "$SANDBOX"; then
+  if nemoclaw list 2>/dev/null | grep -Fqw -- "$SANDBOX"; then
     log "Cleaning up leftover sandbox: $SANDBOX"
     nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true
   fi
@@ -284,7 +284,7 @@ teardown() {
   log ""
   log "=== Teardown ==="
   openshell forward stop "$DASHBOARD_PORT" 2>/dev/null || true
-  if nemoclaw list 2>/dev/null | grep -q "$SANDBOX"; then
+  if nemoclaw list 2>/dev/null | grep -Fqw -- "$SANDBOX"; then
     log "Destroying sandbox '$SANDBOX'..."
     nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true
   fi

From 2ff54ceb0536e1a4e3e48243f5ece0c7af09e2b9 Mon Sep 17 00:00:00 2001
From: Evan Takahashi <evan10takahashi@gmail.com>
Date: Tue, 21 Apr 2026 00:00:25 -0700
Subject: [PATCH 4/5] test(e2e): exact-match sandbox names and ephemeral
 install sandbox

Replace `grep -Fqw` existence checks with an awk first-field match helper
(`sandbox_exists`) so hyphenated prefixes can no longer false-positive
(e.g. `test-dash` matching `test-dash-old`). Also set an ephemeral
`NEMOCLAW_SANDBOX_NAME=test-dash-install-$$` before running install.sh
so cleanup can never destroy a user's real `my-assistant` sandbox when
this script is run locally. Addresses CodeRabbit review feedback on
PR #2123.

Signed-off-by: Evan Takahashi <evan10takahashi@gmail.com>
---
 test/e2e/test-dashboard-reachability.sh | 27 ++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/test/e2e/test-dashboard-reachability.sh b/test/e2e/test-dashboard-reachability.sh
index 6b8f4e3d6d..60cd6f3264 100755
--- a/test/e2e/test-dashboard-reachability.sh
+++ b/test/e2e/test-dashboard-reachability.sh
@@ -78,13 +78,22 @@ onboard_sandbox() {
     return 1
   fi
 
-  if ! nemoclaw list 2>/dev/null | grep -Fqw -- "$name"; then
+  if ! sandbox_exists "$name"; then
     log "  [onboard_sandbox] Sandbox '$name' not found in nemoclaw list after onboard"
     return 1
   fi
   return 0
 }
 
+# ── Exact sandbox-name match helper ──────────────────────────────────────────
+# `nemoclaw list` prints one indented sandbox name per line (optionally followed
+# by " *" for the default). `grep -Fw` word-boundary matches still accept
+# hyphenated prefixes (e.g. "test-dash" matches "test-dash-old"), so we match
+# the first whitespace-delimited field exactly instead.
+sandbox_exists() {
+  nemoclaw list 2>/dev/null | awk -v n="$1" '$1==n { found=1; exit } END { exit !found }'
+}
+
 # ── Resolve repo root ────────────────────────────────────────────────────────
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
 if [ -f "$SCRIPT_DIR/../../install.sh" ]; then
@@ -98,6 +107,11 @@ fi
 
 # ── Install NemoClaw if not present ──────────────────────────────────────────
 install_nemoclaw() {
+  # Use an ephemeral, test-only install-sandbox name so cleanup can never
+  # destroy a user's real 'my-assistant' sandbox when this script is run
+  # locally outside CI.
+  local install_sandbox="test-dash-install-$$"
+
   if command -v nemoclaw &>/dev/null; then
     log "nemoclaw already installed: $(nemoclaw --version 2>/dev/null || echo 'unknown')"
     return 0
@@ -106,7 +120,8 @@ install_nemoclaw() {
   log "=== Installing NemoClaw via install.sh ==="
 
   local install_exit=0
-  bash "$REPO_ROOT/install.sh" --non-interactive --yes-i-accept-third-party-software \
+  NEMOCLAW_SANDBOX_NAME="$install_sandbox" \
+    bash "$REPO_ROOT/install.sh" --non-interactive --yes-i-accept-third-party-software \
     2>&1 | tee -a "$LOG_FILE" || install_exit=$?
 
   if [ -f "$HOME/.bashrc" ]; then
@@ -134,9 +149,7 @@ install_nemoclaw() {
 
   log "nemoclaw installed: $(nemoclaw --version 2>/dev/null || echo 'unknown')"
 
-  local install_sandbox
-  install_sandbox="${NEMOCLAW_SANDBOX_NAME:-my-assistant}"
-  if nemoclaw list 2>/dev/null | grep -Fqw -- "$install_sandbox"; then
+  if sandbox_exists "$install_sandbox"; then
     log "Destroying install sandbox '$install_sandbox'..."
     nemoclaw "$install_sandbox" destroy --yes 2>/dev/null || true
   fi
@@ -167,7 +180,7 @@ preflight() {
     rm -f "$HOME/.nemoclaw/onboard.lock"
   fi
 
-  if nemoclaw list 2>/dev/null | grep -Fqw -- "$SANDBOX"; then
+  if sandbox_exists "$SANDBOX"; then
     log "Cleaning up leftover sandbox: $SANDBOX"
     nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true
   fi
@@ -284,7 +297,7 @@ teardown() {
   log ""
   log "=== Teardown ==="
   openshell forward stop "$DASHBOARD_PORT" 2>/dev/null || true
-  if nemoclaw list 2>/dev/null | grep -Fqw -- "$SANDBOX"; then
+  if sandbox_exists "$SANDBOX"; then
     log "Destroying sandbox '$SANDBOX'..."
     nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true
   fi

From cea7e2a60365d55645041d24754c64b3c60745fa Mon Sep 17 00:00:00 2001
From: Evan Takahashi <evan10takahashi@gmail.com>
Date: Tue, 21 Apr 2026 00:13:38 -0700
Subject: [PATCH 5/5] test(e2e): ephemeral main sandbox name, fail fast on
 forward start
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make SANDBOX an ephemeral per-run name (NEMOCLAW_E2E_SANDBOX_NAME
override, defaulting to `test-dash-$$`) so cleanup can never destroy
a user's unrelated `test-dash` sandbox on local runs — same class of
fix already applied to the install sandbox.

Harden the defensive forward re-establishment: stop any existing
forward first, then hard-fail if `openshell forward start` fails.
Previously we swallowed the non-zero exit with `|| log ...`, which
meant TC-DASH-0{1,2,3} could pass spuriously against a stale
listener from another process.

Signed-off-by: Evan Takahashi <evan10takahashi@gmail.com>
---
 test/e2e/test-dashboard-reachability.sh | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/test/e2e/test-dashboard-reachability.sh b/test/e2e/test-dashboard-reachability.sh
index 60cd6f3264..001c6bd0f3 100755
--- a/test/e2e/test-dashboard-reachability.sh
+++ b/test/e2e/test-dashboard-reachability.sh
@@ -26,7 +26,7 @@ if [ -z "${NEMOCLAW_E2E_NO_TIMEOUT:-}" ]; then
 fi
 
 # ── Config ───────────────────────────────────────────────────────────────────
-SANDBOX="test-dash"
+SANDBOX="${NEMOCLAW_E2E_SANDBOX_NAME:-test-dash-$$}"
 DASHBOARD_PORT="${NEMOCLAW_DASHBOARD_PORT:-18789}"
 DASHBOARD_URL="http://127.0.0.1:${DASHBOARD_PORT}/"
 POLL_ATTEMPTS=30
@@ -201,10 +201,17 @@ setup_sandbox() {
 
   # Defensively re-establish the port-forward. nemoclaw onboard already
   # starts it, but an earlier crashed run can leave a stale entry and the
-  # dashboard test is meaningless without a live forward.
+  # dashboard test is meaningless without a live forward. Stop any existing
+  # forward first so `forward start` is never a no-op against a stale
+  # listener — if the fresh start fails we must fail the suite, since
+  # TC-DASH-0{1,2,3} against a lingering listener would pass spuriously.
   log "Ensuring port-forward on $DASHBOARD_PORT..."
-  openshell forward start --background "$DASHBOARD_PORT" "$SANDBOX" \
-    >>"$LOG_FILE" 2>&1 || log "  forward start returned non-zero (may already be running)"
+  openshell forward stop "$DASHBOARD_PORT" >/dev/null 2>&1 || true
+  if ! openshell forward start --background "$DASHBOARD_PORT" "$SANDBOX" \
+    >>"$LOG_FILE" 2>&1; then
+    echo -e "${RED}FATAL: failed to establish dashboard forward on $DASHBOARD_PORT${NC}"
+    exit 1
+  fi
 
   log "Sandbox '$SANDBOX' onboarded successfully"
   echo ""