test(e2e): add dashboard reachability coverage

evantakahashi · evantakahashi · commit 4e4a05d72149 · 2026-04-20T17:19:25.000-07:00
Adds test/e2e/test-dashboard-reachability.sh validating that the OpenClaw dashboard is reachable from the host on the forwarded port after onboard: port bound (polled), HTTP 200 (polled), HTML body signature (soft marker check). Wires it into nightly-e2e.yaml as a new top-level job with a 30-minute timeout and adds it to notify-on-failure. Closes #2100 Signed-off-by: Evan Takahashi <evan10takahashi@gmail.com>
diff --git a/.github/workflows/nightly-e2e.yaml b/.github/workflows/nightly-e2e.yaml
@@ -356,6 +356,34 @@ jobs:
           path: test-sandbox-operations-*.log
           if-no-files-found: ignore
 
+  # ── Dashboard reachability ───────────────────────────────────
+  # Validates the OpenClaw dashboard is reachable from the host on the
+  # forwarded port after onboard: port bound, HTTP 200, HTML body signature.
+  dashboard-reachability-e2e:
+    if: github.repository == 'NVIDIA/NemoClaw'
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+
+      - name: Run dashboard reachability E2E test
+        env:
+          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NEMOCLAW_NON_INTERACTIVE: "1"
+          NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
+          NEMOCLAW_POLICY_TIER: "open"
+          GITHUB_TOKEN: ${{ github.token }}
+        run: bash test/e2e/test-dashboard-reachability.sh
+
+      - name: Upload test log on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: dashboard-reachability-test-log
+          path: test-dashboard-reachability-*.log
+          if-no-files-found: ignore
+
   # ── Inference routing (credential isolation + error classification) ──
   # TC-INF-05: real API key absent from sandbox env/process/filesystem
   # TC-INF-06: invalid API key → classified credential error (PR-safe)
@@ -610,8 +638,8 @@ jobs:
 
   notify-on-failure:
     runs-on: ubuntu-latest
-    needs: [cloud-e2e, cloud-experimental-e2e, messaging-providers-e2e, token-rotation-e2e, sandbox-survival-e2e, hermes-e2e, skip-permissions-e2e, sandbox-operations-e2e, inference-routing-e2e, network-policy-e2e, snapshot-commands-e2e, shields-config-e2e, rebuild-openclaw-e2e, upgrade-stale-sandbox-e2e, rebuild-hermes-e2e, gpu-e2e]
-    if: ${{ always() && (needs.cloud-e2e.result == 'failure' || needs.cloud-experimental-e2e.result == 'failure' || needs.messaging-providers-e2e.result == 'failure' || needs.token-rotation-e2e.result == 'failure' || needs.sandbox-survival-e2e.result == 'failure' || needs.hermes-e2e.result == 'failure' || needs.skip-permissions-e2e.result == 'failure' || needs.sandbox-operations-e2e.result == 'failure' || needs.inference-routing-e2e.result == 'failure' || needs.network-policy-e2e.result == 'failure' || needs.snapshot-commands-e2e.result == 'failure' || needs.shields-config-e2e.result == 'failure' || needs.rebuild-openclaw-e2e.result == 'failure' || needs.upgrade-stale-sandbox-e2e.result == 'failure' || needs.rebuild-hermes-e2e.result == 'failure' || needs.gpu-e2e.result == 'failure') }}
+    needs: [cloud-e2e, cloud-experimental-e2e, messaging-providers-e2e, token-rotation-e2e, sandbox-survival-e2e, hermes-e2e, skip-permissions-e2e, sandbox-operations-e2e, dashboard-reachability-e2e, inference-routing-e2e, network-policy-e2e, snapshot-commands-e2e, shields-config-e2e, rebuild-openclaw-e2e, upgrade-stale-sandbox-e2e, rebuild-hermes-e2e, gpu-e2e]
+    if: ${{ always() && (needs.cloud-e2e.result == 'failure' || needs.cloud-experimental-e2e.result == 'failure' || needs.messaging-providers-e2e.result == 'failure' || needs.token-rotation-e2e.result == 'failure' || needs.sandbox-survival-e2e.result == 'failure' || needs.hermes-e2e.result == 'failure' || needs.skip-permissions-e2e.result == 'failure' || needs.sandbox-operations-e2e.result == 'failure' || needs.dashboard-reachability-e2e.result == 'failure' || needs.inference-routing-e2e.result == 'failure' || needs.network-policy-e2e.result == 'failure' || needs.snapshot-commands-e2e.result == 'failure' || needs.shields-config-e2e.result == 'failure' || needs.rebuild-openclaw-e2e.result == 'failure' || needs.upgrade-stale-sandbox-e2e.result == 'failure' || needs.rebuild-hermes-e2e.result == 'failure' || needs.gpu-e2e.result == 'failure') }}
     permissions:
       issues: write
     steps:
diff --git a/test/e2e/test-dashboard-reachability.sh b/test/e2e/test-dashboard-reachability.sh
@@ -0,0 +1,343 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# =============================================================================
+# test-dashboard-reachability.sh
+# NemoClaw OpenClaw Dashboard Reachability E2E Test
+#
+# Covers: TC-DASH-01 through TC-DASH-03
+# Verifies the host → pod serving chain for the OpenClaw dashboard (default
+# port 18789): port bound on host, HTTP 200 response, and a body-signature
+# check so an unrelated process binding the port cannot silently pass.
+# =============================================================================
+
+set -euo pipefail
+
+# ── Overall timeout (prevents hung CI jobs) ──────────────────────────────────
+if [ -z "${NEMOCLAW_E2E_NO_TIMEOUT:-}" ]; then
+  export NEMOCLAW_E2E_NO_TIMEOUT=1
+  TIMEOUT_SECONDS="${NEMOCLAW_E2E_TIMEOUT_SECONDS:-1800}"
+  if command -v timeout >/dev/null 2>&1; then
+    exec timeout -s TERM "$TIMEOUT_SECONDS" bash "$0" "$@"
+  elif command -v gtimeout >/dev/null 2>&1; then
+    exec gtimeout -s TERM "$TIMEOUT_SECONDS" bash "$0" "$@"
+  fi
+fi
+
+# ── Config ───────────────────────────────────────────────────────────────────
+SANDBOX="test-dash"
+DASHBOARD_PORT="${NEMOCLAW_DASHBOARD_PORT:-18789}"
+DASHBOARD_URL="http://127.0.0.1:${DASHBOARD_PORT}/"
+POLL_ATTEMPTS=30
+POLL_INTERVAL=1
+LOG_FILE="test-dashboard-reachability-$(date +%Y%m%d-%H%M%S).log"
+
+# macOS uses gtimeout (from coreutils); Linux uses timeout
+if command -v gtimeout &>/dev/null; then
+  TIMEOUT_CMD="gtimeout"
+elif command -v timeout &>/dev/null; then
+  TIMEOUT_CMD="timeout"
+else
+  echo "ERROR: Neither timeout nor gtimeout found. Install coreutils: brew install coreutils"
+  exit 1
+fi
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+# ── Counters ─────────────────────────────────────────────────────────────────
+PASS=0
+FAIL=0
+TOTAL=0
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+log() { echo -e "${CYAN}[$(date +%H:%M:%S)]${NC} $*" | tee -a "$LOG_FILE"; }
+pass() {
+  ((PASS += 1))
+  ((TOTAL += 1))
+  echo -e "${GREEN}  PASS${NC} $1" | tee -a "$LOG_FILE"
+}
+fail() {
+  ((FAIL += 1))
+  ((TOTAL += 1))
+  echo -e "${RED}  FAIL${NC} $1 — $2" | tee -a "$LOG_FILE"
+}
+
+# Onboard the test sandbox in non-interactive mode. Returns 0 if the sandbox
+# appears in nemoclaw list.
+onboard_sandbox() {
+  local name="$1"
+  log "  Onboarding sandbox '$name'..."
+
+  rm -f "$HOME/.nemoclaw/onboard.lock" 2>/dev/null || true
+
+  local onboard_exit=0
+  NEMOCLAW_SANDBOX_NAME="$name" \
+    NEMOCLAW_NON_INTERACTIVE=1 \
+    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
+    NEMOCLAW_RECREATE_SANDBOX=1 \
+    nemoclaw onboard --non-interactive --yes-i-accept-third-party-software \
+    2>&1 | tee -a "$LOG_FILE" || onboard_exit=$?
+
+  if [[ $onboard_exit -ne 0 ]]; then
+    log "  [onboard_sandbox] nemoclaw onboard exited with code $onboard_exit"
+    return 1
+  fi
+
+  if ! nemoclaw list 2>/dev/null | grep -q "$name"; then
+    log "  [onboard_sandbox] Sandbox '$name' not found in nemoclaw list after onboard"
+    return 1
+  fi
+  return 0
+}
+
+# ── Resolve repo root ────────────────────────────────────────────────────────
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
+if [ -f "$SCRIPT_DIR/../../install.sh" ]; then
+  REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+elif [ -f "./install.sh" ]; then
+  REPO_ROOT="$(pwd)"
+else
+  echo "ERROR: Cannot find install.sh — run from the repo root or test/e2e/"
+  exit 1
+fi
+
+# ── Install NemoClaw if not present ──────────────────────────────────────────
+install_nemoclaw() {
+  if command -v nemoclaw &>/dev/null; then
+    log "nemoclaw already installed: $(nemoclaw --version 2>/dev/null || echo 'unknown')"
+    return 0
+  fi
+
+  log "=== Installing NemoClaw via install.sh ==="
+
+  local install_exit=0
+  bash "$REPO_ROOT/install.sh" --non-interactive --yes-i-accept-third-party-software \
+    2>&1 | tee -a "$LOG_FILE" || install_exit=$?
+
+  if [ -f "$HOME/.bashrc" ]; then
+    # shellcheck source=/dev/null
+    source "$HOME/.bashrc" 2>/dev/null || true
+  fi
+  export NVM_DIR="${NVM_DIR:-$HOME/.nvm}"
+  if [ -s "$NVM_DIR/nvm.sh" ]; then
+    # shellcheck source=/dev/null
+    . "$NVM_DIR/nvm.sh"
+  fi
+  if [ -d "$HOME/.local/bin" ] && [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then
+    export PATH="$HOME/.local/bin:$PATH"
+  fi
+
+  if [[ $install_exit -ne 0 ]]; then
+    echo -e "${RED}FATAL: install.sh failed (exit $install_exit)${NC}"
+    exit 1
+  fi
+
+  if ! command -v nemoclaw &>/dev/null; then
+    echo -e "${RED}FATAL: nemoclaw not found on PATH after install${NC}"
+    exit 1
+  fi
+
+  log "nemoclaw installed: $(nemoclaw --version 2>/dev/null || echo 'unknown')"
+
+  local install_sandbox
+  install_sandbox="${NEMOCLAW_SANDBOX_NAME:-my-assistant}"
+  if nemoclaw list 2>/dev/null | grep -q "$install_sandbox"; then
+    log "Destroying install sandbox '$install_sandbox'..."
+    nemoclaw "$install_sandbox" destroy --yes 2>/dev/null || true
+  fi
+}
+
+# ── Pre-flight ───────────────────────────────────────────────────────────────
+preflight() {
+  log "=== Pre-flight checks ==="
+
+  if ! docker info &>/dev/null; then
+    echo -e "${RED}ERROR: Docker is not running.${NC}"
+    exit 1
+  fi
+  log "Docker is running"
+
+  if [[ -z "${NVIDIA_API_KEY:-}" && -z "${OPENAI_API_KEY:-}" && -z "${ANTHROPIC_API_KEY:-}" ]]; then
+    echo -e "${YELLOW}WARNING: No API key detected.${NC}"
+  fi
+
+  install_nemoclaw
+
+  log "nemoclaw: $(nemoclaw --version 2>/dev/null || echo 'unknown')"
+  log "openshell: $(openshell --version 2>&1 | head -1 || echo 'unknown')"
+  log "dashboard port: $DASHBOARD_PORT"
+  log "timeout: $TIMEOUT_CMD"
+
+  if [[ -f "$HOME/.nemoclaw/onboard.lock" ]]; then
+    log "Removing stale onboard lock"
+    rm -f "$HOME/.nemoclaw/onboard.lock"
+  fi
+
+  if nemoclaw list 2>/dev/null | grep -q "$SANDBOX"; then
+    log "Cleaning up leftover sandbox: $SANDBOX"
+    nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true
+  fi
+
+  log "Pre-flight complete"
+  echo ""
+}
+
+# ── Setup: Onboard the test sandbox ─────────────────────────────────────────
+setup_sandbox() {
+  log "=== Setup: Onboarding sandbox '$SANDBOX' ==="
+  log "This may take a few minutes..."
+
+  if ! onboard_sandbox "$SANDBOX"; then
+    echo -e "${RED}FATAL: Onboard failed — sandbox '$SANDBOX' not found.${NC}"
+    exit 1
+  fi
+
+  # Defensively re-establish the port-forward. nemoclaw onboard already
+  # starts it, but an earlier crashed run can leave a stale entry and the
+  # dashboard test is meaningless without a live forward.
+  log "Ensuring port-forward on $DASHBOARD_PORT..."
+  openshell forward start --background "$DASHBOARD_PORT" "$SANDBOX" \
+    >>"$LOG_FILE" 2>&1 || log "  forward start returned non-zero (may already be running)"
+
+  log "Sandbox '$SANDBOX' onboarded successfully"
+  echo ""
+}
+
+# =============================================================================
+# Test cases
+# =============================================================================
+
+# ── TC-DASH-01: Dashboard port bound on host ─────────────────────────────────
+# Confirms the port-forward exists before we try HTTP. Separating this from
+# the HTTP check gives a clearer failure signal: if the port is not bound at
+# all, it's a forward-layer problem, not a gateway-process problem.
+test_dash_01_port_bound() {
+  log "=== TC-DASH-01: Dashboard port bound on host ==="
+
+  if lsof -iTCP:"$DASHBOARD_PORT" -sTCP:LISTEN >/dev/null 2>&1; then
+    pass "TC-DASH-01: Port $DASHBOARD_PORT is bound"
+  else
+    fail "TC-DASH-01: Dashboard port bound" \
+      "Nothing listening on $DASHBOARD_PORT — port-forward not established"
+  fi
+}
+
+# ── TC-DASH-02: Dashboard returns HTTP 200 ──────────────────────────────────
+# Polls the dashboard up to POLL_ATTEMPTS × POLL_INTERVAL seconds. The
+# gateway can take several seconds after onboard to start accepting
+# connections, so a single-shot check would be flaky.
+test_dash_02_http_200() {
+  log "=== TC-DASH-02: Dashboard returns HTTP 200 ==="
+
+  local status=""
+  local i
+  for i in $(seq 1 "$POLL_ATTEMPTS"); do
+    status=$(curl -s -o /dev/null -w '%{http_code}' \
+      --max-time 5 "$DASHBOARD_URL" 2>/dev/null || echo "000")
+    if [[ "$status" == "200" ]]; then
+      pass "TC-DASH-02: HTTP 200 after ${i}s"
+      return
+    fi
+    sleep "$POLL_INTERVAL"
+  done
+
+  fail "TC-DASH-02: Dashboard HTTP 200" \
+    "Last status after ${POLL_ATTEMPTS}s: $status (expected 200)"
+}
+
+# ── TC-DASH-03: Response body signature ─────────────────────────────────────
+# Guards against an unrelated process binding the dashboard port. The real
+# OpenClaw dashboard is an HTML page identifying itself in the body; any
+# other service returning 200 would not match.
+test_dash_03_body_signature() {
+  log "=== TC-DASH-03: Response body signature ==="
+
+  local body
+  body=$(curl -s --max-time 10 "$DASHBOARD_URL" 2>/dev/null || true)
+
+  if [[ -z "$body" ]]; then
+    fail "TC-DASH-03: Body signature" "Empty response body"
+    return
+  fi
+
+  # Primary: looks like HTML.
+  if ! echo "$body" | grep -qiE '<html|<!doctype'; then
+    fail "TC-DASH-03: Body signature" \
+      "Response is not HTML — something else is bound to $DASHBOARD_PORT"
+    return
+  fi
+
+  # Secondary: body or <title> contains an OpenClaw / Control UI marker.
+  if echo "$body" | grep -qiE 'openclaw|control[- ]?ui|nemoclaw'; then
+    pass "TC-DASH-03: Response body identifies as OpenClaw dashboard"
+  else
+    fail "TC-DASH-03: Body signature" \
+      "HTML served but no OpenClaw/Control-UI marker in body"
+  fi
+}
+
+# ── Teardown ─────────────────────────────────────────────────────────────────
+teardown() {
+  # Disable errexit during teardown — cleanup must be best-effort
+  set +e
+  log ""
+  log "=== Teardown ==="
+  openshell forward stop "$DASHBOARD_PORT" 2>/dev/null || true
+  if nemoclaw list 2>/dev/null | grep -q "$SANDBOX"; then
+    log "Destroying sandbox '$SANDBOX'..."
+    nemoclaw "$SANDBOX" destroy --yes 2>/dev/null || true
+  fi
+  openshell gateway destroy -g nemoclaw 2>/dev/null || true
+  rm -f "$HOME/.nemoclaw/onboard.lock" 2>/dev/null || true
+  log "Teardown complete"
+  set -e
+}
+
+# ── Summary ──────────────────────────────────────────────────────────────────
+summary() {
+  echo ""
+  echo "============================================================"
+  echo "  TEST SUMMARY"
+  echo "============================================================"
+  echo -e "  ${GREEN}PASS: $PASS${NC}"
+  echo -e "  ${RED}FAIL: $FAIL${NC}"
+  echo "  TOTAL: $TOTAL"
+  echo "============================================================"
+  echo "  Log: $LOG_FILE"
+  echo "============================================================"
+  echo ""
+
+  if [[ $FAIL -gt 0 ]]; then
+    exit 1
+  fi
+  exit 0
+}
+
+# ── Main ─────────────────────────────────────────────────────────────────────
+main() {
+  echo ""
+  echo "============================================================"
+  echo "  NemoClaw Dashboard Reachability E2E Test"
+  echo "  $(date)"
+  echo "============================================================"
+  echo ""
+
+  preflight
+  setup_sandbox
+
+  test_dash_01_port_bound
+  test_dash_02_http_200
+  test_dash_03_body_signature
+
+  trap - EXIT
+  teardown
+  summary
+}
+
+trap teardown EXIT
+main "$@"