diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 0000000..aa963a5 --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,85 @@ +name: E2E Tests + +on: + workflow_dispatch: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + e2e: + runs-on: ubuntu-latest + timeout-minutes: 15 + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: "22" + cache: "npm" + + - name: Install dependencies + run: npm ci + + - name: Install Playwright browsers + run: npx playwright install chromium --with-deps + + - name: Install OpenClaw CLI + run: npm install -g openclaw + + - name: Configure OpenClaw auth + run: | + mkdir -p ~/.openclaw/agents/main/agent + cat > ~/.openclaw/agents/main/agent/auth-profiles.json << 'AUTHEOF' + { + "profiles": { + "anthropic:default": { + "provider": "anthropic", + "mode": "api_key", + "apiKey": "${{ secrets.ANTHROPIC_API_KEY }}" + } + } + } + AUTHEOF + + - name: Run E2E tests + id: e2e + run: npm run test:e2e 2>&1 | tee e2e-output.txt + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + + - name: Upload screenshots + if: always() + uses: actions/upload-artifact@v4 + with: + name: e2e-screenshots + path: tests/e2e/screenshots/ + if-no-files-found: ignore + + - name: Upload recordings + if: always() + uses: actions/upload-artifact@v4 + with: + name: e2e-recordings + path: tests/e2e/recordings/ + if-no-files-found: ignore + + - name: Upload Playwright report + if: always() + uses: actions/upload-artifact@v4 + with: + name: playwright-report + path: playwright-report/ + if-no-files-found: ignore + + - name: Notify Discord + if: always() + run: bash tests/e2e/notify-discord.sh + env: + DISCORD_WEBHOOK_URL: ${{ secrets.DISCORD_WEBHOOK_URL }} + E2E_STATUS: ${{ steps.e2e.outcome }} + GITHUB_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + COMMIT_SHA: ${{ github.sha }} + COMMIT_REF: ${{ github.ref_name }} diff --git a/docs/plans/active/2026-02-17-feat-e2e-recorded-baseline-test-plan.md b/docs/plans/active/2026-02-17-feat-e2e-recorded-baseline-test-plan.md new file mode 100644 index 0000000..b42af7b --- /dev/null +++ b/docs/plans/active/2026-02-17-feat-e2e-recorded-baseline-test-plan.md @@ -0,0 +1,377 @@ +--- +slug: 2026-02-17-feat-e2e-recorded-baseline-test +status: active +phase: plan +plan_mode: execution +detail_level: more +priority: high +owner: sidmohan +--- + +# Add E2E Recorded Baseline Test for FogClaw Plugin + +This Plan is a living document. The sections `Progress`, `Surprises & Discoveries`, `Decision Log`, and `Outcomes & Retrospective` must be kept up to date as work proceeds. This document must be maintained in accordance with `docs/PLANS.md`. + +## Purpose / Big Picture + +FogClaw has 149+ unit and integration tests but zero end-to-end tests against a real OpenClaw instance. After this change, running `npm run test:e2e` launches an automated test that sends PII-laden prompts through a live OpenClaw gateway, verifies that FogClaw's three scanning layers redact the PII, exercises the access request backlog workflow, captures screenshots at each critical step, and saves a video recording of the entire browser session. The recording becomes the release baseline — every new feature must pass this test before shipping. + +A developer sees this working by running `npm run test:e2e` and observing: (1) all assertions pass, (2) a video file appears at `tests/e2e/recordings/`, and (3) screenshots appear at `tests/e2e/screenshots/`. An agent can run the same test autonomously and produce the same evidence. + +## Progress + +- [x] (2026-02-17) P1 [M1]: Install Playwright and `@playwright/test` as devDependencies +- [x] (2026-02-17) P2 [M1]: Create `tests/e2e/` directory structure with fixtures and config +- [x] (2026-02-17) P3 [M1]: Add `test:e2e` npm script to `package.json` +- [x] (2026-02-17) P4 [M1]: Create PII fixture file at `tests/e2e/fixtures/pii-sample.txt` +- [x] (2026-02-17) P5 [M1]: Create Playwright config at `tests/e2e/playwright.config.ts` (with globalSetup/globalTeardown) +- [x] (2026-02-17) P6 [M2]: Write E2E test for plugin verification (setup test) +- [x] (2026-02-17) P7 [M2]: Write E2E test for `before_agent_start` hook — send PII prompt, assert no raw PII in response +- [x] (2026-02-17) P8 [M2]: Write E2E test for `tool_result_persist` hook — trigger file read containing PII, assert redaction +- [x] (2026-02-17) P9 [M2]: Write E2E test for `message_sending` hook — verify outbound message redaction +- [x] (2026-02-17) P10 [M3]: Write E2E test for access request backlog cycle (request → list → approve → reveal) +- [x] (2026-02-17) P11 [M4]: Add Playwright browser automation for Dashboard visual evidence +- [x] (2026-02-17) P12 [M4]: Add video recording configuration and screenshot capture at key steps +- [x] (2026-02-17) P13 [M4]: Validate full E2E suite runs end-to-end with video output +- [x] (2026-02-17) P14 [BONUS]: Isolated OpenClaw profile per run (globalSetup creates profile, globalTeardown destroys it) +- [x] (2026-02-17) P15 [BUGFIX]: Fix tool schema `schema:` → `parameters:` for OpenClaw compatibility +- [x] (2026-02-17) P16 [BUGFIX]: Fix tool handler `handler:` → `execute:` with correct `(toolCallId, params)` signature + +## Surprises & Discoveries + +- OpenClaw plugin tool registration expects `parameters:` (not `schema:`) for the JSON Schema and `execute(toolCallId, params, signal?, onUpdate?)` (not `handler(params)`) for the handler function. All 6 FogClaw tools had both wrong, causing "Cannot read properties of undefined (reading 'properties')" errors that prevented the LLM from being called at all (input tokens: 0). +- The `--profile` flag for OpenClaw creates fully isolated state at `~/.openclaw-/` with separate sessions, config, and credentials. Combined with `plugins.load.paths` config, this enables hermetic E2E test runs. +- Agent sessions are created implicitly via `--to ` flag, not via a `sessions create` command. + +## Decision Log + +- Decision: Hybrid CLI + browser architecture. CLI (`openclaw agent --json`) handles prompt/response assertions; Playwright handles Dashboard screenshots and video recording. + Rationale: CLI is faster, returns structured JSON, and avoids browser flakiness for the critical assertion path. Browser adds visual evidence. + Date/Author: 2026-02-17 / sidmohan + +- Decision: Isolated OpenClaw profile per E2E run using `--profile e2e-test`. + Rationale: Prevents test pollution between runs. Each run creates a fresh profile with its own gateway on port 19001, copies auth credentials, loads FogClaw from local build via `plugins.load.paths`, and cleans up after. No dependency on the user's running OpenClaw instance. + Date/Author: 2026-02-17 / sidmohan + +- Decision: Use `@playwright/test` (not Vitest + Playwright) for the E2E test suite. + Rationale: `@playwright/test` has built-in video recording, screenshot capture, parallel workers, and retries. Vitest doesn't natively support these features. The E2E tests live in a separate `tests/e2e/` directory with their own Playwright config, keeping them independent from the unit test suite. + Date/Author: 2026-02-17 / sidmohan + +- Decision: Text/role-based Playwright selectors (not data-testid). + Rationale: OpenClaw Dashboard has no data-testid convention. Spike confirmed text labels like button "Send", link "Config" are stable and descriptive. + Date/Author: 2026-02-17 / sidmohan + +## Outcomes & Retrospective + +(Will be populated at completion.) + +## Context and Orientation + +FogClaw is a TypeScript OpenClaw plugin at `/Users/sidmohan/Projects/datafog/fogclaw`. It scans messages for personally identifiable information (PII) and redacts it before the AI agent sees or responds with it. It uses two detection engines: a regex engine (fast, synchronous, detects emails/phones/SSNs/credit cards) and a GLiNER engine (zero-shot NER via ONNX, detects arbitrary entity types). + +The plugin registers three hooks with OpenClaw — these are the "scanning layers" that intercept messages at different points: + +1. **`before_agent_start`** — scans the user's inbound prompt before the agent processes it. Async. Uses both regex and GLiNER engines. Defined in `src/index.ts`. +2. **`tool_result_persist`** — scans tool outputs (file reads, API responses) before they enter the session transcript. Synchronous, regex-only for speed. Defined in `src/tool-result-handler.ts`. +3. **`message_sending`** — scans the agent's outbound reply before it reaches external channels (Telegram, WhatsApp, etc.). Async, uses both engines. Defined in `src/message-sending-handler.ts`. + +The plugin also registers six tools: `fogclaw_scan` (detect PII), `fogclaw_preview` (preview policy without side effects), `fogclaw_redact` (redact PII and return mapping), `fogclaw_request_access` (request access to a redacted value), `fogclaw_requests` (list pending requests), and `fogclaw_resolve` (approve/deny access requests). + +Redaction uses token strategy by default: `[EMAIL_1]`, `[SSN_1]`, `[PHONE_1]`, etc. The `RedactionMapStore` in `src/backlog.ts` stores placeholder-to-original mappings so approved access requests can reveal the original text. + +OpenClaw is the agent orchestration platform that hosts FogClaw. It runs a WebSocket gateway at `ws://127.0.0.1:18789` with a Dashboard UI at `http://127.0.0.1:18789/`. Authentication uses a hash-token in the URL fragment: `http://127.0.0.1:18789/#token=`. The CLI command `openclaw dashboard --no-open` prints the authenticated URL. + +The CLI command `openclaw agent --session-id "" --message "" --json` sends a prompt to the agent and returns a structured JSON response containing the agent's reply text, tool call details, usage stats, and metadata. `openclaw sessions --json` lists active sessions with their UUIDs. + +FogClaw is currently installed at `~/.openclaw/extensions/fogclaw/` at version 0.1.6. Running `openclaw plugins update fogclaw` updates it to the latest npm version (0.3.0). After updating, a gateway restart may be needed for the new tools to register. + +The existing test suite uses Vitest (`npm run test`) with tests in `tests/`. The E2E tests will use `@playwright/test` in a separate `tests/e2e/` directory with their own config and npm script (`npm run test:e2e`). + +Key source files: +- `src/index.ts` — plugin entry point, exports `fogclaw` plugin with `register(api)` function +- `src/scanner.ts` — `Scanner` class (async, regex + GLiNER) +- `src/redactor.ts` — `redact()` function (synchronous) +- `src/backlog.ts` — `RedactionMapStore`, `BacklogStore` +- `src/backlog-tools.ts` — `fogclaw_request_access`, `fogclaw_requests`, `fogclaw_resolve` tool handlers +- `src/tool-result-handler.ts` — `tool_result_persist` hook handler +- `src/message-sending-handler.ts` — `message_sending` hook handler +- `src/engines/regex.ts` — regex patterns for EMAIL, PHONE, SSN, CREDIT_CARD, IP_ADDRESS, DATE, ZIP_CODE +- `package.json` — scripts, dependencies, version 0.3.0 +- `openclaw.plugin.json` — plugin manifest with config schema and UI hints + +Reference documents: +- Spec: `docs/specs/2026-02-17-feat-e2e-recorded-baseline-test-spec.md` +- Spike: `docs/spikes/2026-02-17-feat-e2e-recorded-baseline-test-spike.md` + +## Milestones + +### Milestone 1: Test Infrastructure Setup + +This milestone creates the foundation for E2E testing. At the end, the `tests/e2e/` directory exists with a Playwright config, a PII fixture file, and a working `npm run test:e2e` script that runs (and passes with a trivial placeholder test). No actual E2E logic yet — just the skeleton. + +The PII fixture file contains a realistic paragraph with multiple PII types that FogClaw's regex engine detects: an email address, an SSN, a phone number, and a credit card number. This file serves as the input for the `tool_result_persist` hook test — the agent will be asked to read this file, and FogClaw should redact the PII before it enters the session. + +Verification: Run `npm run test:e2e` from the project root. Expect Playwright to start, execute the placeholder test, and exit with code 0. The `tests/e2e/recordings/` directory should be created (empty, since video is configured but no browser test runs yet). + +### Milestone 2: Core CLI-Driven E2E Tests (Three Scanning Layers) + +This milestone implements the three critical E2E assertions that prove FogClaw's scanning layers work against a real OpenClaw instance. All three tests use `openclaw agent --json` to send prompts and parse responses programmatically. No browser automation yet. + +The test flow for each scanning layer: + +For **`before_agent_start`** (inbound prompt scanning): Send a prompt containing raw PII (e.g., "Contact John Smith at john.smith@example.com, SSN 123-45-6789, phone 555-867-5309"). Parse the JSON response. The agent should receive and respond with redacted tokens (`[EMAIL_1]`, `[SSN_1]`, `[PHONE_1]`) — the original PII values should not appear in the response text. + +For **`tool_result_persist`** (file read scanning): Send a prompt asking the agent to read the PII fixture file at a known path. The agent uses the `read` tool to access the file. FogClaw's `tool_result_persist` hook intercepts the tool result and redacts PII before the agent sees it. Parse the JSON response and verify the agent's reply references redaction tokens, not the original PII. + +For **`message_sending`** (outbound reply scanning): This layer is harder to assert from CLI alone because it intercepts the reply before delivery to external channels (Telegram, WhatsApp). The test verifies this by checking that the agent's reply text in the JSON response does not contain raw PII. If the agent was asked to repeat specific PII and FogClaw is working, the reply should contain redaction tokens instead. + +Before running these tests, the suite ensures FogClaw is updated to v0.3.0 and verifies it is loaded with all 6 tools registered. This is a setup step that runs once in `beforeAll`. + +Verification: Run `npm run test:e2e`. All three scanning-layer tests pass. The test output shows the JSON responses from OpenClaw with redaction tokens present and raw PII absent. + +### Milestone 3: Access Request Backlog E2E + +This milestone tests the full backlog workflow that was added in FogClaw v0.3.0. The test sends a prompt containing PII, then uses the agent to exercise the three backlog tools in sequence: + +1. **Request access**: Ask the agent to use `fogclaw_request_access` for a specific redacted placeholder (e.g., `[EMAIL_1]`). +2. **List pending**: Ask the agent to use `fogclaw_requests` and verify the request appears in the pending list. +3. **Resolve (approve)**: Ask the agent to use `fogclaw_resolve` to approve the request. +4. **Verify original returned**: After approval, verify the agent's response includes the original PII text that was previously redacted. + +This test proves the entire redaction-request-approval lifecycle works end-to-end. It depends on the `RedactionMapStore` capturing placeholder-to-original mappings during the scanning-layer tests, so it must run in sequence after the scanning tests within the same session. + +Verification: Run `npm run test:e2e`. The backlog test passes, showing a successful request → list → approve → reveal cycle. + +### Milestone 4: Browser Visual Evidence and Video Recording + +This milestone adds Playwright browser automation to capture visual evidence of FogClaw working in the OpenClaw Dashboard. The browser tests run after the CLI tests complete, opening the Dashboard to screenshot the results. + +The browser captures: +1. **Dashboard overview** — screenshot showing the OpenClaw Dashboard is accessible and FogClaw is loaded. +2. **Chat view** — screenshot showing the chat session with redaction tokens visible in the conversation (the CLI tests already sent PII prompts, so the chat history should show `[EMAIL_1]`, `[SSN_1]`, etc.). +3. **Full session video** — Playwright records the browser session from the moment it opens the Dashboard through navigation to the chat page. The video is saved to `tests/e2e/recordings/.webm`. + +Screenshots are saved to `tests/e2e/screenshots/` with descriptive filenames (e.g., `dashboard-overview.png`, `chat-redaction-evidence.png`). + +Verification: Run `npm run test:e2e`. After all tests complete, check `tests/e2e/recordings/` for a `.webm` video file and `tests/e2e/screenshots/` for `.png` screenshot files. Open the video — it should show the Dashboard with FogClaw evidence visible. + +## Plan of Work + +The work proceeds in four phases matching the milestones. + +First, install `@playwright/test` as a devDependency. This package provides the Playwright test runner, browser automation APIs, and built-in video/screenshot support. Create the `tests/e2e/` directory with subdirectories for fixtures, recordings, and screenshots. Write a `playwright.config.ts` that configures Chromium, video recording to `tests/e2e/recordings/`, and screenshot capture. Add a `test:e2e` script to `package.json` that runs `npx playwright test --config tests/e2e/playwright.config.ts`. Create the PII fixture file with sample data containing email, SSN, phone, and credit card. + +Second, write the core E2E test file at `tests/e2e/fogclaw-e2e.spec.ts`. This file uses `@playwright/test`'s `test` function but primarily calls `openclaw` CLI commands via Node.js `child_process.execSync` (or Playwright's `test.step` with shell execution). The `beforeAll` hook runs `openclaw plugins update fogclaw` and `openclaw plugins info fogclaw` to ensure v0.3.0 is loaded with 6 tools. Each test sends a prompt via `openclaw agent --session-id --message --json`, parses the JSON response, and asserts on the presence of redaction tokens and absence of raw PII. + +Third, add the backlog workflow test as additional test cases in the same spec file. These tests run sequentially after the scanning-layer tests within the same session, so the `RedactionMapStore` has mappings from earlier prompts. + +Fourth, add browser automation tests in the same or a separate spec file. These use Playwright's `page.goto()` to open the Dashboard URL (obtained from `openclaw dashboard --no-open`), navigate to the chat page, and capture screenshots. The Playwright config's `video: 'on'` setting handles video recording automatically. + +The `.gitignore` at `tests/e2e/recordings/` and `tests/e2e/screenshots/` ensures generated evidence artifacts are not committed to the repo but are available locally after each run. + +## Concrete Steps + +All commands run from the project root `/Users/sidmohan/Projects/datafog/fogclaw` unless otherwise noted. + +**M1: Infrastructure setup** + +Install Playwright: + + npm install --save-dev @playwright/test + +Create directory structure: + + mkdir -p tests/e2e/fixtures tests/e2e/recordings tests/e2e/screenshots + +Add `.gitkeep` files to `recordings/` and `screenshots/` so the directories are tracked but contents are ignored. Add a `.gitignore` in `tests/e2e/` to ignore `recordings/*.webm`, `screenshots/*.png`, and `test-results/`. + +Create the PII fixture at `tests/e2e/fixtures/pii-sample.txt` with content containing at least: an email (`john.smith@example.com`), an SSN (`123-45-6789`), a phone number (`(555) 867-5309`), and a credit card number (`4111-1111-1111-1111`). + +Create the Playwright config at `tests/e2e/playwright.config.ts`. Configure: single Chromium project, `video: 'on'` in `use`, screenshot `'on'`, output dir `tests/e2e/test-results`, timeout of 120 seconds per test (agent responses can be slow). + +Add to `package.json` scripts: + + "test:e2e": "npx playwright test --config tests/e2e/playwright.config.ts" + +Expected output of `npm run test:e2e` after M1 with a placeholder test: + + Running 1 test using 1 worker + ✓ placeholder test + 1 passed + +**M2: Core CLI-driven tests** + +Create `tests/e2e/fogclaw-e2e.spec.ts`. In `beforeAll`: +1. Run `openclaw plugins update fogclaw` via `execSync`. +2. Run `openclaw plugins info fogclaw` and parse output to verify version is `0.3.0` and tools include all 6. +3. Run `openclaw sessions --json` to get the session ID. + +Test "before_agent_start redacts PII in inbound prompt": +1. Run `openclaw agent --session-id --message "Contact John Smith at john.smith@example.com, SSN 123-45-6789" --json`. +2. Parse JSON response. +3. Assert `result.payloads[0].text` contains `[EMAIL_1]` or `[SSN_1]`. +4. Assert `result.payloads[0].text` does NOT contain `john.smith@example.com` or `123-45-6789`. + +Test "tool_result_persist redacts PII in file reads": +1. Copy the PII fixture to a known location in the agent's workspace (or use an absolute path the agent can access). +2. Run `openclaw agent --session-id --message "Read the file at and tell me what it contains" --json`. +3. Parse JSON response. +4. Assert the response text contains redaction tokens and does NOT contain raw PII values. + +Test "message_sending redacts PII in outbound replies": +1. Run `openclaw agent --session-id --message "Please repeat exactly: my email is alice@widgets.io and SSN is 987-65-4321" --json`. +2. Parse JSON response. +3. Assert the response text does NOT contain `alice@widgets.io` or `987-65-4321`. + +Expected output after M2: + + Running 4 tests using 1 worker + ✓ setup: FogClaw v0.3.0 loaded with 6 tools + ✓ before_agent_start redacts PII in inbound prompt + ✓ tool_result_persist redacts PII in file reads + ✓ message_sending redacts PII in outbound replies + 4 passed + +**M3: Backlog workflow test** + +Add sequential tests after the scanning tests: + +Test "access request backlog cycle": +1. Run `openclaw agent --session-id --message "Use fogclaw_request_access to request access to [EMAIL_1]" --json`. +2. Assert response acknowledges the request. +3. Run `openclaw agent --session-id --message "Use fogclaw_requests to list pending access requests" --json`. +4. Assert response shows a pending request for `[EMAIL_1]`. +5. Run `openclaw agent --session-id --message "Use fogclaw_resolve to approve the request for [EMAIL_1]" --json`. +6. Assert response includes the original email address that was previously redacted. + +Expected output after M3: + + Running 5 tests using 1 worker + ✓ setup: FogClaw v0.3.0 loaded with 6 tools + ✓ before_agent_start redacts PII in inbound prompt + ✓ tool_result_persist redacts PII in file reads + ✓ message_sending redacts PII in outbound replies + ✓ access request backlog cycle + 5 passed + +**M4: Browser evidence and video** + +Add browser tests (can be in a separate spec file or appended): + +Test "Dashboard shows FogClaw evidence": +1. Get Dashboard URL: `openclaw dashboard --no-open` → parse URL. +2. `page.goto(dashboardUrl)` → wait for network idle. +3. Screenshot: `page.screenshot({ path: 'tests/e2e/screenshots/dashboard-overview.png' })`. +4. Navigate to Chat: `page.getByRole('link', { name: 'Chat' }).click()`. +5. Wait for chat to load. +6. Screenshot: `page.screenshot({ path: 'tests/e2e/screenshots/chat-redaction-evidence.png' })`. +7. Assert the page contains visible text with redaction tokens (e.g., `[EMAIL_1]`). + +Video recording happens automatically via the Playwright config `video: 'on'` setting. After all tests complete, video files are in `tests/e2e/test-results/` and copied to `tests/e2e/recordings/` with a timestamp name by a `afterAll` hook. + +Expected output after M4 (final): + + Running 6 tests using 1 worker + ✓ setup: FogClaw v0.3.0 loaded with 6 tools + ✓ before_agent_start redacts PII in inbound prompt + ✓ tool_result_persist redacts PII in file reads + ✓ message_sending redacts PII in outbound replies + ✓ access request backlog cycle + ✓ Dashboard shows FogClaw evidence + 6 passed + + Evidence saved: + tests/e2e/screenshots/dashboard-overview.png + tests/e2e/screenshots/chat-redaction-evidence.png + tests/e2e/recordings/2026-02-17T20-30-00.webm + +## Validation and Acceptance + +Run from the project root: + + npm run test:e2e + +All 6 tests pass. Verify: + +1. Exit code is 0. +2. `tests/e2e/screenshots/dashboard-overview.png` exists and shows the OpenClaw Dashboard. +3. `tests/e2e/screenshots/chat-redaction-evidence.png` exists and shows redaction tokens in the chat. +4. `tests/e2e/recordings/` contains a `.webm` video file. Open it — it should show Dashboard navigation from overview to chat with redaction evidence visible. + +To verify scanning assertion correctness, run the E2E test with `--debug` to see the JSON responses: + + npx playwright test --config tests/e2e/playwright.config.ts --debug + +The JSON responses from `openclaw agent` should contain redaction tokens (`[EMAIL_1]`, `[SSN_1]`, `[PHONE_1]`) and should NOT contain raw PII values (`john.smith@example.com`, `123-45-6789`, `555-867-5309`). + +The existing unit test suite must still pass: + + npm run test + +Expect 149+ tests passed, 0 failed. + +## Idempotence and Recovery + +Every step is safe to re-run: + +- `npm install --save-dev @playwright/test` is idempotent — reinstalls if present, installs if missing. +- `openclaw plugins update fogclaw` is idempotent — no-ops if already at the latest version. +- The E2E tests create a new session or reuse an existing one. If a test fails midway, re-running starts fresh assertions against the same or a new session. +- Video and screenshot files are overwritten on each run (timestamped filenames avoid conflicts). +- If the OpenClaw gateway is not running, the E2E tests fail with a clear connection error. Fix: start the gateway with `openclaw gateway` and re-run. +- If Playwright browsers are not installed, run `npx playwright install chromium` and re-run. + +To clean up and start fresh: + + rm -rf tests/e2e/recordings/* tests/e2e/screenshots/* tests/e2e/test-results/ + +## Artifacts and Notes + +(Will be populated during implementation with evidence snippets.) + +## Interfaces and Dependencies + +New devDependency: `@playwright/test` (latest version, provides `test`, `expect`, `Page`, `BrowserContext` APIs). + +External dependencies (must be running): +- OpenClaw gateway at `ws://127.0.0.1:18789` +- OpenClaw Dashboard at `http://127.0.0.1:18789/` +- FogClaw plugin v0.3.0 installed in `~/.openclaw/extensions/fogclaw/` + +New files created by this plan: +- `tests/e2e/playwright.config.ts` — Playwright test configuration +- `tests/e2e/fogclaw-e2e.spec.ts` — main E2E test file +- `tests/e2e/fixtures/pii-sample.txt` — PII fixture for file-read testing +- `tests/e2e/recordings/.gitkeep` — directory placeholder +- `tests/e2e/screenshots/.gitkeep` — directory placeholder +- `tests/e2e/.gitignore` — ignore generated evidence artifacts + +Modified files: +- `package.json` — add `test:e2e` script and `@playwright/test` devDependency + +CLI tools used in tests (from OpenClaw, not npm): +- `openclaw agent --session-id --message --json` — send prompts, get JSON responses +- `openclaw sessions --json` — list sessions with UUIDs +- `openclaw plugins update fogclaw` — update plugin to latest +- `openclaw plugins info fogclaw` — verify plugin version and tools +- `openclaw dashboard --no-open` — get authenticated Dashboard URL + +## Pull Request + +- pr: (pending) +- branch: (pending) +- commit: (pending) +- ci: (pending) + +## Review Findings + +(Populated by `he-review`.) + +## Verify/Release Decision + +(Populated by `he-verify-release`.) + +## Revision Notes + +- 2026-02-17T20:15:00Z: Initial plan draft. 4 milestones: infrastructure setup, CLI-driven scanning layer tests, backlog workflow test, browser visual evidence with video recording. Hybrid CLI + browser architecture based on spike findings. +- 2026-02-17T21:17:00Z: Implementation complete. All 4 milestones done plus two critical bugfixes discovered during E2E testing: (1) `schema:` → `parameters:` for OpenClaw tool schema compatibility, (2) `handler:` → `execute:` with correct `(toolCallId, params)` signature. Added isolated profile support via globalSetup/globalTeardown. 6/6 E2E tests pass with real LLM responses, video recordings, and screenshots captured. diff --git a/docs/specs/2026-02-17-feat-e2e-recorded-baseline-test-spec.md b/docs/specs/2026-02-17-feat-e2e-recorded-baseline-test-spec.md index a1e2731..443f6aa 100644 --- a/docs/specs/2026-02-17-feat-e2e-recorded-baseline-test-spec.md +++ b/docs/specs/2026-02-17-feat-e2e-recorded-baseline-test-spec.md @@ -80,8 +80,8 @@ Ship as a new test target (`npm run test:e2e`) alongside existing tests. Initial ## Open Questions -- **[spike]** **[Affects R1, R7, R8]** How does OpenClaw's local development setup work? What commands start the Dashboard UI? What port? What authentication? Does `openclaw dashboard` exist? This must be validated before planning. -- **[spike]** **[Affects R7]** Can `agent-browser` drive the OpenClaw Dashboard reliably? What selectors are available? Is there a data-testid convention? +- ~~**[spike]** **[Affects R1, R7, R8]** How does OpenClaw's local development setup work?~~ **RESOLVED**: Gateway runs as LaunchAgent on `ws://127.0.0.1:18789`, Dashboard at `http://127.0.0.1:18789/`, token auth via `openclaw dashboard --no-open`. CLI agent: `openclaw agent --session-id --message --json`. See spike findings. +- ~~**[spike]** **[Affects R7]** Can `agent-browser` drive the OpenClaw Dashboard reliably?~~ **RESOLVED**: Yes. No data-testid selectors, but text/role selectors are stable and descriptive (e.g., button "Send", link "Config", link "Chat"). Playwright automation confirmed working. - **[planning]** **[Affects R6]** What video format and resolution? Playwright supports `.webm` natively. Should we transcode to `.mp4` for broader compatibility? - **[decision]** **[Affects R9]** Should the "before/after" comparison be two separate test runs, or a single run that enables FogClaw mid-test? @@ -121,8 +121,9 @@ Ship as a new test target (`npm run test:e2e`) alongside existing tests. Initial ## Handoff -Spike recommended before planning. The OpenClaw Dashboard UI structure, selector availability, and local setup process are unknown and must be validated. After the spike, proceed to he-plan with concrete knowledge of what's automatable. +Spike complete. All `[spike]` questions resolved — Dashboard automation is feasible, CLI agent automation works, plugin update is a single command. Proceed to `he-plan` with concrete knowledge from `docs/spikes/2026-02-17-feat-e2e-recorded-baseline-test-spike.md`. ## Revision Notes - 2026-02-17T19:30:00Z: Initialized spec. E2E recorded baseline test for FogClaw against real OpenClaw instance. Spike recommended to validate Dashboard automation feasibility. Video recording chosen over screenshots for richer evidence with human review loop. +- 2026-02-17T20:00:00Z: Updated from spike findings. Closed `[spike]` open questions (R1/R7/R8). Dashboard uses text/role selectors (no data-testid). CLI agent returns structured JSON. Plugin update is `openclaw plugins update fogclaw`. Handoff updated to proceed to planning. diff --git a/docs/spikes/2026-02-17-feat-e2e-recorded-baseline-test-spike.md b/docs/spikes/2026-02-17-feat-e2e-recorded-baseline-test-spike.md new file mode 100644 index 0000000..21e87b4 --- /dev/null +++ b/docs/spikes/2026-02-17-feat-e2e-recorded-baseline-test-spike.md @@ -0,0 +1,206 @@ +--- +slug: 2026-02-17-feat-e2e-recorded-baseline-test +status: spike-complete +date: 2026-02-17T20:00:00Z +owner: sidmohan +timebox: 2h +--- + +# Spike: OpenClaw Dashboard Automation & E2E Test Feasibility + +## Context + +FogClaw spec `2026-02-17-feat-e2e-recorded-baseline-test-spec.md` requires a fully automated E2E test against a real OpenClaw instance. Before planning, we need to validate that (1) the OpenClaw Dashboard UI can be driven programmatically, (2) CLI agent commands can send prompts and receive structured responses, (3) plugin update/install is automatable, and (4) video recording of the full sequence is feasible. + +The spec's open questions `[spike]` specifically ask: +- How does OpenClaw's local development setup work? (R1, R7, R8) +- Can agent-browser drive the OpenClaw Dashboard reliably? (R7) + +## Validation Goal + +Determine the exact commands, ports, selectors, and interaction patterns needed for E2E automation — or discover it's infeasible and recommend an alternative. + +Spike is complete when we can describe the resulting understanding with enough confidence to proceed into `he-plan`. + +## Approach + +1. Explored the OpenClaw CLI help tree to map all available commands +2. Queried `openclaw status`, `openclaw plugins list/info`, `openclaw sessions --json` for system topology +3. Tested `openclaw agent --session-id --message --json` for programmatic agent interaction +4. Tested `openclaw plugins update fogclaw --dry-run` for plugin update mechanism +5. Installed Playwright Chromium via `npx playwright install chromium` +6. Used `agent-browser` to navigate the Dashboard, snapshot interactive elements, and take screenshots +7. Inventoried all Dashboard navigation, chat interface, and config page elements + +## Findings + +### 1. OpenClaw Local Setup (Confirmed Working) + +| Component | Detail | +|---|---| +| CLI | `/opt/homebrew/bin/openclaw` v2026.2.15 | +| Gateway | Running as macOS LaunchAgent, pid 48042 | +| WebSocket | `ws://127.0.0.1:18789` | +| Dashboard | `http://127.0.0.1:18789/` | +| Auth | Token-based: `http://127.0.0.1:18789/#token=` | +| Token retrieval | `openclaw dashboard --no-open` prints URL with token | +| Active sessions | 1 (session key `agent:main:main`) | +| Model | claude-opus-4-6, 200k context | + +**Evidence:** `openclaw status` returns Connected, 4h uptime. Dashboard accessible and screenshot captured. + +### 2. CLI Agent Automation (Confirmed Working) + +**Command pattern:** +```bash +openclaw agent --session-id "" --message "" --json --timeout 30 +``` + +**Key behaviors:** +- Returns structured JSON with `status`, `result.payloads[].text`, and `result.meta` +- Requires `--session-id`, `--to`, or `--agent` flag (plain `--message` alone fails) +- Session ID available from `openclaw sessions --json` +- `--json` flag gives machine-parseable output +- Agent metadata includes provider, model, usage stats, and system prompt report +- Tool calls appear in the response (fogclaw_scan, fogclaw_redact visible) + +**Evidence:** Successfully sent a test message and received JSON response with full agent metadata. + +### 3. Plugin Management (Confirmed Working) + +| Command | Purpose | +|---|---| +| `openclaw plugins list` | List all discovered plugins (37 total, 7 loaded) | +| `openclaw plugins info fogclaw` | Show plugin details, version, source, tools | +| `openclaw plugins update fogclaw` | Update from npm (tested dry-run: 0.1.6 → 0.3.0) | +| `openclaw plugins update fogclaw --dry-run` | Preview update without applying | +| `openclaw plugins disable fogclaw` | Disable plugin | +| `openclaw plugins enable fogclaw` | Re-enable plugin | +| `openclaw plugins install ` | Install new plugin from npm/path/archive | + +**Current state:** FogClaw v0.1.6 installed at `~/.openclaw/extensions/fogclaw/`. Only 3 tools registered (fogclaw_scan, fogclaw_preview, fogclaw_redact). The v0.3.0 tools (fogclaw_request_access, fogclaw_requests, fogclaw_resolve) are missing. Schema shows `propertiesCount: null` for FogClaw tools, indicating v0.1.6 has schema issues. + +**Security warning:** Plugin update produces `WARNING: Plugin "fogclaw" contains dangerous code patterns: Environment variable access combined with network send`. This is the GLiNER model download code — expected, not malicious. + +### 4. Dashboard UI Structure (Confirmed Automatable) + +**Navigation sidebar:** +- **Chat**: Chat (main agent interaction) +- **Control**: Overview, Channels, Instances, Sessions, Usage, Cron Jobs +- **Agent**: Agents, Skills, Nodes +- **Settings**: Config, Debug, Logs, Docs + +**Chat page elements (automatable):** +- Session selector (combobox with options like "Main Session") +- Message input textbox ("Message (↩ to send, Shift+↩ for line breaks, paste images)") +- "Send" button +- "New session" button +- Refresh, toggle thinking, toggle focus mode buttons +- Tool call results displayed inline (fogclaw_scan, fogclaw_redact visible with expandable "View") +- "Copy as markdown" buttons on tool outputs + +**Config page elements (automatable):** +- Settings category sidebar: All Settings, Environment, Updates, Agents, Authentication, Channels, Messages, Commands, Hooks, Skills, Tools, Gateway, Setup Wizard, etc. +- Search settings input +- Form/Raw toggle (Form view for structured editing, Raw for JSON) +- Reload, Save, Apply, Update buttons +- Settings rendered as form fields with labels + +**Selector strategy:** +- **No `data-testid` attributes** found in the Dashboard UI +- Text-based selectors work well: buttons have clear text labels ("Send", "New session", "Overview") +- Role-based selectors available via Playwright `getByRole()` +- Link text selectors for navigation: "Chat", "Config", "Overview" +- Input fields identifiable by placeholder text: "Search settings...", "Message (↩ to send...)" + +### 5. Video Recording (Confirmed Feasible) + +- Playwright Chromium v1208 (Chrome 145.0.7632.6) installed successfully +- FFmpeg v1011 installed for video encoding +- `agent-browser record start demo.webm` available for recording sessions +- Playwright's built-in `video: 'on'` config also available for test-level recording +- Both `.webm` native and `.mp4` transcode paths viable + +### 6. Hooks System (Separate from Plugin Hooks) + +The Dashboard Config > Hooks section shows OpenClaw's internal webhook/event hook configuration (agent IDs, session key prefixes, etc.). This is **separate from** FogClaw's plugin hooks (`before_agent_start`, `tool_result_persist`, `message_sending`). FogClaw's hooks are registered through the plugin system, not the hooks config. The Dashboard currently shows 4 bundled hooks (boot-md, bootstrap-extra-files, command-logger, session-memory). + +## Decisions + +- Decision: Use `openclaw agent --session-id --message --json` for CLI-driven agent interaction in E2E tests. + Rationale: Returns structured JSON, supports session targeting, and includes tool call metadata. No browser needed for prompt/response validation. + +- Decision: Use text and role-based Playwright selectors (not data-testid) for Dashboard automation. + Rationale: Dashboard has no data-testid convention. Text labels are descriptive and stable (e.g., button "Send", link "Config"). If selectors break on OpenClaw updates, we update selectors — this is documented risk. + +- Decision: Update FogClaw to v0.3.0 as a prerequisite step in E2E test setup. + Rationale: v0.1.6 is missing 3 tools and has schema issues. `openclaw plugins update fogclaw` handles this cleanly. + +- Decision: Use `agent-browser`/Playwright headless for CI, headed for local debugging. + Rationale: Playwright supports both modes natively. Video recording works in both. + +- Decision: Use hash-token auth pattern (`#token=`) for Dashboard access in E2E tests. + Rationale: `openclaw dashboard --no-open` returns the full authenticated URL. No manual login flow needed. + +## Recommendation + +**Proceed to `he-plan`.** All validation goals are met: + +1. OpenClaw is running locally and accessible programmatically +2. CLI agent automation works with structured JSON output +3. Dashboard UI is automatable with Playwright (text/role selectors) +4. Plugin update is a single command +5. Video recording infrastructure is in place + +The E2E test architecture should be: +- **CLI-first**: Use `openclaw agent` for prompt sending and response validation (fast, reliable, JSON-parseable) +- **Browser for Dashboard verification**: Use Playwright for plugin visibility, config verification, and visual evidence +- **Hybrid recording**: Record the Playwright browser session for visual evidence; parse CLI JSON for programmatic assertions + +### Suggested Test Flow + +1. `openclaw plugins update fogclaw` — ensure v0.3.0 +2. `openclaw sessions --json` — get session info +3. Playwright opens Dashboard → verify FogClaw in tools list +4. CLI sends PII prompt → parse JSON response → assert redaction tokens present +5. CLI sends file-read trigger → assert tool_result_persist redaction +6. CLI sends message → assert outbound message_sending redaction +7. CLI exercises backlog tools (request_access, requests, resolve) +8. Playwright captures final Dashboard state +9. Video saved to `tests/e2e/recordings/` + +## Impact on Upstream Docs + +Spec updates needed in `docs/specs/2026-02-17-feat-e2e-recorded-baseline-test-spec.md`: + +- **Open Questions**: Close the `[spike]` questions (R1/R7/R8) — all answered +- **R1**: Confirm 6 tools after v0.3.0 update (was uncertain about registration) +- **R7**: Confirm agent-browser/Playwright can drive Dashboard (feasible with text selectors) +- **R8**: Refine selector strategy — text/role based, no data-testid +- **Constraints**: Add note about FogClaw v0.3.0 prerequisite +- **Risks**: Downgrade "OpenClaw Dashboard UI instability" risk — selectors are text-based and reasonably stable + +## Spike Code + +- worktree: n/a (spike was exploratory, no prototype code) +- branch: main +- prototypes: n/a +- variants: n/a +- reusable: no +- screenshots: `/tmp/openclaw-overview.png`, `/tmp/openclaw-config.png`, `/tmp/openclaw-chat.png` + +## Remaining Unknowns + +1. **`tool_result_persist` triggering from CLI**: How to make the agent read a file containing PII via CLI prompt. Likely: include a prompt like "Read the file at /path/to/pii-fixture.txt" and the agent uses the `read` tool. +2. **`message_sending` hook assertion from CLI**: The `--json` response may not distinguish between pre-hook and post-hook content. May need to check Dashboard chat or gateway logs for hook evidence. +3. **Gateway restart after plugin update**: Does `openclaw plugins update fogclaw` require a gateway restart for new tools to register? Likely yes — need to verify during implementation. +4. **Session isolation for E2E**: Should E2E tests create a new session (`New session` button or `--to` flag) to avoid polluting existing sessions? Almost certainly yes. + +## Time Spent + +- budget: 2h +- actual: ~45m + +## Revision Notes + +- 2026-02-17T20:00:00Z: Initialized spike. Explored OpenClaw CLI, Dashboard UI, and automation feasibility. All validation goals met — proceed to planning. diff --git a/package-lock.json b/package-lock.json index a58f329..a8f159e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@datafog/fogclaw", - "version": "0.2.0", + "version": "0.3.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@datafog/fogclaw", - "version": "0.2.0", + "version": "0.3.0", "license": "MIT", "dependencies": { "gliner": "^0.0.19", @@ -15,6 +15,7 @@ "sharp": "0.34.5" }, "devDependencies": { + "@playwright/test": "^1.58.2", "@types/node": "^22.0.0", "typescript": "^5.7.0", "vitest": "^2.1.0" @@ -917,6 +918,22 @@ "dev": true, "license": "MIT" }, + "node_modules/@playwright/test": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.58.2.tgz", + "integrity": "sha512-akea+6bHYBBfA9uQqSYmlJXn61cTa+jbO87xVLCWbTqbWadRVmhxlXATaOjOgcBaWU4ePo0wB41KMFv3o35IXA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright": "1.58.2" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/@protobufjs/aspromise": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", @@ -1829,6 +1846,53 @@ "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==", "license": "MIT" }, + "node_modules/playwright": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.58.2.tgz", + "integrity": "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.58.2" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.58.2.tgz", + "integrity": "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/playwright/node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, "node_modules/postcss": { "version": "8.5.6", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz", diff --git a/package.json b/package.json index b995ece..34709d9 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "test": "vitest run", "test:watch": "vitest", "test:plugin-smoke": "vitest run tests/plugin-smoke.test.ts", + "test:e2e": "npx playwright test --config tests/e2e/playwright.config.ts", "lint": "tsc --noEmit" }, "dependencies": { @@ -25,6 +26,7 @@ "sharp": "0.34.5" }, "devDependencies": { + "@playwright/test": "^1.58.2", "@types/node": "^22.0.0", "typescript": "^5.7.0", "vitest": "^2.1.0" diff --git a/src/backlog-tools.ts b/src/backlog-tools.ts index 6d36a47..2b18627 100644 --- a/src/backlog-tools.ts +++ b/src/backlog-tools.ts @@ -39,13 +39,13 @@ export function createRequestAccessHandler( backlog: BacklogStore, config: FogClawConfig, logger?: Logger, -): (params: { +): (toolCallId: string, params: { placeholder: string; entity_type: string; reason: string; context?: string; }) => ToolResponse { - return (params) => { + return (_toolCallId, params) => { try { const request = backlog.createRequest( params.placeholder, @@ -85,8 +85,8 @@ export function createRequestsListHandler( backlog: BacklogStore, config: FogClawConfig, logger?: Logger, -): (params: { status?: string }) => ToolResponse { - return (params) => { +): (toolCallId: string, params: { status?: string }) => ToolResponse { + return (_toolCallId, params) => { const validStatuses = ["pending", "approved", "denied", "follow_up"]; const statusFilter = params.status as | "pending" @@ -151,13 +151,13 @@ export function createResolveHandler( backlog: BacklogStore, config: FogClawConfig, logger?: Logger, -): (params: { +): (toolCallId: string, params: { request_id?: string; request_ids?: string[]; action: string; message?: string; }) => ToolResponse { - return (params) => { + return (_toolCallId, params) => { const validActions = ["approve", "deny", "follow_up"]; if (!validActions.includes(params.action)) { return errorResponse( diff --git a/src/index.ts b/src/index.ts index b44db77..6ba0a24 100644 --- a/src/index.ts +++ b/src/index.ts @@ -196,7 +196,7 @@ const fogclaw = { id: "fogclaw_scan", description: "Scan text for PII and custom entities. Returns detected entities with types, positions, and confidence scores.", - schema: { + parameters: { type: "object", properties: { text: { @@ -212,13 +212,11 @@ const fogclaw = { }, required: ["text"], }, - handler: async ({ - text, - custom_labels, - }: { - text: string; - custom_labels?: string[]; - }) => { + execute: async ( + _toolCallId: string, + params: { text: string; custom_labels?: string[] }, + ) => { + const { text, custom_labels } = params; const result = await scanner.scan(text, custom_labels); return { content: [ @@ -250,7 +248,7 @@ const fogclaw = { id: "fogclaw_preview", description: "Preview which entities will be blocked, warned, or redacted and the redacted message, without changing runtime behavior.", - schema: { + parameters: { type: "object", properties: { text: { @@ -271,15 +269,15 @@ const fogclaw = { }, required: ["text"], }, - handler: async ({ - text, - strategy, - custom_labels, - }: { - text: string; - strategy?: "token" | "mask" | "hash"; - custom_labels?: string[]; - }) => { + execute: async ( + _toolCallId: string, + params: { + text: string; + strategy?: "token" | "mask" | "hash"; + custom_labels?: string[]; + }, + ) => { + const { text, strategy, custom_labels } = params; const result = await scanner.scan(text, custom_labels); const plan = buildGuardrailPlan(result.entities, config); const summary = planToSummary(plan); @@ -332,7 +330,7 @@ const fogclaw = { id: "fogclaw_redact", description: "Scan and redact PII/custom entities from text. Returns sanitized text with entities replaced.", - schema: { + parameters: { type: "object", properties: { text: { @@ -353,15 +351,15 @@ const fogclaw = { }, required: ["text"], }, - handler: async ({ - text, - strategy, - custom_labels, - }: { - text: string; - strategy?: "token" | "mask" | "hash"; - custom_labels?: string[]; - }) => { + execute: async ( + _toolCallId: string, + params: { + text: string; + strategy?: "token" | "mask" | "hash"; + custom_labels?: string[]; + }, + ) => { + const { text, strategy, custom_labels } = params; const result = await scanner.scan(text, custom_labels); const redacted = redact( text, @@ -394,7 +392,7 @@ const fogclaw = { id: "fogclaw_request_access", description: "Request access to redacted PII data. Use when you encounter a redacted placeholder (like [EMAIL_1]) and need the original text to complete a task. A user must review and approve the request.", - schema: { + parameters: { type: "object", properties: { placeholder: { @@ -419,7 +417,7 @@ const fogclaw = { }, required: ["placeholder", "entity_type", "reason"], }, - handler: createRequestAccessHandler(backlogStore, config, api.logger), + execute: createRequestAccessHandler(backlogStore, config, api.logger), }); // --- TOOL: List access requests --- @@ -428,7 +426,7 @@ const fogclaw = { id: "fogclaw_requests", description: "List PII access requests. Use to review pending requests or check for approved/denied responses. Filter by status: pending, approved, denied, follow_up.", - schema: { + parameters: { type: "object", properties: { status: { @@ -440,7 +438,7 @@ const fogclaw = { }, required: [], }, - handler: createRequestsListHandler(backlogStore, config, api.logger), + execute: createRequestsListHandler(backlogStore, config, api.logger), }); // --- TOOL: Resolve access request --- @@ -449,7 +447,7 @@ const fogclaw = { id: "fogclaw_resolve", description: 'Resolve a PII access request. Approve to reveal the original text, deny to reject, or follow_up to ask the agent for more context. Use request_id for single or request_ids for batch.', - schema: { + parameters: { type: "object", properties: { request_id: { @@ -476,7 +474,7 @@ const fogclaw = { }, required: ["action"], }, - handler: createResolveHandler(backlogStore, config, api.logger), + execute: createResolveHandler(backlogStore, config, api.logger), }); api.logger?.info( diff --git a/tests/backlog-tools.test.ts b/tests/backlog-tools.test.ts index c1236aa..f4a64de 100644 --- a/tests/backlog-tools.test.ts +++ b/tests/backlog-tools.test.ts @@ -39,7 +39,7 @@ describe("fogclaw_request_access handler", () => { it("creates a request and returns confirmation", () => { const handler = createRequestAccessHandler(backlog, config, logger); - const response = handler({ + const response = handler("test", { placeholder: "[EMAIL_1]", entity_type: "EMAIL", reason: "Need to send a follow-up email", @@ -53,7 +53,7 @@ describe("fogclaw_request_access handler", () => { it("emits audit log when auditEnabled", () => { const handler = createRequestAccessHandler(backlog, config, logger); - handler({ + handler("test", { placeholder: "[EMAIL_1]", entity_type: "EMAIL", reason: "reason", @@ -71,7 +71,7 @@ describe("fogclaw_request_access handler", () => { it("does not emit audit log when auditEnabled is false", () => { const noAuditConfig = makeConfig({ auditEnabled: false }); const handler = createRequestAccessHandler(backlog, noAuditConfig, logger); - handler({ + handler("test", { placeholder: "[EMAIL_1]", entity_type: "EMAIL", reason: "reason", @@ -85,8 +85,8 @@ describe("fogclaw_request_access handler", () => { const smallBacklog = new BacklogStore(mapStore, smallConfig.maxPendingRequests); const handler = createRequestAccessHandler(smallBacklog, smallConfig, logger); - handler({ placeholder: "[EMAIL_1]", entity_type: "EMAIL", reason: "r1" }); - const response = handler({ placeholder: "[SSN_1]", entity_type: "SSN", reason: "r2" }); + handler("test", { placeholder: "[EMAIL_1]", entity_type: "EMAIL", reason: "r1" }); + const response = handler("test", { placeholder: "[SSN_1]", entity_type: "SSN", reason: "r2" }); const parsed = parseToolResponse(response); expect(parsed.error).toContain("Maximum pending requests reached"); @@ -94,7 +94,7 @@ describe("fogclaw_request_access handler", () => { it("includes context when provided", () => { const handler = createRequestAccessHandler(backlog, config, logger); - handler({ + handler("test", { placeholder: "[EMAIL_1]", entity_type: "EMAIL", reason: "reason", @@ -125,7 +125,7 @@ describe("fogclaw_requests handler", () => { it("returns empty list when no requests", () => { const handler = createRequestsListHandler(backlog, config, logger); - const parsed = parseToolResponse(handler({})); + const parsed = parseToolResponse(handler("test", {})); expect(parsed.requests).toEqual([]); expect(parsed.total).toBe(0); }); @@ -135,7 +135,7 @@ describe("fogclaw_requests handler", () => { backlog.createRequest("[SSN_1]", "SSN", "r2"); const handler = createRequestsListHandler(backlog, config, logger); - const parsed = parseToolResponse(handler({})); + const parsed = parseToolResponse(handler("test", {})); expect(parsed.requests).toHaveLength(2); expect(parsed.filter).toBe("all"); }); @@ -147,11 +147,11 @@ describe("fogclaw_requests handler", () => { const handler = createRequestsListHandler(backlog, config, logger); - const pending = parseToolResponse(handler({ status: "pending" })); + const pending = parseToolResponse(handler("test", { status: "pending" })); expect(pending.requests).toHaveLength(1); expect(pending.requests[0].id).toBe("REQ-2"); - const approved = parseToolResponse(handler({ status: "approved" })); + const approved = parseToolResponse(handler("test", { status: "approved" })); expect(approved.requests).toHaveLength(1); expect(approved.requests[0].id).toBe("REQ-1"); expect(approved.requests[0].original_text).toBe("john@example.com"); @@ -159,7 +159,7 @@ describe("fogclaw_requests handler", () => { it("returns error for invalid status filter", () => { const handler = createRequestsListHandler(backlog, config, logger); - const parsed = parseToolResponse(handler({ status: "invalid" })); + const parsed = parseToolResponse(handler("test", { status: "invalid" })); expect(parsed.error).toContain("Invalid status filter"); }); @@ -168,7 +168,7 @@ describe("fogclaw_requests handler", () => { backlog.resolveRequest("REQ-1", "follow_up", "Why do you need this?"); const handler = createRequestsListHandler(backlog, config, logger); - const parsed = parseToolResponse(handler({ status: "follow_up" })); + const parsed = parseToolResponse(handler("test", { status: "follow_up" })); expect(parsed.requests[0].follow_up_message).toBe("Why do you need this?"); }); @@ -177,7 +177,7 @@ describe("fogclaw_requests handler", () => { backlog.resolveRequest("REQ-1", "deny", "Not authorized"); const handler = createRequestsListHandler(backlog, config, logger); - const parsed = parseToolResponse(handler({ status: "denied" })); + const parsed = parseToolResponse(handler("test", { status: "denied" })); expect(parsed.requests[0].response_message).toBe("Not authorized"); }); }); @@ -203,7 +203,7 @@ describe("fogclaw_resolve handler", () => { it("approves a request and returns original text", () => { const handler = createResolveHandler(backlog, config, logger); const parsed = parseToolResponse( - handler({ request_id: "REQ-1", action: "approve" }), + handler("test", { request_id: "REQ-1", action: "approve" }), ); expect(parsed.status).toBe("approved"); expect(parsed.original_text).toBe("john@example.com"); @@ -213,7 +213,7 @@ describe("fogclaw_resolve handler", () => { it("denies a request", () => { const handler = createResolveHandler(backlog, config, logger); const parsed = parseToolResponse( - handler({ request_id: "REQ-1", action: "deny", message: "Not needed" }), + handler("test", { request_id: "REQ-1", action: "deny", message: "Not needed" }), ); expect(parsed.status).toBe("denied"); expect(parsed.message).toBe("Not needed"); @@ -222,7 +222,7 @@ describe("fogclaw_resolve handler", () => { it("sends follow-up question", () => { const handler = createResolveHandler(backlog, config, logger); const parsed = parseToolResponse( - handler({ + handler("test", { request_id: "REQ-1", action: "follow_up", message: "Why do you need this email?", @@ -235,7 +235,7 @@ describe("fogclaw_resolve handler", () => { it("returns error for invalid action", () => { const handler = createResolveHandler(backlog, config, logger); const parsed = parseToolResponse( - handler({ request_id: "REQ-1", action: "invalid" }), + handler("test", { request_id: "REQ-1", action: "invalid" }), ); expect(parsed.error).toContain("Invalid action"); }); @@ -243,14 +243,14 @@ describe("fogclaw_resolve handler", () => { it("returns error for unknown request ID", () => { const handler = createResolveHandler(backlog, config, logger); const parsed = parseToolResponse( - handler({ request_id: "REQ-999", action: "approve" }), + handler("test", { request_id: "REQ-999", action: "approve" }), ); expect(parsed.error).toContain("not found"); }); it("returns error when no request_id or request_ids provided", () => { const handler = createResolveHandler(backlog, config, logger); - const parsed = parseToolResponse(handler({ action: "approve" })); + const parsed = parseToolResponse(handler("test", { action: "approve" })); expect(parsed.error).toContain("request_id or request_ids must be provided"); }); @@ -258,7 +258,7 @@ describe("fogclaw_resolve handler", () => { backlog.createRequest("[SSN_1]", "SSN", "Need for verification"); const handler = createResolveHandler(backlog, config, logger); const parsed = parseToolResponse( - handler({ request_ids: ["REQ-1", "REQ-2"], action: "approve" }), + handler("test", { request_ids: ["REQ-1", "REQ-2"], action: "approve" }), ); expect(parsed.results).toHaveLength(2); expect(parsed.results[0].status).toBe("approved"); @@ -270,7 +270,7 @@ describe("fogclaw_resolve handler", () => { it("batch resolve returns errors for invalid IDs", () => { const handler = createResolveHandler(backlog, config, logger); const parsed = parseToolResponse( - handler({ request_ids: ["REQ-1", "REQ-999"], action: "approve" }), + handler("test", { request_ids: ["REQ-1", "REQ-999"], action: "approve" }), ); expect(parsed.results[0].status).toBe("approved"); expect(parsed.results[1].error).toContain("not found"); @@ -278,7 +278,7 @@ describe("fogclaw_resolve handler", () => { it("emits audit log on resolve", () => { const handler = createResolveHandler(backlog, config, logger); - handler({ request_id: "REQ-1", action: "approve" }); + handler("test", { request_id: "REQ-1", action: "approve" }); const auditCalls = logger.info.mock.calls.filter( (call: string[]) => call[0].includes("access_request_resolved"), @@ -291,7 +291,7 @@ describe("fogclaw_resolve handler", () => { it("emits audit for each request in batch resolve", () => { backlog.createRequest("[SSN_1]", "SSN", "r2"); const handler = createResolveHandler(backlog, config, logger); - handler({ request_ids: ["REQ-1", "REQ-2"], action: "deny" }); + handler("test", { request_ids: ["REQ-1", "REQ-2"], action: "deny" }); const auditCalls = logger.info.mock.calls.filter( (call: string[]) => call[0].includes("access_request_resolved"), @@ -311,7 +311,7 @@ describe("full lifecycle integration", () => { // Step 1: Agent submits request const requestHandler = createRequestAccessHandler(backlog, config, logger); const requestResponse = parseToolResponse( - requestHandler({ + requestHandler("test", { placeholder: "[EMAIL_1]", entity_type: "EMAIL", reason: "Need to send follow-up", @@ -322,21 +322,21 @@ describe("full lifecycle integration", () => { // Step 2: User lists pending requests const listHandler = createRequestsListHandler(backlog, config, logger); - const listResponse = parseToolResponse(listHandler({ status: "pending" })); + const listResponse = parseToolResponse(listHandler("test", { status: "pending" })); expect(listResponse.requests).toHaveLength(1); expect(listResponse.requests[0].reason).toBe("Need to send follow-up"); // Step 3: User approves const resolveHandler = createResolveHandler(backlog, config, logger); const resolveResponse = parseToolResponse( - resolveHandler({ request_id: "REQ-1", action: "approve" }), + resolveHandler("test", { request_id: "REQ-1", action: "approve" }), ); expect(resolveResponse.status).toBe("approved"); expect(resolveResponse.original_text).toBe("john@example.com"); // Step 4: Agent checks approved requests const approvedResponse = parseToolResponse( - listHandler({ status: "approved" }), + listHandler("test", { status: "approved" }), ); expect(approvedResponse.requests).toHaveLength(1); expect(approvedResponse.requests[0].original_text).toBe("john@example.com"); @@ -354,14 +354,14 @@ describe("full lifecycle integration", () => { const resolveHandler = createResolveHandler(backlog, config, logger); // Agent requests - requestHandler({ + requestHandler("test", { placeholder: "[SSN_1]", entity_type: "SSN", reason: "Need for identity verification", }); // User asks follow-up - resolveHandler({ + resolveHandler("test", { request_id: "REQ-1", action: "follow_up", message: "What specific verification requires the SSN?", @@ -369,7 +369,7 @@ describe("full lifecycle integration", () => { // Agent checks follow-up const followUpList = parseToolResponse( - listHandler({ status: "follow_up" }), + listHandler("test", { status: "follow_up" }), ); expect(followUpList.requests[0].follow_up_message).toBe( "What specific verification requires the SSN?", @@ -377,7 +377,7 @@ describe("full lifecycle integration", () => { // User approves after receiving context const resolved = parseToolResponse( - resolveHandler({ request_id: "REQ-1", action: "approve" }), + resolveHandler("test", { request_id: "REQ-1", action: "approve" }), ); expect(resolved.original_text).toBe("123-45-6789"); }); diff --git a/tests/e2e/.gitignore b/tests/e2e/.gitignore new file mode 100644 index 0000000..93e6316 --- /dev/null +++ b/tests/e2e/.gitignore @@ -0,0 +1,5 @@ +recordings/*.webm +recordings/*.mp4 +screenshots/*.png +test-results/ +.gateway-pid diff --git a/tests/e2e/fixtures/pii-sample.txt b/tests/e2e/fixtures/pii-sample.txt new file mode 100644 index 0000000..85a2484 --- /dev/null +++ b/tests/e2e/fixtures/pii-sample.txt @@ -0,0 +1,10 @@ +Employee Record — Confidential + +Name: John Smith +Email: john.smith@example.com +Phone: (555) 867-5309 +Social Security Number: 123-45-6789 +Credit Card: 4111-1111-1111-1111 + +Please contact John at john.smith@example.com regarding the quarterly review. +His backup number is 555-867-5309. diff --git a/tests/e2e/fogclaw-e2e.spec.ts b/tests/e2e/fogclaw-e2e.spec.ts new file mode 100644 index 0000000..2b5cba2 --- /dev/null +++ b/tests/e2e/fogclaw-e2e.spec.ts @@ -0,0 +1,214 @@ +import { test, expect } from "@playwright/test"; +import { execSync } from "node:child_process"; +import { cpSync, mkdirSync, existsSync, readdirSync } from "node:fs"; +import { join, resolve } from "node:path"; + +const PROJECT_ROOT = resolve(import.meta.dirname, "../.."); +const SCREENSHOTS_DIR = join(PROJECT_ROOT, "tests/e2e/screenshots"); +const RECORDINGS_DIR = join(PROJECT_ROOT, "tests/e2e/recordings"); +const PII_FIXTURE = join(PROJECT_ROOT, "tests/e2e/fixtures/pii-sample.txt"); + +const PROFILE_NAME = "e2e-test"; +const GATEWAY_PORT = 19001; +const GATEWAY_TOKEN = "e2e-test-token"; + +// Use a stable dummy phone number to create/reuse a session in the isolated profile +const E2E_PHONE = "+15550001234"; + +// Raw PII values that must NOT appear in agent responses when FogClaw is active +const RAW_PII = [ + "john.smith@example.com", + "123-45-6789", + "(555) 867-5309", + "555-867-5309", + "4111-1111-1111-1111", +]; + +// Redaction tokens that SHOULD appear when FogClaw redacts +const REDACTION_TOKEN_PATTERN = /\[(EMAIL|SSN|PHONE|CREDIT_CARD)_\d+\]/; + +let dashboardUrl: string; + +function openclaw(args: string): string { + return execSync(`openclaw --profile ${PROFILE_NAME} ${args}`, { + encoding: "utf-8", + timeout: 120_000, + env: { ...process.env, NO_COLOR: "1" }, + }).trim(); +} + +function agentMessage(message: string): { status: string; text: string; raw: unknown } { + const raw = JSON.parse( + openclaw(`agent --to ${E2E_PHONE} --message "${message.replace(/"/g, '\\"')}" --json --timeout 90`), + ); + const text = + raw?.result?.payloads?.map((p: { text: string }) => p.text).join("\n") ?? ""; + return { status: raw?.status ?? "unknown", text, raw }; +} + +function assertNoPiiLeaked(text: string, context: string) { + for (const pii of RAW_PII) { + expect(text, `${context}: raw PII "${pii}" leaked through`).not.toContain( + pii, + ); + } +} + +// ── Setup & Scanning Tests ────────────────────────────────────────────── + +test.describe.serial("FogClaw E2E Baseline", () => { + test("setup: FogClaw loaded with tools", async () => { + // Verify FogClaw is loaded (global-setup already handled profile creation) + const info = openclaw("plugins info fogclaw"); + console.log("Plugin info:", info); + expect(info).toContain("fogclaw_scan"); + expect(info).toContain("fogclaw_redact"); + + // Get Dashboard URL + try { + const dashOutput = openclaw("dashboard --no-open"); + dashboardUrl = dashOutput + .split("\n") + .find((l: string) => l.includes("127.0.0.1") || l.includes("localhost")) + ?.replace(/^.*?(https?:\/\/)/, "$1") + .trim() ?? ""; + } catch { + // Fallback to constructed URL if dashboard command fails on isolated profile + dashboardUrl = ""; + } + + // Fallback to constructed URL with known token + if (!dashboardUrl) { + dashboardUrl = `http://127.0.0.1:${GATEWAY_PORT}/#token=${GATEWAY_TOKEN}`; + } + console.log("Dashboard URL:", dashboardUrl); + }); + + // ── Three Scanning Layers ──────────────────────────────────────────── + + test("before_agent_start: redacts PII in inbound prompt", async () => { + const { text, status } = agentMessage( + "I need to contact John Smith at john.smith@example.com about SSN 123-45-6789. What should I do?", + ); + console.log("Agent response:", text.slice(0, 500)); + + expect(status).toBe("ok"); + assertNoPiiLeaked(text, "before_agent_start"); + }); + + test("tool_result_persist: redacts PII in file reads", async () => { + const { text, status } = agentMessage( + `Please read the file at ${PII_FIXTURE} and summarize its contents.`, + ); + console.log("Agent response:", text.slice(0, 500)); + + expect(status).toBe("ok"); + assertNoPiiLeaked(text, "tool_result_persist"); + }); + + test("message_sending: redacts PII in outbound replies", async () => { + const { text, status } = agentMessage( + "Please repeat this information exactly as I give it: email alice@widgets.io and SSN 987-65-4321", + ); + console.log("Agent response:", text.slice(0, 500)); + + expect(status).toBe("ok"); + expect(text).not.toContain("alice@widgets.io"); + expect(text).not.toContain("987-65-4321"); + }); + + // ── Access Request Backlog ─────────────────────────────────────────── + + test("access request backlog: request → list → approve → reveal", async () => { + // Step 1: Request access to a redacted placeholder + const requestResp = agentMessage( + "Use the fogclaw_request_access tool to request access to the placeholder [EMAIL_1]. Reason: need original for compliance audit.", + ); + console.log("Request response:", requestResp.text.slice(0, 300)); + expect(requestResp.status).toBe("ok"); + + // Step 2: List pending requests + const listResp = agentMessage( + "Use the fogclaw_requests tool to list all pending access requests.", + ); + console.log("List response:", listResp.text.slice(0, 300)); + expect(listResp.status).toBe("ok"); + + // Step 3: Approve the request + const resolveResp = agentMessage( + "Use the fogclaw_resolve tool to approve the most recent pending access request.", + ); + console.log("Resolve response:", resolveResp.text.slice(0, 300)); + expect(resolveResp.status).toBe("ok"); + }); + + // ── Browser Visual Evidence ────────────────────────────────────────── + + test("Dashboard shows FogClaw redaction evidence", async ({ page }) => { + mkdirSync(SCREENSHOTS_DIR, { recursive: true }); + + // Navigate to Dashboard + const url = dashboardUrl || `http://127.0.0.1:${GATEWAY_PORT}/#token=${GATEWAY_TOKEN}`; + await page.goto(url, { waitUntil: "networkidle" }); + await page.waitForTimeout(2000); + + // Screenshot: Dashboard overview + await page.screenshot({ + path: join(SCREENSHOTS_DIR, "dashboard-overview.png"), + fullPage: true, + }); + + // Navigate to Chat + const chatLink = page.getByRole("link", { name: "Chat" }); + if (await chatLink.isVisible()) { + await chatLink.click(); + await page.waitForLoadState("networkidle"); + await page.waitForTimeout(2000); + } + + // Screenshot: Chat with redaction evidence + await page.screenshot({ + path: join(SCREENSHOTS_DIR, "chat-redaction-evidence.png"), + fullPage: true, + }); + + // Verify page loaded with content + const pageText = await page.textContent("body"); + expect(pageText).toBeTruthy(); + console.log( + "Dashboard chat text (first 500 chars):", + pageText?.slice(0, 500), + ); + }); + + // ── After all: copy video recordings ───────────────────────────────── + + test.afterAll(async () => { + // Copy video recordings to the recordings directory + const testResultsDir = join(PROJECT_ROOT, "tests/e2e/test-results"); + if (existsSync(testResultsDir)) { + mkdirSync(RECORDINGS_DIR, { recursive: true }); + const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); + const findVideos = (dir: string): string[] => { + const results: string[] = []; + try { + for (const entry of readdirSync(dir, { withFileTypes: true })) { + const fullPath = join(dir, entry.name); + if (entry.isDirectory()) results.push(...findVideos(fullPath)); + else if (entry.name.endsWith(".webm")) results.push(fullPath); + } + } catch { + /* ignore */ + } + return results; + }; + + const videos = findVideos(testResultsDir); + for (let i = 0; i < videos.length; i++) { + const dest = join(RECORDINGS_DIR, `${timestamp}-${i}.webm`); + cpSync(videos[i], dest); + console.log(`Video saved: ${dest}`); + } + } + }); +}); diff --git a/tests/e2e/global-setup.ts b/tests/e2e/global-setup.ts new file mode 100644 index 0000000..6c28c39 --- /dev/null +++ b/tests/e2e/global-setup.ts @@ -0,0 +1,135 @@ +import { execSync, spawn, type ChildProcess } from "node:child_process"; +import { + mkdirSync, + copyFileSync, + existsSync, + writeFileSync, + readFileSync, +} from "node:fs"; +import { join, resolve } from "node:path"; + +const PROJECT_ROOT = resolve(import.meta.dirname, "../.."); +const PROFILE_NAME = "e2e-test"; +const PROFILE_DIR = join(process.env.HOME!, `.openclaw-${PROFILE_NAME}`); +const MAIN_AUTH = join( + process.env.HOME!, + ".openclaw/agents/main/agent/auth-profiles.json", +); +const PROFILE_AUTH_DIR = join(PROFILE_DIR, "agents/main/agent"); +const GATEWAY_PORT = 19001; +const GATEWAY_TOKEN = "e2e-test-token"; +const PID_FILE = join(PROJECT_ROOT, "tests/e2e/.gateway-pid"); + +function oc(args: string): string { + return execSync(`openclaw --profile ${PROFILE_NAME} ${args}`, { + encoding: "utf-8", + timeout: 30_000, + env: { ...process.env, NO_COLOR: "1" }, + }).trim(); +} + +export default async function globalSetup() { + console.log("\n=== FogClaw E2E: Setting up isolated OpenClaw profile ===\n"); + + // 1. Build FogClaw from source (ensure dist/ is current) + console.log("Building FogClaw from source..."); + execSync("npm run build", { + cwd: PROJECT_ROOT, + encoding: "utf-8", + timeout: 60_000, + stdio: "inherit", + }); + + // 2. Kill any leftover gateway on the test port + try { + const pid = execSync(`lsof -ti :${GATEWAY_PORT}`, { encoding: "utf-8" }).trim(); + if (pid) { + console.log(`Killing leftover process on port ${GATEWAY_PORT} (PID ${pid})`); + execSync(`kill ${pid}`, { encoding: "utf-8" }); + await sleep(1000); + } + } catch { + // No process on port — good + } + + // 3. Clean up any previous profile + if (existsSync(PROFILE_DIR)) { + console.log(`Cleaning up previous profile at ${PROFILE_DIR}`); + execSync(`rm -rf "${PROFILE_DIR}"`, { encoding: "utf-8" }); + } + + // 4. Initialize the isolated profile with gateway config + console.log(`Creating isolated profile: ${PROFILE_NAME}`); + oc("config set gateway.mode local"); + oc(`config set gateway.port ${GATEWAY_PORT}`); + oc("config set gateway.auth.mode token"); + oc(`config set gateway.auth.token ${GATEWAY_TOKEN}`); + + // 5. Configure plugin loading from local build + oc(`config set 'plugins.load.paths' '["${PROJECT_ROOT}"]'`); + + // 6. Copy auth credentials from main profile + if (!existsSync(MAIN_AUTH)) { + throw new Error( + `Main profile auth not found at ${MAIN_AUTH}. Run 'openclaw' once to set up credentials.`, + ); + } + mkdirSync(PROFILE_AUTH_DIR, { recursive: true }); + copyFileSync(MAIN_AUTH, join(PROFILE_AUTH_DIR, "auth-profiles.json")); + console.log("Copied auth credentials to isolated profile"); + + // 7. Start gateway in background + console.log(`Starting gateway on port ${GATEWAY_PORT}...`); + const gateway = spawn( + "openclaw", + ["--profile", PROFILE_NAME, "gateway", "run", "--port", String(GATEWAY_PORT), "--force"], + { + detached: true, + stdio: ["ignore", "pipe", "pipe"], + env: { ...process.env, NO_COLOR: "1" }, + }, + ); + + // Save PID for teardown + writeFileSync(PID_FILE, String(gateway.pid)); + gateway.unref(); + + // 8. Wait for gateway to be ready + console.log("Waiting for gateway to be ready..."); + const ready = await waitForGateway(GATEWAY_PORT, 30_000); + if (!ready) { + // Dump any stderr output for debugging + throw new Error(`Gateway failed to start on port ${GATEWAY_PORT} within 30s`); + } + console.log("Gateway is ready!"); + + // 9. Verify FogClaw is loaded + const info = oc("plugins info fogclaw"); + console.log("Plugin info:", info); + + if (!info.includes("fogclaw_scan")) { + console.warn("WARNING: FogClaw tools may not be fully registered. Agent responses may have issues."); + } + + console.log("\n=== FogClaw E2E: Setup complete ===\n"); +} + +async function waitForGateway(port: number, timeoutMs: number): Promise { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + try { + const resp = await fetch(`http://127.0.0.1:${port}/`); + if (resp.ok || resp.status === 401 || resp.status === 404) { + return true; + } + } catch { + // Not ready yet + } + await sleep(500); + } + return false; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/tests/e2e/global-teardown.ts b/tests/e2e/global-teardown.ts new file mode 100644 index 0000000..30d0ae0 --- /dev/null +++ b/tests/e2e/global-teardown.ts @@ -0,0 +1,44 @@ +import { execSync } from "node:child_process"; +import { existsSync, readFileSync, unlinkSync } from "node:fs"; +import { join, resolve } from "node:path"; + +const PROJECT_ROOT = resolve(import.meta.dirname, "../.."); +const PROFILE_NAME = "e2e-test"; +const PROFILE_DIR = join(process.env.HOME!, `.openclaw-${PROFILE_NAME}`); +const GATEWAY_PORT = 19001; +const PID_FILE = join(PROJECT_ROOT, "tests/e2e/.gateway-pid"); + +export default async function globalTeardown() { + console.log("\n=== FogClaw E2E: Tearing down ===\n"); + + // 1. Kill gateway by PID file + if (existsSync(PID_FILE)) { + const pid = readFileSync(PID_FILE, "utf-8").trim(); + console.log(`Killing gateway process (PID ${pid})`); + try { + process.kill(Number(pid), "SIGTERM"); + } catch { + // Process may already be gone + } + unlinkSync(PID_FILE); + } + + // 2. Kill anything still on the gateway port + try { + const portPid = execSync(`lsof -ti :${GATEWAY_PORT}`, { encoding: "utf-8" }).trim(); + if (portPid) { + console.log(`Killing remaining process on port ${GATEWAY_PORT} (PID ${portPid})`); + execSync(`kill ${portPid}`, { encoding: "utf-8" }); + } + } catch { + // Nothing on port — good + } + + // 3. Clean up isolated profile + if (existsSync(PROFILE_DIR)) { + console.log(`Removing isolated profile at ${PROFILE_DIR}`); + execSync(`rm -rf "${PROFILE_DIR}"`, { encoding: "utf-8" }); + } + + console.log("\n=== FogClaw E2E: Teardown complete ===\n"); +} diff --git a/tests/e2e/notify-discord.sh b/tests/e2e/notify-discord.sh new file mode 100755 index 0000000..2bd4a7d --- /dev/null +++ b/tests/e2e/notify-discord.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# Sends E2E test results to a Discord webhook. +# Required env vars: DISCORD_WEBHOOK_URL, E2E_STATUS +# Optional env vars: GITHUB_RUN_URL, COMMIT_SHA, COMMIT_REF + +set -euo pipefail + +if [ -z "${DISCORD_WEBHOOK_URL:-}" ]; then + echo "DISCORD_WEBHOOK_URL not set — skipping notification" + exit 0 +fi + +STATUS="${E2E_STATUS:-unknown}" +RUN_URL="${GITHUB_RUN_URL:-local run}" +SHA="${COMMIT_SHA:-$(git rev-parse --short HEAD 2>/dev/null || echo 'unknown')}" +REF="${COMMIT_REF:-$(git branch --show-current 2>/dev/null || echo 'unknown')}" + +if [ "$STATUS" = "success" ]; then + COLOR=3066993 # green + TITLE="E2E Tests Passed" + EMOJI="white_check_mark" +else + COLOR=15158332 # red + TITLE="E2E Tests Failed" + EMOJI="x" +fi + +PAYLOAD=$(cat < { expect(requestsTool).toBeDefined(); expect(resolveTool).toBeDefined(); - expect(scanTool.schema.required).toContain("text"); - expect(previewTool.schema.required).toContain("text"); - expect(redactTool.schema.required).toContain("text"); - expect(requestAccessTool.schema.required).toContain("placeholder"); - expect(requestsTool.schema.required).toEqual([]); - expect(resolveTool.schema.required).toContain("action"); + expect(scanTool.parameters.required).toContain("text"); + expect(previewTool.parameters.required).toContain("text"); + expect(redactTool.parameters.required).toContain("text"); + expect(requestAccessTool.parameters.required).toContain("placeholder"); + expect(requestsTool.parameters.required).toEqual([]); + expect(resolveTool.parameters.required).toContain("action"); }); it("validates hook and tool behavior against real Scanner execution path", async () => { @@ -91,7 +91,7 @@ describe("FogClaw OpenClaw plugin contract (integration path)", () => { expect(hookResult?.prependContext).not.toContain("john@example.com"); const scanTool = api.tools.find((tool: any) => tool.id === "fogclaw_scan"); - const scanOutput = await scanTool.handler({ + const scanOutput = await scanTool.execute("test", { text: "Email me at john@example.com today.", }); @@ -103,7 +103,7 @@ describe("FogClaw OpenClaw plugin contract (integration path)", () => { expect(scanParsed.entities[0].label).toBe("EMAIL"); const redactTool = api.tools.find((tool: any) => tool.id === "fogclaw_redact"); - const redactOutput = await redactTool.handler({ + const redactOutput = await redactTool.execute("test", { text: "Email me at john@example.com today.", strategy: "token", }); @@ -120,7 +120,7 @@ describe("FogClaw OpenClaw plugin contract (integration path)", () => { const previewTool = api.tools.find((tool: any) => tool.id === "fogclaw_preview"); - const previewOutput = await previewTool.handler({ + const previewOutput = await previewTool.execute("test", { text: "Email me at john@example.com about Acme Corp tomorrow.", }); @@ -142,7 +142,7 @@ describe("FogClaw OpenClaw plugin contract (integration path)", () => { plugin.register(api); const scanTool = api.tools.find((tool: any) => tool.id === "fogclaw_scan"); - const scanOutput = await scanTool.handler({ + const scanOutput = await scanTool.execute("test", { text: "Confidential note for Acme project roadmap", custom_labels: ["project", "competitor name"], }); diff --git a/vitest.config.ts b/vitest.config.ts new file mode 100644 index 0000000..8861916 --- /dev/null +++ b/vitest.config.ts @@ -0,0 +1,7 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + exclude: ["node_modules", "dist", "tests/e2e/**"], + }, +});