From 45eb151d202caa73761ab468f479d33ea48999de Mon Sep 17 00:00:00 2001
From: Derek Meegan <derek@browserbase.com>
Date: Wed, 29 Apr 2026 11:15:14 -0700
Subject: [PATCH 1/6] =?UTF-8?q?Add=20browser-reverse=20skill=20=E2=80=94?=
 =?UTF-8?q?=20discover=20OpenAPI=203.1=20from=20browser-trace=20captures?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Consumes a browser-trace run (.o11y/<run>/), pairs CDP request/response
events, templatizes paths, infers JSON schemas from samples, and emits an
OpenAPI 3.1 document with a coverage report and confidence metadata.

Pipeline: load → filter → normalize → infer → emit. Each stage is a
discrete script writing to intermediate/ for debuggability. Optional
--bodies <path> flag joins a `browse network on` capture by CDP requestId
so response bodies feed into schema inference.

E2E tested against Hacker News, jsonplaceholder, derekmeegan.com,
browserbase.com, browser-use.com, reddit.com.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 skills/browser-reverse/BODY-CAPTURE-LIFT.md   | 118 ++++++
 skills/browser-reverse/REFERENCE.md           | 240 ++++++++++++
 skills/browser-reverse/SKILL.md               | 136 +++++++
 skills/browser-reverse/package.json           |   6 +
 skills/browser-reverse/scripts/discover.mjs   |  93 +++++
 skills/browser-reverse/scripts/emit.mjs       | 356 ++++++++++++++++++
 skills/browser-reverse/scripts/filter.mjs     |  66 ++++
 skills/browser-reverse/scripts/infer.mjs      | 139 +++++++
 skills/browser-reverse/scripts/lib/io.mjs     |  58 +++
 .../scripts/lib/path-template.mjs             |  86 +++++
 skills/browser-reverse/scripts/lib/redact.mjs |  73 ++++
 .../scripts/lib/schema-merge.mjs              | 175 +++++++++
 skills/browser-reverse/scripts/lib/yaml.mjs   |  87 +++++
 skills/browser-reverse/scripts/load.mjs       | 170 +++++++++
 skills/browser-reverse/scripts/normalize.mjs  | 128 +++++++
 15 files changed, 1931 insertions(+)
 create mode 100644 skills/browser-reverse/BODY-CAPTURE-LIFT.md
 create mode 100644 skills/browser-reverse/REFERENCE.md
 create mode 100644 skills/browser-reverse/SKILL.md
 create mode 100644 skills/browser-reverse/package.json
 create mode 100644 skills/browser-reverse/scripts/discover.mjs
 create mode 100644 skills/browser-reverse/scripts/emit.mjs
 create mode 100644 skills/browser-reverse/scripts/filter.mjs
 create mode 100644 skills/browser-reverse/scripts/infer.mjs
 create mode 100644 skills/browser-reverse/scripts/lib/io.mjs
 create mode 100644 skills/browser-reverse/scripts/lib/path-template.mjs
 create mode 100644 skills/browser-reverse/scripts/lib/redact.mjs
 create mode 100644 skills/browser-reverse/scripts/lib/schema-merge.mjs
 create mode 100644 skills/browser-reverse/scripts/lib/yaml.mjs
 create mode 100644 skills/browser-reverse/scripts/load.mjs
 create mode 100644 skills/browser-reverse/scripts/normalize.mjs
diff --git a/skills/browser-reverse/BODY-CAPTURE-LIFT.md b/skills/browser-reverse/BODY-CAPTURE-LIFT.md
new file mode 100644
index 00000000..514f686e
--- /dev/null
+++ b/skills/browser-reverse/BODY-CAPTURE-LIFT.md
@@ -0,0 +1,118 @@
+# Adding Response Body Capture to `browser-trace` — Lift Estimate
+
+> Grounded in the real source as of `browserbase/skills@main`. I read `SKILL.md`,
+> `REFERENCE.md`, `lib.mjs`, `start-capture.mjs`, `snapshot-loop.mjs`, `bisect-cdp.mjs`,
+> `bb-capture.mjs`, `bb-finalize.mjs`, `stop-capture.mjs`.
+
+---
+
+## 1. Why this is harder than it looks
+
+`browser-trace` today does the simplest possible thing: it shells out to `browse cdp <target> --domain Network --domain Console ...`, which emits one CDP event per line to stdout, and that stream is captured verbatim into `cdp/raw.ndjson`. **No CDP commands are issued back into the session.** The capture is fully one-way and stateless.
+
+Response bodies break that model. Bodies aren't pushed by CDP — they have to be **pulled** with a `Network.getResponseBody` request, keyed by `requestId`, **before the renderer evicts the resource**. Eviction is non-deterministic but typically happens within seconds of the response completing on a busy page. That means body capture has to be:
+
+- **Live** — runs concurrently with the trace, can't be done from `raw.ndjson` after the fact.
+- **Bidirectional** — issues CDP commands, not just reads events.
+- **Fast** — the gap between `Network.loadingFinished` and the `getResponseBody` call must be small.
+- **Selective** — fetching every body would 10–100x the disk footprint and add real load on the renderer.
+
+This is a meaningful expansion of the skill's current architecture, not a tweak.
+
+---
+
+## 2. The lift, by component
+
+### 2.1 New companion script — `scripts/body-capture.mjs` — **NEW, ~200 lines**
+
+The `browse cdp` subprocess can't be modified (it's an external binary), so body capture has to be a **second CDP client** running in parallel, attached to the same target. Same model as `snapshot-loop.mjs`, but instead of polling screenshots it subscribes to `Network.responseReceived` + `Network.loadingFinished` and issues `Network.getResponseBody` for matching requests.
+
+Responsibilities:
+
+- Open its own WebSocket to the CDP target (or use `browse --ws <target> ...` if it supports request/response, which from the snapshot loop it does for one-shot commands — body capture is a long-lived subscription, so likely a raw `ws://` client).
+- Maintain an in-memory map of `requestId → { url, method, contentType, status, type }` keyed off `Network.requestWillBeSent` + `Network.responseReceived`.
+- On `Network.loadingFinished`: if the request matches the filter (default: `fetch`/`xhr` resourceType, JSON or form content-type, size cap), call `Network.getResponseBody` and write the result to `<run>/cdp/network/bodies/<requestId>.json`.
+- Track failures (eviction races, out-of-process iframes that can't be addressed, sizes over the cap) in a sidecar `bodies/_skipped.jsonl`.
+- SIGTERM-clean shutdown so `stop-capture.mjs` doesn't have to know about it specifically (it would just need to also kill `.bodies.pid`).
+
+**Risk:** `Network.getResponseBody` requires a session-attached target. For OOPIFs (cross-origin iframes), you have to use `Target.attachToTarget` first and route the command on the resulting session. Non-trivial. Realistic v1 punts on iframes and just records the skip reason.
+
+**Dependencies:** zero — Node stdlib has `ws` via `undici` /`WebSocket` (Node 22+) or you bundle a tiny WS client. The skill is currently zero-dep, so this constraint matters.
+
+### 2.2 `start-capture.mjs` — **MODIFIED, ~10 lines**
+
+Add an optional third detached subprocess: if `O11Y_BODIES=1` (or a `--bodies` flag), spawn `body-capture.mjs` the same way `snapshot-loop.mjs` is spawned, write `.bodies.pid`. Default off so existing users see no change.
+
+### 2.3 `stop-capture.mjs` — **MODIFIED, ~3 lines**
+
+Already loops over `['.cdp.pid', '.loop.pid']`. Add `'.bodies.pid'` to the list. Trivial.
+
+### 2.4 `bisect-cdp.mjs` — **MODIFIED, ~15 lines**
+
+Currently the only "network" buckets are CDP **events** (`requestWillBeSent`, `responseReceived`, `loadingFinished`, `loadingFailed`, `webSocket`). Bodies are content, not events, so they don't fit the existing `BUCKETS` predicate model.
+
+Two sensible places to expose them:
+
+1. **As-is on disk** — `cdp/network/bodies/<requestId>.json` already exists from body-capture; bisect doesn't have to do anything. Per-page slicing (`cdp/pages/<pid>/network/bodies/`) is the only real work: walk `network/responses.jsonl` for each page, find the matching body files, hard-link or copy them into the per-page dir. ~10 lines.
+2. **Index** — emit `cdp/network/bodies-index.jsonl` mapping `{requestId, url, method, status, contentType, sizeBytes, bodyPath}` so query/grep tools don't have to walk the dir. ~5 lines.
+
+### 2.5 `lib.mjs` — **MODIFIED, ~5 lines**
+
+Add a helper `readBody(runDir, requestId) → { contentType, body, base64? }`. Useful for the new skill's `infer.mjs` and for `query.mjs`.
+
+### 2.6 `query.mjs` — **MODIFIED, ~20 lines**
+
+Add a `bodies` subcommand: list captured bodies, filter by URL/status/content-type, dump a body to stdout. Optional but cheap.
+
+### 2.7 `bb-capture.mjs` / `bb-finalize.mjs` — **NO CHANGES**
+
+They delegate to `start-capture.mjs` / `stop-capture.mjs`. Inherits body capture for free.
+
+### 2.8 `SKILL.md` / `REFERENCE.md` — **MODIFIED, ~50 lines**
+
+Document:
+- The new flag/env var.
+- New on-disk layout (`cdp/network/bodies/`, `bodies-index.jsonl`).
+- Caveats: eviction races, OOPIF gaps, size cap, default-off.
+- Filter knobs (`O11Y_BODY_TYPES`, `O11Y_BODY_MAX_KB`, `O11Y_BODY_INCLUDE_PATTERN`).
+- Privacy implication: bodies can contain user data. Off by default for a reason.
+
+---
+
+## 3. Total lift
+
+| Component | Type | Lines | Risk |
+|---|---|---|---|
+| `scripts/body-capture.mjs` | new | ~200 | **medium** — WS client, eviction races, OOPIF |
+| `scripts/start-capture.mjs` | modify | ~10 | low |
+| `scripts/stop-capture.mjs` | modify | ~3 | low |
+| `scripts/bisect-cdp.mjs` | modify | ~15 | low |
+| `scripts/lib.mjs` | modify | ~5 | low |
+| `scripts/query.mjs` | modify | ~20 | low |
+| `SKILL.md` + `REFERENCE.md` | modify | ~50 | low |
+| **Total** | | **~300 LOC** | |
+
+**Calendar estimate for one engineer who knows CDP:** ~2–3 days.
+- Day 1: WS client + filter + happy-path body capture against Chromium local.
+- Day 2: OOPIF target attachment, size cap, skip-tracking, integration with `start`/`stop`.
+- Day 3: bisect integration, query subcommand, docs, end-to-end test against a Browserbase remote session.
+
+**Calendar estimate without prior CDP fluency:** ~1 week. The eviction race and OOPIF target plumbing are the parts that bite.
+
+---
+
+## 4. Risks worth calling out in the PR
+
+1. **Privacy.** Bodies can contain bearer tokens, PII, partial PII even when redacted at the header layer. Default-off + an opt-in flag is non-negotiable. The redaction story has to live in the consuming skill (e.g. `discover-api-spec`), not in the capture layer — capture should write what it sees.
+2. **Performance.** `Network.getResponseBody` blocks on the renderer. For a page making 200 XHR requests, naive capture serializes every one of them. Mitigations: hard cap on concurrent in-flight `getResponseBody` calls (e.g. 8), aggressive content-type filter, default size cap (256 KB).
+3. **Disk.** A 10-minute Browserbase session with body capture on can easily produce 100–500 MB of bodies. The skill should default to JSON-only + 256 KB cap and let users opt into more.
+4. **Eviction races.** Some bodies will fail with `-32000 No data found for resource`. This is normal. `bodies/_skipped.jsonl` should record them so consumers know coverage isn't 100%.
+5. **WebSocket frame data.** `Network.webSocketFrameSent` / `Received` already include the payload inline — no `getResponseBody` needed. v1 should explicitly punt on WebSocket bodies (already in the events bucket) to scope down.
+
+---
+
+## 5. Recommendation
+
+Building this **into** `browser-trace` is the right call **if** the maintainers are willing to add a (default-off) feature with privacy and disk caveats. Putting it in a sibling skill is also viable but less clean — every consumer skill (api-spec, security audits, etc.) would have to reinvent the WS plumbing.
+
+The cleanest framing: **bodies are part of the trace, off by default, on with a flag.** Same shape as how Chrome DevTools handles "Preserve log" / "Disable cache" — capture options, not a separate tool.
diff --git a/skills/browser-reverse/REFERENCE.md b/skills/browser-reverse/REFERENCE.md
new file mode 100644
index 00000000..e8a9ba3c
--- /dev/null
+++ b/skills/browser-reverse/REFERENCE.md
@@ -0,0 +1,240 @@
+# Browser Reverse — Reference
+
+Technical reference for the discovery pipeline, file formats, and configuration.
+
+## Pipeline
+
+```
+browser-trace run                    discover.mjs
+.o11y/<run>/cdp/network/             ┌─────────┐    ┌────────┐    ┌──────────┐    ┌─────────┐    ┌──────┐
+  requests.jsonl       ──────────▶   │  load   │ ─▶ │ filter │ ─▶ │ normalize│ ─▶ │ infer   │ ─▶ │ emit │
+  responses.jsonl                    └─────────┘    └────────┘    └──────────┘    └─────────┘    └──────┘
+                                       paired         filtered      endpoints       endpoints       openapi
+                                       .jsonl         .jsonl        .jsonl          .with-          .yaml
+                                                                                    schemas         report.md
+                                                                                    .jsonl
+```
+
+Each stage is a discrete script that reads a file and writes a file. `discover.mjs` is the dispatcher; pass `--stage <name>` to run a single stage for debugging.
+
+## Scripts
+
+All scripts are Node ESM (`type: module`). They depend only on the Node standard library.
+
+### `discover.mjs --run <path> [flags]`
+
+Top-level dispatcher. Runs `load → filter → normalize → infer → emit` in order. With `--stage <name>`, runs only that stage (assumes prior stages already wrote their intermediate file).
+
+### `load.mjs <run-path> <out-dir> [bodies-dir]`
+
+- Reads `cdp/network/requests.jsonl` and `cdp/network/responses.jsonl`.
+- Pairs by `requestId`. Drops `OPTIONS` (CORS preflight) and pure redirects (status 3xx with `Location` and no body — recorded as metadata on the *next* request in the chain when the requestId carries forward, otherwise dropped).
+- Drops resource types that are not `XHR`, `Fetch`, or `Document` (skips `Image`, `Stylesheet`, `Font`, `Media`, `Manifest`, `Other`, `Script` unless the URL clearly looks like an API endpoint).
+- **Body join**: if a `browse network` capture dir is provided (via `--bodies` or auto-detected at `<run>/cdp/network/bodies/`), each subdir's `request.json` + `response.json` are read and joined to paired rows by `requestId`. The browse-network `id` field IS the CDP requestId for XHR/Fetch resource types, so the join is exact (not URL-or-timestamp matching). Bodies that look like JSON are parsed; otherwise the raw string is preserved.
+- Output: `intermediate/paired.jsonl` — one row per pair with `{ method, url, status, reqHeaders, reqBody, respHeaders, respBody, contentType, type, ts }`.
+
+### `filter.mjs <run-path>`
+
+- Reads `intermediate/paired.jsonl`.
+- Applies `--include` / `--exclude` / `--origins`.
+- Applies built-in exclude list (analytics hosts, sourcemaps, service workers, fonts/CSS that snuck through).
+- Output: `intermediate/filtered.jsonl`.
+
+### `normalize.mjs <run-path>`
+
+- Templatizes paths. Detection order per segment:
+  1. UUID v1–v5 → `{id}` (`string`, `format: uuid`).
+  2. Pure integer → `{id}` (`integer`).
+  3. Hex/base62 ≥ 8 chars → `{id}` (`string`).
+  4. If the same position varies across multiple samples and is short alpha → `{slug}` (`string`).
+  5. Otherwise the segment is left static.
+- Groups paired samples by `(origin, method, templatedPath)`.
+- Collects query parameters across samples; marks `required: true` only when every sample carries the param.
+- If two pre-normalization templates would collapse but yield divergent response status/content-type signatures, they're kept split and flagged.
+- Output: `intermediate/endpoints.jsonl` — one row per endpoint with `{ origin, method, path, samples[], queryParams, statusCodes, normalizationFlags }`.
+
+### `infer.mjs <run-path>`
+
+- For each endpoint, runs JSON-Schema inference across request bodies and (when present) response bodies.
+- Merge rules: required = present-in-all, types = union of observed types, arrays infer item schema, enum detected when ≤ 8 distinct values across ≥ 5 samples.
+- Format hints: `date-time` (ISO-ish), `uri`, `email`, `uuid`.
+- Picks a representative sample (most-recent successful 2xx) and writes redacted request/response example to `samples/`.
+- Output: `intermediate/endpoints.with-schemas.jsonl`.
+
+### `emit.mjs <run-path>`
+
+- Builds the OpenAPI 3.1 document.
+- Hoists structurally-identical schemas into `components.schemas` keyed by structural hash, with names derived from path tokens (`Item`, `Item_List`, etc.) — falls back to `Schema1`, `Schema2` if no path hint applies.
+- Writes `openapi.yaml`, `openapi.json`, `report.md`, `confidence.json`.
+
+## File formats
+
+### `intermediate/paired.jsonl`
+
+```json
+{
+  "requestId": "12345.678",
+  "method": "GET",
+  "url": "https://api.example.com/v1/items/42?page=2",
+  "origin": "https://api.example.com",
+  "path": "/v1/items/42",
+  "query": { "page": "2" },
+  "status": 200,
+  "type": "Fetch",
+  "contentType": "application/json",
+  "reqHeaders": { "accept": "application/json" },
+  "reqBody": null,
+  "respHeaders": { "content-type": "application/json" },
+  "respBody": null,
+  "ts": 1714400000000
+}
+```
+
+`reqBody` is the verbatim `postData` from `Network.requestWillBeSent` (parsed if JSON). `respBody` is `null` unless a `browse network` capture dir was joined in (see below) — `browse cdp` does not embed bodies.
+
+### Joining `browse network` bodies
+
+`browse network on` is a separate command from the `browse` CLI that writes per-request `request.json` + `response.json` files (with full bodies) to a temp directory. Discover joins these into the trace by `requestId`.
+
+Workflow:
+
+```bash
+# during capture, alongside browser-trace
+browse network on
+# ...drive...
+# IMPORTANT: snapshot the dir before it gets reused
+cp -r "$(browse network path | jq -r .path)" .o11y/<run>/cdp/network/bodies/
+browse network off
+```
+
+Internals (matched in `lib/io.mjs` + `load.mjs`):
+
+- The browse-network entry's `request.json.id` field equals the CDP `requestId` for XHR/Fetch resource types. The join is by exact `requestId`, not URL or timestamp.
+- For Document loads, the `id` field is a non-CDP UUID and won't match — those bodies are silently skipped (Documents aren't useful for API spec inference anyway).
+- `response.json` from `browse network` may have empty `status` / `headers` / `mimeType` for some loads — that's fine, those are taken from the CDP firehose. Only `body` is read.
+- The capture dir is shared per `browse` daemon session (`/tmp/.../browse-default-network/`). Run `browse network on` then snapshot the dir before another `browse network on` overwrites it.
+
+### `intermediate/endpoints.jsonl`
+
+```json
+{
+  "endpointKey": "GET https://api.example.com/v1/items/{id}",
+  "origin": "https://api.example.com",
+  "method": "GET",
+  "path": "/v1/items/{id}",
+  "rawPaths": ["/v1/items/42", "/v1/items/97"],
+  "pathParams": [{ "name": "id", "in": "path", "schema": { "type": "integer" } }],
+  "queryParams": [{ "name": "page", "in": "query", "required": false, "schema": { "type": "string" } }],
+  "statusCodes": [200, 200, 404],
+  "samples": [/* indices into paired.jsonl */],
+  "normalizationFlags": []
+}
+```
+
+### `confidence.json`
+
+```json
+{
+  "endpoints": [
+    {
+      "key": "GET /v1/items/{id}",
+      "samples": 7,
+      "statusCodes": [200, 404],
+      "responseBodyKnown": false,
+      "requestBodyKnown": false,
+      "normalizationFlags": [],
+      "confidence": "medium"
+    }
+  ]
+}
+```
+
+`confidence` is a coarse bucket: `low` (1–2 samples or normalization flags), `medium` (3–9 samples, no flags), `high` (≥ 10 samples, multi-status, no flags).
+
+## CLI flags (full)
+
+| Flag | Default | Notes |
+|---|---|---|
+| `--run <path>` | required | Resolves `cdp/network/{requests,responses}.jsonl` underneath |
+| `--out <path>` | `<run>/api-spec` | |
+| `--bodies <path>` | auto | `browse network` capture dir to join into the trace. Auto-detected from `<run>/cdp/network/bodies/` when present |
+| `--include <regex>` | none | Repeatable. ORed together. Applied after `--origins` |
+| `--exclude <regex>` | (defaults) | Repeatable. Combined with built-in defaults |
+| `--origins <list>` | none | Comma-separated. If set, anything *not* matching is dropped before include/exclude |
+| `--format <yaml\|json\|both>` | `both` | Format of the emitted spec |
+| `--title <string>` | derived | `info.title` in the OpenAPI doc |
+| `--redact <list>` | (defaults) | Comma-separated extra header names / JSON keys to scrub. Adds to defaults; never replaces |
+| `--min-samples <n>` | `1` | Drop endpoints below this threshold (still listed in the report) |
+| `--stage <name>` | (all) | One of `load`, `filter`, `normalize`, `infer`, `emit` |
+
+## Default exclude list
+
+URLs matching these patterns are dropped before any analysis (regex, applied to the full URL):
+
+- Analytics: `segment\.(io\|com)`, `mixpanel\.com`, `google-analytics\.com`, `googletagmanager\.com`, `datadog(hq)?\.com`, `sentry\.io`, `amplitude\.com`, `fullstory\.com`, `hotjar\.com`, `intercom\.io`, `clarity\.ms`, `cloudflareinsights\.com`, `doubleclick\.net`, `facebook\.com/tr`
+- Static-only file extensions: `\.(png|jpe?g|gif|svg|webp|ico|woff2?|ttf|eot|otf|css|map|mp4|webm|mp3)(\?|$)`
+- Service worker / metadata: `/sw\.js`, `/service-worker\.js`, `/manifest\.json$`, `/robots\.txt$`, `/favicon\.ico$`
+
+Override granularly via `--include` (which wins over default `--exclude`).
+
+## Default redactions
+
+Headers (case-insensitive): `authorization`, `cookie`, `set-cookie`, `x-csrf-token`, `x-xsrf-token`, `x-api-key`, `proxy-authorization`, plus any header name matching `*token*`, `*secret*`, `*signature*`.
+
+Body keys: `password`, `token`, `secret`, `api_key`, `apiKey`, `accessToken`, `refreshToken`, `creditCard`, `ssn`.
+
+Body values (regex): JWTs (`^eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+$`), email addresses (`@` + TLD), phone numbers (E.164-ish).
+
+Redacted values are replaced with `"<redacted>"` so type information is preserved for schema inference.
+
+## Path templating heuristics
+
+Per-segment classifier in `scripts/lib/path-template.mjs`:
+
+| Pattern | Replacement | OpenAPI schema |
+|---|---|---|
+| 8-4-4-4-12 hex (UUID) | `{id}` | `{ type: string, format: uuid }` |
+| `\d+` | `{id}` | `{ type: integer }` |
+| `[A-Za-z0-9]{8,}` (no vowels-only / dictionary check) | `{id}` | `{ type: string }` |
+| Same-position alpha tokens varying across ≥ 2 samples | `{slug}` | `{ type: string }` |
+
+When multiple variable segments exist in one path, names are suffixed: `{id}`, `{id2}`, `{id3}`. The `--name-params` flag (future) will use sibling segment hints (`/products/42` → `{productId}`).
+
+## Confidence flags
+
+Possible entries in `normalizationFlags`:
+
+- `divergent-response-shape` — pre-normalization paths collapsed to the same template but had structurally different responses. The skill keeps them split and emits both.
+- `single-sample` — endpoint observed exactly once.
+- `single-status` — only one status code observed; spec lists only that response.
+- `mixed-content-types` — different `content-type` values across samples.
+- `request-body-only-on-some-samples` — POST/PUT seen with and without a body.
+
+## OpenAPI extensions
+
+The emitter writes a few `x-*` extensions on each operation:
+
+- `x-confidence`: `{ samples, statusCodes, normalizationFlags }`
+- `x-origin`: the origin this operation was observed on (when multiple servers are listed)
+- `x-observed-auth`: array of auth-shaped header names seen on this endpoint (e.g. `["authorization", "x-api-key"]`)
+- `x-sample-count`: total number of paired samples backing the operation
+
+These extensions are stripped from `report.md` (which is human-facing) but preserved in the YAML/JSON.
+
+## Configuration via env
+
+| Var | Default | Effect |
+|---|---|---|
+| `O11Y_ROOT` | `.o11y` | Inherited from `browser-trace`. Used only when `--run` is bare run id rather than a full path |
+| `DISCOVER_ENUM_MAX_DISTINCT` | `8` | Max distinct values to consider a field an enum |
+| `DISCOVER_ENUM_MIN_SAMPLES` | `5` | Min samples before enum detection runs |
+
+## Troubleshooting
+
+| Symptom | Likely cause | Fix |
+|---|---|---|
+| `paired.jsonl` is empty | trace contains no `Network.requestWillBeSent` events for XHR/Fetch | re-run `browser-trace` exercising the dynamic flows; static-only sites won't yield endpoints |
+| `openapi.yaml` has only `paths: {}` | every paired request was filtered out | check `--origins` and the default exclude list; pass `--include '.*'` to bypass filtering |
+| Path templating collapses too aggressively | numeric IDs being misread as enums, or dictionary words misread as slugs | add `--exclude` for the noisy paths and re-run, or file an issue with the trace |
+| Schemas show `type: "string"` for everything | request/response bodies aren't valid JSON or weren't captured | check `paired.jsonl` for `reqBody`/`respBody` content — if `null`, bodies weren't in the trace |
+| Spec validator complains about `info.version` | derived version is `0.1.0-discovered` which some tools dislike | pass `--version 0.1.0` (TODO) or post-edit the file |
diff --git a/skills/browser-reverse/SKILL.md b/skills/browser-reverse/SKILL.md
new file mode 100644
index 00000000..6bb98eac
--- /dev/null
+++ b/skills/browser-reverse/SKILL.md
@@ -0,0 +1,136 @@
+---
+name: browser-reverse
+description: Reverse-engineer a website's HTTP API into a best-effort OpenAPI 3.1 spec by analyzing a `browser-trace` capture. Use when the user wants to discover/extract API endpoints from a browser session, build an OpenAPI doc from network traffic, or document a third-party site's XHR/fetch surface for client integration.
+compatibility: "Requires Node 18+ and a `browser-trace` run directory (`.o11y/<run>/`) produced by the sibling `browser-trace` skill. The scripts use only the Node standard library — no `npm install` step. `jq` is referenced in docs for ad-hoc querying but is not required by the scripts."
+license: MIT
+allowed-tools: Bash, Read, Grep
+---
+
+# Browser Reverse
+
+Replay-driven API reverse-engineering. Consume a `browser-trace` capture, pair its CDP request / response events, templatize observed URLs, infer JSON schemas from samples, and emit an **OpenAPI 3.1** document plus a human-readable coverage report.
+
+This skill **does not capture traffic**. It is purely offline post-processing on top of `browser-trace`'s `cdp/network/*.jsonl` buckets. The two skills compose:
+
+```
+browser-trace        →  .o11y/<run>/cdp/network/{requests,responses}.jsonl
+discover-api-spec    →  .o11y/<run>/api-spec/openapi.yaml + report.md
+```
+
+## When to use
+
+- The user wants an OpenAPI document for a third-party or undocumented website API.
+- The user has a `browser-trace` run and wants endpoints + schemas extracted from it.
+- The user is building a client/SDK against a site that doesn't publish a spec.
+- The user wants a coverage report showing which flows would broaden the spec.
+
+If the user wants to **capture** traffic, send them to `browser-trace` first.
+
+## Two-step workflow
+
+### 1. Capture with `browser-trace` (and optionally bodies via `browse network on`)
+
+```bash
+# Local Chrome example (see browser-trace SKILL.md for Browserbase variant)
+"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
+  --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-spec about:blank &
+
+node ../browser-trace/scripts/start-capture.mjs 9222 my-site
+browse env local 9222
+browse network on                                    # capture request/response bodies
+browse open https://example.com
+# ...drive whatever flows you want covered...
+
+# Snapshot the bodies dir BEFORE turning capture off (the temp dir is shared
+# per-session, so subsequent `browse network on` runs would mix your bodies
+# with whatever a future capture writes if you skip this step).
+cp -r "$(browse network path | jq -r .path)" .o11y/my-site/cdp/network/bodies/
+browse network off
+
+node ../browser-trace/scripts/stop-capture.mjs my-site
+node ../browser-trace/scripts/bisect-cdp.mjs my-site
+```
+
+`browse network on` is **optional but strongly recommended** — without it, the spec has no response-body schemas (the CDP firehose used by `browse cdp` does not embed bodies). With it, both request bodies (already captured by CDP) *and* response bodies are joined into the trace by CDP `requestId`.
+
+### 2. Generate the spec
+
+```bash
+node scripts/discover.mjs --run .o11y/my-site
+# → .o11y/my-site/api-spec/openapi.yaml
+#   .o11y/my-site/api-spec/openapi.json
+#   .o11y/my-site/api-spec/report.md
+#   .o11y/my-site/api-spec/confidence.json
+#   .o11y/my-site/api-spec/samples/*.json
+#   .o11y/my-site/api-spec/intermediate/*.jsonl
+```
+
+`discover.mjs` auto-detects `<run>/cdp/network/bodies/`. To use a body capture from elsewhere (e.g. didn't snapshot, want the live `browse network` dir), pass `--bodies <path>` explicitly.
+
+Then deliver the artifacts to the user (`exec.sendFile()` for `openapi.yaml` and `report.md`).
+
+## CLI flags
+
+| Flag | Required | Meaning |
+|---|---|---|
+| `--run <path>` | yes | Path to a `browser-trace` run directory |
+| `--out <path>` | no | Output dir; default `<run>/api-spec/` |
+| `--bodies <path>` | no | `browse network` capture dir to join into the trace (auto-detected from `<run>/cdp/network/bodies/` when present) |
+| `--include <regex>` | no | Only include URLs matching regex (repeatable) |
+| `--exclude <regex>` | no | Exclude URLs matching regex (repeatable; in addition to defaults) |
+| `--origins <list>` | no | Comma-separated origin allow-list (e.g. `api.example.com,example.com`) |
+| `--format <yaml\|json\|both>` | no | Output format. Default `both` |
+| `--title <string>` | no | OpenAPI `info.title`. Default derived from primary origin |
+| `--redact <list>` | no | Extra header names / JSON keys to redact (comma-separated) |
+| `--min-samples <n>` | no | Minimum samples per endpoint to include. Default `1` |
+| `--stage <name>` | no | Run only one stage: `load`, `filter`, `normalize`, `infer`, `emit` |
+
+## Output layout
+
+```
+<run>/api-spec/
+├── openapi.yaml              primary deliverable
+├── openapi.json              mirror
+├── report.md                 human-readable summary + coverage caveats
+├── confidence.json           per-endpoint confidence + normalization flags
+├── samples/                  redacted request/response examples
+│   └── <method>__<path-hash>.json
+└── intermediate/             pipeline byproducts (paired/filtered/endpoints jsonl)
+```
+
+## What you get from `browse cdp` and `browse network`
+
+Two complementary capture sources:
+
+| Source | Provides | Limitation |
+|---|---|---|
+| `browse cdp` (used by `browser-trace`) | request method/URL/headers/`postData`, response status/headers/mimeType, full event timing | **Does not embed response bodies.** Bodies must be pulled with `Network.getResponseBody`, which the firehose doesn't do. |
+| `browse network on` (separate command) | request bodies AND response bodies on disk, keyed by CDP `requestId` | Capture dir is shared per `browse` session; snapshot before another `browse network on` overwrites it. |
+
+`discover.mjs` will pull bodies from a `browse network` dir if you pass `--bodies <path>` (or stash them under `<run>/cdp/network/bodies/`, which is auto-detected). The matching is by `requestId` — `browse network` writes that into each `request.json` as `id`, and we join directly.
+
+What changes when bodies are present:
+
+- ✅ Path templating, query-param schemas, status codes, content-types — same either way.
+- ✅ Request-body schemas — `postData` from CDP is enough; bodies dir is a nice-to-have for non-`postData` cases.
+- ✅ **Response-body schemas** — fully inferred from real samples. Without bodies you get `{ description, content: <mimeType> }` skeletons.
+
+The report flags every endpoint that has no response-body sample. For a sketch of what it would take to teach `browser-trace` itself to capture response bodies natively (no separate `browse network on` step), see [BODY-CAPTURE-LIFT.md](BODY-CAPTURE-LIFT.md).
+
+## Limitations
+
+- **Coverage is bounded by the captured flow.** Endpoints not exercised in the trace will not appear. The skill cannot prove completeness.
+- **Schemas are inductive, not contractual.** A field might be optional on the server even if every sample contained it.
+- **Auth is observed, not specified.** The skill records auth-shaped headers in an `x-observed-auth` extension but won't claim a security scheme.
+- **Path templating is heuristic.** Numeric / UUID / hex / slug patterns are detected per segment. Ambiguous URLs are flagged in `confidence.json`.
+- **Redaction is best-effort.** Default redactions cover common credentials, but app-specific secrets may slip through; use `--redact` for known custom headers/keys.
+
+## Best practices
+
+1. **Drive the flows you want documented.** The richer the browser-trace, the richer the spec.
+2. **Use `--origins` for noisy sites.** A marketing page hits dozens of analytics hosts; restrict to the API origin you care about.
+3. **Inspect `report.md` first.** Low-sample endpoints, single-status endpoints, and missing request bodies are listed there with concrete suggestions.
+4. **Bump `--min-samples` to 2+** when you want only confidently-shaped endpoints in the final doc — drop the long tail.
+5. **Pair with `browse network on`** when response-body schemas matter. The CDP firehose alone has request bodies but not response bodies.
+
+For pipeline internals and the file format reference, see [REFERENCE.md](REFERENCE.md).
diff --git a/skills/browser-reverse/package.json b/skills/browser-reverse/package.json
new file mode 100644
index 00000000..86360e3f
--- /dev/null
+++ b/skills/browser-reverse/package.json
@@ -0,0 +1,6 @@
+{
+  "name": "browser-reverse",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module"
+}
diff --git a/skills/browser-reverse/scripts/discover.mjs b/skills/browser-reverse/scripts/discover.mjs
new file mode 100644
index 00000000..c349fa87
--- /dev/null
+++ b/skills/browser-reverse/scripts/discover.mjs
@@ -0,0 +1,93 @@
+#!/usr/bin/env node
+// Top-level dispatcher: load → filter → normalize → infer → emit.
+//
+// Usage:
+//   node scripts/discover.mjs --run .o11y/<run-id> [flags]
+
+import path from 'node:path';
+import fs from 'node:fs';
+import { resolveRun, ensureDir } from './lib/io.mjs';
+import { load } from './load.mjs';
+import { filter } from './filter.mjs';
+import { normalize } from './normalize.mjs';
+import { infer } from './infer.mjs';
+import { emit } from './emit.mjs';
+
+function parseArgs(argv) {
+  const opts = {
+    run: null, out: null, bodies: null,
+    include: [], exclude: [], origins: [],
+    format: 'both', title: null, redact: [],
+    minSamples: 1, stage: null,
+  };
+  for (let i = 0; i < argv.length; i++) {
+    const a = argv[i];
+    const next = () => argv[++i];
+    switch (a) {
+      case '--run': opts.run = next(); break;
+      case '--out': opts.out = next(); break;
+      case '--bodies': opts.bodies = next(); break;
+      case '--include': opts.include.push(next()); break;
+      case '--exclude': opts.exclude.push(next()); break;
+      case '--origins': opts.origins = next().split(',').map(s => s.trim()).filter(Boolean); break;
+      case '--format': opts.format = next(); break;
+      case '--title': opts.title = next(); break;
+      case '--redact': opts.redact = next().split(',').map(s => s.trim()).filter(Boolean); break;
+      case '--min-samples': opts.minSamples = parseInt(next(), 10); break;
+      case '--stage': opts.stage = next(); break;
+      case '-h': case '--help':
+        printHelp(); process.exit(0);
+      default:
+        console.error(`unknown arg: ${a}`);
+        printHelp(); process.exit(2);
+    }
+  }
+  return opts;
+}
+
+function printHelp() {
+  console.error(`usage: discover.mjs --run <path> [--out <dir>] [--bodies <path>]
+                       [--include <re>]... [--exclude <re>]...
+                       [--origins <list>] [--format yaml|json|both]
+                       [--title <s>] [--redact <list>] [--min-samples <n>]
+                       [--stage load|filter|normalize|infer|emit]
+
+  --bodies <path>   Directory written by \`browse network on\`. When set, response
+                    bodies (and request bodies for non-postData captures) are
+                    joined into the trace by CDP requestId. Without it, the spec
+                    has no response-body schemas (browse cdp doesn't embed bodies).`);
+}
+
+function main() {
+  const opts = parseArgs(process.argv.slice(2));
+  if (!opts.run) { printHelp(); process.exit(2); }
+
+  const runPath = resolveRun(opts.run);
+  const outDir = opts.out ? path.resolve(opts.out) : path.join(runPath, 'api-spec');
+  ensureDir(outDir);
+
+  const stages = opts.stage ? [opts.stage] : ['load', 'filter', 'normalize', 'infer', 'emit'];
+
+  for (const stage of stages) {
+    const t0 = Date.now();
+    let stats;
+    switch (stage) {
+      case 'load':      stats = load(runPath, outDir, { bodies: opts.bodies }); break;
+      case 'filter':    stats = filter(outDir, { include: opts.include, exclude: opts.exclude, origins: opts.origins }); break;
+      case 'normalize': stats = normalize(outDir); break;
+      case 'infer':     stats = infer(outDir, { redact: opts.redact }); break;
+      case 'emit':      stats = emit(outDir, { minSamples: opts.minSamples, format: opts.format, title: opts.title }); break;
+      default: console.error(`unknown stage: ${stage}`); process.exit(2);
+    }
+    const ms = Date.now() - t0;
+    console.log(`[${stage}] ${ms}ms ${JSON.stringify(stats)}`);
+  }
+
+  console.log(`\noutput: ${outDir}`);
+  for (const f of ['openapi.yaml', 'openapi.json', 'report.md', 'confidence.json']) {
+    const p = path.join(outDir, f);
+    if (fs.existsSync(p)) console.log(`  ${path.relative(process.cwd(), p)}`);
+  }
+}
+
+main();
diff --git a/skills/browser-reverse/scripts/emit.mjs b/skills/browser-reverse/scripts/emit.mjs
new file mode 100644
index 00000000..71f1872c
--- /dev/null
+++ b/skills/browser-reverse/scripts/emit.mjs
@@ -0,0 +1,356 @@
+#!/usr/bin/env node
+// Stage 5 — Emit.
+//
+// Build the OpenAPI 3.1 document, hoist repeated schemas into components, and
+// write openapi.yaml, openapi.json, report.md, confidence.json.
+
+import path from 'node:path';
+import { readJsonl, writeJson, writeText, intermediatePath, readJson } from './lib/io.mjs';
+import { structuralHash } from './lib/schema-merge.mjs';
+import { toYaml } from './lib/yaml.mjs';
+
+function confidenceBucket(ep) {
+  const s = ep.sampleCount;
+  const flagged = ep.normalizationFlags.length > 0;
+  const multiStatus = ep.statusCodes.length >= 2;
+  if (s <= 2 || flagged) return 'low';
+  if (s >= 10 && multiStatus) return 'high';
+  return 'medium';
+}
+
+// Hoist structurally-identical inline schemas into components.schemas. We use a
+// stable structural hash and bias names off the endpoint path so refs are
+// readable (e.g. "Item" instead of "Schema7"). Recurses into nested object/array
+// schemas so a Post that appears once at the top level and once as the items of
+// a list still hoists as a single component.
+function buildComponents(endpoints) {
+  const byHash = new Map();      // hash -> { name, schema, hint }
+  const refCount = new Map();    // hash -> count of sites referencing it
+
+  function isObjectSchema(s) {
+    if (!s || typeof s !== 'object') return false;
+    if (s.type === 'object') return true;
+    if (Array.isArray(s.type) && s.type.includes('object')) return true;
+    return false;
+  }
+  function isArraySchema(s) {
+    if (!s || typeof s !== 'object') return false;
+    if (s.type === 'array') return true;
+    if (Array.isArray(s.type) && s.type.includes('array')) return true;
+    return false;
+  }
+
+  function visit(schema, hint) {
+    if (!schema || typeof schema !== 'object') return;
+    if (isObjectSchema(schema)) {
+      const h = structuralHash(schema);
+      refCount.set(h, (refCount.get(h) || 0) + 1);
+      if (!byHash.has(h)) byHash.set(h, { name: null, schema, hint });
+      for (const [k, child] of Object.entries(schema.properties || {})) {
+        visit(child, propHint(hint, k));
+      }
+    } else if (isArraySchema(schema) && schema.items) {
+      visit(schema.items, hint);
+    }
+  }
+
+  for (const ep of endpoints) {
+    if (ep.requestSchema) visit(ep.requestSchema, schemaHintFromPath(ep.path) + 'Request');
+    for (const [, sch] of Object.entries(ep.responseSchemas || {})) {
+      visit(sch, schemaHintFromPath(ep.path));
+    }
+  }
+
+  // Hoist when (a) referenced by ≥ 2 sites, OR (b) it's an object with ≥ 4 properties.
+  const components = {};
+  let counter = 0;
+  for (const [h, info] of byHash.entries()) {
+    const refs = refCount.get(h) || 0;
+    const propCount = Object.keys(info.schema.properties || {}).length;
+    if (refs < 2 && propCount < 4) continue;
+    let name = info.hint || `Schema${++counter}`;
+    if (components[name]) name = `${name}_${++counter}`;
+    info.name = name;
+    components[name] = info.schema;
+  }
+
+  // refOrInline rewrites a schema, replacing any nested object schema that
+  // matches a hoisted component with a $ref. Arrays have their items rewritten.
+  function refOrInline(schema) {
+    if (!schema || typeof schema !== 'object') return schema;
+    if (isObjectSchema(schema)) {
+      const h = structuralHash(schema);
+      const info = byHash.get(h);
+      if (info && info.name) return { $ref: `#/components/schemas/${info.name}` };
+      if (!schema.properties) return schema;
+      const rewritten = { ...schema, properties: {} };
+      for (const [k, child] of Object.entries(schema.properties)) {
+        rewritten.properties[k] = refOrInline(child);
+      }
+      return rewritten;
+    }
+    if (isArraySchema(schema) && schema.items) {
+      return { ...schema, items: refOrInline(schema.items) };
+    }
+    return schema;
+  }
+
+  // Inline-rewrite the components themselves so nested objects within
+  // components also use $refs.
+  for (const [name, sch] of Object.entries(components)) {
+    if (sch.properties) {
+      components[name] = { ...sch, properties: Object.fromEntries(
+        Object.entries(sch.properties).map(([k, c]) => [k, refOrInline(c)]),
+      )};
+    }
+  }
+
+  return { components, refOrInline };
+}
+
+function propHint(parentHint, key) {
+  const cap = key.replace(/[^A-Za-z0-9]/g, '').replace(/^./, c => c.toUpperCase());
+  return cap || (parentHint ? parentHint + 'Inner' : 'Schema');
+}
+
+function schemaHintFromPath(p) {
+  if (!p) return 'Schema';
+  const parts = p.split('/').filter(s => s && !s.startsWith('{'));
+  if (!parts.length) return 'Root';
+  const last = parts[parts.length - 1];
+  return last.replace(/[^A-Za-z0-9]/g, '').replace(/^./, c => c.toUpperCase()) || 'Schema';
+}
+
+function makeOperation(ep, refOrInline) {
+  const params = [];
+  for (const p of ep.pathParams || []) params.push(p);
+  for (const p of ep.queryParams || []) params.push(p);
+
+  const op = {
+    summary: `${ep.method} ${ep.path}`,
+    operationId: makeOpId(ep),
+  };
+  if (params.length) op.parameters = params;
+
+  if (ep.requestSchema && (ep.method === 'POST' || ep.method === 'PUT' || ep.method === 'PATCH' || ep.method === 'DELETE')) {
+    op.requestBody = {
+      content: {
+        [ep.requestContentType || 'application/json']: {
+          schema: refOrInline(ep.requestSchema),
+          ...(ep.requestExample ? { example: ep.requestExample } : {}),
+        },
+      },
+    };
+  }
+
+  const responses = {};
+  const statuses = ep.statusCodes.length ? ep.statusCodes : [200];
+  for (const status of statuses) {
+    const ct = (ep.responseContentTypes && ep.responseContentTypes[status]) || 'application/json';
+    const schema = ep.responseSchemas?.[String(status)];
+    const entry = { description: defaultDescriptionFor(status) };
+    if (schema || ep.responseExample) {
+      entry.content = {
+        [ct]: {
+          ...(schema ? { schema: refOrInline(schema) } : {}),
+          ...(status === ep.statusCodes[0] && ep.responseExample ? { example: ep.responseExample } : {}),
+        },
+      };
+    }
+    responses[String(status)] = entry;
+  }
+  op.responses = responses;
+
+  // Extensions
+  op['x-confidence'] = {
+    samples: ep.sampleCount,
+    statusCodes: ep.statusCodes,
+    normalizationFlags: ep.normalizationFlags,
+    confidence: confidenceBucket(ep),
+  };
+  op['x-sample-count'] = ep.sampleCount;
+  if (ep.observedAuthHeaders?.length) op['x-observed-auth'] = ep.observedAuthHeaders;
+  op['x-origin'] = ep.origin;
+
+  return op;
+}
+
+function defaultDescriptionFor(status) {
+  const n = Number(status);
+  if (n >= 200 && n < 300) return 'Success';
+  if (n >= 300 && n < 400) return 'Redirect';
+  if (n === 400) return 'Bad request';
+  if (n === 401) return 'Unauthorized';
+  if (n === 403) return 'Forbidden';
+  if (n === 404) return 'Not found';
+  if (n >= 400 && n < 500) return 'Client error';
+  if (n >= 500) return 'Server error';
+  return `Status ${status}`;
+}
+
+function makeOpId(ep) {
+  const parts = ep.path.split('/').filter(Boolean).map(s => s.replace(/[{}]/g, ''));
+  const tail = parts.map(p => p.replace(/[^A-Za-z0-9]/g, '_')).join('_');
+  return `${ep.method.toLowerCase()}_${tail || 'root'}`;
+}
+
+export function emit(outDir, opts = {}) {
+  const minSamples = opts.minSamples || 1;
+  const format = opts.format || 'both';
+  const titleOverride = opts.title || null;
+
+  const endpoints = readJsonl(intermediatePath(outDir, 'endpoints.with-schemas.jsonl'));
+  const kept = endpoints.filter(e => e.sampleCount >= minSamples);
+  const dropped = endpoints.filter(e => e.sampleCount < minSamples);
+
+  // Servers: one entry per distinct origin, sorted by frequency.
+  const originCounts = new Map();
+  for (const e of kept) originCounts.set(e.origin, (originCounts.get(e.origin) || 0) + e.sampleCount);
+  const servers = [...originCounts.entries()].sort((a, b) => b[1] - a[1]).map(([url]) => ({ url }));
+
+  const primary = servers[0]?.url || '';
+  const title = titleOverride || (primary ? `${new URL(primary).host} (discovered)` : 'Discovered API');
+
+  const { components, refOrInline } = buildComponents(kept);
+
+  // Build paths: one keyed entry per templated path; each method becomes an
+  // operation. When the same (path, method) is observed on multiple origins
+  // (common for third-party analytics endpoints fanned across vendors), keep
+  // the highest-sample-count operation and record the other origins under
+  // `x-also-served-from` so no data is silently dropped.
+  const paths = {};
+  const collisions = {}; // pathKey -> [{origin, samples}]
+  for (const ep of kept) {
+    const m = ep.method.toLowerCase();
+    if (!paths[ep.path]) paths[ep.path] = {};
+    const existing = paths[ep.path][m];
+    if (!existing) {
+      paths[ep.path][m] = makeOperation(ep, refOrInline);
+    } else {
+      const key = `${m} ${ep.path}`;
+      if (!collisions[key]) collisions[key] = [{ origin: existing['x-origin'], samples: existing['x-sample-count'] }];
+      collisions[key].push({ origin: ep.origin, samples: ep.sampleCount });
+      if (ep.sampleCount > (existing['x-sample-count'] || 0)) {
+        paths[ep.path][m] = makeOperation(ep, refOrInline);
+      }
+    }
+  }
+  for (const [key, origins] of Object.entries(collisions)) {
+    const [m, p] = key.split(' ');
+    const op = paths[p][m];
+    const winner = op['x-origin'];
+    op['x-also-served-from'] = origins.filter(o => o.origin !== winner).map(o => o.origin);
+  }
+
+  const doc = {
+    openapi: '3.1.0',
+    info: {
+      title,
+      version: '0.1.0-discovered',
+      description: 'Spec discovered from a browser-trace capture by the browser-reverse skill. Inductive, not contractual — see `report.md` and `x-confidence` extensions for caveats.',
+    },
+    servers,
+    paths,
+  };
+  if (Object.keys(components).length) doc.components = { schemas: components };
+
+  if (format === 'yaml' || format === 'both') {
+    writeText(path.join(outDir, 'openapi.yaml'), toYaml(doc));
+  }
+  if (format === 'json' || format === 'both') {
+    writeJson(path.join(outDir, 'openapi.json'), doc);
+  }
+
+  // confidence.json
+  const confidence = {
+    endpoints: endpoints.map(ep => ({
+      key: ep.endpointKey,
+      samples: ep.sampleCount,
+      statusCodes: ep.statusCodes,
+      requestBodyKnown: ep.requestBodyKnown,
+      responseBodyKnown: ep.responseBodyKnown,
+      normalizationFlags: ep.normalizationFlags,
+      confidence: confidenceBucket(ep),
+      includedInSpec: ep.sampleCount >= minSamples,
+    })),
+  };
+  writeJson(path.join(outDir, 'confidence.json'), confidence);
+
+  // report.md
+  const redaction = readJson(intermediatePath(outDir, 'redaction-stats.json'), { headers: 0, bodyKeys: 0, bodyValues: 0 });
+  writeText(path.join(outDir, 'report.md'), buildReport({ kept, dropped, servers, redaction, minSamples }));
+
+  return {
+    endpoints: kept.length,
+    droppedLowSample: dropped.length,
+    servers: servers.length,
+    components: Object.keys(components).length,
+  };
+}
+
+function buildReport({ kept, dropped, servers, redaction, minSamples }) {
+  const lines = [];
+  lines.push('# Browser-reverse: discovered API\n');
+  lines.push('## Servers\n');
+  for (const s of servers) lines.push(`- ${s.url}`);
+  if (!servers.length) lines.push('_(none)_');
+  lines.push('');
+
+  lines.push('## Endpoints\n');
+  lines.push('| Method | Path | Samples | Statuses | Confidence | Flags |');
+  lines.push('|---|---|---|---|---|---|');
+  const sorted = [...kept].sort((a, b) => a.path.localeCompare(b.path) || a.method.localeCompare(b.method));
+  for (const ep of sorted) {
+    const flags = ep.normalizationFlags.length ? ep.normalizationFlags.join(', ') : '—';
+    lines.push(`| ${ep.method} | \`${ep.path}\` | ${ep.sampleCount} | ${ep.statusCodes.join(', ') || '—'} | ${confidenceBucket(ep)} | ${flags} |`);
+  }
+  if (!kept.length) lines.push('| — | — | — | — | — | — |');
+  lines.push('');
+
+  if (dropped.length) {
+    lines.push(`## Dropped (below --min-samples=${minSamples})\n`);
+    for (const ep of dropped) lines.push(`- \`${ep.method} ${ep.path}\` (${ep.sampleCount} sample${ep.sampleCount === 1 ? '' : 's'})`);
+    lines.push('');
+  }
+
+  lines.push('## Coverage caveats\n');
+  const noResp = kept.filter(e => !e.responseBodyKnown);
+  if (noResp.length) {
+    lines.push(`- **${noResp.length}** endpoint${noResp.length === 1 ? '' : 's'} have no response-body schema. \`browse cdp\` does not embed response bodies; pair with \`browse network on\` to capture them.`);
+  }
+  const singleSample = kept.filter(e => e.sampleCount === 1);
+  if (singleSample.length) {
+    lines.push(`- **${singleSample.length}** endpoint${singleSample.length === 1 ? '' : 's'} were observed only once. Drive the same flow again to gain confidence.`);
+  }
+  const noBodyOnPost = kept.filter(e => ['POST', 'PUT', 'PATCH'].includes(e.method) && !e.requestBodyKnown);
+  if (noBodyOnPost.length) {
+    lines.push(`- **${noBodyOnPost.length}** mutation endpoint${noBodyOnPost.length === 1 ? '' : 's'} have no request body in the trace (form-encoded? non-JSON? not captured?).`);
+  }
+
+  lines.push('');
+  lines.push('## Redaction\n');
+  lines.push(`- Headers redacted: ${redaction.headers}`);
+  lines.push(`- Body keys redacted: ${redaction.bodyKeys}`);
+  lines.push(`- Body values redacted by pattern: ${redaction.bodyValues}`);
+  lines.push('');
+
+  lines.push('## Suggested follow-up flows\n');
+  const status404 = kept.filter(e => e.statusCodes.includes(404));
+  if (status404.length) {
+    lines.push(`- Endpoints that returned 404: ${status404.slice(0, 5).map(e => '`' + e.method + ' ' + e.path + '`').join(', ')}. Re-run with valid IDs to widen the success-path schema.`);
+  }
+  if (singleSample.length) {
+    lines.push('- Re-exercise the single-sample endpoints listed above to promote them out of `low` confidence.');
+  }
+  if (!status404.length && !singleSample.length) {
+    lines.push('- The captured flow looks reasonably balanced. Add an authenticated session if the unauth view is what was captured.');
+  }
+  return lines.join('\n') + '\n';
+}
+
+if (import.meta.url === `file://${process.argv[1]}`) {
+  const out = process.argv[2];
+  if (!out) { console.error('usage: emit.mjs <out-dir>'); process.exit(2); }
+  const stats = emit(out);
+  console.log(`emit: ${stats.endpoints} endpoints, ${stats.servers} server(s), ${stats.components} components${stats.droppedLowSample ? `, ${stats.droppedLowSample} dropped (low sample)` : ''}`);
+}
diff --git a/skills/browser-reverse/scripts/filter.mjs b/skills/browser-reverse/scripts/filter.mjs
new file mode 100644
index 00000000..f681c455
--- /dev/null
+++ b/skills/browser-reverse/scripts/filter.mjs
@@ -0,0 +1,66 @@
+#!/usr/bin/env node
+// Stage 2 — Filter.
+//
+// Apply --include / --exclude / --origins on top of paired.jsonl. Default
+// excludes scrub analytics, sourcemaps, fonts, and other static-asset noise
+// that the load stage may have let through (e.g. when looksApiUrl matched).
+
+import { readJsonl, writeJsonl, intermediatePath } from './lib/io.mjs';
+
+const DEFAULT_EXCLUDES = [
+  // Analytics / RUM / session replay
+  /segment\.(io|com)/i,
+  /mixpanel\.com/i,
+  /google-analytics\.com/i,
+  /googletagmanager\.com/i,
+  /datadog(hq)?\.com/i,
+  /sentry\.io/i,
+  /amplitude\.com/i,
+  /fullstory\.com/i,
+  /hotjar\.com/i,
+  /intercom\.io/i,
+  /clarity\.ms/i,
+  /cloudflareinsights\.com/i,
+  /doubleclick\.net/i,
+  /facebook\.com\/tr/i,
+  // Static assets
+  /\.(png|jpe?g|gif|svg|webp|ico|woff2?|ttf|eot|otf|css|map|mp4|webm|mp3|m4a)(\?|$)/i,
+  // SW / metadata
+  /\/sw\.js(\?|$)/i,
+  /\/service-worker\.js(\?|$)/i,
+  /\/manifest\.json(\?|$)/i,
+  /\/robots\.txt(\?|$)/i,
+  /\/favicon\.ico(\?|$)/i,
+];
+
+export function filter(outDir, opts = {}) {
+  const { include = [], exclude = [], origins = [] } = opts;
+  const includeRes = include.map(s => new RegExp(s));
+  const excludeRes = [...DEFAULT_EXCLUDES, ...exclude.map(s => new RegExp(s))];
+  const originSet = new Set(origins);
+
+  const paired = readJsonl(intermediatePath(outDir, 'paired.jsonl'));
+  const out = [];
+  let droppedOrigin = 0, droppedExclude = 0, droppedInclude = 0;
+
+  for (const row of paired) {
+    if (originSet.size) {
+      const host = row.origin ? new URL(row.origin).host : '';
+      const matched = [...originSet].some(o => host === o || host.endsWith('.' + o));
+      if (!matched) { droppedOrigin++; continue; }
+    }
+    if (excludeRes.some(re => re.test(row.url))) { droppedExclude++; continue; }
+    if (includeRes.length && !includeRes.some(re => re.test(row.url))) { droppedInclude++; continue; }
+    out.push(row);
+  }
+
+  writeJsonl(intermediatePath(outDir, 'filtered.jsonl'), out);
+  return { kept: out.length, droppedOrigin, droppedExclude, droppedInclude };
+}
+
+if (import.meta.url === `file://${process.argv[1]}`) {
+  const out = process.argv[2];
+  if (!out) { console.error('usage: filter.mjs <out-dir>'); process.exit(2); }
+  const stats = filter(out);
+  console.log(`filter: kept ${stats.kept}, dropped ${stats.droppedExclude} (exclude) ${stats.droppedOrigin} (origin) ${stats.droppedInclude} (include)`);
+}
diff --git a/skills/browser-reverse/scripts/infer.mjs b/skills/browser-reverse/scripts/infer.mjs
new file mode 100644
index 00000000..87dbf408
--- /dev/null
+++ b/skills/browser-reverse/scripts/infer.mjs
@@ -0,0 +1,139 @@
+#!/usr/bin/env node
+// Stage 4 — Infer.
+//
+// Run JSON-Schema inference across each endpoint's request/response samples,
+// pick representative redacted examples, and persist the result alongside the
+// endpoint metadata for the emit stage.
+
+import path from 'node:path';
+import crypto from 'node:crypto';
+import { readJsonl, writeJsonl, writeJson, intermediatePath, samplePath, ensureDir } from './lib/io.mjs';
+import { newProto, ingest, toSchema } from './lib/schema-merge.mjs';
+import { makeRedactor } from './lib/redact.mjs';
+
+function pathHash(method, p) {
+  return crypto.createHash('sha1').update(`${method} ${p}`).digest('hex').slice(0, 10);
+}
+
+function inferAuthHeaders(samples) {
+  const seen = new Set();
+  for (const s of samples) {
+    for (const k of Object.keys(s.reqHeaders || {})) {
+      const lk = k.toLowerCase();
+      if (lk === 'authorization' || lk === 'x-api-key' || /token/.test(lk) || /^x-.*-auth/.test(lk)) {
+        seen.add(lk);
+      }
+    }
+  }
+  return [...seen].sort();
+}
+
+export function infer(outDir, opts = {}) {
+  const redactor = makeRedactor({ extra: opts.redact || [] });
+
+  const endpoints = readJsonl(intermediatePath(outDir, 'endpoints.jsonl'));
+  const samplesByKey = new Map();
+  for (const row of readJsonl(intermediatePath(outDir, 'endpoint-samples.jsonl'))) {
+    samplesByKey.set(row.endpointKey, row.samples);
+  }
+
+  ensureDir(path.join(outDir, 'samples'));
+  const enriched = [];
+
+  for (const ep of endpoints) {
+    const samples = samplesByKey.get(ep.endpointKey) || [];
+    const reqProto = newProto();
+    const respProtoByStatus = new Map(); // status -> proto
+
+    let pickedReqExample = null;
+    let pickedRespExample = null;
+    let pickedReqStatus = null, pickedRespStatus = null;
+
+    for (const s of samples) {
+      if (s.reqBody != null && typeof s.reqBody === 'object') {
+        ingest(reqProto, s.reqBody);
+        if (!pickedReqExample) { pickedReqExample = s.reqBody; pickedReqStatus = s.status; }
+      }
+      if (s.respBody != null && typeof s.respBody === 'object') {
+        const status = s.status ?? 0;
+        let p = respProtoByStatus.get(status);
+        if (!p) { p = newProto(); respProtoByStatus.set(status, p); }
+        ingest(p, s.respBody);
+        if (s.status >= 200 && s.status < 300 && !pickedRespExample) {
+          pickedRespExample = s.respBody;
+          pickedRespStatus = s.status;
+        }
+      }
+    }
+
+    const requestBodyKnown = reqProto.samples > 0;
+    const responseBodyKnown = [...respProtoByStatus.values()].some(p => p.samples > 0);
+
+    const requestSchema = requestBodyKnown ? toSchema(reqProto) : null;
+    const responseSchemas = {};
+    for (const [status, p] of respProtoByStatus.entries()) {
+      responseSchemas[String(status)] = toSchema(p);
+    }
+
+    // Determine the canonical content-type per role from sample headers.
+    const reqCT = inferContentType(samples, 'reqHeaders');
+    const respCTByStatus = {};
+    for (const s of samples) {
+      const status = s.status ?? 0;
+      if (!respCTByStatus[status]) respCTByStatus[status] = inferContentType([s], 'respHeaders');
+    }
+
+    // Redact once and reuse for both the persisted sample file and the inline
+    // OpenAPI example. (Calling redactBody twice double-counts redactions.)
+    const ph = pathHash(ep.method, ep.path);
+    const reqExample = pickedReqExample != null ? redactor.redactBody(pickedReqExample) : null;
+    const respExample = pickedRespExample != null ? redactor.redactBody(pickedRespExample) : null;
+    const reqHeaders = redactor.redactHeaders(samples[0]?.reqHeaders || {});
+    const respHeaders = redactor.redactHeaders(samples[0]?.respHeaders || {});
+
+    const example = {
+      endpoint: ep.endpointKey,
+      request:  { status: pickedReqStatus,  headers: reqHeaders,  body: reqExample },
+      response: { status: pickedRespStatus, headers: respHeaders, body: respExample },
+    };
+    writeJson(samplePath(outDir, ep.method, ph), example);
+
+    enriched.push({
+      ...ep,
+      pathHash: ph,
+      requestBodyKnown,
+      responseBodyKnown,
+      requestSchema,
+      responseSchemas,
+      requestContentType: reqCT,
+      responseContentTypes: respCTByStatus,
+      requestExample: reqExample,
+      responseExample: respExample,
+      observedAuthHeaders: inferAuthHeaders(samples),
+    });
+  }
+
+  writeJsonl(intermediatePath(outDir, 'endpoints.with-schemas.jsonl'), enriched);
+
+  // Also persist redaction stats for the report.
+  writeJson(intermediatePath(outDir, 'redaction-stats.json'), redactor.counts);
+
+  return { endpoints: enriched.length, redactor: redactor.counts };
+}
+
+function inferContentType(samples, headerField) {
+  for (const s of samples) {
+    const headers = s[headerField] || {};
+    for (const [k, v] of Object.entries(headers)) {
+      if (k.toLowerCase() === 'content-type') return String(v).split(';')[0].trim();
+    }
+  }
+  return null;
+}
+
+if (import.meta.url === `file://${process.argv[1]}`) {
+  const out = process.argv[2];
+  if (!out) { console.error('usage: infer.mjs <out-dir>'); process.exit(2); }
+  const stats = infer(out);
+  console.log(`infer: ${stats.endpoints} endpoints (redactions: ${stats.redactor.headers}h ${stats.redactor.bodyKeys}k ${stats.redactor.bodyValues}v)`);
+}
diff --git a/skills/browser-reverse/scripts/lib/io.mjs b/skills/browser-reverse/scripts/lib/io.mjs
new file mode 100644
index 00000000..e6e10a82
--- /dev/null
+++ b/skills/browser-reverse/scripts/lib/io.mjs
@@ -0,0 +1,58 @@
+// File-IO helpers shared across the pipeline. Mirrors the conventions of
+// browser-trace/scripts/lib.mjs. Node stdlib only.
+
+import fs from 'node:fs';
+import path from 'node:path';
+
+export function ensureDir(p) {
+  fs.mkdirSync(p, { recursive: true });
+}
+
+export function readJsonl(p) {
+  if (!fs.existsSync(p)) return [];
+  const out = [];
+  for (const line of fs.readFileSync(p, 'utf8').split('\n')) {
+    if (!line) continue;
+    try { out.push(JSON.parse(line)); } catch { /* skip malformed */ }
+  }
+  return out;
+}
+
+export function writeJsonl(p, items) {
+  ensureDir(path.dirname(p));
+  const body = items.length ? items.map(o => JSON.stringify(o)).join('\n') + '\n' : '';
+  fs.writeFileSync(p, body);
+}
+
+export function readJson(p, fallback = null) {
+  if (!fs.existsSync(p)) return fallback;
+  try { return JSON.parse(fs.readFileSync(p, 'utf8')); }
+  catch { return fallback; }
+}
+
+export function writeJson(p, obj) {
+  ensureDir(path.dirname(p));
+  fs.writeFileSync(p, JSON.stringify(obj, null, 2) + '\n');
+}
+
+export function writeText(p, s) {
+  ensureDir(path.dirname(p));
+  fs.writeFileSync(p, s);
+}
+
+export function resolveRun(runArg) {
+  // Accept both bare run-id and full path.
+  if (fs.existsSync(runArg) && fs.statSync(runArg).isDirectory()) return path.resolve(runArg);
+  const root = process.env.O11Y_ROOT || '.o11y';
+  const guess = path.join(root, runArg);
+  if (fs.existsSync(guess)) return path.resolve(guess);
+  throw new Error(`run path not found: ${runArg} (tried ${guess})`);
+}
+
+export function intermediatePath(outDir, name) {
+  return path.join(outDir, 'intermediate', name);
+}
+
+export function samplePath(outDir, method, pathHash) {
+  return path.join(outDir, 'samples', `${method.toLowerCase()}__${pathHash}.json`);
+}
diff --git a/skills/browser-reverse/scripts/lib/path-template.mjs b/skills/browser-reverse/scripts/lib/path-template.mjs
new file mode 100644
index 00000000..4835958e
--- /dev/null
+++ b/skills/browser-reverse/scripts/lib/path-template.mjs
@@ -0,0 +1,86 @@
+// Templatize concrete URL paths into OpenAPI path templates.
+//
+// Strategy: classify each segment in isolation; collisions across samples are
+// handled by the caller (normalize.mjs), which groups samples by the resulting
+// templated path and falls back to keeping endpoints split when the response
+// shape disagrees.
+
+const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
+const HEX_RE  = /^[0-9a-f]{8,}$/i;
+const B62_RE  = /^[A-Za-z0-9]{8,}$/;
+const INT_RE  = /^\d+$/;
+
+// Static-looking segments we never template even if they're numeric/hex
+// (e.g. version markers like "v1", "v2", short slugs that are real path parts).
+const STATIC_HINTS = /^(v\d+|api|graphql|rest|public|private|me|self)$/i;
+
+export function classifySegment(seg) {
+  if (!seg) return { kind: 'static' };
+  if (STATIC_HINTS.test(seg)) return { kind: 'static' };
+  if (UUID_RE.test(seg)) return { kind: 'param', name: 'id', schema: { type: 'string', format: 'uuid' } };
+  if (INT_RE.test(seg))  return { kind: 'param', name: 'id', schema: { type: 'integer' } };
+  if (HEX_RE.test(seg))  return { kind: 'param', name: 'id', schema: { type: 'string' } };
+  if (B62_RE.test(seg) && /[A-Z]/.test(seg) && /[a-z]/.test(seg) && /\d/.test(seg)) {
+    return { kind: 'param', name: 'id', schema: { type: 'string' } };
+  }
+  return { kind: 'static' };
+}
+
+// Single-pass templating used during the first sweep — segments are evaluated
+// independently. Returns { template, params: [{name, schema, position}] }.
+export function templatize(rawPath) {
+  const segs = rawPath.split('/');
+  const params = [];
+  let counter = 0;
+  const out = segs.map((seg, i) => {
+    if (!seg && i > 0) return seg;
+    const c = classifySegment(seg);
+    if (c.kind === 'static') return seg;
+    counter++;
+    const name = counter === 1 ? c.name : `${c.name}${counter}`;
+    params.push({ name, schema: c.schema, position: i });
+    return `{${name}}`;
+  });
+  return { template: out.join('/'), params };
+}
+
+// Second pass: given a set of paths that share the same number of segments
+// and the same statics in the obvious positions, detect "slug" segments —
+// positions that are alpha and *vary* across samples but didn't trip the
+// numeric/UUID/hex classifiers in pass 1. Returns the same shape as templatize.
+export function templatizeWithSlugs(paths) {
+  if (paths.length < 2) return templatize(paths[0]);
+  const split = paths.map(p => p.split('/'));
+  const len = split[0].length;
+  if (!split.every(s => s.length === len)) return templatize(paths[0]);
+
+  const params = [];
+  let counter = 0;
+  const tpl = [];
+  for (let i = 0; i < len; i++) {
+    const colSamples = split.map(s => s[i]);
+    const first = colSamples[0];
+    if (!first && i > 0) { tpl.push(''); continue; }
+
+    const c0 = classifySegment(first);
+    if (c0.kind === 'param') {
+      counter++;
+      const name = counter === 1 ? c0.name : `${c0.name}${counter}`;
+      params.push({ name, schema: c0.schema, position: i });
+      tpl.push(`{${name}}`);
+      continue;
+    }
+
+    const distinct = new Set(colSamples);
+    if (distinct.size > 1 && colSamples.every(s => /^[A-Za-z0-9_-]+$/.test(s))) {
+      counter++;
+      const name = counter === 1 ? 'slug' : `slug${counter}`;
+      params.push({ name, schema: { type: 'string' }, position: i });
+      tpl.push(`{${name}}`);
+      continue;
+    }
+
+    tpl.push(first);
+  }
+  return { template: tpl.join('/'), params };
+}
diff --git a/skills/browser-reverse/scripts/lib/redact.mjs b/skills/browser-reverse/scripts/lib/redact.mjs
new file mode 100644
index 00000000..73c941ed
--- /dev/null
+++ b/skills/browser-reverse/scripts/lib/redact.mjs
@@ -0,0 +1,73 @@
+// Redact credentials and PII before persisting samples or schema-derived bodies.
+// All redactions replace values in-place with the literal string "<redacted>"
+// so downstream schema inference still sees a string and types stay coherent.
+
+const HEADER_DENY = new Set([
+  'authorization', 'cookie', 'set-cookie', 'x-csrf-token', 'x-xsrf-token',
+  'x-api-key', 'proxy-authorization',
+]);
+
+const HEADER_PATTERNS = [/token/i, /secret/i, /signature/i, /session/i];
+
+const KEY_DENY = new Set([
+  'password', 'token', 'secret', 'api_key', 'apikey',
+  'accesstoken', 'refreshtoken', 'creditcard', 'ssn',
+]);
+
+const JWT_RE = /^eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+$/;
+const EMAIL_RE = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
+const PHONE_RE = /^\+?[0-9][0-9\s().-]{6,}[0-9]$/;
+
+export function makeRedactor({ extra = [] } = {}) {
+  const extraHeaders = new Set();
+  const extraKeys = new Set();
+  for (const e of extra) {
+    const k = e.toLowerCase();
+    extraHeaders.add(k);
+    extraKeys.add(k);
+  }
+  const counts = { headers: 0, bodyKeys: 0, bodyValues: 0 };
+
+  function isHeaderSecret(name) {
+    const k = name.toLowerCase();
+    if (HEADER_DENY.has(k) || extraHeaders.has(k)) return true;
+    return HEADER_PATTERNS.some(re => re.test(k));
+  }
+
+  function isKeySecret(name) {
+    const k = String(name).toLowerCase().replace(/[_-]/g, '');
+    return KEY_DENY.has(k) || extraKeys.has(k);
+  }
+
+  function isValueSecret(v) {
+    if (typeof v !== 'string') return false;
+    if (v.length < 6) return false;
+    return JWT_RE.test(v) || EMAIL_RE.test(v) || PHONE_RE.test(v);
+  }
+
+  function redactHeaders(h) {
+    if (!h || typeof h !== 'object') return h;
+    const out = {};
+    for (const [k, v] of Object.entries(h)) {
+      if (isHeaderSecret(k)) { out[k] = '<redacted>'; counts.headers++; }
+      else out[k] = v;
+    }
+    return out;
+  }
+
+  function redactBody(node) {
+    if (Array.isArray(node)) return node.map(redactBody);
+    if (node && typeof node === 'object') {
+      const out = {};
+      for (const [k, v] of Object.entries(node)) {
+        if (isKeySecret(k)) { out[k] = '<redacted>'; counts.bodyKeys++; }
+        else out[k] = redactBody(v);
+      }
+      return out;
+    }
+    if (isValueSecret(node)) { counts.bodyValues++; return '<redacted>'; }
+    return node;
+  }
+
+  return { redactHeaders, redactBody, counts };
+}
diff --git a/skills/browser-reverse/scripts/lib/schema-merge.mjs b/skills/browser-reverse/scripts/lib/schema-merge.mjs
new file mode 100644
index 00000000..106834ce
--- /dev/null
+++ b/skills/browser-reverse/scripts/lib/schema-merge.mjs
@@ -0,0 +1,175 @@
+// JSON-Schema (draft 2020-12 / OpenAPI 3.1 compatible) inference from sample values.
+//
+// The merge is associative and idempotent: mergeSchemas(merge(a,b), c) == merge(a, merge(b,c)).
+// Required fields are intersected (must be present in every sample). Types are
+// unioned. Arrays infer item schemas across all samples. Enum detection runs as
+// a final pass once all samples are merged in.
+
+const ENUM_MAX = parseInt(process.env.DISCOVER_ENUM_MAX_DISTINCT || '8', 10);
+const ENUM_MIN = parseInt(process.env.DISCOVER_ENUM_MIN_SAMPLES  || '5', 10);
+
+const ISO_RE   = /^\d{4}-\d{2}-\d{2}([T ]\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:?\d{2})?)?$/;
+const URI_RE   = /^https?:\/\/\S+$/i;
+const EMAIL_RE = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
+const UUID_RE  = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
+
+function jsonType(v) {
+  if (v === null) return 'null';
+  if (Array.isArray(v)) return 'array';
+  return typeof v; // 'string', 'number', 'boolean', 'object'
+}
+
+function inferFormat(v) {
+  if (typeof v !== 'string') return null;
+  if (UUID_RE.test(v))  return 'uuid';
+  if (ISO_RE.test(v))   return 'date-time';
+  if (URI_RE.test(v))   return 'uri';
+  if (EMAIL_RE.test(v)) return 'email';
+  return null;
+}
+
+// Build a "pre-schema" — captures every observed sample so we can compute
+// required/enum/format with global knowledge, then collapse to JSON Schema.
+export function newProto() {
+  return {
+    types: new Set(),       // 'string', 'integer', 'number', 'boolean', 'null', 'object', 'array'
+    samples: 0,
+    nullCount: 0,
+    formats: new Map(),     // format -> count of samples that matched
+    values: new Set(),      // primitive values, capped (used for enum detection)
+    valuesCapped: false,
+    properties: new Map(),  // key -> proto
+    presence: new Map(),    // key -> count of samples that contained the key
+    items: null,            // proto for array items
+  };
+}
+
+const VALUE_CAP = 64;
+
+export function ingest(proto, value) {
+  proto.samples++;
+  const t = jsonType(value);
+
+  if (t === 'null') { proto.types.add('null'); proto.nullCount++; return; }
+
+  if (t === 'number') {
+    proto.types.add(Number.isInteger(value) ? 'integer' : 'number');
+  } else {
+    proto.types.add(t);
+  }
+
+  if (t === 'string' || t === 'number' || t === 'boolean' || t === 'integer') {
+    if (!proto.valuesCapped) {
+      proto.values.add(value);
+      if (proto.values.size > VALUE_CAP) {
+        proto.values.clear();
+        proto.valuesCapped = true;
+      }
+    }
+    if (t === 'string') {
+      const f = inferFormat(value);
+      if (f) proto.formats.set(f, (proto.formats.get(f) || 0) + 1);
+    }
+  }
+
+  if (t === 'object') {
+    for (const [k, v] of Object.entries(value)) {
+      proto.presence.set(k, (proto.presence.get(k) || 0) + 1);
+      let child = proto.properties.get(k);
+      if (!child) { child = newProto(); proto.properties.set(k, child); }
+      ingest(child, v);
+    }
+  }
+
+  if (t === 'array') {
+    if (!proto.items) proto.items = newProto();
+    for (const item of value) ingest(proto.items, item);
+    // Important: treat array containment as a single sample at this level —
+    // ingest() above already counted samples++ once. Items are sampled
+    // individually inside the recursive call.
+  }
+}
+
+export function ingestMany(proto, values) {
+  for (const v of values) ingest(proto, v);
+  return proto;
+}
+
+// Convert a proto into a JSON Schema fragment.
+export function toSchema(proto) {
+  if (!proto || proto.samples === 0) return {};
+
+  const types = Array.from(proto.types);
+  const nonNull = types.filter(t => t !== 'null');
+  const nullable = proto.types.has('null') && nonNull.length > 0;
+
+  // Scalar / enum case
+  if (nonNull.length === 1 && !['object', 'array'].includes(nonNull[0])) {
+    const t = nonNull[0];
+    const out = { type: t };
+    if (nullable) out.type = [t, 'null'];
+
+    if (t === 'string') {
+      // Format: pick the format that matched ≥ 80% of string samples
+      const stringSamples = proto.samples - proto.nullCount;
+      if (stringSamples > 0) {
+        for (const [f, n] of proto.formats.entries()) {
+          if (n / stringSamples >= 0.8) { out.format = f; break; }
+        }
+      }
+    }
+
+    // Enum detection: low cardinality AND meaningful repetition (otherwise
+    // every distinct ID across N samples would look like an N-way enum).
+    const valueSamples = proto.samples - proto.nullCount;
+    if (!proto.valuesCapped &&
+        proto.values.size > 0 &&
+        proto.values.size <= ENUM_MAX &&
+        valueSamples >= ENUM_MIN &&
+        proto.values.size <= Math.max(2, Math.floor(valueSamples / 2))) {
+      out.enum = Array.from(proto.values).sort((a, b) => String(a).localeCompare(String(b)));
+    }
+    return out;
+  }
+
+  // Object
+  if (nonNull.length === 1 && nonNull[0] === 'object') {
+    const properties = {};
+    const required = [];
+    for (const [k, child] of proto.properties.entries()) {
+      properties[k] = toSchema(child);
+      const presence = proto.presence.get(k) || 0;
+      if (presence === proto.samples - proto.nullCount && presence > 0) required.push(k);
+    }
+    const out = { type: nullable ? ['object', 'null'] : 'object' };
+    if (Object.keys(properties).length) out.properties = properties;
+    if (required.length) out.required = required.sort();
+    return out;
+  }
+
+  // Array
+  if (nonNull.length === 1 && nonNull[0] === 'array') {
+    const out = { type: nullable ? ['array', 'null'] : 'array' };
+    if (proto.items) out.items = toSchema(proto.items);
+    return out;
+  }
+
+  // Mixed types — fall back to a typed union via "type" array (OpenAPI 3.1 / draft 2020-12 OK).
+  const out = { type: nullable ? [...nonNull, 'null'] : nonNull };
+  return out;
+}
+
+// Convenience: build a schema directly from an array of sample values.
+export function inferSchema(samples) {
+  const p = newProto();
+  ingestMany(p, samples);
+  return toSchema(p);
+}
+
+// Stable structural hash for schema deduplication when hoisting components.
+export function structuralHash(schema) {
+  if (!schema || typeof schema !== 'object') return JSON.stringify(schema);
+  if (Array.isArray(schema)) return '[' + schema.map(structuralHash).join(',') + ']';
+  const keys = Object.keys(schema).sort();
+  return '{' + keys.map(k => JSON.stringify(k) + ':' + structuralHash(schema[k])).join(',') + '}';
+}
diff --git a/skills/browser-reverse/scripts/lib/yaml.mjs b/skills/browser-reverse/scripts/lib/yaml.mjs
new file mode 100644
index 00000000..0f8f697c
--- /dev/null
+++ b/skills/browser-reverse/scripts/lib/yaml.mjs
@@ -0,0 +1,87 @@
+// Minimal YAML emitter for the OpenAPI document we build. Sufficient for the
+// shapes we produce (objects, arrays, strings, numbers, booleans, null) without
+// pulling in a dep. Strings are conservatively quoted whenever they contain any
+// character that would change YAML parsing.
+
+// YAML 1.2 reserves certain characters as indicators that cannot start a plain
+// scalar (they must be quoted): , [ ] { } # & * ! | > ' " % @ `
+// plus ? and : when followed by whitespace, plus - when followed by whitespace.
+// We're conservative: if any of those rules might trip, single-quote the string.
+const FIRST_CHAR_DENY = /^[,\[\]{}#&*!|>'"%@`]/;
+const FIRST_CHAR_AMBIG = /^[-?:]/;
+const SAFE_BARE = /^[A-Za-z0-9_./-][A-Za-z0-9 _./@-]*$/;
+const RESERVED = new Set([
+  'true', 'false', 'null', 'yes', 'no', 'on', 'off', '~',
+  'True', 'False', 'Null', 'TRUE', 'FALSE', 'NULL',
+]);
+
+function quoteScalar(s) {
+  if (s === '') return "''";
+  if (RESERVED.has(s)) return `'${s}'`;
+  if (/^-?\d+(\.\d+)?$/.test(s)) return `'${s}'`;
+  if (FIRST_CHAR_DENY.test(s) || FIRST_CHAR_AMBIG.test(s) || !SAFE_BARE.test(s)) {
+    // Prefer single quotes; if the string contains them, escape via doubling.
+    if (!s.includes("'") && !s.includes('\n')) return `'${s}'`;
+    return JSON.stringify(s); // double-quoted YAML is JSON-compatible
+  }
+  return s;
+}
+
+function emitScalar(v) {
+  if (v === null || v === undefined) return 'null';
+  if (typeof v === 'boolean') return v ? 'true' : 'false';
+  if (typeof v === 'number') {
+    if (!Number.isFinite(v)) return JSON.stringify(String(v));
+    return String(v);
+  }
+  return quoteScalar(String(v));
+}
+
+function isScalar(v) {
+  return v === null || v === undefined ||
+    typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean';
+}
+
+function emit(node, indent) {
+  if (isScalar(node)) return emitScalar(node);
+  const pad = '  '.repeat(indent);
+
+  if (Array.isArray(node)) {
+    if (node.length === 0) return '[]';
+    return node.map(item => {
+      if (isScalar(item)) return `${pad}- ${emitScalar(item)}`;
+      const inner = emit(item, indent + 1);
+      // Place the dash on the same line as the first key of an object, or as
+      // the opener of a nested array.
+      if (inner.startsWith(pad + '  ')) {
+        return pad + '- ' + inner.trimStart().slice(0); // first line dedented relative to dash
+      }
+      return `${pad}-\n${inner}`;
+    }).join('\n');
+  }
+
+  // Object
+  const keys = Object.keys(node);
+  if (keys.length === 0) return '{}';
+  const lines = [];
+  for (const k of keys) {
+    const v = node[k];
+    const keyStr = quoteScalar(k);
+    if (isScalar(v)) {
+      lines.push(`${pad}${keyStr}: ${emitScalar(v)}`);
+    } else if (Array.isArray(v)) {
+      if (v.length === 0) { lines.push(`${pad}${keyStr}: []`); continue; }
+      lines.push(`${pad}${keyStr}:`);
+      lines.push(emit(v, indent + 1));
+    } else {
+      if (Object.keys(v).length === 0) { lines.push(`${pad}${keyStr}: {}`); continue; }
+      lines.push(`${pad}${keyStr}:`);
+      lines.push(emit(v, indent + 1));
+    }
+  }
+  return lines.join('\n');
+}
+
+export function toYaml(obj) {
+  return emit(obj, 0) + '\n';
+}
diff --git a/skills/browser-reverse/scripts/load.mjs b/skills/browser-reverse/scripts/load.mjs
new file mode 100644
index 00000000..6d4ba292
--- /dev/null
+++ b/skills/browser-reverse/scripts/load.mjs
@@ -0,0 +1,170 @@
+#!/usr/bin/env node
+// Stage 1 — Load.
+//
+// Read browser-trace's cdp/network/{requests,responses}.jsonl, pair them by
+// requestId, drop preflight + redirects + obvious non-API resource types, and
+// write `intermediate/paired.jsonl`.
+//
+// Optional: a `browse network on` capture directory can be passed via
+// `--bodies <path>` (or stashed under `<run>/cdp/network/bodies/`). Each
+// per-request subdir there has request.json + response.json with the actual
+// bodies. The browse-network "id" matches the CDP requestId for XHR/Fetch, so
+// we join directly on requestId and inject reqBody / respBody into paired rows.
+
+import fs from 'node:fs';
+import path from 'node:path';
+import { readJsonl, writeJsonl, intermediatePath, ensureDir } from './lib/io.mjs';
+
+const KEEP_TYPES = new Set(['XHR', 'Fetch', 'Document']);
+
+function tryParseJson(s) {
+  if (typeof s !== 'string') return s;
+  try { return JSON.parse(s); } catch { return s; }
+}
+
+function looksApiUrl(url) {
+  return /\/(api|graphql|rest|v\d+)\b/i.test(url) ||
+         /\.(json|jsonl|ndjson)(\?|$)/i.test(url);
+}
+
+function urlPath(u) {
+  try { return new URL(u).pathname; } catch { return u; }
+}
+
+function urlOrigin(u) {
+  try { const x = new URL(u); return `${x.protocol}//${x.host}`; } catch { return null; }
+}
+
+function urlQuery(u) {
+  try {
+    const x = new URL(u);
+    const out = {};
+    for (const [k, v] of x.searchParams.entries()) {
+      // Last value wins for repeats; we record the existence either way.
+      if (out[k] === undefined) out[k] = v;
+    }
+    return out;
+  } catch { return {}; }
+}
+
+// Walk a `browse network` capture directory and return a Map keyed by the
+// CDP requestId, each value `{ reqBody, respBody }`. Bodies that are valid JSON
+// are returned parsed; otherwise the raw string is preserved.
+function loadBrowseNetworkBodies(bodiesDir) {
+  const out = new Map();
+  if (!bodiesDir || !fs.existsSync(bodiesDir)) return out;
+  const entries = fs.readdirSync(bodiesDir, { withFileTypes: true });
+  for (const e of entries) {
+    if (!e.isDirectory()) continue;
+    const subdir = path.join(bodiesDir, e.name);
+    const reqPath = path.join(subdir, 'request.json');
+    const respPath = path.join(subdir, 'response.json');
+    if (!fs.existsSync(reqPath)) continue;
+    let req, resp;
+    try { req = JSON.parse(fs.readFileSync(reqPath, 'utf8')); } catch { continue; }
+    try { resp = fs.existsSync(respPath) ? JSON.parse(fs.readFileSync(respPath, 'utf8')) : null; } catch { resp = null; }
+    if (!req?.id) continue;
+    const reqBody = req.body != null ? tryParseJson(req.body) : null;
+    const respBody = resp?.body != null ? tryParseJson(resp.body) : null;
+    out.set(String(req.id), { reqBody, respBody });
+  }
+  return out;
+}
+
+export function load(runPath, outDir, opts = {}) {
+  const cdpDir = path.join(runPath, 'cdp', 'network');
+  const requests  = readJsonl(path.join(cdpDir, 'requests.jsonl'));
+  const responses = readJsonl(path.join(cdpDir, 'responses.jsonl'));
+
+  // Body sources: explicit --bodies path > <run>/cdp/network/bodies/ if present
+  let bodiesDir = opts.bodies || null;
+  if (!bodiesDir) {
+    const stashed = path.join(runPath, 'cdp', 'network', 'bodies');
+    if (fs.existsSync(stashed)) bodiesDir = stashed;
+  }
+  const bodyMap = loadBrowseNetworkBodies(bodiesDir);
+
+  // Index responses by requestId; if the trace has duplicates (redirects), the
+  // last one wins so the terminal status code is what we keep.
+  const respByReq = new Map();
+  for (const ev of responses) {
+    const rid = ev?.params?.requestId;
+    if (rid) respByReq.set(rid, ev);
+  }
+
+  const paired = [];
+  for (const ev of requests) {
+    const p = ev?.params;
+    if (!p?.request) continue;
+
+    const method = p.request.method;
+    const url = p.request.url;
+    if (!url || !method) continue;
+    if (method === 'OPTIONS') continue;
+    if (url.startsWith('data:') || url.startsWith('blob:')) continue;
+
+    // Resource type: prefer p.type (CDP), fall back to URL heuristic.
+    const type = p.type || 'Other';
+    if (!KEEP_TYPES.has(type) && !looksApiUrl(url)) continue;
+
+    const respEv = respByReq.get(p.requestId);
+    const resp = respEv?.params?.response;
+    const status = resp?.status ?? null;
+    if (status && status >= 300 && status < 400) {
+      // Pure redirect. The browser will issue a follow-up request with the
+      // same requestId carrying redirectResponse on it; we already record the
+      // post-redirect resource via the next requestWillBeSent. Drop the
+      // intermediate.
+      continue;
+    }
+
+    const contentType = resp?.headers
+      ? Object.entries(resp.headers).find(([k]) => k.toLowerCase() === 'content-type')?.[1] ?? null
+      : null;
+
+    let reqBody = p.request.postData ? tryParseJson(p.request.postData) : null;
+    let respBody = null;
+
+    // Augment with browse-network bodies when present. Match by requestId
+    // (the browse-network entry's `id` IS the CDP requestId for XHR/Fetch).
+    const captured = bodyMap.get(String(p.requestId));
+    if (captured) {
+      if (reqBody == null && captured.reqBody != null) reqBody = captured.reqBody;
+      if (captured.respBody != null) respBody = captured.respBody;
+    }
+
+    paired.push({
+      requestId: p.requestId,
+      method,
+      url,
+      origin: urlOrigin(url),
+      path: urlPath(url),
+      query: urlQuery(url),
+      status,
+      type,
+      contentType,
+      reqHeaders: p.request.headers || {},
+      reqBody,
+      respHeaders: resp?.headers || {},
+      respBody,
+      ts: typeof p.wallTime === 'number' ? Math.round(p.wallTime * 1000) : null,
+    });
+  }
+
+  ensureDir(path.join(outDir, 'intermediate'));
+  writeJsonl(intermediatePath(outDir, 'paired.jsonl'), paired);
+  return {
+    count: paired.length,
+    requests: requests.length,
+    responses: responses.length,
+    bodiesAttached: paired.filter(r => r.respBody != null).length,
+    bodiesDir,
+  };
+}
+
+if (import.meta.url === `file://${process.argv[1]}`) {
+  const [run, out, bodies] = process.argv.slice(2);
+  if (!run || !out) { console.error('usage: load.mjs <run-path> <out-dir> [bodies-dir]'); process.exit(2); }
+  const stats = load(run, out, { bodies });
+  console.log(`load: ${stats.count} paired (from ${stats.requests} req / ${stats.responses} resp)${stats.bodiesAttached ? `, ${stats.bodiesAttached} response bodies attached` : ''}`);
+}
diff --git a/skills/browser-reverse/scripts/normalize.mjs b/skills/browser-reverse/scripts/normalize.mjs
new file mode 100644
index 00000000..e8a7e3ce
--- /dev/null
+++ b/skills/browser-reverse/scripts/normalize.mjs
@@ -0,0 +1,128 @@
+#!/usr/bin/env node
+// Stage 3 — Normalize.
+//
+// Group paired samples by (origin, method, templated path), collect query-param
+// schemas, and detect when normalization is collapsing structurally divergent
+// endpoints (flagged for the report).
+
+import { readJsonl, writeJsonl, intermediatePath } from './lib/io.mjs';
+import { templatize, templatizeWithSlugs } from './lib/path-template.mjs';
+
+function inferQueryType(values) {
+  // Lightweight type inference for query-string values (always strings on the
+  // wire, but we can hint).
+  if (values.every(v => /^-?\d+$/.test(v))) return { type: 'integer' };
+  if (values.every(v => /^-?\d+(\.\d+)?$/.test(v))) return { type: 'number' };
+  if (values.every(v => v === 'true' || v === 'false')) return { type: 'boolean' };
+  return { type: 'string' };
+}
+
+function statusSignature(rows) {
+  // A coarse "shape signature" used to detect when two raw paths that
+  // templatize to the same template actually behave differently.
+  const ct = new Set(rows.map(r => (r.contentType || '').split(';')[0].trim().toLowerCase()).filter(Boolean));
+  const status = new Set(rows.map(r => (r.status != null ? Math.floor(r.status / 100) + 'xx' : 'none')));
+  return [...ct].sort().join(',') + '|' + [...status].sort().join(',');
+}
+
+export function normalize(outDir) {
+  const filtered = readJsonl(intermediatePath(outDir, 'filtered.jsonl'));
+
+  // Pass 1: bucket by (origin, method, single-pass template).
+  const buckets = new Map();
+  for (const row of filtered) {
+    const t = templatize(row.path);
+    const key = `${row.method} ${row.origin}${t.template}`;
+    let b = buckets.get(key);
+    if (!b) { b = { origin: row.origin, method: row.method, template: t.template, params: t.params, rows: [], rawPaths: new Set() }; buckets.set(key, b); }
+    b.rows.push(row);
+    b.rawPaths.add(row.path);
+  }
+
+  // Pass 2: re-templatize each bucket using its raw-path set so slugs can be
+  // detected. This may further collapse buckets that share the same underlying
+  // template once slugs are recognized.
+  const refined = new Map();
+  for (const [, b] of buckets) {
+    const rawPaths = [...b.rawPaths];
+    const t = rawPaths.length > 1 ? templatizeWithSlugs(rawPaths) : { template: b.template, params: b.params };
+    const key = `${b.method} ${b.origin}${t.template}`;
+    let r = refined.get(key);
+    if (!r) {
+      r = { origin: b.origin, method: b.method, template: t.template, params: t.params, rows: [], rawPaths: new Set(), originalKeys: [] };
+      refined.set(key, r);
+    }
+    r.rows.push(...b.rows);
+    for (const p of b.rawPaths) r.rawPaths.add(p);
+    r.originalKeys.push({ template: b.template, sig: statusSignature(b.rows) });
+  }
+
+  // Build endpoint records.
+  const endpoints = [];
+  for (const [, e] of refined) {
+    const flags = [];
+
+    // Divergent-shape check: if the bucket was collapsed from multiple pass-1
+    // templates that had structurally different responses, flag it.
+    const sigs = new Set(e.originalKeys.map(k => k.sig));
+    if (sigs.size > 1) flags.push('divergent-response-shape');
+
+    if (e.rows.length === 1) flags.push('single-sample');
+    const statuses = new Set(e.rows.map(r => r.status).filter(s => s != null));
+    if (statuses.size === 1) flags.push('single-status');
+    const cts = new Set(e.rows.map(r => (r.contentType || '').split(';')[0].trim()).filter(Boolean));
+    if (cts.size > 1) flags.push('mixed-content-types');
+    const withBody = e.rows.filter(r => r.reqBody != null).length;
+    if (withBody > 0 && withBody < e.rows.length) flags.push('request-body-only-on-some-samples');
+
+    // Query parameter schema: collect names + sample values.
+    const qSamples = new Map();
+    for (const r of e.rows) {
+      for (const k of Object.keys(r.query || {})) {
+        if (!qSamples.has(k)) qSamples.set(k, []);
+        qSamples.get(k).push(r.query[k]);
+      }
+    }
+    const queryParams = [];
+    for (const [name, values] of qSamples.entries()) {
+      const present = e.rows.filter(r => name in (r.query || {})).length;
+      queryParams.push({
+        name,
+        in: 'query',
+        required: present === e.rows.length,
+        schema: inferQueryType(values),
+      });
+    }
+
+    endpoints.push({
+      endpointKey: `${e.method} ${e.origin}${e.template}`,
+      origin: e.origin,
+      method: e.method,
+      path: e.template,
+      pathParams: e.params.map(p => ({ name: p.name, in: 'path', required: true, schema: p.schema })),
+      queryParams,
+      statusCodes: [...new Set(e.rows.map(r => r.status).filter(s => s != null))].sort((a, b) => a - b),
+      sampleRows: e.rows,                      // kept on the in-memory record; trimmed before write
+      sampleCount: e.rows.length,
+      rawPaths: [...e.rawPaths],
+      normalizationFlags: flags,
+    });
+  }
+
+  // Drop the heavy in-memory rows from the persisted form; infer.mjs needs
+  // them so we keep a parallel sidecar file.
+  const persisted = endpoints.map(({ sampleRows, ...rest }) => rest);
+  writeJsonl(intermediatePath(outDir, 'endpoints.jsonl'), persisted);
+
+  const sidecar = endpoints.map(e => ({ endpointKey: e.endpointKey, samples: e.sampleRows }));
+  writeJsonl(intermediatePath(outDir, 'endpoint-samples.jsonl'), sidecar);
+
+  return { endpoints: endpoints.length };
+}
+
+if (import.meta.url === `file://${process.argv[1]}`) {
+  const out = process.argv[2];
+  if (!out) { console.error('usage: normalize.mjs <out-dir>'); process.exit(2); }
+  const stats = normalize(out);
+  console.log(`normalize: ${stats.endpoints} endpoints`);
+}

From 9446f9136f9d4dced5dce8e4bbe33e88316d9e41 Mon Sep 17 00:00:00 2001
From: Derek Meegan <derek@browserbase.com>
Date: Wed, 29 Apr 2026 13:35:50 -0700
Subject: [PATCH 2/6] Address PR #88 review: rename to browser-to-api, drop
 lift doc, fix bugs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Renaming and doc cleanup (per shrey150):
- Rename skill from `browser-reverse` to `browser-to-api`. Updates SKILL.md
  frontmatter + heading, package.json, REFERENCE.md heading, the OpenAPI
  doc's `info.description`, and the report.md heading.
- Fix the stale `discover-api-spec` reference in SKILL.md's composition diagram
  (left over from an earlier rename).
- Drop `BODY-CAPTURE-LIFT.md` from the PR; it's a separate proposal.
- Remove the `exec.sendFile()` reference in SKILL.md (browserbase-internal,
  not a generic skill primitive).
- REFERENCE.md restructured to lead with the script/CLI/file-format reference
  rather than an architecture intro. Pipeline diagram dropped.

Bug fixes (per Cursor Bugbot):
- `filter.mjs`: rework precedence so `--include` actually rescues URLs that
  would be hit by a default exclude, matching the documented contract. User
  `--exclude` still wins. Added a unit-style test path.
- `infer.mjs`: skip response-body samples whose CDP status is null. Previously
  they were keyed under `"0"` but `emit.mjs` only iterates `ep.statusCodes`
  (which excludes nulls), silently discarding the body.
- `load.mjs`: fix the comment in `urlQuery()` — code is first-value-wins, not
  last-value-wins.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 skills/browser-reverse/BODY-CAPTURE-LIFT.md   | 118 ------------------
 .../REFERENCE.md                              |  21 +---
 .../SKILL.md                                  |  16 +--
 .../package.json                              |   2 +-
 .../scripts/discover.mjs                      |   0
 .../scripts/emit.mjs                          |   4 +-
 .../scripts/filter.mjs                        |  16 ++-
 .../scripts/infer.mjs                         |   9 +-
 .../scripts/lib/io.mjs                        |   0
 .../scripts/lib/path-template.mjs             |   0
 .../scripts/lib/redact.mjs                    |   0
 .../scripts/lib/schema-merge.mjs              |   0
 .../scripts/lib/yaml.mjs                      |   0
 .../scripts/load.mjs                          |   4 +-
 .../scripts/normalize.mjs                     |   0
 15 files changed, 36 insertions(+), 154 deletions(-)
 delete mode 100644 skills/browser-reverse/BODY-CAPTURE-LIFT.md
 rename skills/{browser-reverse => browser-to-api}/REFERENCE.md (89%)
 rename skills/{browser-reverse => browser-to-api}/SKILL.md (87%)
 rename skills/{browser-reverse => browser-to-api}/package.json (68%)
 rename skills/{browser-reverse => browser-to-api}/scripts/discover.mjs (100%)
 rename skills/{browser-reverse => browser-to-api}/scripts/emit.mjs (98%)
 rename skills/{browser-reverse => browser-to-api}/scripts/filter.mjs (73%)
 rename skills/{browser-reverse => browser-to-api}/scripts/infer.mjs (93%)
 rename skills/{browser-reverse => browser-to-api}/scripts/lib/io.mjs (100%)
 rename skills/{browser-reverse => browser-to-api}/scripts/lib/path-template.mjs (100%)
 rename skills/{browser-reverse => browser-to-api}/scripts/lib/redact.mjs (100%)
 rename skills/{browser-reverse => browser-to-api}/scripts/lib/schema-merge.mjs (100%)
 rename skills/{browser-reverse => browser-to-api}/scripts/lib/yaml.mjs (100%)
 rename skills/{browser-reverse => browser-to-api}/scripts/load.mjs (96%)
 rename skills/{browser-reverse => browser-to-api}/scripts/normalize.mjs (100%)

diff --git a/skills/browser-reverse/BODY-CAPTURE-LIFT.md b/skills/browser-reverse/BODY-CAPTURE-LIFT.md
deleted file mode 100644
index 514f686e..00000000
--- a/skills/browser-reverse/BODY-CAPTURE-LIFT.md
+++ /dev/null
@@ -1,118 +0,0 @@
-# Adding Response Body Capture to `browser-trace` — Lift Estimate
-
-> Grounded in the real source as of `browserbase/skills@main`. I read `SKILL.md`,
-> `REFERENCE.md`, `lib.mjs`, `start-capture.mjs`, `snapshot-loop.mjs`, `bisect-cdp.mjs`,
-> `bb-capture.mjs`, `bb-finalize.mjs`, `stop-capture.mjs`.
-
----
-
-## 1. Why this is harder than it looks
-
-`browser-trace` today does the simplest possible thing: it shells out to `browse cdp <target> --domain Network --domain Console ...`, which emits one CDP event per line to stdout, and that stream is captured verbatim into `cdp/raw.ndjson`. **No CDP commands are issued back into the session.** The capture is fully one-way and stateless.
-
-Response bodies break that model. Bodies aren't pushed by CDP — they have to be **pulled** with a `Network.getResponseBody` request, keyed by `requestId`, **before the renderer evicts the resource**. Eviction is non-deterministic but typically happens within seconds of the response completing on a busy page. That means body capture has to be:
-
-- **Live** — runs concurrently with the trace, can't be done from `raw.ndjson` after the fact.
-- **Bidirectional** — issues CDP commands, not just reads events.
-- **Fast** — the gap between `Network.loadingFinished` and the `getResponseBody` call must be small.
-- **Selective** — fetching every body would 10–100x the disk footprint and add real load on the renderer.
-
-This is a meaningful expansion of the skill's current architecture, not a tweak.
-
----
-
-## 2. The lift, by component
-
-### 2.1 New companion script — `scripts/body-capture.mjs` — **NEW, ~200 lines**
-
-The `browse cdp` subprocess can't be modified (it's an external binary), so body capture has to be a **second CDP client** running in parallel, attached to the same target. Same model as `snapshot-loop.mjs`, but instead of polling screenshots it subscribes to `Network.responseReceived` + `Network.loadingFinished` and issues `Network.getResponseBody` for matching requests.
-
-Responsibilities:
-
-- Open its own WebSocket to the CDP target (or use `browse --ws <target> ...` if it supports request/response, which from the snapshot loop it does for one-shot commands — body capture is a long-lived subscription, so likely a raw `ws://` client).
-- Maintain an in-memory map of `requestId → { url, method, contentType, status, type }` keyed off `Network.requestWillBeSent` + `Network.responseReceived`.
-- On `Network.loadingFinished`: if the request matches the filter (default: `fetch`/`xhr` resourceType, JSON or form content-type, size cap), call `Network.getResponseBody` and write the result to `<run>/cdp/network/bodies/<requestId>.json`.
-- Track failures (eviction races, out-of-process iframes that can't be addressed, sizes over the cap) in a sidecar `bodies/_skipped.jsonl`.
-- SIGTERM-clean shutdown so `stop-capture.mjs` doesn't have to know about it specifically (it would just need to also kill `.bodies.pid`).
-
-**Risk:** `Network.getResponseBody` requires a session-attached target. For OOPIFs (cross-origin iframes), you have to use `Target.attachToTarget` first and route the command on the resulting session. Non-trivial. Realistic v1 punts on iframes and just records the skip reason.
-
-**Dependencies:** zero — Node stdlib has `ws` via `undici` /`WebSocket` (Node 22+) or you bundle a tiny WS client. The skill is currently zero-dep, so this constraint matters.
-
-### 2.2 `start-capture.mjs` — **MODIFIED, ~10 lines**
-
-Add an optional third detached subprocess: if `O11Y_BODIES=1` (or a `--bodies` flag), spawn `body-capture.mjs` the same way `snapshot-loop.mjs` is spawned, write `.bodies.pid`. Default off so existing users see no change.
-
-### 2.3 `stop-capture.mjs` — **MODIFIED, ~3 lines**
-
-Already loops over `['.cdp.pid', '.loop.pid']`. Add `'.bodies.pid'` to the list. Trivial.
-
-### 2.4 `bisect-cdp.mjs` — **MODIFIED, ~15 lines**
-
-Currently the only "network" buckets are CDP **events** (`requestWillBeSent`, `responseReceived`, `loadingFinished`, `loadingFailed`, `webSocket`). Bodies are content, not events, so they don't fit the existing `BUCKETS` predicate model.
-
-Two sensible places to expose them:
-
-1. **As-is on disk** — `cdp/network/bodies/<requestId>.json` already exists from body-capture; bisect doesn't have to do anything. Per-page slicing (`cdp/pages/<pid>/network/bodies/`) is the only real work: walk `network/responses.jsonl` for each page, find the matching body files, hard-link or copy them into the per-page dir. ~10 lines.
-2. **Index** — emit `cdp/network/bodies-index.jsonl` mapping `{requestId, url, method, status, contentType, sizeBytes, bodyPath}` so query/grep tools don't have to walk the dir. ~5 lines.
-
-### 2.5 `lib.mjs` — **MODIFIED, ~5 lines**
-
-Add a helper `readBody(runDir, requestId) → { contentType, body, base64? }`. Useful for the new skill's `infer.mjs` and for `query.mjs`.
-
-### 2.6 `query.mjs` — **MODIFIED, ~20 lines**
-
-Add a `bodies` subcommand: list captured bodies, filter by URL/status/content-type, dump a body to stdout. Optional but cheap.
-
-### 2.7 `bb-capture.mjs` / `bb-finalize.mjs` — **NO CHANGES**
-
-They delegate to `start-capture.mjs` / `stop-capture.mjs`. Inherits body capture for free.
-
-### 2.8 `SKILL.md` / `REFERENCE.md` — **MODIFIED, ~50 lines**
-
-Document:
-- The new flag/env var.
-- New on-disk layout (`cdp/network/bodies/`, `bodies-index.jsonl`).
-- Caveats: eviction races, OOPIF gaps, size cap, default-off.
-- Filter knobs (`O11Y_BODY_TYPES`, `O11Y_BODY_MAX_KB`, `O11Y_BODY_INCLUDE_PATTERN`).
-- Privacy implication: bodies can contain user data. Off by default for a reason.
-
----
-
-## 3. Total lift
-
-| Component | Type | Lines | Risk |
-|---|---|---|---|
-| `scripts/body-capture.mjs` | new | ~200 | **medium** — WS client, eviction races, OOPIF |
-| `scripts/start-capture.mjs` | modify | ~10 | low |
-| `scripts/stop-capture.mjs` | modify | ~3 | low |
-| `scripts/bisect-cdp.mjs` | modify | ~15 | low |
-| `scripts/lib.mjs` | modify | ~5 | low |
-| `scripts/query.mjs` | modify | ~20 | low |
-| `SKILL.md` + `REFERENCE.md` | modify | ~50 | low |
-| **Total** | | **~300 LOC** | |
-
-**Calendar estimate for one engineer who knows CDP:** ~2–3 days.
-- Day 1: WS client + filter + happy-path body capture against Chromium local.
-- Day 2: OOPIF target attachment, size cap, skip-tracking, integration with `start`/`stop`.
-- Day 3: bisect integration, query subcommand, docs, end-to-end test against a Browserbase remote session.
-
-**Calendar estimate without prior CDP fluency:** ~1 week. The eviction race and OOPIF target plumbing are the parts that bite.
-
----
-
-## 4. Risks worth calling out in the PR
-
-1. **Privacy.** Bodies can contain bearer tokens, PII, partial PII even when redacted at the header layer. Default-off + an opt-in flag is non-negotiable. The redaction story has to live in the consuming skill (e.g. `discover-api-spec`), not in the capture layer — capture should write what it sees.
-2. **Performance.** `Network.getResponseBody` blocks on the renderer. For a page making 200 XHR requests, naive capture serializes every one of them. Mitigations: hard cap on concurrent in-flight `getResponseBody` calls (e.g. 8), aggressive content-type filter, default size cap (256 KB).
-3. **Disk.** A 10-minute Browserbase session with body capture on can easily produce 100–500 MB of bodies. The skill should default to JSON-only + 256 KB cap and let users opt into more.
-4. **Eviction races.** Some bodies will fail with `-32000 No data found for resource`. This is normal. `bodies/_skipped.jsonl` should record them so consumers know coverage isn't 100%.
-5. **WebSocket frame data.** `Network.webSocketFrameSent` / `Received` already include the payload inline — no `getResponseBody` needed. v1 should explicitly punt on WebSocket bodies (already in the events bucket) to scope down.
-
----
-
-## 5. Recommendation
-
-Building this **into** `browser-trace` is the right call **if** the maintainers are willing to add a (default-off) feature with privacy and disk caveats. Putting it in a sibling skill is also viable but less clean — every consumer skill (api-spec, security audits, etc.) would have to reinvent the WS plumbing.
-
-The cleanest framing: **bodies are part of the trace, off by default, on with a flag.** Same shape as how Chrome DevTools handles "Preserve log" / "Disable cache" — capture options, not a separate tool.
diff --git a/skills/browser-reverse/REFERENCE.md b/skills/browser-to-api/REFERENCE.md
similarity index 89%
rename from skills/browser-reverse/REFERENCE.md
rename to skills/browser-to-api/REFERENCE.md
index e8a9ba3c..02871d7b 100644
--- a/skills/browser-reverse/REFERENCE.md
+++ b/skills/browser-to-api/REFERENCE.md
@@ -1,25 +1,10 @@
-# Browser Reverse — Reference
+# Browser to API — Reference
 
-Technical reference for the discovery pipeline, file formats, and configuration.
-
-## Pipeline
-
-```
-browser-trace run                    discover.mjs
-.o11y/<run>/cdp/network/             ┌─────────┐    ┌────────┐    ┌──────────┐    ┌─────────┐    ┌──────┐
-  requests.jsonl       ──────────▶   │  load   │ ─▶ │ filter │ ─▶ │ normalize│ ─▶ │ infer   │ ─▶ │ emit │
-  responses.jsonl                    └─────────┘    └────────┘    └──────────┘    └─────────┘    └──────┘
-                                       paired         filtered      endpoints       endpoints       openapi
-                                       .jsonl         .jsonl        .jsonl          .with-          .yaml
-                                                                                    schemas         report.md
-                                                                                    .jsonl
-```
-
-Each stage is a discrete script that reads a file and writes a file. `discover.mjs` is the dispatcher; pass `--stage <name>` to run a single stage for debugging.
+Exhaustive reference for every script, flag, file format, and configuration knob the skill exposes.
 
 ## Scripts
 
-All scripts are Node ESM (`type: module`). They depend only on the Node standard library.
+All scripts are Node ESM (`type: module`). They depend only on the Node standard library. `discover.mjs` is the top-level dispatcher; the others are stage scripts the dispatcher calls in order. Run an individual stage with `discover.mjs --stage <name>` for debugging or partial reruns.
 
 ### `discover.mjs --run <path> [flags]`
 
diff --git a/skills/browser-reverse/SKILL.md b/skills/browser-to-api/SKILL.md
similarity index 87%
rename from skills/browser-reverse/SKILL.md
rename to skills/browser-to-api/SKILL.md
index 6bb98eac..108f1d5f 100644
--- a/skills/browser-reverse/SKILL.md
+++ b/skills/browser-to-api/SKILL.md
@@ -1,20 +1,20 @@
 ---
-name: browser-reverse
-description: Reverse-engineer a website's HTTP API into a best-effort OpenAPI 3.1 spec by analyzing a `browser-trace` capture. Use when the user wants to discover/extract API endpoints from a browser session, build an OpenAPI doc from network traffic, or document a third-party site's XHR/fetch surface for client integration.
+name: browser-to-api
+description: Turn a website's observable HTTP traffic into a best-effort OpenAPI 3.1 spec by analyzing a `browser-trace` capture. Use when the user wants to discover/extract API endpoints from a browser session, build an OpenAPI doc from network traffic, or document a third-party site's XHR/fetch surface for client integration.
 compatibility: "Requires Node 18+ and a `browser-trace` run directory (`.o11y/<run>/`) produced by the sibling `browser-trace` skill. The scripts use only the Node standard library — no `npm install` step. `jq` is referenced in docs for ad-hoc querying but is not required by the scripts."
 license: MIT
 allowed-tools: Bash, Read, Grep
 ---
 
-# Browser Reverse
+# Browser to API
 
-Replay-driven API reverse-engineering. Consume a `browser-trace` capture, pair its CDP request / response events, templatize observed URLs, infer JSON schemas from samples, and emit an **OpenAPI 3.1** document plus a human-readable coverage report.
+Replay-driven API discovery. Consume a `browser-trace` capture, pair its CDP request / response events, templatize observed URLs, infer JSON schemas from samples, and emit an **OpenAPI 3.1** document plus a human-readable coverage report.
 
 This skill **does not capture traffic**. It is purely offline post-processing on top of `browser-trace`'s `cdp/network/*.jsonl` buckets. The two skills compose:
 
 ```
-browser-trace        →  .o11y/<run>/cdp/network/{requests,responses}.jsonl
-discover-api-spec    →  .o11y/<run>/api-spec/openapi.yaml + report.md
+browser-trace    →  .o11y/<run>/cdp/network/{requests,responses}.jsonl
+browser-to-api   →  .o11y/<run>/api-spec/openapi.yaml + report.md
 ```
 
 ## When to use
@@ -67,7 +67,7 @@ node scripts/discover.mjs --run .o11y/my-site
 
 `discover.mjs` auto-detects `<run>/cdp/network/bodies/`. To use a body capture from elsewhere (e.g. didn't snapshot, want the live `browse network` dir), pass `--bodies <path>` explicitly.
 
-Then deliver the artifacts to the user (`exec.sendFile()` for `openapi.yaml` and `report.md`).
+The two primary deliverables are `openapi.yaml` (machine-readable spec) and `report.md` (human-readable coverage summary).
 
 ## CLI flags
 
@@ -115,7 +115,7 @@ What changes when bodies are present:
 - ✅ Request-body schemas — `postData` from CDP is enough; bodies dir is a nice-to-have for non-`postData` cases.
 - ✅ **Response-body schemas** — fully inferred from real samples. Without bodies you get `{ description, content: <mimeType> }` skeletons.
 
-The report flags every endpoint that has no response-body sample. For a sketch of what it would take to teach `browser-trace` itself to capture response bodies natively (no separate `browse network on` step), see [BODY-CAPTURE-LIFT.md](BODY-CAPTURE-LIFT.md).
+The report flags every endpoint that has no response-body sample.
 
 ## Limitations
 
diff --git a/skills/browser-reverse/package.json b/skills/browser-to-api/package.json
similarity index 68%
rename from skills/browser-reverse/package.json
rename to skills/browser-to-api/package.json
index 86360e3f..58577884 100644
--- a/skills/browser-reverse/package.json
+++ b/skills/browser-to-api/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "browser-reverse",
+  "name": "browser-to-api",
   "version": "0.1.0",
   "private": true,
   "type": "module"
diff --git a/skills/browser-reverse/scripts/discover.mjs b/skills/browser-to-api/scripts/discover.mjs
similarity index 100%
rename from skills/browser-reverse/scripts/discover.mjs
rename to skills/browser-to-api/scripts/discover.mjs
diff --git a/skills/browser-reverse/scripts/emit.mjs b/skills/browser-to-api/scripts/emit.mjs
similarity index 98%
rename from skills/browser-reverse/scripts/emit.mjs
rename to skills/browser-to-api/scripts/emit.mjs
index 71f1872c..5ad43272 100644
--- a/skills/browser-reverse/scripts/emit.mjs
+++ b/skills/browser-to-api/scripts/emit.mjs
@@ -247,7 +247,7 @@ export function emit(outDir, opts = {}) {
     info: {
       title,
       version: '0.1.0-discovered',
-      description: 'Spec discovered from a browser-trace capture by the browser-reverse skill. Inductive, not contractual — see `report.md` and `x-confidence` extensions for caveats.',
+      description: 'Spec discovered from a browser-trace capture by the browser-to-api skill. Inductive, not contractual — see `report.md` and `x-confidence` extensions for caveats.',
     },
     servers,
     paths,
@@ -290,7 +290,7 @@ export function emit(outDir, opts = {}) {
 
 function buildReport({ kept, dropped, servers, redaction, minSamples }) {
   const lines = [];
-  lines.push('# Browser-reverse: discovered API\n');
+  lines.push('# Discovered API\n');
   lines.push('## Servers\n');
   for (const s of servers) lines.push(`- ${s.url}`);
   if (!servers.length) lines.push('_(none)_');
diff --git a/skills/browser-reverse/scripts/filter.mjs b/skills/browser-to-api/scripts/filter.mjs
similarity index 73%
rename from skills/browser-reverse/scripts/filter.mjs
rename to skills/browser-to-api/scripts/filter.mjs
index f681c455..9c9bab10 100644
--- a/skills/browser-reverse/scripts/filter.mjs
+++ b/skills/browser-to-api/scripts/filter.mjs
@@ -35,8 +35,13 @@ const DEFAULT_EXCLUDES = [
 
 export function filter(outDir, opts = {}) {
   const { include = [], exclude = [], origins = [] } = opts;
+  // Precedence:
+  //   1. --origins gates everything; non-matching is dropped.
+  //   2. User --exclude always wins (explicit user intent).
+  //   3. Default excludes can be rescued by --include (REFERENCE.md contract).
+  //   4. When --include is set, anything that doesn't match it is dropped.
+  const userExcludeRes = exclude.map(s => new RegExp(s));
   const includeRes = include.map(s => new RegExp(s));
-  const excludeRes = [...DEFAULT_EXCLUDES, ...exclude.map(s => new RegExp(s))];
   const originSet = new Set(origins);
 
   const paired = readJsonl(intermediatePath(outDir, 'paired.jsonl'));
@@ -49,8 +54,13 @@ export function filter(outDir, opts = {}) {
       const matched = [...originSet].some(o => host === o || host.endsWith('.' + o));
       if (!matched) { droppedOrigin++; continue; }
     }
-    if (excludeRes.some(re => re.test(row.url))) { droppedExclude++; continue; }
-    if (includeRes.length && !includeRes.some(re => re.test(row.url))) { droppedInclude++; continue; }
+    if (userExcludeRes.some(re => re.test(row.url))) { droppedExclude++; continue; }
+
+    const matchesInclude = includeRes.length > 0 && includeRes.some(re => re.test(row.url));
+    const matchesDefaultExclude = DEFAULT_EXCLUDES.some(re => re.test(row.url));
+    if (matchesDefaultExclude && !matchesInclude) { droppedExclude++; continue; }
+    if (includeRes.length && !matchesInclude) { droppedInclude++; continue; }
+
     out.push(row);
   }
 
diff --git a/skills/browser-reverse/scripts/infer.mjs b/skills/browser-to-api/scripts/infer.mjs
similarity index 93%
rename from skills/browser-reverse/scripts/infer.mjs
rename to skills/browser-to-api/scripts/infer.mjs
index 87dbf408..33dfed58 100644
--- a/skills/browser-reverse/scripts/infer.mjs
+++ b/skills/browser-to-api/scripts/infer.mjs
@@ -55,9 +55,12 @@ export function infer(outDir, opts = {}) {
         if (!pickedReqExample) { pickedReqExample = s.reqBody; pickedReqStatus = s.status; }
       }
       if (s.respBody != null && typeof s.respBody === 'object') {
-        const status = s.status ?? 0;
-        let p = respProtoByStatus.get(status);
-        if (!p) { p = newProto(); respProtoByStatus.set(status, p); }
+        // Skip when we have no status: emit.mjs only renders schemas under
+        // statuses that appear in ep.statusCodes (which excludes nulls), so
+        // a body keyed under "0" would be silently discarded.
+        if (s.status == null) continue;
+        let p = respProtoByStatus.get(s.status);
+        if (!p) { p = newProto(); respProtoByStatus.set(s.status, p); }
         ingest(p, s.respBody);
         if (s.status >= 200 && s.status < 300 && !pickedRespExample) {
           pickedRespExample = s.respBody;
diff --git a/skills/browser-reverse/scripts/lib/io.mjs b/skills/browser-to-api/scripts/lib/io.mjs
similarity index 100%
rename from skills/browser-reverse/scripts/lib/io.mjs
rename to skills/browser-to-api/scripts/lib/io.mjs
diff --git a/skills/browser-reverse/scripts/lib/path-template.mjs b/skills/browser-to-api/scripts/lib/path-template.mjs
similarity index 100%
rename from skills/browser-reverse/scripts/lib/path-template.mjs
rename to skills/browser-to-api/scripts/lib/path-template.mjs
diff --git a/skills/browser-reverse/scripts/lib/redact.mjs b/skills/browser-to-api/scripts/lib/redact.mjs
similarity index 100%
rename from skills/browser-reverse/scripts/lib/redact.mjs
rename to skills/browser-to-api/scripts/lib/redact.mjs
diff --git a/skills/browser-reverse/scripts/lib/schema-merge.mjs b/skills/browser-to-api/scripts/lib/schema-merge.mjs
similarity index 100%
rename from skills/browser-reverse/scripts/lib/schema-merge.mjs
rename to skills/browser-to-api/scripts/lib/schema-merge.mjs
diff --git a/skills/browser-reverse/scripts/lib/yaml.mjs b/skills/browser-to-api/scripts/lib/yaml.mjs
similarity index 100%
rename from skills/browser-reverse/scripts/lib/yaml.mjs
rename to skills/browser-to-api/scripts/lib/yaml.mjs
diff --git a/skills/browser-reverse/scripts/load.mjs b/skills/browser-to-api/scripts/load.mjs
similarity index 96%
rename from skills/browser-reverse/scripts/load.mjs
rename to skills/browser-to-api/scripts/load.mjs
index 6d4ba292..bfab6275 100644
--- a/skills/browser-reverse/scripts/load.mjs
+++ b/skills/browser-to-api/scripts/load.mjs
@@ -39,8 +39,10 @@ function urlQuery(u) {
   try {
     const x = new URL(u);
     const out = {};
+    // First value wins for repeats. The downstream consumer (normalize.mjs)
+    // only uses parameter names + a representative value for type inference,
+    // so collapsing repeats to the first observation is fine.
     for (const [k, v] of x.searchParams.entries()) {
-      // Last value wins for repeats; we record the existence either way.
       if (out[k] === undefined) out[k] = v;
     }
     return out;
diff --git a/skills/browser-reverse/scripts/normalize.mjs b/skills/browser-to-api/scripts/normalize.mjs
similarity index 100%
rename from skills/browser-reverse/scripts/normalize.mjs
rename to skills/browser-to-api/scripts/normalize.mjs

From b233aeec314ac91fd77a2643adec8aa1cb24706e Mon Sep 17 00:00:00 2001
From: Shrey Pandya <shrey@browserbase.com>
Date: Tue, 12 May 2026 16:53:40 -0400
Subject: [PATCH 3/6] Add Swagger UI preview for browser-to-api

---
 skills/browser-to-api/REFERENCE.md            |  24 ++
 skills/browser-to-api/SKILL.md                |  22 +-
 .../scripts/open-swagger-ui.mjs               | 205 ++++++++++++++++++
 3 files changed, 246 insertions(+), 5 deletions(-)
 create mode 100644 skills/browser-to-api/scripts/open-swagger-ui.mjs

diff --git a/skills/browser-to-api/REFERENCE.md b/skills/browser-to-api/REFERENCE.md
index 02871d7b..77928a86 100644
--- a/skills/browser-to-api/REFERENCE.md
+++ b/skills/browser-to-api/REFERENCE.md
@@ -10,6 +10,17 @@ All scripts are Node ESM (`type: module`). They depend only on the Node standard
 
 Top-level dispatcher. Runs `load → filter → normalize → infer → emit` in order. With `--stage <name>`, runs only that stage (assumes prior stages already wrote their intermediate file).
 
+### `open-swagger-ui.mjs (--run <path> | --spec <path>) [flags]`
+
+Preview an emitted OpenAPI spec in a local Swagger UI checkout. The script serves the Swagger UI `dist/` assets and the generated spec from one local HTTP origin, injects a per-run `swagger-initializer.js`, opens the browser by default, and keeps the server alive until interrupted.
+
+- `--run <path>` loads `<run>/api-spec/openapi.yaml`, falling back to `openapi.json`.
+- `--spec <path>` previews an explicit OpenAPI YAML/JSON file.
+- `--swagger-ui <path>` points at a Swagger UI checkout/package directory. If omitted, the script tries `$SWAGGER_UI_DIR`, `~/Developer/swagger-ui`, and `node_modules/swagger-ui-dist`.
+- `--host <host>` defaults to `127.0.0.1`.
+- `--port <port>` defaults to a random free port.
+- `--no-open` prints the URL without opening a browser.
+
 ### `load.mjs <run-path> <out-dir> [bodies-dir]`
 
 - Reads `cdp/network/requests.jsonl` and `cdp/network/responses.jsonl`.
@@ -152,6 +163,17 @@ Internals (matched in `lib/io.mjs` + `load.mjs`):
 | `--min-samples <n>` | `1` | Drop endpoints below this threshold (still listed in the report) |
 | `--stage <name>` | (all) | One of `load`, `filter`, `normalize`, `infer`, `emit` |
 
+## Swagger UI preview flags
+
+| Flag | Default | Notes |
+|---|---|---|
+| `--run <path>` | required unless `--spec` is set | Resolves a browser-trace run and previews `<run>/api-spec/openapi.yaml` or `openapi.json` |
+| `--spec <path>` | required unless `--run` is set | Explicit OpenAPI YAML/JSON path |
+| `--swagger-ui <path>` | auto | Checkout/package dir containing either `dist/index.html` or `index.html` + `swagger-ui-bundle.js` |
+| `--host <host>` | `127.0.0.1` | Preview server bind host |
+| `--port <port>` | random | Preview server bind port |
+| `--no-open` | false | Print the URL without launching the browser |
+
 ## Default exclude list
 
 URLs matching these patterns are dropped before any analysis (regex, applied to the full URL):
@@ -213,6 +235,7 @@ These extensions are stripped from `report.md` (which is human-facing) but prese
 | `O11Y_ROOT` | `.o11y` | Inherited from `browser-trace`. Used only when `--run` is bare run id rather than a full path |
 | `DISCOVER_ENUM_MAX_DISTINCT` | `8` | Max distinct values to consider a field an enum |
 | `DISCOVER_ENUM_MIN_SAMPLES` | `5` | Min samples before enum detection runs |
+| `SWAGGER_UI_DIR` | auto | Optional Swagger UI checkout/package dir for `open-swagger-ui.mjs` |
 
 ## Troubleshooting
 
@@ -223,3 +246,4 @@ These extensions are stripped from `report.md` (which is human-facing) but prese
 | Path templating collapses too aggressively | numeric IDs being misread as enums, or dictionary words misread as slugs | add `--exclude` for the noisy paths and re-run, or file an issue with the trace |
 | Schemas show `type: "string"` for everything | request/response bodies aren't valid JSON or weren't captured | check `paired.jsonl` for `reqBody`/`respBody` content — if `null`, bodies weren't in the trace |
 | Spec validator complains about `info.version` | derived version is `0.1.0-discovered` which some tools dislike | pass `--version 0.1.0` (TODO) or post-edit the file |
+| `Swagger UI not found` | no local Swagger UI checkout/package was detected | clone `https://github.com/swagger-api/swagger-ui` to `~/Developer/swagger-ui`, or pass `--swagger-ui <path>` / set `SWAGGER_UI_DIR` |
diff --git a/skills/browser-to-api/SKILL.md b/skills/browser-to-api/SKILL.md
index 108f1d5f..e923026d 100644
--- a/skills/browser-to-api/SKILL.md
+++ b/skills/browser-to-api/SKILL.md
@@ -31,12 +31,12 @@ If the user wants to **capture** traffic, send them to `browser-trace` first.
 ### 1. Capture with `browser-trace` (and optionally bodies via `browse network on`)
 
 ```bash
-# Local Chrome example (see browser-trace SKILL.md for Browserbase variant)
-"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
-  --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-spec about:blank &
+# Local example (see browser-trace SKILL.md for Browserbase variant)
+browse env local
+browse open about:blank
+TARGET="$(browse status --json | jq -r .wsUrl)"
 
-node ../browser-trace/scripts/start-capture.mjs 9222 my-site
-browse env local 9222
+node ../browser-trace/scripts/start-capture.mjs "$TARGET" my-site
 browse network on                                    # capture request/response bodies
 browse open https://example.com
 # ...drive whatever flows you want covered...
@@ -69,6 +69,16 @@ node scripts/discover.mjs --run .o11y/my-site
 
 The two primary deliverables are `openapi.yaml` (machine-readable spec) and `report.md` (human-readable coverage summary).
 
+### 3. Preview in Swagger UI when available
+
+If Swagger UI is installed locally, open the generated spec there:
+
+```bash
+node scripts/open-swagger-ui.mjs --run .o11y/my-site
+```
+
+The helper auto-detects `$SWAGGER_UI_DIR`, `~/Developer/swagger-ui`, or `node_modules/swagger-ui-dist`. If none exists, deliver `openapi.yaml` and `report.md` directly and tell the user Swagger UI was not found.
+
 ## CLI flags
 
 | Flag | Required | Meaning |
@@ -85,6 +95,8 @@ The two primary deliverables are `openapi.yaml` (machine-readable spec) and `rep
 | `--min-samples <n>` | no | Minimum samples per endpoint to include. Default `1` |
 | `--stage <name>` | no | Run only one stage: `load`, `filter`, `normalize`, `infer`, `emit` |
 
+`scripts/open-swagger-ui.mjs` accepts `--run <path>` or `--spec <path>`, plus optional `--swagger-ui <path>`, `--host`, `--port`, and `--no-open`.
+
 ## Output layout
 
 ```
diff --git a/skills/browser-to-api/scripts/open-swagger-ui.mjs b/skills/browser-to-api/scripts/open-swagger-ui.mjs
new file mode 100644
index 00000000..e2abc459
--- /dev/null
+++ b/skills/browser-to-api/scripts/open-swagger-ui.mjs
@@ -0,0 +1,205 @@
+#!/usr/bin/env node
+// Preview an emitted OpenAPI spec in a local Swagger UI checkout.
+
+import fs from 'node:fs';
+import http from 'node:http';
+import os from 'node:os';
+import path from 'node:path';
+import { spawn } from 'node:child_process';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
+function parseArgs(argv) {
+  const opts = {
+    run: null,
+    spec: null,
+    swaggerUi: null,
+    host: '127.0.0.1',
+    port: 0,
+    open: true,
+  };
+  for (let i = 0; i < argv.length; i++) {
+    const a = argv[i];
+    const next = () => argv[++i];
+    switch (a) {
+      case '--run': opts.run = next(); break;
+      case '--spec': opts.spec = next(); break;
+      case '--swagger-ui': opts.swaggerUi = next(); break;
+      case '--host': opts.host = next(); break;
+      case '--port': opts.port = Number(next()); break;
+      case '--no-open': opts.open = false; break;
+      case '-h': case '--help':
+        printHelp(); process.exit(0);
+      default:
+        console.error(`unknown arg: ${a}`);
+        printHelp(); process.exit(2);
+    }
+  }
+  return opts;
+}
+
+function printHelp() {
+  console.error(`usage: open-swagger-ui.mjs (--run <path> | --spec <path>) [flags]
+
+  --run <path>          browser-trace run dir; uses <run>/api-spec/openapi.yaml
+  --spec <path>         OpenAPI YAML/JSON file to preview
+  --swagger-ui <path>   Swagger UI checkout/package dir. Defaults to
+                        $SWAGGER_UI_DIR, ~/Developer/swagger-ui, or node_modules/swagger-ui-dist
+  --host <host>         Bind host. Default: 127.0.0.1
+  --port <port>         Bind port. Default: random free port
+  --no-open             Print the URL without opening a browser`);
+}
+
+function resolveRun(runArg) {
+  if (fs.existsSync(runArg) && fs.statSync(runArg).isDirectory()) return path.resolve(runArg);
+  const root = process.env.O11Y_ROOT || '.o11y';
+  const guess = path.join(root, runArg);
+  if (fs.existsSync(guess) && fs.statSync(guess).isDirectory()) return path.resolve(guess);
+  throw new Error(`run path not found: ${runArg} (tried ${guess})`);
+}
+
+function resolveSpec(opts) {
+  if (opts.spec) return path.resolve(opts.spec);
+  if (!opts.run) throw new Error('expected --run <path> or --spec <path>');
+
+  const runPath = resolveRun(opts.run);
+  const candidates = [
+    path.join(runPath, 'api-spec', 'openapi.yaml'),
+    path.join(runPath, 'api-spec', 'openapi.json'),
+  ];
+  const found = candidates.find(p => fs.existsSync(p));
+  if (!found) throw new Error(`no OpenAPI spec found under ${path.join(runPath, 'api-spec')}`);
+  return found;
+}
+
+function swaggerUiCandidates(explicit) {
+  return [
+    explicit,
+    process.env.SWAGGER_UI_DIR,
+    path.join(os.homedir(), 'Developer', 'swagger-ui'),
+    path.resolve(process.cwd(), 'node_modules', 'swagger-ui-dist'),
+    path.resolve(__dirname, '..', 'node_modules', 'swagger-ui-dist'),
+  ].filter(Boolean);
+}
+
+function distDirFor(candidate) {
+  const resolved = path.resolve(candidate);
+  const directDist = path.join(resolved, 'dist');
+  if (fs.existsSync(path.join(directDist, 'index.html'))) return directDist;
+  if (fs.existsSync(path.join(resolved, 'index.html')) && fs.existsSync(path.join(resolved, 'swagger-ui-bundle.js'))) return resolved;
+  return null;
+}
+
+function resolveSwaggerUi(explicit) {
+  for (const candidate of swaggerUiCandidates(explicit)) {
+    const dist = distDirFor(candidate);
+    if (dist) return dist;
+  }
+
+  const searched = swaggerUiCandidates(explicit).map(p => `  - ${path.resolve(p)}`).join('\n');
+  throw new Error(`Swagger UI not found. Searched:\n${searched}\n\nInstall it locally, then rerun:\n  git clone https://github.com/swagger-api/swagger-ui.git ~/Developer/swagger-ui\n  cd ~/Developer/swagger-ui && npm ci\n\nOr pass --swagger-ui <path> / set SWAGGER_UI_DIR.`);
+}
+
+function mimeFor(filePath) {
+  const ext = path.extname(filePath).toLowerCase();
+  return {
+    '.css': 'text/css; charset=utf-8',
+    '.html': 'text/html; charset=utf-8',
+    '.js': 'application/javascript; charset=utf-8',
+    '.json': 'application/json; charset=utf-8',
+    '.map': 'application/json; charset=utf-8',
+    '.png': 'image/png',
+    '.svg': 'image/svg+xml',
+    '.yaml': 'application/yaml; charset=utf-8',
+    '.yml': 'application/yaml; charset=utf-8',
+  }[ext] || 'application/octet-stream';
+}
+
+function swaggerInitializer(specRoute) {
+  return `window.onload = function() {
+  window.ui = SwaggerUIBundle({
+    url: ${JSON.stringify(specRoute)},
+    dom_id: '#swagger-ui',
+    deepLinking: true,
+    presets: [
+      SwaggerUIBundle.presets.apis,
+      SwaggerUIStandalonePreset
+    ],
+    plugins: [
+      SwaggerUIBundle.plugins.DownloadUrl
+    ],
+    layout: 'StandaloneLayout'
+  });
+};
+`;
+}
+
+function safeStaticPath(distDir, urlPath) {
+  const decoded = decodeURIComponent(urlPath);
+  const relative = decoded === '/' ? 'index.html' : decoded.replace(/^\/+/, '');
+  const fullPath = path.resolve(distDir, relative);
+  const root = path.resolve(distDir);
+  if (fullPath !== root && !fullPath.startsWith(root + path.sep)) return null;
+  return fullPath;
+}
+
+function openUrl(url) {
+  const opener = process.platform === 'darwin'
+    ? ['open', [url]]
+    : process.platform === 'win32'
+      ? ['cmd', ['/c', 'start', '', url]]
+      : ['xdg-open', [url]];
+  const child = spawn(opener[0], opener[1], { detached: true, stdio: 'ignore' });
+  child.unref();
+}
+
+async function main() {
+  const opts = parseArgs(process.argv.slice(2));
+  const specPath = resolveSpec(opts);
+  if (!fs.existsSync(specPath)) throw new Error(`spec not found: ${specPath}`);
+
+  const distDir = resolveSwaggerUi(opts.swaggerUi);
+  const specRoute = path.extname(specPath).toLowerCase() === '.json' ? '/openapi.json' : '/openapi.yaml';
+
+  const server = http.createServer((req, res) => {
+    const requestPath = new URL(req.url, `http://${opts.host}`).pathname;
+    if (requestPath === specRoute) {
+      res.writeHead(200, { 'content-type': mimeFor(specPath), 'cache-control': 'no-store' });
+      fs.createReadStream(specPath).pipe(res);
+      return;
+    }
+    if (requestPath === '/swagger-initializer.js') {
+      res.writeHead(200, { 'content-type': 'application/javascript; charset=utf-8', 'cache-control': 'no-store' });
+      res.end(swaggerInitializer(specRoute));
+      return;
+    }
+
+    const staticPath = safeStaticPath(distDir, requestPath);
+    if (!staticPath || !fs.existsSync(staticPath) || fs.statSync(staticPath).isDirectory()) {
+      res.writeHead(404, { 'content-type': 'text/plain; charset=utf-8' });
+      res.end('not found\n');
+      return;
+    }
+    res.writeHead(200, { 'content-type': mimeFor(staticPath) });
+    fs.createReadStream(staticPath).pipe(res);
+  });
+
+  await new Promise((resolve, reject) => {
+    server.once('error', reject);
+    server.listen(opts.port, opts.host, resolve);
+  });
+
+  const address = server.address();
+  const url = `http://${opts.host}:${address.port}/`;
+  console.log(`swagger_ui=${distDir}`);
+  console.log(`spec=${specPath}`);
+  console.log(`url=${url}`);
+  console.log('Press Ctrl-C to stop the preview server.');
+  if (opts.open) openUrl(url);
+}
+
+main().catch(err => {
+  console.error(err.message);
+  process.exit(1);
+});

From dc07d29d02763b56f49162b72c3d30697edf4a93 Mon Sep 17 00:00:00 2001
From: Shrey Pandya <shrey@browserbase.com>
Date: Wed, 13 May 2026 16:57:45 -0400
Subject: [PATCH 4/6] Noise classification, GraphQL decomposition, and client
 SDK generation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

normalize.mjs:
- Auto-classify endpoints as api/noise/page and drop non-API traffic
  (tracking, analytics, bot defense, session plumbing, HTML page renders)
- Detect multiplexed endpoints (GraphQL operationName, JSON-RPC method,
  query param dispatch) and decompose into separate logical operations
- Typically drops 60-80% of captured traffic as noise

emit.mjs:
- Generate client.mjs — zero-dependency ES module wrapping each discovered
  operation as an async function with JSDoc param types
- For GraphQL/APQ endpoints, embeds persisted query hashes and wires up
  the full request shape so callers just pass variables
- Extract required headers from trace (CSRF tokens, custom headers) and
  include them in client defaults
- Task-oriented report.md with quick-start import, curl examples,
  variables tables, and response samples per operation

On OpenTable trace: 27 raw endpoints → 9 named operations, zero noise.
Generated client with autocomplete(), restaurantsAvailability(), etc.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 skills/browser-to-api/SKILL.md              |  22 +-
 skills/browser-to-api/scripts/discover.mjs  |   2 +-
 skills/browser-to-api/scripts/emit.mjs      | 380 +++++++++++++++++---
 skills/browser-to-api/scripts/normalize.mjs | 195 ++++++++--
 4 files changed, 518 insertions(+), 81 deletions(-)

diff --git a/skills/browser-to-api/SKILL.md b/skills/browser-to-api/SKILL.md
index e923026d..d62d4939 100644
--- a/skills/browser-to-api/SKILL.md
+++ b/skills/browser-to-api/SKILL.md
@@ -129,6 +129,26 @@ What changes when bodies are present:
 
 The report flags every endpoint that has no response-body sample.
 
+## Automatic noise filtering
+
+The normalize stage automatically classifies and drops infrastructure noise:
+
+- **Tracking / analytics** — paths containing `/track`, `/pixel`, `/beacon`, `/impression`, `/pageview`, `/dag/v*`
+- **Bot defense** — Akamai (`/akam/`), fingerprint payloads (`sensor_data`), obfuscated multi-segment paths
+- **Session plumbing** — `/session`, `/authenticate/start`, cookie consent, A/B experiment endpoints
+- **HTML page renders** — `GET` requests returning `text/html` (the rendered page, not the API)
+
+This typically drops 60-80% of captured traffic. The `--include` flag can rescue a false positive.
+
+## GraphQL / multiplexed endpoint decomposition
+
+When a single endpoint (like `/dapi/fe/gql`) is called with different `operationName` values, the skill automatically splits it into separate logical operations. Each gets its own:
+- OpenAPI path entry (e.g. `/dapi/fe/gql [Autocomplete]`)
+- Request/response schema inferred from only that operation's samples
+- Curl example and variables table in the report
+
+Detection works on body fields (`operationName`, `method`, `action`) and query params (`opname`, `op`). This covers GraphQL (APQ and inline), JSON-RPC, and similar dispatch patterns.
+
 ## Limitations
 
 - **Coverage is bounded by the captured flow.** Endpoints not exercised in the trace will not appear. The skill cannot prove completeness.
@@ -141,7 +161,7 @@ The report flags every endpoint that has no response-body sample.
 
 1. **Drive the flows you want documented.** The richer the browser-trace, the richer the spec.
 2. **Use `--origins` for noisy sites.** A marketing page hits dozens of analytics hosts; restrict to the API origin you care about.
-3. **Inspect `report.md` first.** Low-sample endpoints, single-status endpoints, and missing request bodies are listed there with concrete suggestions.
+3. **Inspect `report.md` first.** It has curl-ready examples and response samples for every discovered operation.
 4. **Bump `--min-samples` to 2+** when you want only confidently-shaped endpoints in the final doc — drop the long tail.
 5. **Pair with `browse network on`** when response-body schemas matter. The CDP firehose alone has request bodies but not response bodies.
 
diff --git a/skills/browser-to-api/scripts/discover.mjs b/skills/browser-to-api/scripts/discover.mjs
index c349fa87..07023685 100644
--- a/skills/browser-to-api/scripts/discover.mjs
+++ b/skills/browser-to-api/scripts/discover.mjs
@@ -84,7 +84,7 @@ function main() {
   }
 
   console.log(`\noutput: ${outDir}`);
-  for (const f of ['openapi.yaml', 'openapi.json', 'report.md', 'confidence.json']) {
+  for (const f of ['client.mjs', 'openapi.yaml', 'openapi.json', 'report.md', 'confidence.json']) {
     const p = path.join(outDir, f);
     if (fs.existsSync(p)) console.log(`  ${path.relative(process.cwd(), p)}`);
   }
diff --git a/skills/browser-to-api/scripts/emit.mjs b/skills/browser-to-api/scripts/emit.mjs
index 5ad43272..f53d640a 100644
--- a/skills/browser-to-api/scripts/emit.mjs
+++ b/skills/browser-to-api/scripts/emit.mjs
@@ -126,8 +126,11 @@ function makeOperation(ep, refOrInline) {
   for (const p of ep.pathParams || []) params.push(p);
   for (const p of ep.queryParams || []) params.push(p);
 
+  const summary = ep.operationName
+    ? `${ep.operationName} (${ep.method} ${ep.parentPath || ep.path})`
+    : `${ep.method} ${ep.path}`;
   const op = {
-    summary: `${ep.method} ${ep.path}`,
+    summary,
     operationId: makeOpId(ep),
   };
   if (params.length) op.parameters = params;
@@ -189,6 +192,9 @@ function defaultDescriptionFor(status) {
 }
 
 function makeOpId(ep) {
+  if (ep.operationName) {
+    return `${ep.method.toLowerCase()}_${ep.operationName.replace(/[^A-Za-z0-9]/g, '_')}`;
+  }
   const parts = ep.path.split('/').filter(Boolean).map(s => s.replace(/[{}]/g, ''));
   const tail = parts.map(p => p.replace(/[^A-Za-z0-9]/g, '_')).join('_');
   return `${ep.method.toLowerCase()}_${tail || 'root'}`;
@@ -203,6 +209,16 @@ export function emit(outDir, opts = {}) {
   const kept = endpoints.filter(e => e.sampleCount >= minSamples);
   const dropped = endpoints.filter(e => e.sampleCount < minSamples);
 
+  // Load raw samples for header extraction (client generation needs them)
+  const samplesByKey = new Map();
+  for (const row of readJsonl(intermediatePath(outDir, 'endpoint-samples.jsonl'))) {
+    samplesByKey.set(row.endpointKey, row.samples);
+  }
+  // Attach to kept endpoints temporarily for client gen
+  for (const ep of kept) {
+    ep.sampleRows = samplesByKey.get(ep.endpointKey) || [];
+  }
+
   // Servers: one entry per distinct origin, sorted by frequency.
   const originCounts = new Map();
   for (const e of kept) originCounts.set(e.origin, (originCounts.get(e.origin) || 0) + e.sampleCount);
@@ -213,30 +229,31 @@ export function emit(outDir, opts = {}) {
 
   const { components, refOrInline } = buildComponents(kept);
 
-  // Build paths: one keyed entry per templated path; each method becomes an
-  // operation. When the same (path, method) is observed on multiple origins
-  // (common for third-party analytics endpoints fanned across vendors), keep
-  // the highest-sample-count operation and record the other origins under
-  // `x-also-served-from` so no data is silently dropped.
+  // Build paths. Decomposed operations (e.g. GraphQL) get a synthetic path
+  // like /dapi/fe/gql#Autocomplete so each operation is a distinct entry.
   const paths = {};
-  const collisions = {}; // pathKey -> [{origin, samples}]
+  const collisions = {};
   for (const ep of kept) {
     const m = ep.method.toLowerCase();
-    if (!paths[ep.path]) paths[ep.path] = {};
-    const existing = paths[ep.path][m];
+    // Use the path as-is (includes [OpName] for decomposed endpoints)
+    const pathKey = ep.path;
+    if (!paths[pathKey]) paths[pathKey] = {};
+    const existing = paths[pathKey][m];
     if (!existing) {
-      paths[ep.path][m] = makeOperation(ep, refOrInline);
+      paths[pathKey][m] = makeOperation(ep, refOrInline);
     } else {
-      const key = `${m} ${ep.path}`;
+      const key = `${m} ${pathKey}`;
       if (!collisions[key]) collisions[key] = [{ origin: existing['x-origin'], samples: existing['x-sample-count'] }];
       collisions[key].push({ origin: ep.origin, samples: ep.sampleCount });
       if (ep.sampleCount > (existing['x-sample-count'] || 0)) {
-        paths[ep.path][m] = makeOperation(ep, refOrInline);
+        paths[pathKey][m] = makeOperation(ep, refOrInline);
       }
     }
   }
   for (const [key, origins] of Object.entries(collisions)) {
-    const [m, p] = key.split(' ');
+    const [m, ...rest] = key.split(' ');
+    const p = rest.join(' ');
+    if (!paths[p]?.[m]) continue;
     const op = paths[p][m];
     const winner = op['x-origin'];
     op['x-also-served-from'] = origins.filter(o => o.origin !== winner).map(o => o.origin);
@@ -278,73 +295,318 @@ export function emit(outDir, opts = {}) {
 
   // report.md
   const redaction = readJson(intermediatePath(outDir, 'redaction-stats.json'), { headers: 0, bodyKeys: 0, bodyValues: 0 });
-  writeText(path.join(outDir, 'report.md'), buildReport({ kept, dropped, servers, redaction, minSamples }));
+
+  // client.mjs — generated SDK wrapping each operation as a callable function
+  const clientCode = buildClient({ kept, servers });
+  if (clientCode) {
+    writeText(path.join(outDir, 'client.mjs'), clientCode);
+  }
+
+  writeText(path.join(outDir, 'report.md'), buildReport({ kept, dropped, servers, redaction, minSamples, hasClient: !!clientCode }));
 
   return {
     endpoints: kept.length,
     droppedLowSample: dropped.length,
     servers: servers.length,
     components: Object.keys(components).length,
+    client: !!clientCode,
   };
 }
 
-function buildReport({ kept, dropped, servers, redaction, minSamples }) {
-  const lines = [];
-  lines.push('# Discovered API\n');
-  lines.push('## Servers\n');
-  for (const s of servers) lines.push(`- ${s.url}`);
-  if (!servers.length) lines.push('_(none)_');
-  lines.push('');
+// ---------------------------------------------------------------------------
+// Client SDK generation
+// ---------------------------------------------------------------------------
 
-  lines.push('## Endpoints\n');
-  lines.push('| Method | Path | Samples | Statuses | Confidence | Flags |');
-  lines.push('|---|---|---|---|---|---|');
-  const sorted = [...kept].sort((a, b) => a.path.localeCompare(b.path) || a.method.localeCompare(b.method));
-  for (const ep of sorted) {
-    const flags = ep.normalizationFlags.length ? ep.normalizationFlags.join(', ') : '—';
-    lines.push(`| ${ep.method} | \`${ep.path}\` | ${ep.sampleCount} | ${ep.statusCodes.join(', ') || '—'} | ${confidenceBucket(ep)} | ${flags} |`);
-  }
-  if (!kept.length) lines.push('| — | — | — | — | — | — |');
-  lines.push('');
+function toFnName(name) {
+  // Autocomplete → autocomplete, RestaurantsAvailability → restaurantsAvailability
+  return name[0].toLowerCase() + name.slice(1);
+}
 
-  if (dropped.length) {
-    lines.push(`## Dropped (below --min-samples=${minSamples})\n`);
-    for (const ep of dropped) lines.push(`- \`${ep.method} ${ep.path}\` (${ep.sampleCount} sample${ep.sampleCount === 1 ? '' : 's'})`);
-    lines.push('');
+function extractObservedHeaders(kept) {
+  // Pull non-standard headers that appeared consistently across requests.
+  // These are often required (CSRF tokens, custom auth, etc.)
+  const candidates = new Map(); // headerName -> { values: Set, count }
+  let totalSamples = 0;
+  const skip = new Set([
+    'content-type', 'user-agent', 'accept', 'accept-encoding', 'accept-language',
+    'referer', 'origin', 'host', 'connection', 'content-length',
+    'sec-ch-ua', 'sec-ch-ua-mobile', 'sec-ch-ua-platform',
+    'sec-fetch-dest', 'sec-fetch-mode', 'sec-fetch-site',
+    'cookie', 'authorization', 'x-api-key',
+  ]);
+  for (const ep of kept) {
+    const samples = ep.sampleRows || [];
+    for (const s of samples) {
+      totalSamples++;
+      for (const [k, v] of Object.entries(s.reqHeaders || {})) {
+        const lk = k.toLowerCase();
+        if (skip.has(lk)) continue;
+        if (!candidates.has(lk)) candidates.set(lk, { name: k, values: new Set(), count: 0 });
+        const c = candidates.get(lk);
+        c.count++;
+        c.values.add(v);
+      }
+    }
+  }
+  // Keep headers present in >50% of requests (likely required)
+  const result = {};
+  for (const [, c] of candidates) {
+    if (c.count <= totalSamples * 0.5) continue;
+    if (c.values.size <= 5) {
+      result[c.name] = [...c.values][0];
+    } else {
+      // High cardinality (e.g. CSRF tokens, correlation IDs) — include with a
+      // representative value. The header is likely required even if the value varies.
+      result[c.name] = [...c.values][0];
+    }
   }
+  return result;
+}
 
-  lines.push('## Coverage caveats\n');
-  const noResp = kept.filter(e => !e.responseBodyKnown);
-  if (noResp.length) {
-    lines.push(`- **${noResp.length}** endpoint${noResp.length === 1 ? '' : 's'} have no response-body schema. \`browse cdp\` does not embed response bodies; pair with \`browse network on\` to capture them.`);
+function buildClient({ kept, servers }) {
+  const baseUrl = servers[0]?.url || '';
+  const operations = kept.filter(e => e.operationName);
+  const regular = kept.filter(e => !e.operationName);
+
+  if (!operations.length && !regular.length) return null;
+
+  // Detect required headers from the trace (e.g. CSRF tokens)
+  const observedHeaders = extractObservedHeaders(kept);
+
+  const lines = [];
+  lines.push(`// Auto-generated API client from browser-trace capture.`);
+  lines.push(`// Usage: import { ${operations.slice(0, 3).map(e => toFnName(e.operationName)).join(', ')}${operations.length > 3 ? ', ...' : ''} } from './client.mjs';\n`);
+  lines.push(`const BASE = '${baseUrl}';\n`);
+
+  lines.push(`const defaultHeaders = {`);
+  lines.push(`  'Content-Type': 'application/json',`);
+  lines.push(`  'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',`);
+  for (const [k, v] of Object.entries(observedHeaders)) {
+    lines.push(`  '${k}': '${v}',`);
   }
-  const singleSample = kept.filter(e => e.sampleCount === 1);
-  if (singleSample.length) {
-    lines.push(`- **${singleSample.length}** endpoint${singleSample.length === 1 ? '' : 's'} were observed only once. Drive the same flow again to gain confidence.`);
+  lines.push(`};\n`);
+
+  lines.push(`async function request(path, { method = 'GET', body, query, headers } = {}) {`);
+  lines.push(`  let url = BASE + path;`);
+  lines.push(`  if (query) {`);
+  lines.push(`    const qs = new URLSearchParams(Object.entries(query).filter(([, v]) => v != null));`);
+  lines.push(`    if (qs.toString()) url += '?' + qs;`);
+  lines.push(`  }`);
+  lines.push(`  const res = await fetch(url, {`);
+  lines.push(`    method,`);
+  lines.push(`    headers: { ...defaultHeaders, ...headers },`);
+  lines.push(`    ...(body ? { body: JSON.stringify(body) } : {}),`);
+  lines.push(`  });`);
+  lines.push(`  if (!res.ok) throw new Error(\`\${res.status} \${res.statusText}: \${await res.text()}\`);`);
+  lines.push(`  const ct = res.headers.get('content-type') || '';`);
+  lines.push(`  return ct.includes('json') ? res.json() : res.text();`);
+  lines.push(`}\n`);
+
+  // GraphQL / multiplexed operations
+  if (operations.length) {
+    // Group by parent path + discriminator to emit one dispatcher per GQL endpoint
+    const byParent = new Map();
+    for (const op of operations) {
+      const key = op.parentPath || op.path;
+      if (!byParent.has(key)) byParent.set(key, []);
+      byParent.get(key).push(op);
+    }
+
+    for (const [parentPath, ops] of byParent) {
+      // Check if it's a persisted-query GraphQL endpoint
+      const isPersisted = ops.some(op =>
+        op.requestExample?.extensions?.persistedQuery?.sha256Hash);
+
+      if (isPersisted) {
+        // Build a hash lookup table
+        lines.push(`// Persisted query hashes for ${parentPath}`);
+        lines.push(`const HASHES = {`);
+        for (const op of ops) {
+          const hash = op.requestExample?.extensions?.persistedQuery?.sha256Hash;
+          if (hash) lines.push(`  ${op.operationName}: '${hash}',`);
+        }
+        lines.push(`};\n`);
+      }
+
+      // Emit a function per operation
+      for (const op of ops) {
+        const fnName = toFnName(op.operationName);
+        const vars = op.requestExample?.variables;
+        const varKeys = vars && typeof vars === 'object' ? Object.keys(vars) : [];
+
+        // Build JSDoc
+        lines.push(`/**`);
+        if (varKeys.length) {
+          for (const k of varKeys) {
+            const v = vars[k];
+            const t = v === null ? '*' : Array.isArray(v) ? 'Array' : typeof v;
+            lines.push(` * @param {${t}} variables.${k}`);
+          }
+        }
+        lines.push(` * @returns {Promise<object>}`);
+        lines.push(` */`);
+
+        lines.push(`export async function ${fnName}(variables = {}) {`);
+        if (isPersisted) {
+          lines.push(`  return request('${parentPath}', {`);
+          lines.push(`    method: 'POST',`);
+          lines.push(`    query: { optype: 'query', opname: '${op.operationName}' },`);
+          lines.push(`    body: {`);
+          lines.push(`      operationName: '${op.operationName}',`);
+          lines.push(`      variables,`);
+          lines.push(`      extensions: { persistedQuery: { version: 1, sha256Hash: HASHES.${op.operationName} } },`);
+          lines.push(`    },`);
+          lines.push(`  });`);
+        } else {
+          lines.push(`  return request('${parentPath}', {`);
+          lines.push(`    method: 'POST',`);
+          lines.push(`    body: { ${op.discriminatorField || 'operationName'}: '${op.operationName}', variables },`);
+          lines.push(`  });`);
+        }
+        lines.push(`}\n`);
+      }
+    }
   }
-  const noBodyOnPost = kept.filter(e => ['POST', 'PUT', 'PATCH'].includes(e.method) && !e.requestBodyKnown);
-  if (noBodyOnPost.length) {
-    lines.push(`- **${noBodyOnPost.length}** mutation endpoint${noBodyOnPost.length === 1 ? '' : 's'} have no request body in the trace (form-encoded? non-JSON? not captured?).`);
+
+  // Regular REST endpoints
+  for (const ep of regular) {
+    const fnName = makeOpId(ep).replace(/^(get|post|put|patch|delete)_/, (_, m) => m);
+    const hasBody = ['POST', 'PUT', 'PATCH'].includes(ep.method) && ep.requestBodyKnown;
+
+    lines.push(`export async function ${fnName}(${hasBody ? 'body, ' : ''}options = {}) {`);
+    lines.push(`  return request('${ep.path}', {`);
+    lines.push(`    method: '${ep.method}',`);
+    if (hasBody) lines.push(`    body,`);
+    lines.push(`    ...options,`);
+    lines.push(`  });`);
+    lines.push(`}\n`);
   }
 
-  lines.push('');
-  lines.push('## Redaction\n');
-  lines.push(`- Headers redacted: ${redaction.headers}`);
-  lines.push(`- Body keys redacted: ${redaction.bodyKeys}`);
-  lines.push(`- Body values redacted by pattern: ${redaction.bodyValues}`);
-  lines.push('');
+  return lines.join('\n') + '\n';
+}
 
-  lines.push('## Suggested follow-up flows\n');
-  const status404 = kept.filter(e => e.statusCodes.includes(404));
-  if (status404.length) {
-    lines.push(`- Endpoints that returned 404: ${status404.slice(0, 5).map(e => '`' + e.method + ' ' + e.path + '`').join(', ')}. Re-run with valid IDs to widen the success-path schema.`);
+function buildReport({ kept, dropped, servers, redaction, minSamples, hasClient }) {
+  const lines = [];
+  const baseUrl = servers[0]?.url || '';
+  lines.push('# Discovered API\n');
+  lines.push(`**Base URL:** \`${baseUrl || '(unknown)'}\`\n`);
+
+  // Separate decomposed (named operations) from regular endpoints
+  const operations = kept.filter(e => e.operationName);
+  const regular = kept.filter(e => !e.operationName);
+
+  // Quick-start with generated client
+  if (hasClient) {
+    const allFns = [...operations, ...regular];
+    const fnNames = allFns.map(e => e.operationName ? toFnName(e.operationName) : makeOpId(e));
+    lines.push('## Quick start\n');
+    lines.push('```js');
+    lines.push(`import { ${fnNames.join(', ')} } from './client.mjs';`);
+    lines.push('```\n');
+    lines.push(`**${fnNames.length} functions**, zero dependencies. See [\`client.mjs\`](./client.mjs) for full signatures.\n`);
   }
-  if (singleSample.length) {
-    lines.push('- Re-exercise the single-sample endpoints listed above to promote them out of `low` confidence.');
+
+  // --- Named operations (GraphQL / multiplexed) ---
+  if (operations.length) {
+    lines.push('## Operations\n');
+    lines.push('These are logical operations multiplexed over a single endpoint.\n');
+
+    const sorted = [...operations].sort((a, b) => b.sampleCount - a.sampleCount);
+    for (const ep of sorted) {
+      lines.push(`### ${ep.operationName}\n`);
+      lines.push(`- **Endpoint:** \`${ep.method} ${ep.parentPath || ep.path}\``);
+      lines.push(`- **Discriminator:** \`${ep.discriminatorField}: "${ep.operationName}"\``);
+      lines.push(`- **Samples:** ${ep.sampleCount} | **Statuses:** ${ep.statusCodes.join(', ') || '—'}`);
+      lines.push('');
+
+      // Curl example from request body
+      if (ep.requestExample) {
+        const body = JSON.stringify(ep.requestExample, null, 2);
+        const curlPath = ep.parentPath || ep.path;
+        lines.push('```bash');
+        lines.push(`curl -X ${ep.method} '${baseUrl}${curlPath}' \\`);
+        lines.push(`  -H 'Content-Type: application/json' \\`);
+        lines.push(`  -d '${body}'`);
+        lines.push('```\n');
+      }
+
+      // Key variables (for GraphQL, show the variables object shape)
+      if (ep.requestExample?.variables && typeof ep.requestExample.variables === 'object') {
+        const vars = ep.requestExample.variables;
+        const varKeys = Object.keys(vars);
+        if (varKeys.length) {
+          lines.push('**Variables:**\n');
+          lines.push('| Name | Example | Type |');
+          lines.push('|---|---|---|');
+          for (const k of varKeys) {
+            const v = vars[k];
+            const t = Array.isArray(v) ? 'array' : typeof v;
+            const example = JSON.stringify(v);
+            const truncated = example.length > 60 ? example.slice(0, 57) + '...' : example;
+            lines.push(`| \`${k}\` | \`${truncated}\` | ${t} |`);
+          }
+          lines.push('');
+        }
+      }
+
+      // Response shape summary
+      if (ep.responseExample) {
+        const respStr = JSON.stringify(ep.responseExample, null, 2);
+        const truncResp = respStr.length > 1500 ? respStr.slice(0, 1500) + '\n  ...\n}' : respStr;
+        lines.push('<details><summary>Example response</summary>\n');
+        lines.push('```json');
+        lines.push(truncResp);
+        lines.push('```\n</details>\n');
+      }
+    }
   }
-  if (!status404.length && !singleSample.length) {
-    lines.push('- The captured flow looks reasonably balanced. Add an authenticated session if the unauth view is what was captured.');
+
+  // --- Regular REST endpoints ---
+  if (regular.length) {
+    lines.push('## Endpoints\n');
+    lines.push('| Method | Path | Samples | Statuses | Confidence |');
+    lines.push('|---|---|---|---|---|');
+    const sorted = [...regular].sort((a, b) => b.sampleCount - a.sampleCount);
+    for (const ep of sorted) {
+      lines.push(`| ${ep.method} | \`${ep.path}\` | ${ep.sampleCount} | ${ep.statusCodes.join(', ') || '—'} | ${confidenceBucket(ep)} |`);
+    }
+    lines.push('');
+
+    // Curl examples for top regular endpoints
+    const withExamples = sorted.filter(e => e.requestExample || e.responseExample).slice(0, 5);
+    for (const ep of withExamples) {
+      lines.push(`### \`${ep.method} ${ep.path}\`\n`);
+      if (ep.requestExample) {
+        const body = JSON.stringify(ep.requestExample, null, 2);
+        lines.push('```bash');
+        lines.push(`curl -X ${ep.method} '${baseUrl}${ep.path}' \\`);
+        lines.push(`  -H 'Content-Type: application/json' \\`);
+        lines.push(`  -d '${body}'`);
+        lines.push('```\n');
+      }
+      if (ep.responseExample) {
+        const respStr = JSON.stringify(ep.responseExample, null, 2);
+        const truncResp = respStr.length > 1000 ? respStr.slice(0, 1000) + '\n  ...\n}' : respStr;
+        lines.push('<details><summary>Example response</summary>\n');
+        lines.push('```json');
+        lines.push(truncResp);
+        lines.push('```\n</details>\n');
+      }
+    }
   }
+
+  if (!kept.length) lines.push('No API endpoints discovered.\n');
+
+  // --- Coverage ---
+  lines.push('## Coverage\n');
+  lines.push(`- **${kept.length}** API endpoints discovered`);
+  if (dropped.length) lines.push(`- **${dropped.length}** dropped (below --min-samples=${minSamples})`);
+  const noResp = kept.filter(e => !e.responseBodyKnown);
+  if (noResp.length) lines.push(`- **${noResp.length}** missing response-body schemas`);
+  const singleSample = kept.filter(e => e.sampleCount === 1);
+  if (singleSample.length) lines.push(`- **${singleSample.length}** observed only once`);
+  lines.push('');
+
   return lines.join('\n') + '\n';
 }
 
diff --git a/skills/browser-to-api/scripts/normalize.mjs b/skills/browser-to-api/scripts/normalize.mjs
index e8a7e3ce..4132ec8f 100644
--- a/skills/browser-to-api/scripts/normalize.mjs
+++ b/skills/browser-to-api/scripts/normalize.mjs
@@ -1,16 +1,14 @@
 #!/usr/bin/env node
 // Stage 3 — Normalize.
 //
-// Group paired samples by (origin, method, templated path), collect query-param
-// schemas, and detect when normalization is collapsing structurally divergent
-// endpoints (flagged for the report).
+// Group paired samples by (origin, method, templated path), classify noise vs
+// real API, decompose multiplexed endpoints (GraphQL, JSON-RPC), collect
+// query-param schemas, and detect normalization anomalies.
 
 import { readJsonl, writeJsonl, intermediatePath } from './lib/io.mjs';
 import { templatize, templatizeWithSlugs } from './lib/path-template.mjs';
 
 function inferQueryType(values) {
-  // Lightweight type inference for query-string values (always strings on the
-  // wire, but we can hint).
   if (values.every(v => /^-?\d+$/.test(v))) return { type: 'integer' };
   if (values.every(v => /^-?\d+(\.\d+)?$/.test(v))) return { type: 'number' };
   if (values.every(v => v === 'true' || v === 'false')) return { type: 'boolean' };
@@ -18,13 +16,147 @@ function inferQueryType(values) {
 }
 
 function statusSignature(rows) {
-  // A coarse "shape signature" used to detect when two raw paths that
-  // templatize to the same template actually behave differently.
   const ct = new Set(rows.map(r => (r.contentType || '').split(';')[0].trim().toLowerCase()).filter(Boolean));
   const status = new Set(rows.map(r => (r.status != null ? Math.floor(r.status / 100) + 'xx' : 'none')));
   return [...ct].sort().join(',') + '|' + [...status].sort().join(',');
 }
 
+// ---------------------------------------------------------------------------
+// Noise classification — tag endpoints that are infrastructure, not user-facing
+// ---------------------------------------------------------------------------
+const NOISE_PATH_PATTERNS = [
+  // Tracking / analytics / telemetry
+  /\/track(ing)?[\/\b]/i, /\/pixel/i, /\/beacon/i, /\/log[\/\b]/i,
+  /\/impression/i, /\/pageview/i, /\/click[\/\b]/i,
+  /\/session[-_]?start/i, /\/batch\/(impression|list)/i,
+  /\/dag\/v\d+\//i,
+  /\/trackgoal/i, /\/profileview/i, /\/sessionstart/i,
+  /\/dinerTrust/i, /\/trackDiner/i,
+  /\/profile-view$/i, /\/track\/search$/i,
+  /\/mix$/i,
+  // Cookie / consent / privacy
+  /\/cookie[-_]?consent/i, /\/consent\//i, /\/onetrust/i,
+  // Experimentation
+  /\/bucket[-_]?experiment/i, /\/experiment[\/\b]/i, /\/feature[-_]?flag/i,
+  // Bot defense / fingerprinting
+  /\/akam\//i, /\/akamai\//i, /\/human$/i,
+  // Session plumbing (not user-facing API)
+  /\/session$/i, /\/authenticate\/start$/i,
+];
+
+const NOISE_BODY_SIGNALS = [
+  /^sensor_data$/,                  // Akamai bot fingerprint
+  /^body$/,                         // Obfuscated payloads (Akamai, etc.)
+];
+
+function classifyEndpoint(endpoint) {
+  const p = endpoint.path;
+  const m = endpoint.method;
+
+  // HTML page renders are not API endpoints
+  const htmlRows = endpoint.sampleRows.filter(r =>
+    (r.contentType || '').includes('text/html'));
+  if (htmlRows.length === endpoint.sampleRows.length && m === 'GET') return 'page';
+
+  // Path-based noise detection
+  if (NOISE_PATH_PATTERNS.some(re => re.test(p))) return 'noise';
+
+  // Obfuscated paths (random-looking segments with mixed case, no real structure)
+  const segs = p.split('/').filter(Boolean);
+  const obfuscated = segs.filter(s =>
+    /[A-Za-z0-9_-]{8,}/.test(s) &&
+    !/^(v\d+|api|dapi|graphql|rest|fe|gql)$/i.test(s) &&
+    /[A-Z]/.test(s) && /[a-z]/.test(s));
+  if (obfuscated.length >= 2) return 'noise';
+
+  // Body-based: if every sample's request body only has noise-signal keys
+  if (endpoint.sampleRows.length > 0) {
+    const allNoise = endpoint.sampleRows.every(r => {
+      if (!r.reqBody || typeof r.reqBody !== 'object') return false;
+      const keys = Object.keys(r.reqBody);
+      return keys.length > 0 && keys.every(k => NOISE_BODY_SIGNALS.some(re => re.test(k)));
+    });
+    if (allNoise) return 'noise';
+  }
+
+  return 'api';
+}
+
+// ---------------------------------------------------------------------------
+// GraphQL / multiplexed endpoint decomposition
+// ---------------------------------------------------------------------------
+function detectDiscriminator(rows) {
+  // Check if these rows share a URL path but have a body field that acts as
+  // a discriminator (operationName for GraphQL, method for JSON-RPC, etc.)
+  const candidates = ['operationName', 'method', 'action', 'type', 'command'];
+  for (const field of candidates) {
+    const values = new Set();
+    let matchCount = 0;
+    for (const r of rows) {
+      if (r.reqBody && typeof r.reqBody === 'object' && typeof r.reqBody[field] === 'string') {
+        values.add(r.reqBody[field]);
+        matchCount++;
+      }
+    }
+    if (matchCount >= rows.length * 0.8 && values.size >= 2) {
+      return { field, values: [...values] };
+    }
+  }
+
+  // Also check query params (OpenTable uses ?opname= for GraphQL)
+  for (const field of ['opname', 'operationName', 'op', 'action']) {
+    const values = new Set();
+    let matchCount = 0;
+    for (const r of rows) {
+      if (r.query && typeof r.query[field] === 'string') {
+        values.add(r.query[field]);
+        matchCount++;
+      }
+    }
+    if (matchCount >= rows.length * 0.8 && values.size >= 2) {
+      return { field, values: [...values], source: 'query' };
+    }
+  }
+
+  return null;
+}
+
+function decomposeMultiplexed(endpoint) {
+  const disc = detectDiscriminator(endpoint.sampleRows);
+  if (!disc) return [endpoint];
+
+  const byOp = new Map();
+  for (const row of endpoint.sampleRows) {
+    let opName;
+    if (disc.source === 'query') {
+      opName = row.query?.[disc.field] || '__unknown__';
+    } else {
+      opName = (row.reqBody && typeof row.reqBody === 'object')
+        ? row.reqBody[disc.field] || '__unknown__'
+        : '__unknown__';
+    }
+    if (!byOp.has(opName)) byOp.set(opName, []);
+    byOp.get(opName).push(row);
+  }
+
+  const sub = [];
+  for (const [opName, rows] of byOp) {
+    // Build a virtual endpoint per operation
+    const virtualPath = `${endpoint.path} [${opName}]`;
+    sub.push({
+      ...endpoint,
+      endpointKey: `${endpoint.method} ${endpoint.origin}${virtualPath}`,
+      path: virtualPath,
+      operationName: opName,
+      discriminatorField: disc.field,
+      parentPath: endpoint.path,
+      sampleRows: rows,
+      sampleCount: rows.length,
+    });
+  }
+  return sub;
+}
+
 export function normalize(outDir) {
   const filtered = readJsonl(intermediatePath(outDir, 'filtered.jsonl'));
 
@@ -40,8 +172,7 @@ export function normalize(outDir) {
   }
 
   // Pass 2: re-templatize each bucket using its raw-path set so slugs can be
-  // detected. This may further collapse buckets that share the same underlying
-  // template once slugs are recognized.
+  // detected.
   const refined = new Map();
   for (const [, b] of buckets) {
     const rawPaths = [...b.rawPaths];
@@ -57,16 +188,12 @@ export function normalize(outDir) {
     r.originalKeys.push({ template: b.template, sig: statusSignature(b.rows) });
   }
 
-  // Build endpoint records.
-  const endpoints = [];
+  // Build endpoint records, classify, and decompose.
+  const preEndpoints = [];
   for (const [, e] of refined) {
     const flags = [];
-
-    // Divergent-shape check: if the bucket was collapsed from multiple pass-1
-    // templates that had structurally different responses, flag it.
     const sigs = new Set(e.originalKeys.map(k => k.sig));
     if (sigs.size > 1) flags.push('divergent-response-shape');
-
     if (e.rows.length === 1) flags.push('single-sample');
     const statuses = new Set(e.rows.map(r => r.status).filter(s => s != null));
     if (statuses.size === 1) flags.push('single-status');
@@ -75,7 +202,6 @@ export function normalize(outDir) {
     const withBody = e.rows.filter(r => r.reqBody != null).length;
     if (withBody > 0 && withBody < e.rows.length) flags.push('request-body-only-on-some-samples');
 
-    // Query parameter schema: collect names + sample values.
     const qSamples = new Map();
     for (const r of e.rows) {
       for (const k of Object.keys(r.query || {})) {
@@ -94,7 +220,7 @@ export function normalize(outDir) {
       });
     }
 
-    endpoints.push({
+    preEndpoints.push({
       endpointKey: `${e.method} ${e.origin}${e.template}`,
       origin: e.origin,
       method: e.method,
@@ -102,13 +228,42 @@ export function normalize(outDir) {
       pathParams: e.params.map(p => ({ name: p.name, in: 'path', required: true, schema: p.schema })),
       queryParams,
       statusCodes: [...new Set(e.rows.map(r => r.status).filter(s => s != null))].sort((a, b) => a - b),
-      sampleRows: e.rows,                      // kept on the in-memory record; trimmed before write
+      sampleRows: e.rows,
       sampleCount: e.rows.length,
       rawPaths: [...e.rawPaths],
       normalizationFlags: flags,
     });
   }
 
+  // Pass 3: classify and decompose
+  const endpoints = [];
+  let noiseCount = 0, pageCount = 0, decomposedCount = 0;
+  for (const ep of preEndpoints) {
+    const category = classifyEndpoint(ep);
+    if (category === 'noise') { noiseCount++; continue; }
+    if (category === 'page') { pageCount++; continue; }
+
+    // Try to decompose multiplexed endpoints
+    const decomposed = decomposeMultiplexed(ep);
+    if (decomposed.length > 1) {
+      decomposedCount += decomposed.length;
+      for (const sub of decomposed) {
+        sub.normalizationFlags = [...(sub.normalizationFlags || [])];
+        const subStatuses = new Set(sub.sampleRows.map(r => r.status).filter(s => s != null));
+        sub.statusCodes = [...subStatuses].sort((a, b) => a - b);
+        if (sub.sampleRows.length === 1) {
+          if (!sub.normalizationFlags.includes('single-sample')) sub.normalizationFlags.push('single-sample');
+        }
+        if (subStatuses.size === 1) {
+          if (!sub.normalizationFlags.includes('single-status')) sub.normalizationFlags.push('single-status');
+        }
+        endpoints.push(sub);
+      }
+    } else {
+      endpoints.push(ep);
+    }
+  }
+
   // Drop the heavy in-memory rows from the persisted form; infer.mjs needs
   // them so we keep a parallel sidecar file.
   const persisted = endpoints.map(({ sampleRows, ...rest }) => rest);
@@ -117,12 +272,12 @@ export function normalize(outDir) {
   const sidecar = endpoints.map(e => ({ endpointKey: e.endpointKey, samples: e.sampleRows }));
   writeJsonl(intermediatePath(outDir, 'endpoint-samples.jsonl'), sidecar);
 
-  return { endpoints: endpoints.length };
+  return { endpoints: endpoints.length, noise: noiseCount, pages: pageCount, decomposed: decomposedCount };
 }
 
 if (import.meta.url === `file://${process.argv[1]}`) {
   const out = process.argv[2];
   if (!out) { console.error('usage: normalize.mjs <out-dir>'); process.exit(2); }
   const stats = normalize(out);
-  console.log(`normalize: ${stats.endpoints} endpoints`);
+  console.log(`normalize: ${stats.endpoints} endpoints (${stats.noise} noise, ${stats.pages} pages dropped, ${stats.decomposed} decomposed)`);
 }

From 5eee2ab9d79853cc2dd18df34a89de822356a70a Mon Sep 17 00:00:00 2001
From: Shrey Pandya <shrey@browserbase.com>
Date: Wed, 13 May 2026 17:31:44 -0400
Subject: [PATCH 5/6] Add self-contained HTML report replacing Swagger UI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Generates index.html with:
- Summary stats (operations, endpoint, protocol, sample count)
- Expandable cards per operation with variables table, client usage,
  request body, and response example
- Full generated client.mjs embedded at the bottom

The Swagger UI was a poor fit — 10 identical green POST bars for a
single GraphQL endpoint with bracket-syntax paths that aren't even
valid OpenAPI. The HTML report shows what actually matters.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 skills/browser-to-api/scripts/discover.mjs |   2 +-
 skills/browser-to-api/scripts/emit.mjs     | 165 +++++++++++++++++++++
 2 files changed, 166 insertions(+), 1 deletion(-)

diff --git a/skills/browser-to-api/scripts/discover.mjs b/skills/browser-to-api/scripts/discover.mjs
index 07023685..5dce10ed 100644
--- a/skills/browser-to-api/scripts/discover.mjs
+++ b/skills/browser-to-api/scripts/discover.mjs
@@ -84,7 +84,7 @@ function main() {
   }
 
   console.log(`\noutput: ${outDir}`);
-  for (const f of ['client.mjs', 'openapi.yaml', 'openapi.json', 'report.md', 'confidence.json']) {
+  for (const f of ['index.html', 'client.mjs', 'report.md', 'openapi.yaml', 'openapi.json', 'confidence.json']) {
     const p = path.join(outDir, f);
     if (fs.existsSync(p)) console.log(`  ${path.relative(process.cwd(), p)}`);
   }
diff --git a/skills/browser-to-api/scripts/emit.mjs b/skills/browser-to-api/scripts/emit.mjs
index f53d640a..11614ed1 100644
--- a/skills/browser-to-api/scripts/emit.mjs
+++ b/skills/browser-to-api/scripts/emit.mjs
@@ -304,6 +304,9 @@ export function emit(outDir, opts = {}) {
 
   writeText(path.join(outDir, 'report.md'), buildReport({ kept, dropped, servers, redaction, minSamples, hasClient: !!clientCode }));
 
+  // index.html — self-contained visual report
+  writeText(path.join(outDir, 'index.html'), buildHtmlReport({ kept, servers, title, clientCode }));
+
   return {
     endpoints: kept.length,
     droppedLowSample: dropped.length,
@@ -610,6 +613,168 @@ function buildReport({ kept, dropped, servers, redaction, minSamples, hasClient
   return lines.join('\n') + '\n';
 }
 
+// ---------------------------------------------------------------------------
+// HTML report
+// ---------------------------------------------------------------------------
+
+function escHtml(s) {
+  return String(s).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
+}
+
+function buildHtmlReport({ kept, servers, title, clientCode }) {
+  const baseUrl = servers[0]?.url || '';
+  const operations = kept.filter(e => e.operationName);
+  const regular = kept.filter(e => !e.operationName);
+  const all = [...operations.sort((a, b) => b.sampleCount - a.sampleCount), ...regular];
+
+  const opCards = all.map((ep, i) => {
+    const name = ep.operationName || `${ep.method} ${ep.path}`;
+    const fnName = ep.operationName ? toFnName(ep.operationName) : null;
+    const vars = ep.requestExample?.variables;
+    const varRows = vars && typeof vars === 'object'
+      ? Object.entries(vars).map(([k, v]) => {
+          const t = v === null ? 'null' : Array.isArray(v) ? 'array' : typeof v;
+          const ex = JSON.stringify(v);
+          return `<tr><td><code>${escHtml(k)}</code></td><td>${escHtml(t)}</td><td><code>${escHtml(ex.length > 50 ? ex.slice(0, 47) + '...' : ex)}</code></td></tr>`;
+        }).join('\n')
+      : '';
+
+    const reqBody = ep.requestExample ? JSON.stringify(ep.requestExample, null, 2) : null;
+    const respBody = ep.responseExample ? JSON.stringify(ep.responseExample, null, 2) : null;
+    const truncResp = respBody && respBody.length > 2000 ? respBody.slice(0, 2000) + '\n  ...' : respBody;
+
+    return `
+    <div class="card" id="op-${i}">
+      <div class="card-header" onclick="this.parentElement.classList.toggle('open')">
+        <div class="card-title">
+          <span class="method">POST</span>
+          <span class="op-name">${escHtml(name)}</span>
+        </div>
+        <div class="card-meta">
+          <span class="badge">${ep.sampleCount} sample${ep.sampleCount !== 1 ? 's' : ''}</span>
+          ${fnName ? `<code class="fn-name">${escHtml(fnName)}()</code>` : ''}
+        </div>
+      </div>
+      <div class="card-body">
+        ${ep.parentPath ? `<p class="endpoint-line"><strong>Endpoint:</strong> <code>${escHtml(ep.method)} ${escHtml(baseUrl)}${escHtml(ep.parentPath)}</code></p>` : ''}
+        ${ep.discriminatorField ? `<p class="endpoint-line"><strong>Discriminator:</strong> <code>${escHtml(ep.discriminatorField)}: "${escHtml(ep.operationName)}"</code></p>` : ''}
+
+        ${varRows ? `
+        <h4>Variables</h4>
+        <table class="var-table">
+          <thead><tr><th>Name</th><th>Type</th><th>Example</th></tr></thead>
+          <tbody>${varRows}</tbody>
+        </table>` : ''}
+
+        ${fnName ? `
+        <h4>Client usage</h4>
+        <pre><code>import { ${escHtml(fnName)} } from './client.mjs';
+
+const result = await ${escHtml(fnName)}(${vars ? JSON.stringify(Object.fromEntries(Object.entries(vars).filter(([,v]) => v !== '<redacted>').slice(0, 4).map(([k, v]) => {
+          if (Array.isArray(v) && v.length > 2) return [k, v.slice(0, 2)];
+          return [k, v];
+        })), null, 2) : '{}'});</code></pre>` : ''}
+
+        ${reqBody ? `
+        <h4>Request body</h4>
+        <pre class="scrollable"><code>${escHtml(reqBody)}</code></pre>` : ''}
+
+        ${truncResp ? `
+        <h4>Response</h4>
+        <pre class="scrollable"><code>${escHtml(truncResp)}</code></pre>` : ''}
+      </div>
+    </div>`;
+  }).join('\n');
+
+  return `<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>${escHtml(title)} — API Report</title>
+<style>
+  :root {
+    --brand: #F03603;
+    --black: #100D0D;
+    --gray: #514F4F;
+    --border: #edebeb;
+    --bg: #F9F6F4;
+    --card: #ffffff;
+    --text: #100D0D;
+    --muted: #514F4F;
+    --green: #22863a;
+    --code-bg: #f6f5f5;
+  }
+  * { margin: 0; padding: 0; box-sizing: border-box; }
+  body { font-family: Inter, -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif; background: var(--bg); color: var(--text); line-height: 1.6; font-size: 15px; }
+  .container { max-width: 900px; margin: 0 auto; padding: 2rem 1.5rem; }
+
+  header { margin-bottom: 2rem; }
+  header h1 { font-size: 1.5rem; font-weight: 600; margin-bottom: 0.25rem; }
+  header .meta { color: var(--muted); font-size: 0.875rem; }
+
+  .summary { display: flex; gap: 0.75rem; margin-bottom: 2rem; flex-wrap: wrap; }
+  .stat { background: var(--card); border: 1px solid var(--border); border-radius: 6px; padding: 1rem 1.25rem; flex: 1; min-width: 120px; }
+  .stat .label { font-size: 0.6875rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--muted); font-weight: 600; margin-bottom: 0.25rem; }
+  .stat .value { font-size: 1.5rem; font-weight: 700; color: var(--black); }
+
+  .card { background: var(--card); border: 1px solid var(--border); border-radius: 6px; margin-bottom: 0.5rem; overflow: hidden; }
+  .card-header { padding: 0.875rem 1.25rem; cursor: pointer; display: flex; justify-content: space-between; align-items: center; user-select: none; }
+  .card-header:hover { background: #faf9f8; }
+  .card-title { display: flex; align-items: center; gap: 0.75rem; }
+  .card-meta { display: flex; align-items: center; gap: 0.75rem; }
+  .method { background: var(--green); color: white; font-size: 0.6875rem; font-weight: 700; padding: 0.2rem 0.5rem; border-radius: 3px; text-transform: uppercase; letter-spacing: 0.03em; }
+  .op-name { font-weight: 600; font-size: 0.9375rem; }
+  .fn-name { font-size: 0.8125rem; color: var(--muted); background: var(--code-bg); padding: 0.15rem 0.4rem; border-radius: 3px; }
+  .badge { font-size: 0.75rem; color: var(--muted); background: var(--code-bg); padding: 0.15rem 0.5rem; border-radius: 10px; }
+
+  .card-body { display: none; padding: 0 1.25rem 1.25rem; border-top: 1px solid var(--border); }
+  .card.open .card-body { display: block; padding-top: 1rem; }
+  .card-body h4 { font-size: 0.8125rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.04em; color: var(--muted); margin: 1.25rem 0 0.5rem; }
+  .card-body h4:first-child { margin-top: 0; }
+  .endpoint-line { font-size: 0.875rem; margin-bottom: 0.25rem; }
+
+  .var-table { width: 100%; border-collapse: collapse; font-size: 0.8125rem; }
+  .var-table th { text-align: left; font-weight: 600; color: var(--muted); padding: 0.4rem 0.75rem; border-bottom: 1px solid var(--border); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.04em; }
+  .var-table td { padding: 0.35rem 0.75rem; border-bottom: 1px solid #f5f4f3; }
+  .var-table code { font-size: 0.8125rem; }
+
+  pre { background: var(--code-bg); border-radius: 4px; padding: 0.75rem 1rem; overflow-x: auto; font-size: 0.8125rem; line-height: 1.5; }
+  pre.scrollable { max-height: 400px; overflow-y: auto; }
+  code { font-family: 'SF Mono', 'Fira Code', 'Fira Mono', Menlo, Consolas, monospace; font-size: 0.875em; }
+
+  .client-section { margin-top: 2rem; }
+  .client-section h2 { font-size: 1.125rem; font-weight: 600; margin-bottom: 0.75rem; }
+</style>
+</head>
+<body>
+<div class="container">
+  <header>
+    <h1>${escHtml(title)}</h1>
+    <p class="meta">${escHtml(baseUrl)} · ${all.length} operation${all.length !== 1 ? 's' : ''} discovered from browser trace</p>
+  </header>
+
+  <div class="summary">
+    <div class="stat"><div class="label">Operations</div><div class="value">${all.length}</div></div>
+    <div class="stat"><div class="label">Endpoint</div><div class="value" style="font-size:0.875rem">${escHtml(operations[0]?.parentPath || regular[0]?.path || '—')}</div></div>
+    <div class="stat"><div class="label">Protocol</div><div class="value" style="font-size:0.875rem">${operations.length ? 'GraphQL (APQ)' : 'REST'}</div></div>
+    <div class="stat"><div class="label">Total samples</div><div class="value">${all.reduce((s, e) => s + e.sampleCount, 0)}</div></div>
+  </div>
+
+  ${opCards}
+
+  ${clientCode ? `
+  <div class="client-section">
+    <h2>Generated client</h2>
+    <p style="color:var(--muted);font-size:0.875rem;margin-bottom:0.75rem;">Copy <code>client.mjs</code> into your project. Zero dependencies — uses native <code>fetch</code>.</p>
+    <pre class="scrollable"><code>${escHtml(clientCode)}</code></pre>
+  </div>` : ''}
+</div>
+</body>
+</html>
+`;
+}
+
 if (import.meta.url === `file://${process.argv[1]}`) {
   const out = process.argv[2];
   if (!out) { console.error('usage: emit.mjs <out-dir>'); process.exit(2); }

From cf3e72bc6c0e2416b740b03e84fd1379ad962fc4 Mon Sep 17 00:00:00 2001
From: Shrey Pandya <shrey@browserbase.com>
Date: Wed, 13 May 2026 17:40:01 -0400
Subject: [PATCH 6/6] Replace Swagger UI with self-contained HTML report
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

emit.mjs already generates index.html as the primary visual output —
update SKILL.md to match and remove the dead open-swagger-ui.mjs script.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 skills/browser-to-api/SKILL.md                |  23 +-
 .../scripts/open-swagger-ui.mjs               | 205 ------------------
 2 files changed, 12 insertions(+), 216 deletions(-)
 delete mode 100644 skills/browser-to-api/scripts/open-swagger-ui.mjs

diff --git a/skills/browser-to-api/SKILL.md b/skills/browser-to-api/SKILL.md
index d62d4939..f1b10f15 100644
--- a/skills/browser-to-api/SKILL.md
+++ b/skills/browser-to-api/SKILL.md
@@ -14,7 +14,7 @@ This skill **does not capture traffic**. It is purely offline post-processing on
 
 ```
 browser-trace    →  .o11y/<run>/cdp/network/{requests,responses}.jsonl
-browser-to-api   →  .o11y/<run>/api-spec/openapi.yaml + report.md
+browser-to-api   →  .o11y/<run>/api-spec/index.html + openapi.yaml + client.mjs
 ```
 
 ## When to use
@@ -57,7 +57,9 @@ node ../browser-trace/scripts/bisect-cdp.mjs my-site
 
 ```bash
 node scripts/discover.mjs --run .o11y/my-site
-# → .o11y/my-site/api-spec/openapi.yaml
+# → .o11y/my-site/api-spec/index.html          ← open this
+#   .o11y/my-site/api-spec/client.mjs
+#   .o11y/my-site/api-spec/openapi.yaml
 #   .o11y/my-site/api-spec/openapi.json
 #   .o11y/my-site/api-spec/report.md
 #   .o11y/my-site/api-spec/confidence.json
@@ -67,17 +69,15 @@ node scripts/discover.mjs --run .o11y/my-site
 
 `discover.mjs` auto-detects `<run>/cdp/network/bodies/`. To use a body capture from elsewhere (e.g. didn't snapshot, want the live `browse network` dir), pass `--bodies <path>` explicitly.
 
-The two primary deliverables are `openapi.yaml` (machine-readable spec) and `report.md` (human-readable coverage summary).
+### 3. Open the HTML report
 
-### 3. Preview in Swagger UI when available
-
-If Swagger UI is installed locally, open the generated spec there:
+After `discover.mjs` finishes, **always open the generated HTML report**:
 
 ```bash
-node scripts/open-swagger-ui.mjs --run .o11y/my-site
+open .o11y/my-site/api-spec/index.html
 ```
 
-The helper auto-detects `$SWAGGER_UI_DIR`, `~/Developer/swagger-ui`, or `node_modules/swagger-ui-dist`. If none exists, deliver `openapi.yaml` and `report.md` directly and tell the user Swagger UI was not found.
+The report is a self-contained HTML file (no server needed) that shows each discovered operation as an expandable card with variables, client usage, request/response examples, and a generated `client.mjs` snippet at the bottom. This is the primary deliverable — always open it for the user.
 
 ## CLI flags
 
@@ -95,15 +95,16 @@ The helper auto-detects `$SWAGGER_UI_DIR`, `~/Developer/swagger-ui`, or `node_mo
 | `--min-samples <n>` | no | Minimum samples per endpoint to include. Default `1` |
 | `--stage <name>` | no | Run only one stage: `load`, `filter`, `normalize`, `infer`, `emit` |
 
-`scripts/open-swagger-ui.mjs` accepts `--run <path>` or `--spec <path>`, plus optional `--swagger-ui <path>`, `--host`, `--port`, and `--no-open`.
 
 ## Output layout
 
 ```
 <run>/api-spec/
-├── openapi.yaml              primary deliverable
+├── index.html                visual report — open this (self-contained, no server)
+├── client.mjs                zero-dep fetch client with typed functions per operation
+├── openapi.yaml              machine-readable spec
 ├── openapi.json              mirror
-├── report.md                 human-readable summary + coverage caveats
+├── report.md                 markdown summary + curl examples
 ├── confidence.json           per-endpoint confidence + normalization flags
 ├── samples/                  redacted request/response examples
 │   └── <method>__<path-hash>.json
diff --git a/skills/browser-to-api/scripts/open-swagger-ui.mjs b/skills/browser-to-api/scripts/open-swagger-ui.mjs
deleted file mode 100644
index e2abc459..00000000
--- a/skills/browser-to-api/scripts/open-swagger-ui.mjs
+++ /dev/null
@@ -1,205 +0,0 @@
-#!/usr/bin/env node
-// Preview an emitted OpenAPI spec in a local Swagger UI checkout.
-
-import fs from 'node:fs';
-import http from 'node:http';
-import os from 'node:os';
-import path from 'node:path';
-import { spawn } from 'node:child_process';
-import { fileURLToPath } from 'node:url';
-
-const __dirname = path.dirname(fileURLToPath(import.meta.url));
-
-function parseArgs(argv) {
-  const opts = {
-    run: null,
-    spec: null,
-    swaggerUi: null,
-    host: '127.0.0.1',
-    port: 0,
-    open: true,
-  };
-  for (let i = 0; i < argv.length; i++) {
-    const a = argv[i];
-    const next = () => argv[++i];
-    switch (a) {
-      case '--run': opts.run = next(); break;
-      case '--spec': opts.spec = next(); break;
-      case '--swagger-ui': opts.swaggerUi = next(); break;
-      case '--host': opts.host = next(); break;
-      case '--port': opts.port = Number(next()); break;
-      case '--no-open': opts.open = false; break;
-      case '-h': case '--help':
-        printHelp(); process.exit(0);
-      default:
-        console.error(`unknown arg: ${a}`);
-        printHelp(); process.exit(2);
-    }
-  }
-  return opts;
-}
-
-function printHelp() {
-  console.error(`usage: open-swagger-ui.mjs (--run <path> | --spec <path>) [flags]
-
-  --run <path>          browser-trace run dir; uses <run>/api-spec/openapi.yaml
-  --spec <path>         OpenAPI YAML/JSON file to preview
-  --swagger-ui <path>   Swagger UI checkout/package dir. Defaults to
-                        $SWAGGER_UI_DIR, ~/Developer/swagger-ui, or node_modules/swagger-ui-dist
-  --host <host>         Bind host. Default: 127.0.0.1
-  --port <port>         Bind port. Default: random free port
-  --no-open             Print the URL without opening a browser`);
-}
-
-function resolveRun(runArg) {
-  if (fs.existsSync(runArg) && fs.statSync(runArg).isDirectory()) return path.resolve(runArg);
-  const root = process.env.O11Y_ROOT || '.o11y';
-  const guess = path.join(root, runArg);
-  if (fs.existsSync(guess) && fs.statSync(guess).isDirectory()) return path.resolve(guess);
-  throw new Error(`run path not found: ${runArg} (tried ${guess})`);
-}
-
-function resolveSpec(opts) {
-  if (opts.spec) return path.resolve(opts.spec);
-  if (!opts.run) throw new Error('expected --run <path> or --spec <path>');
-
-  const runPath = resolveRun(opts.run);
-  const candidates = [
-    path.join(runPath, 'api-spec', 'openapi.yaml'),
-    path.join(runPath, 'api-spec', 'openapi.json'),
-  ];
-  const found = candidates.find(p => fs.existsSync(p));
-  if (!found) throw new Error(`no OpenAPI spec found under ${path.join(runPath, 'api-spec')}`);
-  return found;
-}
-
-function swaggerUiCandidates(explicit) {
-  return [
-    explicit,
-    process.env.SWAGGER_UI_DIR,
-    path.join(os.homedir(), 'Developer', 'swagger-ui'),
-    path.resolve(process.cwd(), 'node_modules', 'swagger-ui-dist'),
-    path.resolve(__dirname, '..', 'node_modules', 'swagger-ui-dist'),
-  ].filter(Boolean);
-}
-
-function distDirFor(candidate) {
-  const resolved = path.resolve(candidate);
-  const directDist = path.join(resolved, 'dist');
-  if (fs.existsSync(path.join(directDist, 'index.html'))) return directDist;
-  if (fs.existsSync(path.join(resolved, 'index.html')) && fs.existsSync(path.join(resolved, 'swagger-ui-bundle.js'))) return resolved;
-  return null;
-}
-
-function resolveSwaggerUi(explicit) {
-  for (const candidate of swaggerUiCandidates(explicit)) {
-    const dist = distDirFor(candidate);
-    if (dist) return dist;
-  }
-
-  const searched = swaggerUiCandidates(explicit).map(p => `  - ${path.resolve(p)}`).join('\n');
-  throw new Error(`Swagger UI not found. Searched:\n${searched}\n\nInstall it locally, then rerun:\n  git clone https://github.com/swagger-api/swagger-ui.git ~/Developer/swagger-ui\n  cd ~/Developer/swagger-ui && npm ci\n\nOr pass --swagger-ui <path> / set SWAGGER_UI_DIR.`);
-}
-
-function mimeFor(filePath) {
-  const ext = path.extname(filePath).toLowerCase();
-  return {
-    '.css': 'text/css; charset=utf-8',
-    '.html': 'text/html; charset=utf-8',
-    '.js': 'application/javascript; charset=utf-8',
-    '.json': 'application/json; charset=utf-8',
-    '.map': 'application/json; charset=utf-8',
-    '.png': 'image/png',
-    '.svg': 'image/svg+xml',
-    '.yaml': 'application/yaml; charset=utf-8',
-    '.yml': 'application/yaml; charset=utf-8',
-  }[ext] || 'application/octet-stream';
-}
-
-function swaggerInitializer(specRoute) {
-  return `window.onload = function() {
-  window.ui = SwaggerUIBundle({
-    url: ${JSON.stringify(specRoute)},
-    dom_id: '#swagger-ui',
-    deepLinking: true,
-    presets: [
-      SwaggerUIBundle.presets.apis,
-      SwaggerUIStandalonePreset
-    ],
-    plugins: [
-      SwaggerUIBundle.plugins.DownloadUrl
-    ],
-    layout: 'StandaloneLayout'
-  });
-};
-`;
-}
-
-function safeStaticPath(distDir, urlPath) {
-  const decoded = decodeURIComponent(urlPath);
-  const relative = decoded === '/' ? 'index.html' : decoded.replace(/^\/+/, '');
-  const fullPath = path.resolve(distDir, relative);
-  const root = path.resolve(distDir);
-  if (fullPath !== root && !fullPath.startsWith(root + path.sep)) return null;
-  return fullPath;
-}
-
-function openUrl(url) {
-  const opener = process.platform === 'darwin'
-    ? ['open', [url]]
-    : process.platform === 'win32'
-      ? ['cmd', ['/c', 'start', '', url]]
-      : ['xdg-open', [url]];
-  const child = spawn(opener[0], opener[1], { detached: true, stdio: 'ignore' });
-  child.unref();
-}
-
-async function main() {
-  const opts = parseArgs(process.argv.slice(2));
-  const specPath = resolveSpec(opts);
-  if (!fs.existsSync(specPath)) throw new Error(`spec not found: ${specPath}`);
-
-  const distDir = resolveSwaggerUi(opts.swaggerUi);
-  const specRoute = path.extname(specPath).toLowerCase() === '.json' ? '/openapi.json' : '/openapi.yaml';
-
-  const server = http.createServer((req, res) => {
-    const requestPath = new URL(req.url, `http://${opts.host}`).pathname;
-    if (requestPath === specRoute) {
-      res.writeHead(200, { 'content-type': mimeFor(specPath), 'cache-control': 'no-store' });
-      fs.createReadStream(specPath).pipe(res);
-      return;
-    }
-    if (requestPath === '/swagger-initializer.js') {
-      res.writeHead(200, { 'content-type': 'application/javascript; charset=utf-8', 'cache-control': 'no-store' });
-      res.end(swaggerInitializer(specRoute));
-      return;
-    }
-
-    const staticPath = safeStaticPath(distDir, requestPath);
-    if (!staticPath || !fs.existsSync(staticPath) || fs.statSync(staticPath).isDirectory()) {
-      res.writeHead(404, { 'content-type': 'text/plain; charset=utf-8' });
-      res.end('not found\n');
-      return;
-    }
-    res.writeHead(200, { 'content-type': mimeFor(staticPath) });
-    fs.createReadStream(staticPath).pipe(res);
-  });
-
-  await new Promise((resolve, reject) => {
-    server.once('error', reject);
-    server.listen(opts.port, opts.host, resolve);
-  });
-
-  const address = server.address();
-  const url = `http://${opts.host}:${address.port}/`;
-  console.log(`swagger_ui=${distDir}`);
-  console.log(`spec=${specPath}`);
-  console.log(`url=${url}`);
-  console.log('Press Ctrl-C to stop the preview server.');
-  if (opts.open) openUrl(url);
-}
-
-main().catch(err => {
-  console.error(err.message);
-  process.exit(1);
-});