From ec48c176f5b02961195943316efa3fb6e57c365b Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 27 May 2026 07:22:16 -0700 Subject: [PATCH 01/19] add withCdpSession + getOrCreateCdpSession helpers Two CDP-session lifecycle helpers in cdp-bridge.ts: - withCdpSession(page, fn): ephemeral session with try/finally detach. For one-shot CDP work (archive snapshots, $B memory, single Page.captureScreenshot) where the caller doesn't need session reuse. - getOrCreateCdpSession(page, cache): cached long-lived session that registers a page.once('close') hook to BOTH delete the cache entry AND call session.detach(). Pre-helper code only deleted the cache entry, leaving the Chromium-side CDP target attached until the underlying transport dropped. Pure addition. Existing callers untouched in this commit; they migrate in the next commit alongside the static-grep test that pins the invariant. Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/src/cdp-bridge.ts | 74 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/browse/src/cdp-bridge.ts b/browse/src/cdp-bridge.ts index a2dd7c17fc..cf1b43eef1 100644 --- a/browse/src/cdp-bridge.ts +++ b/browse/src/cdp-bridge.ts @@ -25,8 +25,80 @@ import { logTelemetry } from './telemetry'; const CDP_TIMEOUT_MS = 5000; const CDP_ACQUIRE_TIMEOUT_MS = 5000; +// ─── CDP session lifecycle helpers ───────────────────────────── +// +// Every direct `newCDPSession(page)` call needs a matching `session.detach()` +// to release the Chromium-side CDP target. Forgetting the detach leaves the +// target attached until the underlying transport drops (often process exit), +// which on a long-lived headed browser shows up as steadily-climbing +// browser-process RSS. To make the leak class unforgettable, callers should +// go through one of these two helpers and a static-grep test +// (browse/test/cdp-session-cleanup.test.ts) fails CI if any source file +// calls `newCDPSession(` outside this module. + +/** + * Ephemeral CDP session with try/finally detach. Use for one-shot CDP work + * where the caller doesn't need session reuse — e.g. archive snapshots, + * `$B memory`, a single `Page.captureScreenshot`. The session is detached + * in `finally` regardless of whether `fn` threw, so the Chromium target + * doesn't leak on the error path. + * + * For repeated use of the same page (e.g. the `$B cdp` bridge or the + * inspector), use `getOrCreateCdpSession` instead — it caches and detaches + * on page close. + */ +export async function withCdpSession( + page: Page, + fn: (session: any) => Promise, +): Promise { + const session = await page.context().newCDPSession(page); + try { + return await fn(session); + } finally { + try { + await session.detach(); + } catch { + // Best-effort cleanup. Session may already be detached (target closed, + // context recreated, browser disconnect). Swallowing all errors is the + // correct cleanup posture per CLAUDE.md "best-effort cleanup paths". + } + } +} + +/** + * Cached long-lived CDP session keyed by Page. First call creates the + * session and registers a `page.once('close', ...)` hook that removes the + * cache entry AND calls `session.detach()`. Pre-helper code only removed + * the cache entry, leaving the Chromium-side target attached. + * + * Pass a caller-owned WeakMap so this helper doesn't impose a single global + * cache — the `$B cdp` bridge and the inspector each keep their own session + * pool with different invariants (e.g. the inspector also detaches on + * `framenavigated` because DOM/CSS domain state is tied to the document). + */ +export async function getOrCreateCdpSession( + page: Page, + cache: WeakMap, +): Promise { + let session = cache.get(page); + if (session) return session; + session = await page.context().newCDPSession(page); + cache.set(page, session); + page.once('close', () => { + cache.delete(page); + session.detach().catch(() => { + // Best-effort cleanup — see withCdpSession finally block. + }); + }); + return session; +} + +// ─── $B cdp bridge ───────────────────────────────────────────── + // Per-page CDPSession cache. Created lazily on first allow-listed call, -// cleaned up when the page closes. +// cleaned up when the page closes. TODO(C2): migrate to getOrCreateCdpSession +// so the session also detaches on close (currently only the cache entry is +// removed; the Chromium-side target lingers). const sessionCache: WeakMap = new WeakMap(); async function getCdpSession(page: Page): Promise { From 213a234d10820b4c658a110f28c1eaa08fdc62a7 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 27 May 2026 07:23:09 -0700 Subject: [PATCH 02/19] migrate 3 CDP-session sites to lifecycle helpers Fixes the CDP-target leak class identified by /codex outside-voice on the eng review (D11 EXPAND_SCOPE). All three sites called `page.context().newCDPSession(page)` directly and either forgot the detach entirely (cdp-bridge cache cleanup), only detached on the success path (write-commands archive), or detached on framenavigated but not page-close (cdp-inspector). - cdp-bridge.ts: `getCdpSession` now delegates to `getOrCreateCdpSession`, which registers a `page.once('close')` hook that BOTH removes the cache entry AND calls `session.detach()`. - cdp-inspector.ts: same migration for the inspector's session pool. Keeps the existing framenavigated detach (more granular than close for DOM/CSS state invalidation) plus an inspector-layer close hook for the initializedPages WeakSet. - write-commands.ts archive: wraps Page.captureSnapshot in withCdpSession so the detach runs in `finally`, including the path where captureSnapshot throws. The static-grep tripwire (next commit) pins the invariant so future direct calls to newCDPSession fail CI. Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/src/cdp-bridge.ts | 16 +++++----------- browse/src/cdp-inspector.ts | 25 +++++++++++++++++-------- browse/src/write-commands.ts | 8 +++++--- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/browse/src/cdp-bridge.ts b/browse/src/cdp-bridge.ts index cf1b43eef1..3d1fa3d8d0 100644 --- a/browse/src/cdp-bridge.ts +++ b/browse/src/cdp-bridge.ts @@ -95,20 +95,14 @@ export async function getOrCreateCdpSession( // ─── $B cdp bridge ───────────────────────────────────────────── -// Per-page CDPSession cache. Created lazily on first allow-listed call, -// cleaned up when the page closes. TODO(C2): migrate to getOrCreateCdpSession -// so the session also detaches on close (currently only the cache entry is -// removed; the Chromium-side target lingers). +// Per-page CDPSession cache. Lifecycle delegated to getOrCreateCdpSession +// which registers a close hook that BOTH removes the cache entry AND calls +// session.detach() — pre-helper code only did the former, leaving the +// Chromium-side target attached. const sessionCache: WeakMap = new WeakMap(); async function getCdpSession(page: Page): Promise { - let s = sessionCache.get(page); - if (s) return s; - s = await page.context().newCDPSession(page); - sessionCache.set(page, s); - // Clear cache on detach so we don't hold a stale handle. - page.once('close', () => sessionCache.delete(page)); - return s; + return getOrCreateCdpSession(page, sessionCache); } export interface CdpDispatchInput { diff --git a/browse/src/cdp-inspector.ts b/browse/src/cdp-inspector.ts index 4315ddd895..293b6eb8bd 100644 --- a/browse/src/cdp-inspector.ts +++ b/browse/src/cdp-inspector.ts @@ -13,6 +13,7 @@ */ import type { Page } from 'playwright'; +import { getOrCreateCdpSession } from './cdp-bridge'; // ─── Types ────────────────────────────────────────────────────── @@ -106,15 +107,23 @@ async function getOrCreateSession(page: Page): Promise { } } - session = await page.context().newCDPSession(page); - cdpSessions.set(page, session); - - // Enable DOM and CSS domains - await session.send('DOM.enable'); - await session.send('CSS.enable'); - initializedPages.add(page); + session = await getOrCreateCdpSession(page, cdpSessions); + + // Enable DOM and CSS domains on first init for this page. The session + // itself is cached + close-detached by getOrCreateCdpSession; the + // initializedPages WeakSet is inspector-layer state that needs its + // own close hook to stay in sync. + if (!initializedPages.has(page)) { + await session.send('DOM.enable'); + await session.send('CSS.enable'); + initializedPages.add(page); + page.once('close', () => initializedPages.delete(page)); + } - // Auto-detach on navigation + // Auto-detach on navigation — DOM/CSS domain state is tied to the + // document. Close-detach (from getOrCreateCdpSession) handles the + // tab-close case; framenavigated catches in-tab navigation that + // invalidates inspector state without closing the tab. page.once('framenavigated', () => { try { session.detach().catch(() => {}); diff --git a/browse/src/write-commands.ts b/browse/src/write-commands.ts index daebd18a0b..4a847141d2 100644 --- a/browse/src/write-commands.ts +++ b/browse/src/write-commands.ts @@ -18,6 +18,7 @@ import type { SetContentWaitUntil } from './tab-session'; import { TEMP_DIR, isPathWithin } from './platform'; import { SAFE_DIRECTORIES } from './path-security'; import { modifyStyle, undoModification, resetModifications, getModificationHistory } from './cdp-inspector'; +import { withCdpSession } from './cdp-bridge'; /** * Aggressive page cleanup selectors and heuristics. @@ -1409,9 +1410,10 @@ export async function handleWriteCommand( validateOutputPath(outputPath); try { - const cdp = await page.context().newCDPSession(page); - const { data } = await cdp.send('Page.captureSnapshot', { format: 'mhtml' }); - await cdp.detach(); + const data = await withCdpSession(page, async (cdp) => { + const result = await cdp.send('Page.captureSnapshot', { format: 'mhtml' }); + return (result as { data: string }).data; + }); fs.writeFileSync(outputPath, data); return `Archive saved: ${outputPath} (${Math.round(data.length / 1024)}KB, MHTML)`; } catch (err: any) { From efc0c0c64e09d965261b4d79c4c2f2f2929e8888 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 27 May 2026 07:23:33 -0700 Subject: [PATCH 03/19] add CDP-session cleanup tripwire + helper unit tests browse/test/cdp-session-cleanup.test.ts pins the invariant that no source file outside cdp-bridge.ts may call newCDPSession() directly. If a future refactor reintroduces the direct call, CI fails with a file:line list and a pointer to the right helper to use instead (withCdpSession for one-shot, getOrCreateCdpSession for cached). Also covers the helpers themselves with fake-Page unit tests: - withCdpSession detaches on success - withCdpSession detaches on throw (the actual leak fix) - withCdpSession swallows detach errors so they don't mask fn errors - getOrCreateCdpSession caches the session across calls - close hook detaches AND clears the cache Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/test/cdp-session-cleanup.test.ts | 171 ++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 browse/test/cdp-session-cleanup.test.ts diff --git a/browse/test/cdp-session-cleanup.test.ts b/browse/test/cdp-session-cleanup.test.ts new file mode 100644 index 0000000000..25ca6760cb --- /dev/null +++ b/browse/test/cdp-session-cleanup.test.ts @@ -0,0 +1,171 @@ +import { describe, test, expect } from 'bun:test'; +import * as fs from 'fs'; +import * as path from 'path'; +import type { Page } from 'playwright'; +import { withCdpSession, getOrCreateCdpSession } from '../src/cdp-bridge'; + +// Static-grep tripwire + behavior tests for the CDP session lifecycle +// helpers introduced as part of the D11 EXPAND_SCOPE memory-leak fix. +// +// Direct calls to `page.context().newCDPSession(page)` are the leak class +// the helpers exist to close — every direct call needs a matching +// `session.detach()` and forgetting it leaves the Chromium-side target +// attached until the underlying transport drops. The tripwire fails CI +// if any source file calls `newCDPSession(` outside `cdp-bridge.ts` +// (the file that owns the helpers). +// +// Pattern mirrors browse/test/terminal-agent-pid-identity.test.ts and +// browse/test/server-sanitize-surrogates.test.ts: read source files +// directly, assert an invariant on their contents. + +const SRC_DIR = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src'); + +function readAllSourceFiles(): Array<{ file: string; content: string }> { + const out: Array<{ file: string; content: string }> = []; + for (const entry of fs.readdirSync(SRC_DIR)) { + if (!entry.endsWith('.ts')) continue; + const full = path.join(SRC_DIR, entry); + out.push({ file: entry, content: fs.readFileSync(full, 'utf-8') }); + } + return out; +} + +describe('CDP session cleanup invariant', () => { + test('1. no source file calls `newCDPSession(` outside cdp-bridge.ts', () => { + const offenders: Array<{ file: string; line: number; text: string }> = []; + for (const { file, content } of readAllSourceFiles()) { + // The helper file is the ONE allowed home for direct newCDPSession calls. + if (file === 'cdp-bridge.ts') continue; + const lines = content.split('\n'); + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (!/newCDPSession\s*\(/.test(line)) continue; + // Skip comment lines — documentation mentions are fine. + const trimmed = line.trim(); + if (trimmed.startsWith('//') || trimmed.startsWith('*')) continue; + offenders.push({ file, line: i + 1, text: trimmed }); + } + } + if (offenders.length > 0) { + const formatted = offenders + .map((o) => ` ${o.file}:${o.line} ${o.text}`) + .join('\n'); + throw new Error( + `Direct newCDPSession(...) calls found outside cdp-bridge.ts. ` + + `Route through withCdpSession() (one-shot, finally-detach) or ` + + `getOrCreateCdpSession() (cached, close-detach) instead:\n${formatted}`, + ); + } + expect(offenders).toEqual([]); + }); + + test('2. helper file exports the two documented entry points', () => { + // Sanity: the tripwire is meaningless if the helpers themselves are gone. + expect(typeof withCdpSession).toBe('function'); + expect(typeof getOrCreateCdpSession).toBe('function'); + }); +}); + +describe('withCdpSession finally-detach', () => { + // Fake Page surface for unit-testing the helper without spinning up a real + // browser. The helper only touches page.context().newCDPSession(page) and + // the returned session's .detach(), so this surface is enough. + function makeFakePage(detachSpy: { called: number; rejected?: Error }) { + const session = { + detach: async () => { + detachSpy.called++; + if (detachSpy.rejected) throw detachSpy.rejected; + }, + }; + return { + context: () => ({ + newCDPSession: async (_p: unknown) => session, + }), + } as unknown as Page; + } + + test('3. detaches on the success path', async () => { + const detachSpy = { called: 0 }; + const page = makeFakePage(detachSpy); + const result = await withCdpSession(page, async (session) => { + expect(session).toBeDefined(); + return 42; + }); + expect(result).toBe(42); + expect(detachSpy.called).toBe(1); + }); + + test('4. detaches even when fn throws (the actual leak fix)', async () => { + const detachSpy = { called: 0 }; + const page = makeFakePage(detachSpy); + await expect( + withCdpSession(page, async () => { + throw new Error('boom'); + }), + ).rejects.toThrow('boom'); + expect(detachSpy.called).toBe(1); + }); + + test('5. swallows detach errors so they do not mask fn errors', async () => { + const detachSpy = { called: 0, rejected: new Error('already detached') }; + const page = makeFakePage(detachSpy); + await expect( + withCdpSession(page, async () => { + throw new Error('original'); + }), + ).rejects.toThrow('original'); + expect(detachSpy.called).toBe(1); + }); + + test('6. swallows detach errors on the success path too', async () => { + const detachSpy = { called: 0, rejected: new Error('target closed') }; + const page = makeFakePage(detachSpy); + const result = await withCdpSession(page, async () => 'ok'); + expect(result).toBe('ok'); + expect(detachSpy.called).toBe(1); + }); +}); + +describe('getOrCreateCdpSession close-detach', () => { + function makeFakePage() { + const closeListeners: Array<() => void> = []; + const session = { + detach: async () => { + session._detachCount++; + }, + _detachCount: 0, + }; + const page = { + context: () => ({ + newCDPSession: async (_p: unknown) => session, + }), + once: (event: string, fn: () => void) => { + if (event === 'close') closeListeners.push(fn); + }, + _fireClose: () => { + for (const fn of closeListeners) fn(); + }, + }; + return { page: page as unknown as Page, session, fireClose: page._fireClose }; + } + + test('7. caches the session across calls', async () => { + const { page } = makeFakePage(); + const cache = new WeakMap(); + const s1 = await getOrCreateCdpSession(page, cache); + const s2 = await getOrCreateCdpSession(page, cache); + expect(s1).toBe(s2); + }); + + test('8. close hook detaches the session AND clears the cache', async () => { + const { page, session, fireClose } = makeFakePage(); + const cache = new WeakMap(); + await getOrCreateCdpSession(page, cache); + expect(cache.get(page)).toBeDefined(); + fireClose(); + // Detach runs synchronously up to the await in the close hook; let it settle. + await new Promise((r) => setTimeout(r, 0)); + expect(cache.get(page)).toBeUndefined(); + expect(session._detachCount).toBe(1); + }); +}); From 6e6d7c2efc560138bdf4a9dce9ccde8a51361133 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 27 May 2026 07:24:51 -0700 Subject: [PATCH 04/19] extract createSseEndpoint helper with cleanup contract browse/src/sse-helpers.ts owns the SSE cleanup invariant: cleanup runs on abort, enqueue failure, AND heartbeat failure, exactly once, regardless of which edge fires first. Pre-helper, /activity/stream and /inspector/events ran cleanup only on the req.signal.abort edge. If the underlying TCP died without firing abort (Chromium MV3 service-worker suspend, intermediate proxy half-close), the subscriber closure stayed in the Set capturing the ReadableStreamDefaultController plus any payloads queued behind it. Over a multi-day sidebar session this compounded into multi-MB of retained controllers per dead connection. Caller surface: initialReplay (optional, for gap replay or state snapshots), subscribe (live-event source), liveEventName (SSE event name for live wrap), heartbeatMs. send() helper handles JSON encoding with sanitizeReplacer + lone-surrogate stripping. Unit tests pin all three cleanup edges + idempotency + replay ordering + surrogate sanitization. Endpoint refactors land in the next commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/src/sse-helpers.ts | 154 +++++++++++++++++++++++++ browse/test/sse-helpers.test.ts | 194 ++++++++++++++++++++++++++++++++ 2 files changed, 348 insertions(+) create mode 100644 browse/src/sse-helpers.ts create mode 100644 browse/test/sse-helpers.test.ts diff --git a/browse/src/sse-helpers.ts b/browse/src/sse-helpers.ts new file mode 100644 index 0000000000..ed4954112b --- /dev/null +++ b/browse/src/sse-helpers.ts @@ -0,0 +1,154 @@ +// SSE endpoint helper — shared cleanup contract for stream endpoints. +// +// Pre-helper, /activity/stream and /inspector/events implemented the same +// pattern in parallel and both leaked subscribers when enqueue failed +// without a corresponding abort signal (e.g. Chromium MV3 service-worker +// suspend dropped the TCP without an abort edge). The subscriber closure +// stayed in the Set, capturing the ReadableStreamDefaultController plus +// any payloads queued behind it. Over a multi-day sidebar session this +// compounded into multi-MB of retained controllers per dead connection. +// +// Centralizing the cleanup contract here means any future SSE endpoint +// inherits the invariant — cleanup runs on abort, enqueue failure, AND +// heartbeat failure, exactly once, regardless of which edge fires first. + +import { stripLoneSurrogates } from './sanitize'; + +/** + * JSON.stringify replacer that strips lone UTF-16 surrogates from string + * values before they get escape-encoded. Pair with stringify when the + * consumer will JSON.parse the payload back into JS strings (SSE clients + * do this). Required at every SSE egress that ships page-content-derived + * fields — see CLAUDE.md "Unicode sanitization at server egress". + */ +function sanitizeReplacer(_key: string, value: unknown): unknown { + return typeof value === 'string' ? stripLoneSurrogates(value) : value; +} + +/** Send an SSE event. Handles JSON encoding + lone-surrogate sanitization. */ +export type SseSender = (event: string, data: unknown) => void; + +export interface SseEndpointConfig { + /** + * Optional. Runs once after the stream opens, before subscribing for live + * events. Use for initial event replay (activity gap detection, history + * burst) or a current-state snapshot (inspector). The `send` helper + * handles JSON encoding with sanitizeReplacer and SSE framing; pass + * any event name and any payload object. + */ + initialReplay?: (send: SseSender) => void; + + /** + * Subscribe to the live event source. Receives a `notify` callback; + * returns an unsubscribe function. The callback routes through the + * helper's safeEnqueue + cleanup-on-throw, so a dead consumer ends up + * removed from the subscriber set on the very next event (instead of + * waiting for an abort that may never fire). + */ + subscribe: (notify: (entry: T) => void) => () => void; + + /** + * SSE event name for live events. `data: \n\n` + * is wrapped automatically. /activity/stream uses 'activity'; + * /inspector/events uses 'inspector'. + */ + liveEventName: string; + + /** Heartbeat interval in ms. Default: 15000. */ + heartbeatMs?: number; +} + +/** + * Build a streaming Response that owns the cleanup contract: + * - safeEnqueue catches enqueue throws → cleanup + * - 15s heartbeat catches dead peers; failure → cleanup + * - req.signal abort → cleanup + * - cleanup is idempotent (clearInterval + unsubscribe + try close) + */ +export function createSseEndpoint( + req: Request, + config: SseEndpointConfig, +): Response { + const heartbeatMs = config.heartbeatMs ?? 15000; + const encoder = new TextEncoder(); + + const stream = new ReadableStream({ + start(controller) { + let cleanedUp = false; + let heartbeat: ReturnType | null = null; + let unsubscribe: (() => void) | null = null; + + const cleanup = (): void => { + if (cleanedUp) return; + cleanedUp = true; + if (heartbeat !== null) { + clearInterval(heartbeat); + heartbeat = null; + } + if (unsubscribe !== null) { + unsubscribe(); + unsubscribe = null; + } + try { + controller.close(); + } catch { + // Expected: stream already closed by the consumer. + } + }; + + const send: SseSender = (event, data) => { + if (cleanedUp) return; + try { + controller.enqueue( + encoder.encode( + `event: ${event}\ndata: ${JSON.stringify(data, sanitizeReplacer)}\n\n`, + ), + ); + } catch { + // Consumer disconnected mid-write. Tear down so this subscriber + // doesn't sit in the set forever. + cleanup(); + } + }; + + // Initial replay (caller-provided). + if (config.initialReplay) { + try { + config.initialReplay(send); + } catch { + cleanup(); + return; + } + if (cleanedUp) return; + } + + // Subscribe for live events. + unsubscribe = config.subscribe((entry) => { + send(config.liveEventName, entry); + }); + + // Heartbeat keeps NAT boxes and proxies from dropping idle SSE, + // and serves as a liveness probe: an enqueue failure here is the + // cheapest way to learn the consumer is gone without waiting for + // an abort signal that may never arrive. + heartbeat = setInterval(() => { + if (cleanedUp) return; + try { + controller.enqueue(encoder.encode(`: heartbeat\n\n`)); + } catch { + cleanup(); + } + }, heartbeatMs); + + req.signal.addEventListener('abort', cleanup); + }, + }); + + return new Response(stream, { + headers: { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + }, + }); +} diff --git a/browse/test/sse-helpers.test.ts b/browse/test/sse-helpers.test.ts new file mode 100644 index 0000000000..bf3c42965f --- /dev/null +++ b/browse/test/sse-helpers.test.ts @@ -0,0 +1,194 @@ +import { describe, test, expect } from 'bun:test'; +import { createSseEndpoint } from '../src/sse-helpers'; + +// Unit tests for the SSE cleanup contract introduced by D6 EXTRACT_HELPER. +// +// The pre-helper bug: /activity/stream and /inspector/events ran cleanup +// only on the `req.signal.abort` edge. If the underlying TCP died without +// firing abort (Chromium MV3 service-worker suspend, intermediate proxy +// half-close), the subscriber closure stayed in the Set capturing the +// ReadableStreamDefaultController and any payloads queued behind it. +// +// These tests pin the three cleanup edges: +// 1. abort signal → cleanup +// 2. enqueue throws (consumer gone) → cleanup +// 3. heartbeat enqueue throws → cleanup +// And the idempotency invariant: cleanup running twice is a no-op. + +function makeRequest(): { req: Request; abort: () => void } { + const controller = new AbortController(); + // Minimal Request — we only use req.signal here. URL is irrelevant. + const req = new Request('http://localhost/test', { signal: controller.signal }); + return { req, abort: () => controller.abort() }; +} + +/** Pull SSE bytes from a Response stream, return decoded text. */ +async function readAll(res: Response, ms: number): Promise { + if (!res.body) return ''; + const reader = res.body.getReader(); + const decoder = new TextDecoder(); + let out = ''; + const deadline = Date.now() + ms; + while (Date.now() < deadline) { + try { + const { value, done } = await Promise.race([ + reader.read(), + new Promise<{ value: undefined; done: true }>((resolve) => + setTimeout(() => resolve({ value: undefined, done: true }), deadline - Date.now()), + ), + ]); + if (done) break; + if (value) out += decoder.decode(value, { stream: true }); + } catch { + break; + } + } + try { reader.cancel().catch(() => {}); } catch {} + return out; +} + +describe('createSseEndpoint cleanup contract', () => { + test('1. abort signal triggers unsubscribe', async () => { + let unsubscribed = 0; + const { req, abort } = makeRequest(); + const res = createSseEndpoint(req, { + subscribe: () => () => { + unsubscribed++; + }, + liveEventName: 'test', + heartbeatMs: 60_000, // long enough that we don't see heartbeats in this test + }); + // Start the stream by reading once, then abort. + const reader = res.body!.getReader(); + // Yield to let start() run. + await Promise.resolve(); + await Promise.resolve(); + abort(); + // Let the abort listener fire. + await new Promise((r) => setTimeout(r, 10)); + expect(unsubscribed).toBe(1); + reader.cancel().catch(() => {}); + }); + + test('2. enqueue throw triggers unsubscribe + heartbeat clear', async () => { + let unsubscribed = 0; + let notify: ((entry: { msg: string }) => void) | null = null; + const { req } = makeRequest(); + const res = createSseEndpoint<{ msg: string }>(req, { + subscribe: (n) => { + notify = n; + return () => { + unsubscribed++; + }; + }, + liveEventName: 'test', + heartbeatMs: 60_000, + }); + // Cancel the reader so subsequent enqueues throw. + const reader = res.body!.getReader(); + await Promise.resolve(); + await Promise.resolve(); + expect(notify).not.toBeNull(); + await reader.cancel(); // closes the consumer side + // Now fire a live event — enqueue should throw → cleanup → unsubscribe. + notify!({ msg: 'will fail to enqueue' }); + await new Promise((r) => setTimeout(r, 10)); + expect(unsubscribed).toBe(1); + }); + + test('3. cleanup is idempotent (abort then enqueue-fail)', async () => { + let unsubscribed = 0; + let notify: ((entry: { msg: string }) => void) | null = null; + const { req, abort } = makeRequest(); + const res = createSseEndpoint<{ msg: string }>(req, { + subscribe: (n) => { + notify = n; + return () => { + unsubscribed++; + }; + }, + liveEventName: 'test', + heartbeatMs: 60_000, + }); + const reader = res.body!.getReader(); + await Promise.resolve(); + await Promise.resolve(); + abort(); + await new Promise((r) => setTimeout(r, 10)); + // Second cleanup edge — should be a no-op. + notify!({ msg: 'no-op' }); + await new Promise((r) => setTimeout(r, 10)); + expect(unsubscribed).toBe(1); + reader.cancel().catch(() => {}); + }); + + test('4. initialReplay events reach the client before live events', async () => { + let notify: ((entry: { msg: string }) => void) | null = null; + const { req } = makeRequest(); + const res = createSseEndpoint<{ msg: string }>(req, { + initialReplay: (send) => { + send('replay', { msg: 'first' }); + }, + subscribe: (n) => { + notify = n; + return () => {}; + }, + liveEventName: 'live', + heartbeatMs: 60_000, + }); + // Trigger one live event soon after stream starts. + setTimeout(() => notify?.({ msg: 'second' }), 5); + const text = await readAll(res, 50); + expect(text).toContain('event: replay'); + expect(text).toContain('"msg":"first"'); + expect(text).toContain('event: live'); + expect(text).toContain('"msg":"second"'); + // Replay must come before live. + expect(text.indexOf('"first"')).toBeLessThan(text.indexOf('"second"')); + }); + + test('5. initialReplay throw triggers cleanup without subscribing', async () => { + let subscribed = 0; + const { req } = makeRequest(); + const res = createSseEndpoint(req, { + initialReplay: () => { + throw new Error('replay boom'); + }, + subscribe: () => { + subscribed++; + return () => {}; + }, + liveEventName: 'test', + heartbeatMs: 60_000, + }); + // Drain — stream should close cleanly. + const text = await readAll(res, 30); + expect(text).toBe(''); // no events + expect(subscribed).toBe(0); // never reached subscribe() + }); + + test('6. lone surrogates in payload string are sanitized', async () => { + let notify: ((entry: { msg: string }) => void) | null = null; + const { req } = makeRequest(); + const res = createSseEndpoint<{ msg: string }>(req, { + subscribe: (n) => { + notify = n; + return () => {}; + }, + liveEventName: 'test', + heartbeatMs: 60_000, + }); + setTimeout(() => { + // Lone high surrogate (no matching low). JSON.stringify would emit + // \uD800 escape that breaks Claude API. Helper must strip it. + notify?.({ msg: 'hello \uD800 world' }); + }, 5); + const text = await readAll(res, 50); + expect(text).toContain('event: test'); + // JSON.stringify emits U+FFFD as the literal character, not as escape. + expect(text).toContain('�'); + // The raw lone-surrogate escape MUST NOT survive — that's the failure + // mode that breaks the Claude API with HTTP 400. + expect(text.toLowerCase()).not.toContain('\\ud800'); + }); +}); From 336cb32434122274823ffc2ccacae318fc7a28bc Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 27 May 2026 07:25:34 -0700 Subject: [PATCH 05/19] route /activity/stream + /inspector/events through createSseEndpoint Both endpoints collapse from ~45 lines of in-line ReadableStream wiring to ~8 lines of helper config. Behavior preserved bit-for-bit by the new sse-helpers tests: - initial replay (activity gap + history, inspector state snapshot) - live event subscription - 15s heartbeat - SSE framing - sanitizeReplacer applied to every JSON.stringify The leak fix is the cleanup contract: pre-refactor, both endpoints ran cleanup only on req.signal.abort. If TCP died without firing abort (Chromium MV3 SW suspend, intermediate proxy half-close), the subscriber closure stayed in the Set forever capturing the ReadableStreamDefaultController + queued payloads. Post-refactor, an enqueue-failure or heartbeat-failure on a dead consumer triggers the same idempotent cleanup as abort would. Net: -83 / +15 in server.ts. Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/src/server.ts | 132 ++++++++----------------------------------- 1 file changed, 24 insertions(+), 108 deletions(-) diff --git a/browse/src/server.ts b/browse/src/server.ts index bc0b378cb4..2f6c598f7e 100644 --- a/browse/src/server.ts +++ b/browse/src/server.ts @@ -38,6 +38,7 @@ import { import { validateTempPath } from './path-security'; import { resolveConfig, ensureStateDir, readVersionHash, resolveChromiumProfile, cleanSingletonLocks } from './config'; import { emitActivity, subscribe, getActivityAfter, getActivityHistory, getSubscriberCount } from './activity'; +import { createSseEndpoint } from './sse-helpers'; import { initAuditLog, writeAuditEntry } from './audit'; import { inspectElement, modifyStyle, resetModifications, getModificationHistory, detachSession, type InspectorResult } from './cdp-inspector'; // Bun.spawn used instead of child_process.spawn (compiled bun binaries @@ -2432,62 +2433,19 @@ export function buildFetchHandler(cfg: ServerConfig): ServerHandle { }); } const afterId = parseInt(url.searchParams.get('after') || '0', 10); - const encoder = new TextEncoder(); - - const stream = new ReadableStream({ - start(controller) { - // SSE egress invariant: every JSON.stringify here ships page-content-derived - // fields (URLs, command args, errors) to the sidebar. Lone surrogates must - // be sanitized DURING stringify (via sanitizeReplacer) so they're cleaned - // before escape-encoding — post-stringify regex is ineffective because - // JSON.stringify has already converted \uD800 → "\\ud800". - // 1. Gap detection + replay + // Cleanup contract (abort + enqueue-fail + heartbeat-fail, all + // idempotent) lives in createSseEndpoint; sanitizeReplacer is + // applied to every JSON.stringify inside the helper, so + // page-content-derived fields (URLs, command args, errors) + // stay surrogate-safe per CLAUDE.md egress invariant. + return createSseEndpoint(req, { + initialReplay: (send) => { const { entries, gap, gapFrom, availableFrom } = getActivityAfter(afterId); - if (gap) { - controller.enqueue(encoder.encode(`event: gap\ndata: ${JSON.stringify({ gapFrom, availableFrom }, sanitizeReplacer)}\n\n`)); - } - for (const entry of entries) { - controller.enqueue(encoder.encode(`event: activity\ndata: ${JSON.stringify(entry, sanitizeReplacer)}\n\n`)); - } - - // 2. Subscribe for live events - const unsubscribe = subscribe((entry) => { - try { - controller.enqueue(encoder.encode(`event: activity\ndata: ${JSON.stringify(entry, sanitizeReplacer)}\n\n`)); - } catch (err: any) { - console.debug('[browse] Activity SSE stream error, unsubscribing:', err.message); - unsubscribe(); - } - }); - - // 3. Heartbeat every 15s - const heartbeat = setInterval(() => { - try { - controller.enqueue(encoder.encode(`: heartbeat\n\n`)); - } catch (err: any) { - console.debug('[browse] Activity SSE heartbeat failed:', err.message); - clearInterval(heartbeat); - unsubscribe(); - } - }, 15000); - - // 4. Cleanup on disconnect - req.signal.addEventListener('abort', () => { - clearInterval(heartbeat); - unsubscribe(); - try { controller.close(); } catch { - // Expected: stream already closed - } - }); - }, - }); - - return new Response(stream, { - headers: { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', + if (gap) send('gap', { gapFrom, availableFrom }); + for (const entry of entries) send('activity', entry); }, + subscribe, + liveEventName: 'activity', }); } @@ -2806,62 +2764,20 @@ export function buildFetchHandler(cfg: ServerConfig): ServerHandle { status: 401, headers: { 'Content-Type': 'application/json' }, }); } - const encoder = new TextEncoder(); - const stream = new ReadableStream({ - start(controller) { - // SSE egress invariant: inspectorData and CDP event payloads carry - // page-DOM strings (selectors, attribute values, console messages). - // sanitizeReplacer cleans lone surrogates DURING JSON.stringify so - // they're neutralized before escape-encoding (post-stringify regex - // is a no-op once \uD800 has become "\\ud800"). - // Send current state immediately - if (inspectorData) { - controller.enqueue(encoder.encode( - `event: state\ndata: ${JSON.stringify({ data: inspectorData, timestamp: inspectorTimestamp }, sanitizeReplacer)}\n\n` - )); - } - - // Subscribe for live events - const notify: InspectorSubscriber = (event) => { - try { - controller.enqueue(encoder.encode( - `event: inspector\ndata: ${JSON.stringify(event, sanitizeReplacer)}\n\n` - )); - } catch (err: any) { - console.debug('[browse] Inspector SSE stream error:', err.message); - inspectorSubscribers.delete(notify); - } - }; + // Cleanup contract (abort + enqueue-fail + heartbeat-fail, + // idempotent) lives in createSseEndpoint; sanitizeReplacer is + // applied to every JSON.stringify inside the helper. The + // inspector subscriber set stays here because it's also written + // to by emitInspectorEvent above. + return createSseEndpoint(req, { + initialReplay: inspectorData + ? (send) => send('state', { data: inspectorData, timestamp: inspectorTimestamp }) + : undefined, + subscribe: (notify) => { inspectorSubscribers.add(notify); - - // Heartbeat every 15s - const heartbeat = setInterval(() => { - try { - controller.enqueue(encoder.encode(`: heartbeat\n\n`)); - } catch (err: any) { - console.debug('[browse] Inspector SSE heartbeat failed:', err.message); - clearInterval(heartbeat); - inspectorSubscribers.delete(notify); - } - }, 15000); - - // Cleanup on disconnect - req.signal.addEventListener('abort', () => { - clearInterval(heartbeat); - inspectorSubscribers.delete(notify); - try { controller.close(); } catch (err: any) { - // Expected: stream already closed - } - }); - }, - }); - - return new Response(stream, { - headers: { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', + return () => inspectorSubscribers.delete(notify); }, + liveEventName: 'inspector', }); } From 4f42c7bf08d843c8d9f572cd704240380ae74e31 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 27 May 2026 07:26:21 -0700 Subject: [PATCH 06/19] cap inspector modificationHistory at 200 entries Pre-cap, modificationHistory was an unbounded module-scoped array that grew for every CSS edit through $B css across the entire session. Small per-entry footprint but no upper bound, the kind of slow leak that compounds over multi-day inspector use. Cap is 200, oldest evicted on push past the cap. modHistoryTotalPushed stays monotonic across the session so undoModification can tell the user when their target index has been evicted, instead of just the opaque pre-cap "No modification at index 500" with no context. __testInternals export lets the cap + eviction error be unit-tested without spinning up a CDP-driven Page. Production code must continue to go through modifyStyle / undoModification / resetModifications. Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/src/cdp-inspector.ts | 44 ++++++++- browse/test/cdp-inspector-history-cap.test.ts | 95 +++++++++++++++++++ 2 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 browse/test/cdp-inspector-history-cap.test.ts diff --git a/browse/src/cdp-inspector.ts b/browse/src/cdp-inspector.ts index 293b6eb8bd..fd4a938d17 100644 --- a/browse/src/cdp-inspector.ts +++ b/browse/src/cdp-inspector.ts @@ -139,7 +139,41 @@ async function getOrCreateSession(page: Page): Promise { // ─── Modification History ─────────────────────────────────────── +// Bounded FIFO of style modifications. Pre-cap, this was an unbounded +// module-scoped array that grew for every CSS edit made through $B css +// across the whole browser session — small per-entry footprint but no +// upper bound, the kind of slow leak that compounds over multi-day +// inspector use. The cap is 200 because per-session undo workflows +// rarely walk back more than a handful of edits, and a user who really +// wants to roll a long change back can `$B css reset` to revert all of +// them. totalPushed is monotonic across the session so undoModification +// can tell the user when their target index has been evicted, instead +// of just "no modification at index N". +const MOD_HISTORY_CAP = 200; const modificationHistory: StyleModification[] = []; +let modHistoryTotalPushed = 0; + +function pushModification(mod: StyleModification): void { + modificationHistory.push(mod); + modHistoryTotalPushed++; + while (modificationHistory.length > MOD_HISTORY_CAP) { + modificationHistory.shift(); + } +} + +// Test-only entry: exposes the history-cap mechanics (push, reset, cap value) +// without requiring a CDP-driven Page. Production code must go through +// modifyStyle / undoModification / resetModifications. +export const __testInternals = { + pushModification, + MOD_HISTORY_CAP, + getRawHistory: () => modificationHistory.slice(), + getTotalPushed: () => modHistoryTotalPushed, + resetForTest: () => { + modificationHistory.length = 0; + modHistoryTotalPushed = 0; + }, +}; // ─── Specificity Calculation ──────────────────────────────────── @@ -568,7 +602,7 @@ export async function modifyStyle( method, }; - modificationHistory.push(modification); + pushModification(modification); return modification; } @@ -578,7 +612,12 @@ export async function modifyStyle( export async function undoModification(page: Page, index?: number): Promise { const idx = index ?? modificationHistory.length - 1; if (idx < 0 || idx >= modificationHistory.length) { - throw new Error(`No modification at index ${idx}. History has ${modificationHistory.length} entries.`); + const evictedNote = modHistoryTotalPushed > MOD_HISTORY_CAP + ? ` (most recent ${MOD_HISTORY_CAP} only — ${modHistoryTotalPushed - MOD_HISTORY_CAP} earlier entries evicted at the cap)` + : ''; + throw new Error( + `No modification at index ${idx}. History has ${modificationHistory.length} entries${evictedNote}.`, + ); } const mod = modificationHistory[idx]; @@ -657,6 +696,7 @@ export async function resetModifications(page: Page): Promise { } } modificationHistory.length = 0; + modHistoryTotalPushed = 0; } /** diff --git a/browse/test/cdp-inspector-history-cap.test.ts b/browse/test/cdp-inspector-history-cap.test.ts new file mode 100644 index 0000000000..21b2d6c22f --- /dev/null +++ b/browse/test/cdp-inspector-history-cap.test.ts @@ -0,0 +1,95 @@ +import { describe, test, expect, beforeEach } from 'bun:test'; +import type { Page } from 'playwright'; +import { + __testInternals, + undoModification, +} from '../src/cdp-inspector'; + +// Regression tests for the modificationHistory cap (D6 / smoking gun #2). +// Pre-cap, the module-scoped array grew unbounded across the session. Cap is +// 200 entries, oldest evicted on push past the cap. undoModification reports +// "evicted at the cap" in the error message so a user who asks for a +// no-longer-available index understands what happened (instead of seeing the +// pre-cap "No modification at index 500" with no context). + +const { pushModification, MOD_HISTORY_CAP, getRawHistory, getTotalPushed, resetForTest } = __testInternals; + +function fakeMod(id: number) { + return { + selector: `#node-${id}`, + property: 'color', + oldValue: 'red', + newValue: 'blue', + source: 'inline' as const, + timestamp: id, + method: 'setProperty' as 'setProperty', + }; +} + +beforeEach(() => { + resetForTest(); +}); + +describe('modificationHistory cap', () => { + test('1. push under cap keeps every entry', () => { + for (let i = 0; i < 50; i++) pushModification(fakeMod(i)); + expect(getRawHistory().length).toBe(50); + expect(getTotalPushed()).toBe(50); + expect(getRawHistory()[0].timestamp).toBe(0); + expect(getRawHistory()[49].timestamp).toBe(49); + }); + + test('2. push exactly cap keeps every entry', () => { + for (let i = 0; i < MOD_HISTORY_CAP; i++) pushModification(fakeMod(i)); + expect(getRawHistory().length).toBe(MOD_HISTORY_CAP); + expect(getTotalPushed()).toBe(MOD_HISTORY_CAP); + expect(getRawHistory()[0].timestamp).toBe(0); + }); + + test('3. push past cap evicts oldest, keeps length at cap', () => { + const total = MOD_HISTORY_CAP + 50; + for (let i = 0; i < total; i++) pushModification(fakeMod(i)); + expect(getRawHistory().length).toBe(MOD_HISTORY_CAP); + expect(getTotalPushed()).toBe(total); + // Oldest 50 dropped — entry that was #0 is gone; new oldest is #50. + expect(getRawHistory()[0].timestamp).toBe(50); + expect(getRawHistory()[MOD_HISTORY_CAP - 1].timestamp).toBe(total - 1); + }); + + test('4. resetForTest clears both buffer and totalPushed', () => { + for (let i = 0; i < 10; i++) pushModification(fakeMod(i)); + resetForTest(); + expect(getRawHistory().length).toBe(0); + expect(getTotalPushed()).toBe(0); + }); +}); + +describe('undoModification eviction-aware error', () => { + // Stub Page: undoModification throws before any await when idx is out of + // range, so the stub never actually gets called. + const stubPage = {} as unknown as Page; + + test('5. out-of-range BEFORE any eviction → no evicted note', async () => { + for (let i = 0; i < 5; i++) pushModification(fakeMod(i)); + await expect(undoModification(stubPage, 99)).rejects.toThrow( + 'No modification at index 99. History has 5 entries.', + ); + }); + + test('6. out-of-range AFTER eviction → message names the evicted count', async () => { + const total = MOD_HISTORY_CAP + 73; + for (let i = 0; i < total; i++) pushModification(fakeMod(i)); + // 273 pushed, 200 in buffer, 73 evicted. Ask for idx=400 (above buffer). + await expect(undoModification(stubPage, 400)).rejects.toThrow( + `No modification at index 400. History has ${MOD_HISTORY_CAP} entries ` + + `(most recent ${MOD_HISTORY_CAP} only — 73 earlier entries evicted at the cap).`, + ); + }); + + test('7. negative explicit index throws cleanly (no NaN propagation)', async () => { + for (let i = 0; i < 10; i++) pushModification(fakeMod(i)); + await expect(undoModification(stubPage, -1)).rejects.toThrow( + 'No modification at index -1.', + ); + }); +}); From baf493f3c98fbacc3a896d8ee38e17e46da5cf46 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 27 May 2026 07:27:56 -0700 Subject: [PATCH 07/19] add BrowserManager.getMemorySnapshot() + shared types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Diagnostic foundation for $B memory and the /memory endpoint that land in the next two commits. Collects: - Bun process memory via process.memoryUsage (cross-platform, accurate). - Per-tab JS heap via CDP Performance.getMetrics, lazy per tracked page, swallows target-died errors so a dying tab doesn't poison the snapshot for the rest. - Chromium process tree via SystemInfo.getProcessInfo (PID + type + CPU time). RSS is NOT exposed via CDP — the eng review (D2 USE_CDP) picked CDP over shelling to `ps`, so notes[] tells the caller why the RSS column is absent and points at the follow-up TODO. cdp-inspector exports getModificationHistoryStats so the snapshot can surface buffer occupancy + cap + evicted count without reaching into module-private state. memory-snapshot.ts holds the shared types so server.ts and read-commands can import without circular dep on browser-manager. Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/src/browser-manager.ts | 112 ++++++++++++++++++++++++++++++++++ browse/src/cdp-inspector.ts | 17 ++++++ browse/src/memory-snapshot.ts | 73 ++++++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 browse/src/memory-snapshot.ts diff --git a/browse/src/browser-manager.ts b/browse/src/browser-manager.ts index 7734f0a620..a70393f3ff 100644 --- a/browse/src/browser-manager.ts +++ b/browse/src/browser-manager.ts @@ -21,6 +21,8 @@ import { addConsoleEntry, addNetworkEntry, addDialogEntry, networkBuffer, type D import { validateNavigationUrl } from './url-validation'; import { TabSession, type RefEntry } from './tab-session'; import { resolveChromiumProfile, cleanSingletonLocks } from './config'; +import { withCdpSession } from './cdp-bridge'; +import type { MemorySnapshot, MemoryStructureStats, MemoryTabSnapshot, MemoryProcess } from './memory-snapshot'; /** * Detect whether GSTACK_CHROMIUM_PATH points at a custom Chromium build that @@ -1004,6 +1006,116 @@ export class BrowserManager { } } + /** + * Diagnostic for `$B memory` and the /memory endpoint. + * + * Collects: + * - Bun process memory (cross-platform, accurate, no shelling). + * - Per-tab JS heap via CDP Performance.getMetrics — the most portable + * per-tab signal CDP exposes. Misses native/GPU/Skia/cache memory + * (Codex flag on the eng-review; see follow-up TODO "native/GPU + * memory breakdown"). + * - Chromium process tree via SystemInfo.getProcessInfo — PID + type + * + CPU time. Per-process RSS is NOT exposed via CDP and the eng + * review (D2 USE_CDP) explicitly chose CDP over shelling to `ps`, + * so RSS columns are absent and `notes[]` says why. + * + * `structures` is passed in by the caller (read-commands / server) so + * browser-manager doesn't take a hard dep on every buffer-owning module. + */ + async getMemorySnapshot(structures: MemoryStructureStats): Promise { + const bunMem = process.memoryUsage(); + const notes: string[] = []; + + // Per-tab JS heap. Lazy: only the pages we already track. A target + // that died mid-snapshot is omitted, never throws. + const tabs: MemoryTabSnapshot[] = []; + for (const [id, page] of this.pages) { + try { + const url = (() => { try { return page.url(); } catch { return ''; } })(); + const title = await page.title().catch(() => ''); + const metrics = await withCdpSession(page, async (session) => { + await session.send('Performance.enable').catch(() => undefined); + const result = await session.send('Performance.getMetrics'); + return ((result as { metrics?: Array<{ name: string; value: number }> }).metrics) ?? []; + }); + const mm: Record = {}; + for (const m of metrics) mm[m.name] = m.value; + tabs.push({ + id, + url, + title, + jsHeapUsed: mm.JSHeapUsedSize ?? 0, + jsHeapTotal: mm.JSHeapTotalSize ?? 0, + documents: mm.Documents ?? 0, + nodes: mm.Nodes ?? 0, + listeners: mm.JSEventListeners ?? 0, + }); + } catch { + // Target died or CDP unavailable mid-snapshot — skip this tab. + } + } + + // Chromium process tree. Browser handle may be on the `browser` field + // (launched mode) or accessible via `context.browser()` (persistent + // context / headed mode); try both. + let processes: MemoryProcess[] | null = null; + const browser: Browser | null = this.browser ?? (this.context ? this.context.browser() : null); + if (browser) { + try { + // `newBrowserCDPSession` is browser-wide. Not exposed on every + // Playwright TypeScript surface, but present at runtime on the + // Browser instance — use a typed cast to avoid the @ts-expect-error. + type BrowserWithCDP = Browser & { + newBrowserCDPSession?: () => Promise<{ + send: (method: string, params?: unknown) => Promise; + detach: () => Promise; + }>; + }; + const maybeFactory = (browser as BrowserWithCDP).newBrowserCDPSession; + if (typeof maybeFactory === 'function') { + const browserSession = await maybeFactory.call(browser); + try { + const info = (await browserSession.send('SystemInfo.getProcessInfo')) as { + processInfo?: Array<{ id: number; type: string; cpuTime: number }>; + }; + processes = (info.processInfo ?? []).map((p) => ({ + id: p.id, + type: p.type, + cpuTime: p.cpuTime, + })); + notes.push( + 'Per-Chromium-process RSS not collected — SystemInfo.getProcessInfo exposes PID+type+CPU only. ' + + 'See follow-up TODO "native/GPU memory breakdown" for the deferred fix.', + ); + } finally { + await browserSession.detach().catch(() => undefined); + } + } else { + notes.push('Playwright build does not expose newBrowserCDPSession; per-process info skipped.'); + } + } catch (err: any) { + notes.push(`CDP browser session unavailable: ${err?.message ?? String(err)}`); + } + } else { + notes.push('Browser handle unavailable (server connection mode); per-process info skipped.'); + } + + return { + bunServer: { + rss: bunMem.rss, + heapUsed: bunMem.heapUsed, + heapTotal: bunMem.heapTotal, + external: bunMem.external, + }, + tabs, + processes, + structures, + capturedAt: Date.now(), + notes, + }; + } + // ─── Ref Map (delegates to active session) ────────────────── setRefMap(refs: Map) { this.getActiveSession().setRefMap(refs); diff --git a/browse/src/cdp-inspector.ts b/browse/src/cdp-inspector.ts index fd4a938d17..52a488e570 100644 --- a/browse/src/cdp-inspector.ts +++ b/browse/src/cdp-inspector.ts @@ -670,6 +670,23 @@ export function getModificationHistory(): StyleModification[] { return [...modificationHistory]; } +/** + * Diagnostic accessor for the $B memory snapshot. Returns current buffer + * occupancy, the cap, and how many entries have been evicted since the + * last reset. + */ +export function getModificationHistoryStats(): { + current: number; + cap: number; + evicted: number; +} { + return { + current: modificationHistory.length, + cap: MOD_HISTORY_CAP, + evicted: Math.max(0, modHistoryTotalPushed - MOD_HISTORY_CAP), + }; +} + /** * Reset all modifications, restoring original values. */ diff --git a/browse/src/memory-snapshot.ts b/browse/src/memory-snapshot.ts new file mode 100644 index 0000000000..02a54d49de --- /dev/null +++ b/browse/src/memory-snapshot.ts @@ -0,0 +1,73 @@ +// Shared types for the $B memory diagnostic command and the /memory +// endpoint. Lives in its own module so server.ts, read-commands.ts, and +// the extension footer poll can import without taking a circular dep on +// browser-manager.ts. +// +// Background: the gbrowser-OOM investigation (160 GB Activity Monitor +// reading on a friend's machine) needed a diagnostic that could land +// before the next incident — measurement comes first, fixes come after. +// $B memory is that diagnostic. + +/** Counts/bytes for the bounded in-memory structures on the Bun side. */ +export interface MemoryStructureStats { + modificationHistory: { current: number; cap: number; evicted: number }; + activitySubscribers: number; + inspectorSubscribers: number; + consoleBufferLen: number; + networkBufferLen: number; + dialogBufferLen: number; + captureBufferBytes: number; +} + +/** Per-tab JS heap snapshot (CDP Performance.getMetrics). */ +export interface MemoryTabSnapshot { + id: number; + url: string; + title: string; + jsHeapUsed: number; + jsHeapTotal: number; + documents: number; + nodes: number; + listeners: number; +} + +/** Chromium process metadata via CDP SystemInfo.getProcessInfo. */ +export interface MemoryProcess { + /** Chromium-internal process id (not OS PID). */ + id: number; + /** 'browser' | 'renderer' | 'gpu' | 'utility' | 'extension' | ... */ + type: string; + /** CPU time accumulated since process start (seconds). */ + cpuTime: number; +} + +export interface MemorySnapshot { + bunServer: { + rss: number; + heapUsed: number; + heapTotal: number; + external: number; + }; + tabs: MemoryTabSnapshot[]; + /** + * Chromium process tree. `null` when no browser handle is available + * (server in connection mode, or browser not yet launched). + * + * Per-process RSS is NOT included: SystemInfo.getProcessInfo returns + * id+type+cpuTime but Chromium does not expose RSS via CDP. The + * `notes[]` field tells the caller why — see the follow-up TODO + * "native/GPU memory breakdown" for the deferred fix. + */ + processes: MemoryProcess[] | null; + structures: MemoryStructureStats; + capturedAt: number; + notes: string[]; +} + +/** Format bytes as a short human string ("1.4 GB", "312 MB", "84 KB"). */ +export function formatBytes(n: number): string { + if (n < 1024) return `${n} B`; + if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`; + if (n < 1024 * 1024 * 1024) return `${(n / 1024 / 1024).toFixed(1)} MB`; + return `${(n / 1024 / 1024 / 1024).toFixed(2)} GB`; +} From aa3121a794a803129b84dc80e102f19362331146 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 27 May 2026 07:28:49 -0700 Subject: [PATCH 08/19] add \$B memory command Registers 'memory' in META_COMMANDS, wires the meta-command dispatch to a lazy-imported handler in memory-command.ts. Lazy because the import graph (cdp-bridge + memory-snapshot + buffer accessors) isn't useful to projects that never run the diagnostic. The handler assembles MemoryStructureStats from the modules that own each buffer (cdp-inspector mod history stats, activity subscriber count, console/network/dialog buffer lengths, captureBuffer bytes, inspectorSubscriber count via a new server.ts export) and calls BrowserManager.getMemorySnapshot. Output is text by default, JSON with --json so the sidebar footer and test harness can consume it programmatically. buildMemorySnapshotJson is the entry the /memory endpoint will call in the next commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/src/commands.ts | 2 + browse/src/memory-command.ts | 115 +++++++++++++++++++++++++++++++++++ browse/src/meta-commands.ts | 7 +++ browse/src/server.ts | 5 ++ 4 files changed, 129 insertions(+) create mode 100644 browse/src/memory-command.ts diff --git a/browse/src/commands.ts b/browse/src/commands.ts index 1af127d51f..c3637fe9df 100644 --- a/browse/src/commands.ts +++ b/browse/src/commands.ts @@ -45,6 +45,7 @@ export const META_COMMANDS = new Set([ 'domain-skill', 'skill', 'cdp', + 'memory', ]); export const ALL_COMMANDS = new Set([...READ_COMMANDS, ...WRITE_COMMANDS, ...META_COMMANDS]); @@ -89,6 +90,7 @@ export function wrapUntrustedContent(result: string, url: string): string { export const COMMAND_DESCRIPTIONS: Record = { // Navigation + 'memory': { category: 'Diagnostics', description: 'Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. JSON output with --json.', usage: 'memory [--json]' }, 'goto': { category: 'Navigation', description: 'Navigate to URL (http://, https://, or file:// scoped to cwd/TEMP_DIR)', usage: 'goto ' }, 'load-html': { category: 'Navigation', description: 'Load HTML via setContent. Accepts a file path under safe-dirs (validated), OR --from-file with {"html":"...","waitUntil":"..."} for large inline HTML (Windows argv safe).', usage: 'load-html [--wait-until load|domcontentloaded|networkidle] [--tab-id ] | load-html --from-file [--tab-id ]' }, 'back': { category: 'Navigation', description: 'History back' }, diff --git a/browse/src/memory-command.ts b/browse/src/memory-command.ts new file mode 100644 index 0000000000..29f76d7a81 --- /dev/null +++ b/browse/src/memory-command.ts @@ -0,0 +1,115 @@ +// `$B memory` — diagnostic snapshot of Bun heap + per-tab JS heap + +// Chromium process tree + bounded buffer sizes. Lives in its own file +// because the meta-commands dispatcher imports it lazily — projects +// that never run the diagnostic don't pay the import-graph cost (CDP +// bridge, memory-snapshot types, buffer accessors). + +import type { BrowserManager } from './browser-manager'; +import { formatBytes, type MemorySnapshot, type MemoryStructureStats } from './memory-snapshot'; +import { getModificationHistoryStats } from './cdp-inspector'; +import { getSubscriberCount as getActivitySubscriberCount } from './activity'; +import { getInspectorSubscriberCount } from './server'; +import { consoleBuffer, networkBuffer, dialogBuffer } from './buffers'; +import { getCaptureBuffer } from './network-capture'; + +/** + * Assemble the MemoryStructureStats from the modules that own each buffer. + * Browser-manager doesn't take a hard dep on every buffer-owning module — + * the snapshot caller passes them in. + */ +function collectStructureStats(): MemoryStructureStats { + return { + modificationHistory: getModificationHistoryStats(), + activitySubscribers: getActivitySubscriberCount(), + inspectorSubscribers: getInspectorSubscriberCount(), + consoleBufferLen: consoleBuffer.length, + networkBufferLen: networkBuffer.length, + dialogBufferLen: dialogBuffer.length, + captureBufferBytes: getCaptureBuffer().byteSize, + }; +} + +/** + * Pretty-print the snapshot for terminal output. JSON mode (--json) goes + * straight through JSON.stringify so the extension footer and any test + * harness can consume it programmatically. + */ +function formatSnapshotText(s: MemorySnapshot): string { + const lines: string[] = []; + lines.push( + `Bun server: RSS: ${formatBytes(s.bunServer.rss)} ` + + `heap: ${formatBytes(s.bunServer.heapUsed)} / ${formatBytes(s.bunServer.heapTotal)} ` + + `external: ${formatBytes(s.bunServer.external)}`, + ); + + if (s.processes && s.processes.length > 0) { + // Group by type so the user sees "renderer: 12" vs listing 12 separate rows. + const byType: Record = {}; + for (const p of s.processes) byType[p.type] = (byType[p.type] ?? 0) + 1; + const typeSummary = Object.entries(byType) + .map(([t, n]) => `${t}=${n}`) + .join(' '); + lines.push(`Chromium processes: ${s.processes.length} total (${typeSummary})`); + } else if (s.processes === null) { + lines.push('Chromium processes: (unavailable — see notes)'); + } else { + lines.push('Chromium processes: 0'); + } + + if (s.tabs.length > 0) { + // Sort by JS heap descending; show top 10 plus "...N more" tail. + const sorted = [...s.tabs].sort((a, b) => b.jsHeapUsed - a.jsHeapUsed); + const shown = sorted.slice(0, 10); + lines.push(`Renderers: ${s.tabs.length} tabs (top by JS heap):`); + for (const t of shown) { + const urlShort = t.url.length > 80 ? t.url.slice(0, 77) + '...' : t.url; + lines.push( + ` [${formatBytes(t.jsHeapUsed).padStart(8)} JS, ` + + `${String(t.nodes).padStart(6)} nodes, ` + + `${String(t.listeners).padStart(5)} listeners] ` + + `tab #${t.id} — ${urlShort}`, + ); + } + if (sorted.length > shown.length) { + lines.push(` ...and ${sorted.length - shown.length} more`); + } + } else { + lines.push('Renderers: (no tabs tracked)'); + } + + lines.push('─────────────────────────────────────────────────'); + lines.push('In-memory structures (Bun side):'); + const m = s.structures.modificationHistory; + lines.push( + ` modificationHistory: ${m.current} / ${m.cap} entries` + + (m.evicted > 0 ? ` (${m.evicted} evicted since reset)` : ''), + ); + lines.push(` inspectorSubscribers: ${s.structures.inspectorSubscribers}`); + lines.push(` activitySubscribers: ${s.structures.activitySubscribers}`); + lines.push(` consoleBuffer: ${s.structures.consoleBufferLen} entries`); + lines.push(` networkBuffer: ${s.structures.networkBufferLen} entries`); + lines.push(` dialogBuffer: ${s.structures.dialogBufferLen} entries`); + lines.push(` captureBuffer: ${formatBytes(s.structures.captureBufferBytes)}`); + + if (s.notes.length > 0) { + lines.push(''); + lines.push('Notes:'); + for (const n of s.notes) lines.push(` - ${n}`); + } + + return lines.join('\n'); +} + +export async function handleMemoryCommand(args: string[], bm: BrowserManager): Promise { + const jsonMode = args.includes('--json'); + const structures = collectStructureStats(); + const snapshot = await bm.getMemorySnapshot(structures); + if (jsonMode) return JSON.stringify(snapshot); + return formatSnapshotText(snapshot); +} + +/** Entry point used by the /memory HTTP endpoint — same data, always JSON. */ +export async function buildMemorySnapshotJson(bm: BrowserManager): Promise { + const structures = collectStructureStats(); + return bm.getMemorySnapshot(structures); +} diff --git a/browse/src/meta-commands.ts b/browse/src/meta-commands.ts index 4008099a05..4bd0faae7a 100644 --- a/browse/src/meta-commands.ts +++ b/browse/src/meta-commands.ts @@ -1161,6 +1161,13 @@ export async function handleMetaCommand( return await handleCdpCommand(args, bm); } + case 'memory': { + // Lazy import — pulls in cdp-bridge + memory-snapshot + buffer accessors + // that aren't useful for projects that never run the diagnostic. + const { handleMemoryCommand } = await import('./memory-command'); + return await handleMemoryCommand(args, bm); + } + default: throw new Error(`Unknown meta command: ${command}`); } diff --git a/browse/src/server.ts b/browse/src/server.ts index 2f6c598f7e..3b0052d016 100644 --- a/browse/src/server.ts +++ b/browse/src/server.ts @@ -724,6 +724,11 @@ let inspectorTimestamp: number = 0; type InspectorSubscriber = (event: any) => void; const inspectorSubscribers = new Set(); +/** Diagnostic accessor used by the $B memory snapshot. */ +export function getInspectorSubscriberCount(): number { + return inspectorSubscribers.size; +} + function emitInspectorEvent(event: any): void { for (const notify of inspectorSubscribers) { queueMicrotask(() => { From 10495978e61e6fa51985d7687dcafe68dbe4ff8d Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 27 May 2026 07:29:25 -0700 Subject: [PATCH 09/19] add /memory endpoint (SSE-session-cookie gated) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GET /memory returns the BrowserManager memory snapshot as JSON. Auth matches /activity/stream and /inspector/events: Bearer header OR view-only SSE-session cookie (the extension fetches the cookie once via POST /sse-session, then polls /memory with withCredentials: true). Deliberately NOT extending /health for the sidebar footer poll — TODOS.md "Audit /health token distribution" records that /health already surfaces AUTH_TOKEN to any localhost caller in headed mode. A separate endpoint with the standard SSE auth keeps the future /health fix from cascading into the sidebar. sanitizeReplacer is applied at egress because tab.url and tab.title come from page content — lone-surrogate bytes from broken emoji could otherwise reach the sidebar and (when forwarded to Claude API) trigger HTTP 400. Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/src/server.ts | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/browse/src/server.ts b/browse/src/server.ts index 3b0052d016..6f75551ff5 100644 --- a/browse/src/server.ts +++ b/browse/src/server.ts @@ -2759,6 +2759,32 @@ export function buildFetchHandler(cfg: ServerConfig): ServerHandle { }); } + // GET /memory — diagnostic snapshot (auth required, does NOT reset idle). + // Same auth model as /activity/stream and /inspector/events: Bearer header + // OR view-only SSE-session cookie. Does NOT extend /health (which already + // leaks AUTH_TOKEN to any localhost caller in headed mode — see TODOS.md + // "Audit /health token distribution"); a separate endpoint with the + // standard SSE auth keeps the future /health fix from cascading into the + // sidebar footer poll. + if (url.pathname === '/memory' && req.method === 'GET') { + const cookieToken = extractSseCookie(req); + if (!validateAuth(req) && !validateSseSessionToken(cookieToken)) { + return new Response(JSON.stringify({ error: 'Unauthorized' }), { + status: 401, headers: { 'Content-Type': 'application/json' }, + }); + } + const { buildMemorySnapshotJson } = await import('./memory-command'); + const snapshot = await buildMemorySnapshotJson(cfgBrowserManager); + // sanitizeReplacer is required at every SSE/JSON egress that ships + // page-content-derived strings — tab.url and tab.title come from + // page content, so lone-surrogate bytes from broken emoji or + // mid-emoji splits could otherwise reach the sidebar / Claude API. + return new Response(JSON.stringify(snapshot, sanitizeReplacer), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + } + // GET /inspector/events — SSE for inspector state changes (auth required) if (url.pathname === '/inspector/events' && req.method === 'GET') { // Same auth model as /activity/stream: Bearer OR view-only cookie. From 98b2ae8103f3d9c16e23e50cc088a857caf6c825 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 27 May 2026 07:30:07 -0700 Subject: [PATCH 10/19] add sidebar footer RSS readout (polls /memory every 30s) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Footer now shows " · " sourced from the /memory endpoint, polled every 30s. Color thresholds: orange warn at 2 GB Bun RSS or 50 tabs; red bad at 8 GB or 200 tabs (matches the tab-guardrail threshold landing in a later commit). The footer gives the user an early signal that the cliff is forming, instead of only learning when the OS OOM-kills the process. Backoff per Codex's flag: if a poll takes > 2s response time the sidebar drops to a 5-minute cadence until the next successful fast poll. The diagnostic shouldn't add load to a browser that's already unhealthy. Start/stop is wired to the existing setServerInfo() hook so the timer only runs while the sidebar is connected to a server. Co-Authored-By: Claude Opus 4.7 (1M context) --- extension/sidepanel.css | 15 ++++++ extension/sidepanel.html | 1 + extension/sidepanel.js | 98 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+) diff --git a/extension/sidepanel.css b/extension/sidepanel.css index d83486e6c2..12be632bfd 100644 --- a/extension/sidepanel.css +++ b/extension/sidepanel.css @@ -1137,6 +1137,21 @@ footer { transition: color 150ms; } .footer-port:hover { color: var(--text-label); } +.footer-mem { + color: var(--text-meta); + font-family: var(--font-mono); + font-size: 11px; + margin-right: 6px; + padding: 1px 6px; + border-radius: var(--radius-sm); + transition: color 150ms; +} +.footer-mem.warn { + color: #f59e0b; +} +.footer-mem.bad { + color: #ef4444; +} .port-input { width: 56px; padding: 2px 6px; diff --git a/extension/sidepanel.html b/extension/sidepanel.html index cc456865ff..b978f33434 100644 --- a/extension/sidepanel.html +++ b/extension/sidepanel.html @@ -166,6 +166,7 @@ + + +