Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion browse/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,26 @@ import { spawnTerminalAgent } from './terminal-agent-control';

const config = resolveConfig();
const IS_WINDOWS = process.platform === 'win32';
const MAX_START_WAIT = IS_WINDOWS ? 15000 : (process.env.CI ? 30000 : 8000); // Node+Chromium takes longer on Windows

/**
* Startup health-probe budget (ms) for a freshly spawned server. The daemon is
* detached + unref'd, so it keeps booting regardless of how long the CLI is
* willing to poll — this constant only bounds how long `startServer` waits
* before reporting failure.
*
* Overridable via `BROWSE_START_TIMEOUT` (ms) for hosts where even the platform
* ceiling isn't enough — e.g. Windows under heavy load (#1846), where the 15s
* budget can still elapse before a busy box finishes booting Node+Chromium.
* Mirrors the `BROWSE_*` tunable convention used throughout server.ts
* (BROWSE_PORT, BROWSE_IDLE_TIMEOUT, ...). A non-positive or unparseable value
* falls back to the platform default. Pure + exported for tests.
*/
export function resolveStartTimeout(env: NodeJS.ProcessEnv = process.env): number {
const platformDefault = IS_WINDOWS ? 15000 : (env.CI ? 30000 : 8000); // Node+Chromium takes longer on Windows
const override = parseInt(env.BROWSE_START_TIMEOUT || '', 10);
return Number.isFinite(override) && override > 0 ? override : platformDefault;
}
const MAX_START_WAIT = resolveStartTimeout();

export function resolveServerScript(
env: Record<string, string | undefined> = process.env,
Expand Down Expand Up @@ -357,6 +376,17 @@ async function startServer(extraEnv?: Record<string, string>): Promise<ServerSta
await Bun.sleep(100);
}

// One last check before declaring failure. The daemon is detached + unref'd,
// so on a loaded machine it can become healthy in the gap between the poll
// loop's final tick and now — the probe timed out, the launch did not
// (#1846). Re-checking here turns that false negative into a success, and
// mirrors the post-loop recovery already done in ensureServer(). A genuinely
// failed server is still unhealthy, so this falls through to the error report.
const lateState = readState();
if (lateState && await isServerHealthy(lateState.port)) {
return lateState;
}

// Server didn't start in time — check the on-disk startup error log.
// Both platforms now spawn with stdio: 'ignore', so the server writes
// errors to disk for the CLI to read (see server.ts start().catch).
Expand Down
77 changes: 77 additions & 0 deletions browse/test/cli-start-final-healthcheck.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/**
* Coverage for #1846 — `browse` CLI must not report "Server failed to start
* within Ns" when the detached daemon actually came up healthy a moment later.
*
* The spawned server is `detached: true` + `.unref()`'d, so it keeps booting
* independently of the CLI's poll loop. On a loaded machine (the issue repro is
* Windows under load) the loop's budget can elapse in the gap between its last
* health tick and the daemon becoming ready — the very next `browse status`
* then shows a healthy, listening server. #1732 only widened the budget; the
* throw site itself still fired on timeout regardless of real health.
*
* Two invariants are defended here:
* 1. `startServer` does a final readState()+isServerHealthy() re-check before
* the timeout throw (structural — removes the false negative at any budget).
* 2. The startup budget is env-overridable via BROWSE_START_TIMEOUT, matching
* the BROWSE_* tunable convention (BROWSE_PORT, BROWSE_IDLE_TIMEOUT, ...).
*
* (1) is a static source invariant (live spawn cycles belong in the e2e tier);
* (2) is exercised behaviorally against the exported pure helper.
*/
import { describe, expect, test } from 'bun:test';
import * as fs from 'node:fs';
import * as path from 'node:path';
import { resolveStartTimeout } from '../src/cli';

const CLI = path.join(import.meta.dir, '..', 'src', 'cli.ts');
const read = (): string => fs.readFileSync(CLI, 'utf-8');

describe('#1846 startServer false-negative on a late-healthy detached daemon', () => {
test('a final health re-check sits between the poll loop and the timeout throw', () => {
const src = read();
const throwIdx = src.indexOf('Server failed to start within');
expect(throwIdx).toBeGreaterThan(-1);

// The startServer poll loop ends at its `await Bun.sleep(100)`; the final
// re-check must live AFTER that loop and BEFORE the timeout throw.
const loopEnd = src.lastIndexOf('await Bun.sleep(100)', throwIdx);
expect(loopEnd).toBeGreaterThan(-1);
const between = src.slice(loopEnd, throwIdx);

// It must re-read state and re-probe health, then be able to return — i.e.
// a genuine recovery path, not just a comment.
expect(between).toContain('readState()');
expect(between).toMatch(/isServerHealthy\([^)]*\)/);
expect(between).toMatch(/return\s+\w+;/);
});

test('the re-check returns the recovered state rather than swallowing it', () => {
const src = read();
// Guard against a refactor that probes health but forgets to return the
// state (which would re-introduce the false negative).
expect(src).toMatch(/if\s*\([^)]*await\s+isServerHealthy\([^)]*\)\)\s*\{\s*return\s+\w+;/);
});
});

describe('#1846 BROWSE_START_TIMEOUT env override (resolveStartTimeout)', () => {
const platformDefault = resolveStartTimeout({} as NodeJS.ProcessEnv);

test('platform default is a positive millisecond budget when unset', () => {
expect(platformDefault).toBeGreaterThan(0);
});

test('honors a positive BROWSE_START_TIMEOUT override', () => {
expect(resolveStartTimeout({ BROWSE_START_TIMEOUT: '42000' } as NodeJS.ProcessEnv)).toBe(42000);
});

test('falls back to the platform default for non-positive / unparseable values', () => {
for (const bad of ['0', '-5', 'abc', '', ' ']) {
expect(resolveStartTimeout({ BROWSE_START_TIMEOUT: bad } as NodeJS.ProcessEnv)).toBe(platformDefault);
}
});

test('MAX_START_WAIT is wired through resolveStartTimeout (no stray hardcoded constant)', () => {
const src = read();
expect(src).toMatch(/const\s+MAX_START_WAIT\s*=\s*resolveStartTimeout\(\)/);
});
});