Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/eval/discovery-harness.ts
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,9 @@ export async function evaluateDiscoveryFixture({

for (const task of fixture.tasks) {
const runner = runners[task.surface];
if (!runner) {
throw new Error(`No runner registered for surface: ${task.surface}`);
}
const payload = await runner(task, rootPath);
results.push(evaluateDiscoveryTask(task, payload));
Comment on lines +430 to +436
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 No guard for undefined surface runner

If task.surface is a value not present in the runners map (e.g., a typo in a fixture file or a newly added DiscoverySurface value without a corresponding runner), runner is undefined and calling await runner(task, rootPath) throws a confusing TypeError: runner is not a function rather than a descriptive error.

Consider adding an explicit check:

for (const task of fixture.tasks) {
  const runner = runners[task.surface];
  if (!runner) {
    throw new Error(`No runner registered for surface: ${task.surface}`);
  }
  const payload = await runner(task, rootPath);
  results.push(evaluateDiscoveryTask(task, payload));
}

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Added an explicit guard before calling the runner:

const runner = runners[task.surface];
if (!runner) {
  throw new Error(`No runner registered for surface: ${task.surface}`);
}

This surfaces fixture typos or unregistered surfaces with a clear message instead of a cryptic TypeError.

}
Expand Down
62 changes: 46 additions & 16 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1748,8 +1748,7 @@ async function main() {
const transport = new StdioServerTransport();
await server.connect(transport);

// Register cleanup before any handler that calls process.exit(), so the
// exit listener is always in place when stdin/onclose/signals fire.
// ── Cleanup guards (normal MCP lifecycle) ──────────────────────────────────
const stopAllWatchers = () => {
for (const project of getAllProjects()) {
project.stopWatcher?.();
Expand All @@ -1770,27 +1769,58 @@ async function main() {
process.exit(0);
});

// Detect stdin pipe closure — the primary signal that the MCP client is gone.
// StdioServerTransport only listens for 'data'/'error', never 'end'.
process.stdin.on('end', () => process.exit(0));
process.stdin.on('close', () => process.exit(0));

// Handle graceful MCP protocol-level disconnect.
// Fires after SDK internal cleanup when transport.close() is called.
server.onclose = () => process.exit(0);

if (process.env.CODEBASE_CONTEXT_DEBUG) console.error('[DEBUG] Server ready');
// ── Zombie process prevention ──────────────────────────────────────────────
// If no MCP client sends an `initialize` message within 30 seconds, this
// process was started incorrectly (e.g. `npx codebase-context <path>` from
// a shell or AI agent without a subcommand). Exit cleanly to avoid a zombie.
const HANDSHAKE_TIMEOUT_MS =
Number.parseInt(process.env.CODEBASE_CONTEXT_HANDSHAKE_TIMEOUT_MS ?? '', 10) || 30_000;
let mcpClientInitialized = false;

const handshakeTimer = setTimeout(() => {
if (!mcpClientInitialized) {
console.error(
'No MCP client connected within ' +
Math.round(HANDSHAKE_TIMEOUT_MS / 1000) +
's - exiting.\n' +
'If you meant to use CLI commands:\n' +
' npx codebase-context memory list\n' +
' npx codebase-context search --query "..."\n' +
' npx codebase-context --help'
);
process.exit(1);
}
}, HANDSHAKE_TIMEOUT_MS);
handshakeTimer.unref();

await refreshKnownRootsFromClient();
// ── Deferred project initialization ────────────────────────────────────────
// Don't start CPU-heavy indexing or file-watchers until the MCP handshake
// completes. A misfire (no real client) consumes near-zero resources during
// the timeout window and exits cleanly.
server.oninitialized = async () => {
mcpClientInitialized = true;
clearTimeout(handshakeTimer);

// Keep the current single-project auto-select behavior when exactly one startup project is known.
const startupRoots = getKnownRootPaths();
if (startupRoots.length === 1) {
await initProject(startupRoots[0], watcherDebounceMs, { enableWatcher: true });
setActiveProject(startupRoots[0]);
}
if (process.env.CODEBASE_CONTEXT_DEBUG) console.error('[DEBUG] Server ready');

try {
await refreshKnownRootsFromClient();

const startupRoots = getKnownRootPaths();
if (startupRoots.length === 1) {
await initProject(startupRoots[0], watcherDebounceMs, { enableWatcher: true });
setActiveProject(startupRoots[0]);
}
} catch (error) {
console.error('[codebase-context] Project initialization failed:', error);
}
};

// Subscribe to root changes
// Subscribe to root changes (lightweight — no project init cost)
server.setNotificationHandler(RootsListChangedNotificationSchema, async () => {
try {
await refreshKnownRootsFromClient();
Expand Down
122 changes: 122 additions & 0 deletions tests/zombie-guard.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/**
* Integration tests for zombie process prevention.
*
* These tests verify that the MCP server exits cleanly when no client connects
* (handshake timeout) and that project initialization is deferred until after
* the MCP handshake completes.
*
* The tests spawn real child processes to exercise the actual startup path.
*/

import { describe, it, expect, beforeAll } from 'vitest';
import { spawn } from 'node:child_process';
import { existsSync } from 'node:fs';
import path from 'node:path';
import os from 'node:os';
import { fileURLToPath } from 'node:url';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ENTRY_POINT = path.resolve(__dirname, '..', 'dist', 'index.js');
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Tests depend on a pre-built dist/index.js

The test resolves the entry point from dist/index.js, so it will fail with a confusing rejection (from child.on('error', reject)) if the project hasn't been built before running tests. A descriptive beforeAll guard would make the failure more actionable:

import { existsSync } from 'node:fs';

beforeAll(() => {
  if (!existsSync(ENTRY_POINT)) {
    throw new Error(
      `dist/index.js not found — run \`npm run build\` before the zombie-guard tests.`
    );
  }
});

This is especially relevant for contributors running vitest without a prior build step.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Added a beforeAll guard at the top of the describe block:

beforeAll(() => {
  if (!existsSync(ENTRY_POINT)) {
    throw new Error(
      `dist/index.js not found - run \`npm run build\` before the zombie-guard tests.`
    );
  }
});

Throws early with an actionable message rather than letting the test fail with a cryptic spawn rejection.


/**
* Spawn the MCP server as a child process and wait for it to exit.
* Returns { code, stderr, elapsed } where elapsed is in milliseconds.
*/
function spawnServer(
args: string[],
env: Record<string, string> = {},
timeoutMs = 45_000
): Promise<{ code: number | null; signal: string | null; stderr: string; elapsed: number }> {
return new Promise((resolve, reject) => {
const start = Date.now();
let stderr = '';

const child = spawn(process.execPath, [ENTRY_POINT, ...args], {
stdio: ['pipe', 'pipe', 'pipe'],
env: { ...process.env, ...env },
timeout: timeoutMs
});

child.stderr?.on('data', (chunk: Buffer) => {
stderr += chunk.toString();
});

child.on('error', reject);
child.on('close', (code, signal) => {
resolve({ code, signal, stderr, elapsed: Date.now() - start });
});

// Don't write anything to stdin — simulate the zombie scenario
// where no MCP client sends an `initialize` message.
});
}

describe('zombie process prevention', () => {
beforeAll(() => {
if (!existsSync(ENTRY_POINT)) {
throw new Error(
`dist/index.js not found - run \`npm run build\` before the zombie-guard tests.`
);
}
});

it('exits with code 1 when no MCP client connects within timeout', async () => {
// Use a short timeout for the test (2 seconds instead of the default 30).
// Use os.tmpdir() as a real existing directory so path validation passes —
// this tests the realistic scenario where a valid path IS provided but no
// MCP client connects (which is exactly the Codex zombie scenario).
const result = await spawnServer(
[os.tmpdir()],
{ CODEBASE_CONTEXT_HANDSHAKE_TIMEOUT_MS: '2000' }
);

expect(result.code).toBe(1);
expect(result.stderr).toContain('No MCP client connected within');
expect(result.stderr).toContain('npx codebase-context --help');
// Should exit roughly around the timeout (2s), not hang forever
expect(result.elapsed).toBeLessThan(10_000);
}, 15_000);

it('exits with code 1 even when invoked with no arguments at all', async () => {
const result = await spawnServer(
[],
{ CODEBASE_CONTEXT_HANDSHAKE_TIMEOUT_MS: '2000' }
);

expect(result.code).toBe(1);
expect(result.stderr).toContain('No MCP client connected within');
expect(result.elapsed).toBeLessThan(10_000);
}, 15_000);

it('does not start indexing or file watchers before handshake', async () => {
// With DEBUG on, the server logs "[DEBUG] Server ready" inside oninitialized.
// Since no client ever connects, that log must never appear.
// Use os.tmpdir() so path validation passes before the handshake timer runs.
const result = await spawnServer(
[os.tmpdir()],
{
CODEBASE_CONTEXT_HANDSHAKE_TIMEOUT_MS: '2000',
CODEBASE_CONTEXT_DEBUG: '1'
}
);

expect(result.code).toBe(1);
// "[DEBUG] Server ready" is printed inside oninitialized — should NOT appear
// because no client ever sends `initialize`.
expect(result.stderr).not.toContain('[DEBUG] Server ready');
}, 15_000);

it('respects custom timeout via environment variable', async () => {
const start = Date.now();
const result = await spawnServer(
[],
{ CODEBASE_CONTEXT_HANDSHAKE_TIMEOUT_MS: '1000' }
);
const elapsed = Date.now() - start;

expect(result.code).toBe(1);
// Should exit around 1 second, definitely under 5
expect(elapsed).toBeGreaterThan(800);
expect(elapsed).toBeLessThan(5_000);
}, 10_000);
});
Loading