Skip to content

Commit 1110a0c

Browse files
test(integration): fix cloudflare workers test leaking workerd processes (modelcontextprotocol#2296)
1 parent f2a3320 commit 1110a0c

1 file changed

Lines changed: 205 additions & 79 deletions

File tree

test/integration/test/server/cloudflareWorkers.test.ts

Lines changed: 205 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -7,51 +7,152 @@
77

88
import type { ChildProcess } from 'node:child_process';
99
import { execSync, spawn } from 'node:child_process';
10+
import { randomUUID } from 'node:crypto';
1011
import * as fs from 'node:fs';
12+
import { createRequire } from 'node:module';
13+
import * as net from 'node:net';
1114
import * as os from 'node:os';
1215
import path from 'node:path';
1316

1417
import { Client, StreamableHTTPClientTransport } from '@modelcontextprotocol/client';
1518
import { afterAll, beforeAll, describe, expect, it } from 'vitest';
1619

17-
const PORT = 8787;
1820
const READINESS_TIMEOUT_MS = 60_000;
1921
const READINESS_POLL_INTERVAL_MS = 100;
22+
const SHUTDOWN_GRACE_MS = 5000;
23+
24+
/**
25+
* Embedded in the worker's `serverInfo.version` and asserted by the readiness probe, so a
26+
* leftover server from an earlier run can never satisfy the probe for this run.
27+
*/
28+
const SERVER_VERSION_NONCE = randomUUID();
29+
30+
/**
31+
* The workspace's own wrangler installation (pinned by pnpm-lock). Running it directly —
32+
* instead of `npm install`-ing wrangler into the temp project and going through `npx` —
33+
* keeps the wrangler version deterministic, avoids a ~90MB download per test run, and
34+
* removes two shell layers from the spawned process tree. Resolved through the package's
35+
* `bin` field rather than a hardcoded internal file path.
36+
*/
37+
const WRANGLER_BIN = (() => {
38+
const pkgPath = createRequire(import.meta.url).resolve('wrangler/package.json');
39+
const bin = (JSON.parse(fs.readFileSync(pkgPath, 'utf8')) as { bin: Record<string, string> }).bin.wrangler;
40+
return path.resolve(path.dirname(pkgPath), bin);
41+
})();
42+
43+
/** Ask the kernel for a currently-free port instead of hardcoding one. */
44+
async function getFreePort(): Promise<number> {
45+
return new Promise((resolve, reject) => {
46+
const probe = net.createServer();
47+
probe.unref();
48+
probe.on('error', reject);
49+
probe.listen(0, '127.0.0.1', () => {
50+
const { port } = probe.address() as net.AddressInfo;
51+
probe.close(() => resolve(port));
52+
});
53+
});
54+
}
55+
56+
/**
57+
* Kill the whole `wrangler dev` process tree.
58+
*
59+
* `wrangler dev` fans out into several processes (wrangler bin shim → wrangler CLI →
60+
* esbuild service + two workerd instances), and non-interactive wrangler installs no
61+
* SIGTERM handler that would dispose them. Signalling just `proc.pid` kills the top of
62+
* the tree and orphans workerd, which keeps running — and keeps its port bound —
63+
* indefinitely. So the process is spawned `detached` (own process group) and the whole
64+
* group is signalled here, with a SIGKILL sweep afterwards because a wedged workerd can
65+
* ignore SIGTERM. Orphaned workerd is a known recurring wrangler bug class (e.g.
66+
* cloudflare/workers-sdk#9193); Cloudflare's own CI harness likewise tree-kills rather
67+
* than trusting signal propagation.
68+
*
69+
* Caveat: if the test runner itself dies abruptly (`kill -9`, OOM), nothing here runs and
70+
* the detached tree is orphaned; a process 'exit' guard in beforeAll covers ordinary
71+
* fatal exits, and an orphan can't affect later runs (ephemeral port + readiness nonce).
72+
*/
73+
async function killWranglerTree(proc: ChildProcess): Promise<void> {
74+
if (proc.pid === undefined) {
75+
return;
76+
}
77+
if (process.platform === 'win32') {
78+
try {
79+
execSync(`taskkill /pid ${proc.pid} /T /F`, { stdio: 'ignore' });
80+
} catch {
81+
// Tree already gone
82+
}
83+
return;
84+
}
85+
try {
86+
process.kill(-proc.pid, 'SIGTERM');
87+
} catch {
88+
// Group already gone
89+
}
90+
// Wait on 'exit', not 'close': surviving grandchildren inherit the stdio pipes, so
91+
// 'close' can stay pending long after wrangler itself died.
92+
if (proc.exitCode === null && proc.signalCode === null) {
93+
await new Promise<void>(resolve => {
94+
const timer = setTimeout(resolve, SHUTDOWN_GRACE_MS);
95+
proc.once('exit', () => {
96+
clearTimeout(timer);
97+
resolve();
98+
});
99+
});
100+
}
101+
try {
102+
process.kill(-proc.pid, 'SIGKILL');
103+
} catch {
104+
// Group already gone — the expected case after a clean SIGTERM shutdown
105+
}
106+
}
20107

21108
/**
22109
* Wait until the worker can serve a real MCP `initialize` request.
23110
*
24111
* Wrangler's "Ready on …" stdout line is unreliable: miniflare can print it before the user
25112
* worker is actually wired, and subsequent POSTs come back as `500 Network connection lost` or
26113
* `ECONNREFUSED`. The only signal we can trust is "the server returned an MCP-shaped response
27-
* to a protocol request".
114+
* to a protocol request" — and specifically a response carrying this run's
115+
* {@link SERVER_VERSION_NONCE}, so a stale server from a previous run can't pass the probe.
28116
*
29-
* Polls the configured port with an MCP `initialize` POST every {@link READINESS_POLL_INTERVAL_MS}ms
30-
* until either a JSON-RPC result body comes back, the wrangler process exits, or
31-
* {@link READINESS_TIMEOUT_MS} elapses.
117+
* Polls the given port with an MCP `initialize` POST every {@link READINESS_POLL_INTERVAL_MS}ms
118+
* until either a matching JSON-RPC result body comes back, the wrangler process exits, or
119+
* {@link READINESS_TIMEOUT_MS} elapses. Each probe is individually bounded so a wedged server
120+
* that accepts connections but never responds can't stall the loop.
32121
*/
33-
async function waitForMcpReady(proc: ChildProcess): Promise<void> {
122+
async function waitForMcpReady(proc: ChildProcess, port: number): Promise<void> {
34123
let stderrTail = '';
35124
proc.stderr?.on('data', d => {
36125
stderrTail = (stderrTail + d.toString()).slice(-2048);
37126
});
127+
// Keep stdout flowing too: nothing reads it otherwise, and a full pipe buffer would make
128+
// wrangler queue log writes in memory for as long as the server runs.
129+
let stdoutTail = '';
130+
proc.stdout?.on('data', d => {
131+
stdoutTail = (stdoutTail + d.toString()).slice(-2048);
132+
});
38133

39-
let processExitedWithCode: number | null = null;
40-
proc.on('exit', code => {
41-
processExitedWithCode = code ?? -1;
134+
let processFailure: string | null = null;
135+
proc.on('exit', (code, signal) => {
136+
processFailure = signal === null ? `exited with code ${code ?? -1}` : `was killed by ${signal}`;
137+
});
138+
proc.on('error', error => {
139+
processFailure = `failed to spawn: ${error.message}`;
42140
});
43141

44142
const deadline = Date.now() + READINESS_TIMEOUT_MS;
45143
let lastFailure = 'no attempts made';
46144

47145
while (Date.now() < deadline) {
48-
if (processExitedWithCode !== null) {
49-
throw new Error(`wrangler dev exited with code ${processExitedWithCode} before becoming ready.\nstderr tail:\n${stderrTail}`);
146+
if (processFailure !== null) {
147+
throw new Error(
148+
`wrangler dev ${processFailure} before becoming ready.\nstdout tail:\n${stdoutTail}\nstderr tail:\n${stderrTail}`
149+
);
50150
}
51151

52152
try {
53-
const response = await fetch(`http://127.0.0.1:${PORT}/`, {
153+
const response = await fetch(`http://127.0.0.1:${port}/`, {
54154
method: 'POST',
155+
signal: AbortSignal.timeout(2000),
55156
headers: {
56157
'Content-Type': 'application/json',
57158
Accept: 'application/json, text/event-stream'
@@ -68,7 +169,7 @@ async function waitForMcpReady(proc: ChildProcess): Promise<void> {
68169
})
69170
});
70171
const body = await response.text();
71-
if (response.ok && body.includes('"jsonrpc"') && body.includes('"result"')) {
172+
if (response.ok && body.includes('"jsonrpc"') && body.includes('"result"') && body.includes(SERVER_VERSION_NONCE)) {
72173
return;
73174
}
74175
lastFailure = `status=${response.status} body=${body.slice(0, 200)}`;
@@ -80,57 +181,77 @@ async function waitForMcpReady(proc: ChildProcess): Promise<void> {
80181
}
81182

82183
throw new Error(
83-
`Worker did not become ready within ${READINESS_TIMEOUT_MS}ms.\nLast probe: ${lastFailure}\nstderr tail:\n${stderrTail}`
184+
`Worker did not become ready within ${READINESS_TIMEOUT_MS}ms.\nLast probe: ${lastFailure}\nstdout tail:\n${stdoutTail}\nstderr tail:\n${stderrTail}`
84185
);
85186
}
86187

87188
describe('Cloudflare Workers compatibility (no nodejs_compat)', () => {
189+
let port = 0;
88190
let cleanup: (() => Promise<void>) | null = null;
89191

90192
beforeAll(async () => {
91193
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cf-worker-test-'));
194+
let proc: ChildProcess | null = null;
195+
let orphanGuard: (() => void) | null = null;
92196

93-
// Pack server package
94-
const serverPkgPath = path.resolve(__dirname, '../../../../packages/server');
95-
const packOutput = execSync(`pnpm pack --pack-destination ${tempDir}`, {
96-
cwd: serverPkgPath,
97-
encoding: 'utf8'
98-
});
99-
const tarballName = path.basename(packOutput.trim().split('\n').pop()!);
100-
101-
// Write package.json
102-
const pkgJson = {
103-
name: 'cf-worker-test',
104-
private: true,
105-
type: 'module',
106-
dependencies: {
107-
'@modelcontextprotocol/server': `file:./${tarballName}`
108-
},
109-
devDependencies: {
110-
wrangler: '^4.14.4'
197+
// Registered before anything can fail (including a vitest hook timeout, which skips
198+
// the catch below but still runs afterAll): kill the process tree, then the temp dir.
199+
cleanup = async () => {
200+
if (orphanGuard) {
201+
process.removeListener('exit', orphanGuard);
202+
orphanGuard = null;
203+
}
204+
if (proc) {
205+
await killWranglerTree(proc);
206+
}
207+
try {
208+
fs.rmSync(tempDir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 });
209+
} catch {
210+
// Ignore cleanup errors
111211
}
112212
};
113-
fs.writeFileSync(path.join(tempDir, 'package.json'), JSON.stringify(pkgJson, null, 2));
114-
115-
// Write wrangler config
116-
const wranglerConfig = {
117-
$schema: 'node_modules/wrangler/config-schema.json',
118-
name: 'cf-worker-test',
119-
main: 'server.ts',
120-
compatibility_date: '2025-01-01'
121-
};
122-
fs.writeFileSync(path.join(tempDir, 'wrangler.jsonc'), JSON.stringify(wranglerConfig, null, 2));
123213

124-
// Write server source
125-
const serverSource = `
214+
try {
215+
// Pack server package
216+
const serverPkgPath = path.resolve(__dirname, '../../../../packages/server');
217+
const packOutput = execSync(`pnpm pack --pack-destination "${tempDir}"`, {
218+
cwd: serverPkgPath,
219+
encoding: 'utf8',
220+
timeout: 60_000
221+
});
222+
const tarballName = path.basename(packOutput.trim().split('\n').pop()!);
223+
224+
// Write package.json
225+
const pkgJson = {
226+
name: 'cf-worker-test',
227+
private: true,
228+
type: 'module',
229+
dependencies: {
230+
'@modelcontextprotocol/server': `file:./${tarballName}`
231+
}
232+
};
233+
fs.writeFileSync(path.join(tempDir, 'package.json'), JSON.stringify(pkgJson, null, 2));
234+
235+
// Write wrangler config
236+
const wranglerConfig = {
237+
name: 'cf-worker-test',
238+
main: 'server.ts',
239+
compatibility_date: '2025-01-01'
240+
};
241+
fs.writeFileSync(path.join(tempDir, 'wrangler.jsonc'), JSON.stringify(wranglerConfig, null, 2));
242+
243+
// Write server source
244+
const serverSource = `
126245
import { McpServer, WebStandardStreamableHTTPServerTransport } from '@modelcontextprotocol/server';
246+
import { z } from 'zod';
127247
128-
const server = new McpServer({ name: "test-server", version: "1.0.0" });
248+
const server = new McpServer({ name: "test-server", version: "${SERVER_VERSION_NONCE}" });
129249
130250
server.registerTool("greet", {
131-
description: "Greet someone"
132-
}, async (args) => ({
133-
content: [{ type: "text", text: "Hello, " + (args.name || "World") + "!" }]
251+
description: "Greet someone",
252+
inputSchema: z.object({ name: z.string() })
253+
}, async ({ name }) => ({
254+
content: [{ type: "text", text: "Hello, " + name + "!" }]
134255
}));
135256
136257
const transport = new WebStandardStreamableHTTPServerTransport();
@@ -140,51 +261,56 @@ export default {
140261
fetch: (request) => transport.handleRequest(request)
141262
};
142263
`;
143-
fs.writeFileSync(path.join(tempDir, 'server.ts'), serverSource);
264+
fs.writeFileSync(path.join(tempDir, 'server.ts'), serverSource);
144265

145-
// Install dependencies
146-
execSync('npm install', { cwd: tempDir, stdio: 'pipe', timeout: 60_000 });
266+
// Install dependencies (just the packed server tarball — wrangler comes from the
267+
// workspace, see WRANGLER_BIN)
268+
execSync('npm install --no-audit --no-fund --prefer-offline', { cwd: tempDir, stdio: 'pipe', timeout: 60_000 });
147269

148-
// Start wrangler dev server. Readiness is determined by probing the MCP endpoint, not by
149-
// parsing wrangler's stdout — see waitForMcpReady for the reasoning.
150-
const proc = spawn('npx', ['wrangler', 'dev', '--local', '--port', String(PORT)], {
151-
cwd: tempDir,
152-
shell: true,
153-
stdio: 'pipe'
154-
});
270+
// Start wrangler dev directly from the workspace installation, in its own process
271+
// group so the whole tree can be torn down — see killWranglerTree. Readiness is
272+
// determined by probing the MCP endpoint, not by parsing wrangler's stdout — see
273+
// waitForMcpReady for the reasoning.
274+
port = await getFreePort();
275+
proc = spawn(process.execPath, [WRANGLER_BIN, 'dev', '--local', '--port', String(port)], {
276+
cwd: tempDir,
277+
stdio: 'pipe',
278+
detached: process.platform !== 'win32'
279+
});
155280

156-
try {
157-
await waitForMcpReady(proc);
281+
// Best-effort orphan guard: if the runner dies without running afterAll
282+
// (process.exit, fatal error), take the detached tree down with it. Signal
283+
// deaths (SIGKILL, OOM) can't be intercepted — see killWranglerTree.
284+
if (process.platform !== 'win32' && proc.pid !== undefined) {
285+
const pgid = proc.pid;
286+
orphanGuard = () => {
287+
try {
288+
process.kill(-pgid, 'SIGKILL');
289+
} catch {
290+
// Tree already gone
291+
}
292+
};
293+
process.once('exit', orphanGuard);
294+
}
295+
296+
await waitForMcpReady(proc, port);
158297
} catch (error) {
159-
proc.kill('SIGTERM');
298+
await cleanup();
160299
throw error;
161300
}
162-
163-
cleanup = async () => {
164-
proc.kill('SIGTERM');
165-
await new Promise<void>(resolve => {
166-
proc.on('close', () => resolve());
167-
setTimeout(resolve, 5000);
168-
});
169-
try {
170-
fs.rmSync(tempDir, { recursive: true, force: true });
171-
} catch {
172-
// Ignore cleanup errors
173-
}
174-
};
175-
}, 120_000);
301+
}, 180_000);
176302

177303
afterAll(async () => {
178304
await cleanup?.();
179-
});
305+
}, 30_000);
180306

181307
it('should handle MCP requests', async () => {
182308
const client = new Client({ name: 'test-client', version: '1.0.0' });
183-
const transport = new StreamableHTTPClientTransport(new URL(`http://127.0.0.1:${PORT}/`));
309+
const transport = new StreamableHTTPClientTransport(new URL(`http://127.0.0.1:${port}/`));
184310
await client.connect(transport);
185311

186-
const result = await client.callTool({ name: 'greet', arguments: { name: 'World' } });
187-
expect(result.content).toEqual([{ type: 'text', text: 'Hello, World!' }]);
312+
const result = await client.callTool({ name: 'greet', arguments: { name: 'Workers' } });
313+
expect(result.content).toEqual([{ type: 'text', text: 'Hello, Workers!' }]);
188314

189315
await client.close();
190316
}, 30_000);

0 commit comments

Comments
 (0)