Skip to content

Commit 408c248

Browse files
committed
fix: clean up benchmark MCP sessions
1 parent 51cbcfb commit 408c248

File tree

5 files changed

+317
-108
lines changed

5 files changed

+317
-108
lines changed

scripts/benchmark-comparators.mjs

Lines changed: 95 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs';
1717
import { execSync, exec } from 'child_process';
1818
import { parseArgs } from 'util';
1919
import { promisify } from 'util';
20+
import { withManagedStdioClientSession } from './lib/managed-mcp-session.mjs';
2021

2122
const execAsync = promisify(exec);
2223

@@ -293,125 +294,111 @@ const COMPARATOR_ADAPTERS = [
293294
// ---------------------------------------------------------------------------
294295

295296
async function runComparatorViaMcp(adapter, rootPath, tasks) {
296-
let Client, StdioClientTransport;
297-
try {
298-
({ Client } = await import('@modelcontextprotocol/sdk/client/index.js'));
299-
({ StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js'));
300-
} catch (err) {
301-
throw new Error(`MCP SDK not available: ${err.message}`);
302-
}
303-
304-
const transport = new StdioClientTransport({
305-
command: adapter.serverCommand,
306-
args: adapter.serverArgs,
307-
env: { ...process.env, ...adapter.serverEnv }
308-
});
297+
return withManagedStdioClientSession(
298+
{
299+
serverCommand: adapter.serverCommand,
300+
serverArgs: adapter.serverArgs,
301+
serverEnv: adapter.serverEnv,
302+
connectTimeoutMs: adapter.connectTimeout ?? 15_000
303+
},
304+
async ({ client }) => {
305+
if (adapter.initTimeout > 0) {
306+
await new Promise((resolve) => setTimeout(resolve, adapter.initTimeout));
307+
}
309308

310-
const client = new Client({ name: 'benchmark-runner', version: '1.0.0' });
309+
let availableTools = [];
310+
try {
311+
const toolsResult = await client.listTools();
312+
availableTools = toolsResult.tools ?? [];
313+
} catch (err) {
314+
throw new Error(`Failed to list tools from ${adapter.name}: ${err.message}`);
315+
}
311316

312-
try {
313-
await client.connect(transport);
314-
} catch (err) {
315-
throw new Error(`Failed to connect to ${adapter.name} MCP server: ${err.message}`);
316-
}
317+
const toolNames = availableTools.map((t) => t.name);
318+
let searchToolName = adapter.searchTool;
319+
if (!searchToolName) {
320+
searchToolName =
321+
toolNames.find((n) => n.includes('search') || n.includes('query') || n.includes('find')) ??
322+
null;
323+
}
317324

318-
// Wait for server to be ready
319-
if (adapter.initTimeout > 0) {
320-
await new Promise((resolve) => setTimeout(resolve, adapter.initTimeout));
321-
}
325+
if (!searchToolName || !toolNames.includes(searchToolName)) {
326+
throw new Error(
327+
`No suitable search tool found for ${adapter.name}. Available: ${toolNames.join(', ')}`
328+
);
329+
}
322330

323-
// Discover tools
324-
let availableTools = [];
325-
try {
326-
const toolsResult = await client.listTools();
327-
availableTools = toolsResult.tools ?? [];
328-
} catch (err) {
329-
await client.close();
330-
throw new Error(`Failed to list tools from ${adapter.name}: ${err.message}`);
331-
}
331+
let totalToolCalls = 0;
332+
if (adapter.indexTool && toolNames.includes(adapter.indexTool)) {
333+
console.log(` [${adapter.name}] Indexing ${path.basename(rootPath)}...`);
334+
const indexStartMs = Date.now();
335+
try {
336+
await Promise.race([
337+
client.callTool({ name: adapter.indexTool, arguments: adapter.indexArgs(rootPath) }),
338+
new Promise((_, reject) =>
339+
setTimeout(() => reject(new Error('Index timeout')), adapter.indexTimeout ?? 120000)
340+
)
341+
]);
342+
totalToolCalls++;
343+
console.log(` [${adapter.name}] Index complete in ${Date.now() - indexStartMs}ms`);
344+
} catch (err) {
345+
throw new Error(`${adapter.name} indexing failed: ${err.message}`);
346+
}
347+
}
332348

333-
const toolNames = availableTools.map((t) => t.name);
349+
const taskResults = [];
350+
for (const task of tasks) {
351+
const startMs = Date.now();
352+
let payload = '';
353+
let toolCallCount = totalToolCalls;
354+
355+
try {
356+
const result = await client.callTool({
357+
name: searchToolName,
358+
arguments: adapter.searchArgs(task)
359+
});
360+
toolCallCount++;
361+
payload = adapter.extractPayload(result);
362+
} catch (err) {
363+
console.warn(` [${adapter.name}] Task ${task.id} failed: ${err.message}`);
364+
payload = '';
365+
}
334366

335-
// Determine search tool (may be dynamic for CodeGraphContext)
336-
let searchToolName = adapter.searchTool;
337-
if (!searchToolName) {
338-
// Try to find a search-like tool
339-
searchToolName =
340-
toolNames.find((n) => n.includes('search') || n.includes('query') || n.includes('find')) ??
341-
null;
342-
}
367+
const elapsedMs = Date.now() - startMs;
368+
const payloadBytes = countUtf8Bytes(payload);
369+
const estimatedTokens = estimateTokens(payloadBytes);
370+
const { usefulnessScore, matchedSignals, missingSignals } = matchSignals(
371+
payload,
372+
task.expectedSignals,
373+
task.forbiddenSignals
374+
);
343375

344-
if (!searchToolName || !toolNames.includes(searchToolName)) {
345-
await client.close();
346-
throw new Error(
347-
`No suitable search tool found for ${adapter.name}. Available: ${toolNames.join(', ')}`
348-
);
349-
}
376+
taskResults.push({
377+
taskId: task.id,
378+
job: task.job,
379+
surface: task.surface,
380+
usefulnessScore,
381+
matchedSignals,
382+
missingSignals,
383+
payloadBytes,
384+
estimatedTokens,
385+
toolCallCount,
386+
elapsedMs
387+
});
388+
}
350389

351-
// Index the repo if needed
352-
let totalToolCalls = 0;
353-
if (adapter.indexTool && toolNames.includes(adapter.indexTool)) {
354-
console.log(` [${adapter.name}] Indexing ${path.basename(rootPath)}...`);
355-
const indexStartMs = Date.now();
356-
try {
357-
await Promise.race([
358-
client.callTool({ name: adapter.indexTool, arguments: adapter.indexArgs(rootPath) }),
359-
new Promise((_, reject) =>
360-
setTimeout(() => reject(new Error('Index timeout')), adapter.indexTimeout ?? 120000)
361-
)
362-
]);
363-
totalToolCalls++;
364-
console.log(` [${adapter.name}] Index complete in ${Date.now() - indexStartMs}ms`);
365-
} catch (err) {
366-
await client.close();
367-
throw new Error(`${adapter.name} indexing failed: ${err.message}`);
390+
return taskResults;
368391
}
369-
}
370-
371-
// Run each task
372-
const taskResults = [];
373-
for (const task of tasks) {
374-
const startMs = Date.now();
375-
let payload = '';
376-
let toolCallCount = totalToolCalls;
377-
378-
try {
379-
const result = await client.callTool({
380-
name: searchToolName,
381-
arguments: adapter.searchArgs(task)
382-
});
383-
toolCallCount++;
384-
payload = adapter.extractPayload(result);
385-
} catch (err) {
386-
console.warn(` [${adapter.name}] Task ${task.id} failed: ${err.message}`);
387-
payload = '';
392+
).catch((err) => {
393+
if (err.message.startsWith('Failed to connect to')) {
394+
throw err;
388395
}
389-
390-
const elapsedMs = Date.now() - startMs;
391-
const payloadBytes = countUtf8Bytes(payload);
392-
const estimatedTokens = estimateTokens(payloadBytes);
393-
const { usefulnessScore, matchedSignals, missingSignals } = matchSignals(
394-
payload,
395-
task.expectedSignals,
396-
task.forbiddenSignals
396+
throw new Error(
397+
err.message.includes('timed out')
398+
? `Failed to connect to ${adapter.name} MCP server: ${err.message}`
399+
: err.message
397400
);
398-
399-
taskResults.push({
400-
taskId: task.id,
401-
job: task.job,
402-
surface: task.surface,
403-
usefulnessScore,
404-
matchedSignals,
405-
missingSignals,
406-
payloadBytes,
407-
estimatedTokens,
408-
toolCallCount,
409-
elapsedMs
410-
});
411-
}
412-
413-
await client.close();
414-
return taskResults;
401+
});
415402
}
416403

417404
// ---------------------------------------------------------------------------
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import process from 'node:process';
2+
3+
async function loadSdkClient() {
4+
const [{ Client }, { StdioClientTransport }] = await Promise.all([
5+
import('@modelcontextprotocol/sdk/client/index.js'),
6+
import('@modelcontextprotocol/sdk/client/stdio.js')
7+
]);
8+
9+
return { Client, StdioClientTransport };
10+
}
11+
12+
function createTimeoutError(timeoutMs) {
13+
const seconds = Math.max(1, Math.round(timeoutMs / 1000));
14+
return new Error(`MCP client connect timed out after ${seconds}s`);
15+
}
16+
17+
function withTimeout(promise, timeoutMs) {
18+
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
19+
return promise;
20+
}
21+
22+
let timer;
23+
const timeoutPromise = new Promise((_, reject) => {
24+
timer = setTimeout(() => reject(createTimeoutError(timeoutMs)), timeoutMs);
25+
timer.unref?.();
26+
});
27+
28+
return Promise.race([promise, timeoutPromise]).finally(() => {
29+
if (timer) {
30+
clearTimeout(timer);
31+
}
32+
});
33+
}
34+
35+
async function safeClose(client, transport, connected) {
36+
const closeAttempts = [];
37+
38+
if (connected) {
39+
closeAttempts.push(client.close().catch(() => undefined));
40+
}
41+
42+
closeAttempts.push(transport.close?.().catch(() => undefined));
43+
await Promise.all(closeAttempts);
44+
}
45+
46+
export async function withManagedStdioClientSession(options, callback) {
47+
const {
48+
serverCommand,
49+
serverArgs = [],
50+
serverEnv = {},
51+
cwd,
52+
clientInfo = { name: 'benchmark-runner', version: '1.0.0' },
53+
connectTimeoutMs = 10_000,
54+
onSpawn
55+
} = options;
56+
57+
const { Client, StdioClientTransport } = await loadSdkClient();
58+
const transport = new StdioClientTransport({
59+
command: serverCommand,
60+
args: serverArgs,
61+
env: { ...process.env, ...serverEnv },
62+
cwd
63+
});
64+
const client = new Client(clientInfo);
65+
66+
let connected = false;
67+
let settling = false;
68+
const connectPromise = client.connect(transport);
69+
const spawnNotification = (async () => {
70+
if (typeof onSpawn !== 'function') {
71+
return;
72+
}
73+
74+
while (!settling) {
75+
if (transport.pid !== null) {
76+
onSpawn(transport.pid);
77+
return;
78+
}
79+
await new Promise((resolve) => setTimeout(resolve, 10));
80+
}
81+
})();
82+
83+
try {
84+
await withTimeout(connectPromise, connectTimeoutMs);
85+
connected = true;
86+
return await callback({ client, transport });
87+
} finally {
88+
settling = true;
89+
await safeClose(client, transport, connected);
90+
await spawnNotification.catch(() => undefined);
91+
await connectPromise.catch(() => undefined);
92+
}
93+
}

0 commit comments

Comments
 (0)