Skip to content

Commit ad5db8b

Browse files
authored
Merge pull request #94 from PatrickSys/fix/benchmark-lifecycle-cleanup
fix: clean up benchmark MCP sessions
2 parents 51cbcfb + fcce4b6 commit ad5db8b

File tree

5 files changed

+326
-109
lines changed

5 files changed

+326
-109
lines changed

scripts/benchmark-comparators.mjs

Lines changed: 93 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs';
1717
import { execSync, exec } from 'child_process';
1818
import { parseArgs } from 'util';
1919
import { promisify } from 'util';
20+
import { withManagedStdioClientSession } from './lib/managed-mcp-session.mjs';
2021

2122
const execAsync = promisify(exec);
2223

@@ -293,125 +294,108 @@ const COMPARATOR_ADAPTERS = [
293294
// ---------------------------------------------------------------------------
294295

295296
async function runComparatorViaMcp(adapter, rootPath, tasks) {
296-
let Client, StdioClientTransport;
297-
try {
298-
({ Client } = await import('@modelcontextprotocol/sdk/client/index.js'));
299-
({ StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js'));
300-
} catch (err) {
301-
throw new Error(`MCP SDK not available: ${err.message}`);
302-
}
303-
304-
const transport = new StdioClientTransport({
305-
command: adapter.serverCommand,
306-
args: adapter.serverArgs,
307-
env: { ...process.env, ...adapter.serverEnv }
308-
});
309-
310-
const client = new Client({ name: 'benchmark-runner', version: '1.0.0' });
311-
312-
try {
313-
await client.connect(transport);
314-
} catch (err) {
315-
throw new Error(`Failed to connect to ${adapter.name} MCP server: ${err.message}`);
316-
}
297+
return withManagedStdioClientSession(
298+
{
299+
serverCommand: adapter.serverCommand,
300+
serverArgs: adapter.serverArgs,
301+
serverEnv: adapter.serverEnv,
302+
connectTimeoutMs: adapter.connectTimeout ?? 15_000
303+
},
304+
async ({ client }) => {
305+
if (adapter.initTimeout > 0) {
306+
await new Promise((resolve) => setTimeout(resolve, adapter.initTimeout));
307+
}
317308

318-
// Wait for server to be ready
319-
if (adapter.initTimeout > 0) {
320-
await new Promise((resolve) => setTimeout(resolve, adapter.initTimeout));
321-
}
309+
let availableTools = [];
310+
try {
311+
const toolsResult = await client.listTools();
312+
availableTools = toolsResult.tools ?? [];
313+
} catch (err) {
314+
throw new Error(`Failed to list tools from ${adapter.name}: ${err.message}`);
315+
}
322316

323-
// Discover tools
324-
let availableTools = [];
325-
try {
326-
const toolsResult = await client.listTools();
327-
availableTools = toolsResult.tools ?? [];
328-
} catch (err) {
329-
await client.close();
330-
throw new Error(`Failed to list tools from ${adapter.name}: ${err.message}`);
331-
}
317+
const toolNames = availableTools.map((t) => t.name);
318+
let searchToolName = adapter.searchTool;
319+
if (!searchToolName) {
320+
searchToolName =
321+
toolNames.find((n) => n.includes('search') || n.includes('query') || n.includes('find')) ??
322+
null;
323+
}
332324

333-
const toolNames = availableTools.map((t) => t.name);
325+
if (!searchToolName || !toolNames.includes(searchToolName)) {
326+
throw new Error(
327+
`No suitable search tool found for ${adapter.name}. Available: ${toolNames.join(', ')}`
328+
);
329+
}
334330

335-
// Determine search tool (may be dynamic for CodeGraphContext)
336-
let searchToolName = adapter.searchTool;
337-
if (!searchToolName) {
338-
// Try to find a search-like tool
339-
searchToolName =
340-
toolNames.find((n) => n.includes('search') || n.includes('query') || n.includes('find')) ??
341-
null;
342-
}
331+
let totalToolCalls = 0;
332+
if (adapter.indexTool && toolNames.includes(adapter.indexTool)) {
333+
console.log(` [${adapter.name}] Indexing ${path.basename(rootPath)}...`);
334+
const indexStartMs = Date.now();
335+
try {
336+
await Promise.race([
337+
client.callTool({ name: adapter.indexTool, arguments: adapter.indexArgs(rootPath) }),
338+
new Promise((_, reject) =>
339+
setTimeout(() => reject(new Error('Index timeout')), adapter.indexTimeout ?? 120000)
340+
)
341+
]);
342+
totalToolCalls++;
343+
console.log(` [${adapter.name}] Index complete in ${Date.now() - indexStartMs}ms`);
344+
} catch (err) {
345+
throw new Error(`${adapter.name} indexing failed: ${err.message}`);
346+
}
347+
}
343348

344-
if (!searchToolName || !toolNames.includes(searchToolName)) {
345-
await client.close();
346-
throw new Error(
347-
`No suitable search tool found for ${adapter.name}. Available: ${toolNames.join(', ')}`
348-
);
349-
}
349+
const taskResults = [];
350+
for (const task of tasks) {
351+
const startMs = Date.now();
352+
let payload = '';
353+
let toolCallCount = totalToolCalls;
354+
355+
try {
356+
const result = await client.callTool({
357+
name: searchToolName,
358+
arguments: adapter.searchArgs(task)
359+
});
360+
toolCallCount++;
361+
payload = adapter.extractPayload(result);
362+
} catch (err) {
363+
console.warn(` [${adapter.name}] Task ${task.id} failed: ${err.message}`);
364+
payload = '';
365+
}
350366

351-
// Index the repo if needed
352-
let totalToolCalls = 0;
353-
if (adapter.indexTool && toolNames.includes(adapter.indexTool)) {
354-
console.log(` [${adapter.name}] Indexing ${path.basename(rootPath)}...`);
355-
const indexStartMs = Date.now();
356-
try {
357-
await Promise.race([
358-
client.callTool({ name: adapter.indexTool, arguments: adapter.indexArgs(rootPath) }),
359-
new Promise((_, reject) =>
360-
setTimeout(() => reject(new Error('Index timeout')), adapter.indexTimeout ?? 120000)
361-
)
362-
]);
363-
totalToolCalls++;
364-
console.log(` [${adapter.name}] Index complete in ${Date.now() - indexStartMs}ms`);
365-
} catch (err) {
366-
await client.close();
367-
throw new Error(`${adapter.name} indexing failed: ${err.message}`);
368-
}
369-
}
367+
const elapsedMs = Date.now() - startMs;
368+
const payloadBytes = countUtf8Bytes(payload);
369+
const estimatedTokens = estimateTokens(payloadBytes);
370+
const { usefulnessScore, matchedSignals, missingSignals } = matchSignals(
371+
payload,
372+
task.expectedSignals,
373+
task.forbiddenSignals
374+
);
370375

371-
// Run each task
372-
const taskResults = [];
373-
for (const task of tasks) {
374-
const startMs = Date.now();
375-
let payload = '';
376-
let toolCallCount = totalToolCalls;
376+
taskResults.push({
377+
taskId: task.id,
378+
job: task.job,
379+
surface: task.surface,
380+
usefulnessScore,
381+
matchedSignals,
382+
missingSignals,
383+
payloadBytes,
384+
estimatedTokens,
385+
toolCallCount,
386+
elapsedMs
387+
});
388+
}
377389

378-
try {
379-
const result = await client.callTool({
380-
name: searchToolName,
381-
arguments: adapter.searchArgs(task)
382-
});
383-
toolCallCount++;
384-
payload = adapter.extractPayload(result);
385-
} catch (err) {
386-
console.warn(` [${adapter.name}] Task ${task.id} failed: ${err.message}`);
387-
payload = '';
390+
return taskResults;
388391
}
389-
390-
const elapsedMs = Date.now() - startMs;
391-
const payloadBytes = countUtf8Bytes(payload);
392-
const estimatedTokens = estimateTokens(payloadBytes);
393-
const { usefulnessScore, matchedSignals, missingSignals } = matchSignals(
394-
payload,
395-
task.expectedSignals,
396-
task.forbiddenSignals
392+
).catch((err) => {
393+
throw new Error(
394+
err.message.includes('timed out')
395+
? `Failed to connect to ${adapter.name} MCP server: ${err.message}`
396+
: err.message
397397
);
398-
399-
taskResults.push({
400-
taskId: task.id,
401-
job: task.job,
402-
surface: task.surface,
403-
usefulnessScore,
404-
matchedSignals,
405-
missingSignals,
406-
payloadBytes,
407-
estimatedTokens,
408-
toolCallCount,
409-
elapsedMs
410-
});
411-
}
412-
413-
await client.close();
414-
return taskResults;
398+
});
415399
}
416400

417401
// ---------------------------------------------------------------------------
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import process from 'node:process';
2+
3+
async function loadSdkClient() {
4+
const [{ Client }, { StdioClientTransport }] = await Promise.all([
5+
import('@modelcontextprotocol/sdk/client/index.js'),
6+
import('@modelcontextprotocol/sdk/client/stdio.js')
7+
]);
8+
9+
return { Client, StdioClientTransport };
10+
}
11+
12+
function createTimeoutError(timeoutMs) {
13+
const seconds = Math.max(1, Math.round(timeoutMs / 1000));
14+
return new Error(`MCP client connect timed out after ${seconds}s`);
15+
}
16+
17+
function withTimeout(promise, timeoutMs) {
18+
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
19+
return promise;
20+
}
21+
22+
let timer;
23+
const timeoutPromise = new Promise((_, reject) => {
24+
timer = setTimeout(() => reject(createTimeoutError(timeoutMs)), timeoutMs);
25+
timer.unref?.();
26+
});
27+
28+
return Promise.race([promise, timeoutPromise]).finally(() => {
29+
if (timer) {
30+
clearTimeout(timer);
31+
}
32+
});
33+
}
34+
35+
function delay(timeoutMs) {
36+
return new Promise((resolve) => {
37+
const timer = setTimeout(resolve, timeoutMs);
38+
timer.unref?.();
39+
});
40+
}
41+
42+
async function safeClose(client, transport, connected) {
43+
const closeAttempts = [];
44+
45+
if (connected) {
46+
closeAttempts.push(client.close().catch(() => undefined));
47+
}
48+
49+
closeAttempts.push(transport.close?.().catch(() => undefined));
50+
await Promise.all(closeAttempts);
51+
}
52+
53+
export async function withManagedStdioClientSession(options, callback) {
54+
const {
55+
serverCommand,
56+
serverArgs = [],
57+
serverEnv = {},
58+
cwd,
59+
clientInfo = { name: 'benchmark-runner', version: '1.0.0' },
60+
connectTimeoutMs = 10_000,
61+
onSpawn
62+
} = options;
63+
64+
const { Client, StdioClientTransport } = await loadSdkClient();
65+
const transport = new StdioClientTransport({
66+
command: serverCommand,
67+
args: serverArgs,
68+
env: { ...process.env, ...serverEnv },
69+
cwd
70+
});
71+
const client = new Client(clientInfo);
72+
73+
let connected = false;
74+
let settling = false;
75+
const connectPromise = client.connect(transport);
76+
const spawnNotification = (async () => {
77+
if (typeof onSpawn !== 'function') {
78+
return;
79+
}
80+
81+
while (!settling) {
82+
if (transport.pid !== null) {
83+
onSpawn(transport.pid);
84+
return;
85+
}
86+
await new Promise((resolve) => setTimeout(resolve, 10));
87+
}
88+
89+
if (transport.pid !== null) {
90+
onSpawn(transport.pid);
91+
}
92+
})();
93+
94+
try {
95+
await withTimeout(connectPromise, connectTimeoutMs);
96+
connected = true;
97+
return await callback({ client, transport });
98+
} finally {
99+
settling = true;
100+
await safeClose(client, transport, connected);
101+
await spawnNotification.catch(() => undefined);
102+
await Promise.race([connectPromise, delay(5_000)]).catch(() => undefined);
103+
}
104+
}

0 commit comments

Comments
 (0)