Skip to content

Commit 3bfa481

Browse files
cameroncookecodex
andcommitted
fix(server): Redesign MCP shutdown with bounded Sentry flush
Move MCP shutdown orchestration into a dedicated bounded runner and stop using Sentry.close in MCP shutdown paths. Suppress stdio immediately on transport disconnect, capture one shutdown summary event, seal further capture, then perform bounded Sentry.flush before explicit exit. Add shared bulk cleanup helpers for tracked runtime resources and wire shutdown through those helpers. This keeps shutdown deterministic under parent/disconnect teardown while preserving high-value shutdown telemetry. Co-Authored-By: OpenAI Codex <noreply@openai.com>
1 parent 28bd7c4 commit 3bfa481

22 files changed

+1342
-820
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
"hooks:install": "node scripts/install-git-hooks.js",
2323
"generate:version": "npx tsx scripts/generate-version.ts",
2424
"repro:mcp-lifecycle-leak": "npm run build && npx tsx scripts/repro-mcp-lifecycle-leak.ts",
25+
"repro:sentry-mcp-teardown": "cd repros/sentry-mcp-teardown && npm run harness",
2526
"bundle:axe": "scripts/bundle-axe.sh",
2627
"package:macos": "scripts/package-macos-portable.sh",
2728
"package:macos:universal": "scripts/package-macos-portable.sh --universal",

scripts/repro-mcp-lifecycle-leak.ts

Lines changed: 127 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@ interface CliOptions {
55
iterations: number;
66
closeDelayMs: number;
77
settleMs: number;
8+
shutdownMode: 'graceful-stdin' | 'parent-hard-exit';
89
}
910

1011
interface PeerProcess {
1112
pid: number;
13+
ppid: number;
1214
ageSeconds: number;
1315
rssKb: number;
1416
command: string;
@@ -19,6 +21,7 @@ function parseArgs(argv: string[]): CliOptions {
1921
iterations: 20,
2022
closeDelayMs: 0,
2123
settleMs: 2000,
24+
shutdownMode: 'parent-hard-exit',
2225
};
2326

2427
for (let index = 0; index < argv.length; index += 1) {
@@ -34,6 +37,12 @@ function parseArgs(argv: string[]): CliOptions {
3437
} else if (arg === '--settle-ms' && value) {
3538
options.settleMs = Number(value);
3639
index += 1;
40+
} else if (arg === '--shutdown-mode' && value) {
41+
if (value !== 'graceful-stdin' && value !== 'parent-hard-exit') {
42+
throw new Error('--shutdown-mode must be graceful-stdin or parent-hard-exit');
43+
}
44+
options.shutdownMode = value;
45+
index += 1;
3746
}
3847
}
3948

@@ -95,7 +104,7 @@ function parseElapsedSeconds(value: string): number | null {
95104

96105
async function sampleMcpProcesses(): Promise<PeerProcess[]> {
97106
return new Promise((resolve, reject) => {
98-
const child = spawn('ps', ['-axo', 'pid=,etime=,rss=,command='], {
107+
const child = spawn('ps', ['-axo', 'pid=,ppid=,etime=,rss=,command='], {
99108
stdio: ['ignore', 'pipe', 'pipe'],
100109
});
101110
let stdout = '';
@@ -119,16 +128,17 @@ async function sampleMcpProcesses(): Promise<PeerProcess[]> {
119128
.map((line) => line.trim())
120129
.filter(Boolean)
121130
.map((line) => {
122-
const match = line.match(/^(\d+)\s+(\S+)\s+(\d+)\s+(.+)$/);
131+
const match = line.match(/^(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(.+)$/);
123132
if (!match) {
124133
return null;
125134
}
126-
const ageSeconds = parseElapsedSeconds(match[2]);
135+
const ageSeconds = parseElapsedSeconds(match[3]);
127136
return {
128137
pid: Number(match[1]),
138+
ppid: Number(match[2]),
129139
ageSeconds,
130-
rssKb: Number(match[3]),
131-
command: match[4],
140+
rssKb: Number(match[4]),
141+
command: match[5],
132142
};
133143
})
134144
.filter((entry): entry is PeerProcess => {
@@ -146,21 +156,33 @@ async function sampleMcpProcesses(): Promise<PeerProcess[]> {
146156
});
147157
}
148158

149-
async function runIteration(closeDelayMs: number): Promise<boolean> {
159+
interface IterationResult {
160+
helperExited: boolean;
161+
childExited: boolean;
162+
childPid: number | null;
163+
}
164+
165+
async function runGracefulStdinIteration(closeDelayMs: number): Promise<IterationResult> {
150166
return new Promise((resolve) => {
151167
const child = spawn(process.execPath, ['build/cli.js', 'mcp'], {
152168
cwd: process.cwd(),
153169
stdio: ['pipe', 'ignore', 'ignore'],
154170
});
155171

156-
let exited = false;
172+
let settled = false;
173+
const finish = (result: IterationResult): void => {
174+
if (settled) {
175+
return;
176+
}
177+
settled = true;
178+
resolve(result);
179+
};
180+
157181
child.once('close', () => {
158-
exited = true;
159-
resolve(true);
182+
finish({ helperExited: true, childExited: true, childPid: child.pid ?? null });
160183
});
161184
child.once('error', () => {
162-
exited = true;
163-
resolve(false);
185+
finish({ helperExited: false, childExited: false, childPid: child.pid ?? null });
164186
});
165187

166188
setTimeout(() => {
@@ -169,54 +191,138 @@ async function runIteration(closeDelayMs: number): Promise<boolean> {
169191

170192
setTimeout(
171193
() => {
172-
if (!exited) {
173-
resolve(false);
174-
}
194+
finish({ helperExited: false, childExited: false, childPid: child.pid ?? null });
175195
},
176196
Math.max(1000, closeDelayMs + 1000),
177197
);
178198
});
179199
}
180200

201+
async function runParentHardExitIteration(closeDelayMs: number): Promise<IterationResult> {
202+
return new Promise((resolve) => {
203+
const helper = spawn(
204+
process.execPath,
205+
[
206+
'scripts/repro-mcp-parent-exit-helper.mjs',
207+
process.execPath,
208+
'build/cli.js',
209+
process.cwd(),
210+
String(closeDelayMs),
211+
],
212+
{
213+
cwd: process.cwd(),
214+
stdio: ['ignore', 'pipe', 'pipe'],
215+
},
216+
);
217+
218+
let childPid: number | null = null;
219+
let settled = false;
220+
let stdout = '';
221+
222+
const finish = (result: IterationResult): void => {
223+
if (settled) {
224+
return;
225+
}
226+
settled = true;
227+
resolve(result);
228+
};
229+
230+
helper.stdout.on('data', (chunk: Buffer) => {
231+
stdout += chunk.toString();
232+
const candidate = stdout.split('\n')[0]?.trim();
233+
if (candidate && /^\d+$/.test(candidate)) {
234+
childPid = Number(candidate);
235+
}
236+
});
237+
238+
helper.once('error', () => {
239+
finish({ helperExited: false, childExited: false, childPid });
240+
});
241+
242+
helper.once('close', (code) => {
243+
finish({ helperExited: code === 0, childExited: false, childPid });
244+
});
245+
246+
setTimeout(
247+
() => {
248+
finish({ helperExited: false, childExited: false, childPid });
249+
},
250+
Math.max(1500, closeDelayMs + 1500),
251+
);
252+
});
253+
}
254+
255+
async function runIteration(options: CliOptions): Promise<IterationResult> {
256+
if (options.shutdownMode === 'parent-hard-exit') {
257+
return runParentHardExitIteration(options.closeDelayMs);
258+
}
259+
260+
return runGracefulStdinIteration(options.closeDelayMs);
261+
}
262+
181263
async function main(): Promise<void> {
182264
const options = parseArgs(process.argv.slice(2));
183265
const before = await sampleMcpProcesses();
184266
const baselinePids = new Set(before.map((entry) => entry.pid));
185267

186-
let exitedCount = 0;
268+
let helperExitedCount = 0;
269+
let childExitedCount = 0;
270+
const spawnedChildPids = new Set<number>();
271+
187272
for (let index = 0; index < options.iterations; index += 1) {
188-
const exited = await runIteration(options.closeDelayMs);
189-
if (exited) {
190-
exitedCount += 1;
273+
const result = await runIteration(options);
274+
if (result.helperExited) {
275+
helperExitedCount += 1;
276+
}
277+
if (result.childExited) {
278+
childExitedCount += 1;
279+
}
280+
if (result.childPid !== null) {
281+
spawnedChildPids.add(result.childPid);
191282
}
192283
}
193284

194285
await delay(options.settleMs);
195286

196287
const after = await sampleMcpProcesses();
197288
const lingering = after.filter((entry) => !baselinePids.has(entry.pid));
289+
const lingeringSpawned = lingering.filter((entry) => spawnedChildPids.has(entry.pid));
198290

199291
console.log(
200292
JSON.stringify(
201293
{
294+
shutdownMode: options.shutdownMode,
202295
iterations: options.iterations,
203-
exitedCount,
296+
helperExitedCount,
297+
childExitedCount,
298+
spawnedChildPidCount: spawnedChildPids.size,
204299
baselineProcessCount: before.length,
205300
finalProcessCount: after.length,
206301
lingeringProcessCount: lingering.length,
207-
lingering: lingering.map(({ pid, ageSeconds, rssKb, command }) => ({
302+
lingeringSpawnedProcessCount: lingeringSpawned.length,
303+
lingeringSpawned: lingeringSpawned.map(({ pid, ppid, ageSeconds, rssKb, command }) => ({
304+
pid,
305+
ppid,
306+
ageSeconds,
307+
rssKb,
308+
command,
309+
})),
310+
lingering: lingering.map(({ pid, ppid, ageSeconds, rssKb, command }) => ({
208311
pid,
312+
ppid,
209313
ageSeconds,
210314
rssKb,
211315
command,
212316
})),
317+
orphanedLingeringCount: lingering.filter((entry) => entry.ppid === 1).length,
318+
maxLingeringRssKb: lingering.reduce((max, entry) => Math.max(max, entry.rssKb), 0),
213319
},
214320
null,
215321
2,
216322
),
217323
);
218324

219-
process.exit(lingering.length === 0 ? 0 : 1);
325+
process.exit(lingeringSpawned.length === 0 ? 0 : 1);
220326
}
221327

222328
void main().catch((error) => {

0 commit comments

Comments
 (0)