Skip to content

Commit 6d637b8

Browse files
authored
fix(bench): dispose WASM worker pool and keep progress off stdout (#1009)
* fix(bench): dispose WASM worker pool and keep progress off stdout v3.9.5 moved WASM tree-sitter parsing into a node:worker_threads Worker (via a shared singleton pool in src/domain/wasm-worker-pool.ts). The benchmark worker scripts never dispose that pool, so the worker thread kept the event loop alive after JSON was written to stdout. The parent fork-engine then SIGKILLed the hung child at 600s and discarded the valid stdout as "Both engines failed." Also fix the embedding benchmark: src/domain/search/models.ts wrote "Embedded N/M\r" progress directly to process.stdout, bypassing the worker's console.log->stderr redirect and corrupting the JSON payload with "Unexpected token 'E'..." errors. - benchmark/query/incremental/resolution scripts: import disposeParsers from the installed package (try/catch for older releases) and call it before cleanup(), then process.exit(0) to guarantee termination. - fork-engine: on SIGKILL, try to salvage captured stdout as JSON before giving up — defence-in-depth against future event-loop-keepalive bugs. - search/models.ts: write progress to stderr so it can't pollute stdout in JSON-consuming benchmark workers. docs check acknowledged — internal benchmark infra fix, no user-facing docs affected. Impact: 7 functions changed, 16 affected * fix(bench): move process.exit(0) outside finally to preserve exception exit codes (#1009) Addresses Greptile P1: placing process.exit(0) inside the finally block would swallow exceptions from the try body (missing fixture, DB open failure, import error) and exit with code 0, masking the failure from CI and the parent forkWorker. Mirror the pattern in benchmark.ts/incremental-benchmark.ts/ query-benchmark.ts: exit only after the success path completes.
1 parent 176e3f0 commit 6d637b8

6 files changed

Lines changed: 50 additions & 2 deletions

File tree

scripts/benchmark.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,13 @@ const { buildGraph } = await import(srcImport(srcDir, 'domain/graph/builder.js')
8282
const { fnDepsData, fnImpactData, pathData, rolesData, statsData } = await import(
8383
srcImport(srcDir, 'domain/queries.js')
8484
);
85+
// v3.9.5+ parses WASM in a worker_thread that keeps the event loop alive until
86+
// disposed. Older releases don't export disposeParsers — fall back to a no-op.
87+
let disposeParsers = async () => {};
88+
try {
89+
const parser = await import(srcImport(srcDir, 'domain/parser.js'));
90+
if (typeof parser.disposeParsers === 'function') disposeParsers = parser.disposeParsers;
91+
} catch { /* older release — no worker pool to dispose */ }
8592

8693
const INCREMENTAL_RUNS = 3;
8794
const QUERY_RUNS = 5;
@@ -226,4 +233,6 @@ const workerResult = {
226233

227234
console.log(JSON.stringify(workerResult));
228235

236+
await disposeParsers();
229237
cleanup();
238+
process.exit(0);

scripts/incremental-benchmark.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,13 @@ const { srcDir, cleanup } = await resolveBenchmarkSource();
143143
const dbPath = path.join(root, '.codegraph', 'graph.db');
144144

145145
const { buildGraph } = await import(srcImport(srcDir, 'domain/graph/builder.js'));
146+
// v3.9.5+ parses WASM in a worker_thread that keeps the event loop alive until
147+
// disposed. Older releases don't export disposeParsers — fall back to a no-op.
148+
let disposeParsers = async () => {};
149+
try {
150+
const parser = await import(srcImport(srcDir, 'domain/parser.js'));
151+
if (typeof parser.disposeParsers === 'function') disposeParsers = parser.disposeParsers;
152+
} catch { /* older release — no worker pool to dispose */ }
146153

147154
// Redirect console.log to stderr so only JSON goes to stdout
148155
const origLog = console.log;
@@ -218,4 +225,6 @@ console.log = origLog;
218225
const workerResult = { fullBuildMs, noopRebuildMs, oneFileRebuildMs, oneFilePhases };
219226
console.log(JSON.stringify(workerResult));
220227

228+
await disposeParsers();
221229
cleanup();
230+
process.exit(0);

scripts/lib/fork-engine.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,18 @@ export function forkWorker(scriptPath, envKey, workerName, argv = [], timeoutMs
9393

9494
if (signal) {
9595
console.error(`[fork] ${workerName} worker killed by signal ${signal}`);
96-
settle(null);
96+
// A worker can finish its measurements and write valid JSON to
97+
// stdout before hanging on a non-daemonized handle (e.g. an
98+
// un-disposed worker_thread pool). SIGKILL from our timeout
99+
// then discards the result. Try to parse captured stdout first
100+
// so we don't lose real data to a lingering event-loop keepalive.
101+
try {
102+
const parsed = JSON.parse(stdout);
103+
console.error(`[fork] ${workerName} worker produced results before being killed — salvaging`);
104+
settle(parsed);
105+
} catch {
106+
settle(null);
107+
}
97108
return;
98109
}
99110

scripts/query-benchmark.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,13 @@ const { buildGraph } = await import(srcImport(srcDir, 'domain/graph/builder.js')
9494
const { fnDepsData, fnImpactData, diffImpactData } = await import(
9595
srcImport(srcDir, 'domain/queries.js')
9696
);
97+
// v3.9.5+ parses WASM in a worker_thread that keeps the event loop alive until
98+
// disposed. Older releases don't export disposeParsers — fall back to a no-op.
99+
let disposeParsers = async () => {};
100+
try {
101+
const parser = await import(srcImport(srcDir, 'domain/parser.js'));
102+
if (typeof parser.disposeParsers === 'function') disposeParsers = parser.disposeParsers;
103+
} catch { /* older release — no worker pool to dispose */ }
97104

98105
// Redirect console.log to stderr so only JSON goes to stdout
99106
const origLog = console.log;
@@ -259,4 +266,6 @@ console.log = origLog;
259266
const workerResult = { targets, fnDeps, fnImpact, diffImpact };
260267
console.log(JSON.stringify(workerResult));
261268

269+
await disposeParsers();
262270
cleanup();
271+
process.exit(0);

scripts/resolution-benchmark.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,14 @@ console.log = (...args) => console.error(...args);
219219

220220
const { srcDir, cleanup } = await resolveBenchmarkSource();
221221

222+
// v3.9.5+ parses WASM in a worker_thread that keeps the event loop alive until
223+
// disposed. Older releases don't export disposeParsers — fall back to a no-op.
224+
let disposeParsers = async () => {};
225+
try {
226+
const parser = await import(srcImport(srcDir, 'domain/parser.js'));
227+
if (typeof parser.disposeParsers === 'function') disposeParsers = parser.disposeParsers;
228+
} catch { /* older release — no worker pool to dispose */ }
229+
222230
try {
223231
const { buildGraph } = await import(srcImport(srcDir, 'domain/graph/builder.js'));
224232
const { openReadonlyOrFail } = await import(srcImport(srcDir, 'db/index.js'));
@@ -296,5 +304,7 @@ try {
296304
console.log(JSON.stringify(results, null, 2));
297305
} finally {
298306
console.log = origLog;
307+
await disposeParsers();
299308
cleanup();
300309
}
310+
process.exit(0);

src/domain/search/models.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ export async function embed(
253253
}
254254

255255
if (texts.length > batchSize) {
256-
process.stdout.write(` Embedded ${Math.min(i + batchSize, texts.length)}/${texts.length}\r`);
256+
process.stderr.write(` Embedded ${Math.min(i + batchSize, texts.length)}/${texts.length}\r`);
257257
}
258258
}
259259

0 commit comments

Comments
 (0)