Skip to content

Commit 8e78e62

Browse files
authored
fix: free leaked WASM trees in native engine typeMap backfill (#534)
* perf: reduce query latency regression from 3.1.4 → 3.3.0 Three targeted fixes for the +28–56% query latency regression: 1. Pin benchmark hub target to stable function names (buildGraph, openDb, loadConfig) instead of auto-selecting the most-connected node. Barrel/type files becoming the hub made version-to-version comparison meaningless. 2. Gate implementors queries in bfsTransitiveCallers — check once whether the graph has any 'implements' edges before doing per-node findNodeById + findImplementors lookups. Skips all implementor overhead for codebases without interface/trait hierarchies. 3. Cache loadConfig() results per cwd. The config file is read from disk on every fnImpactData and diffImpactData call; caching eliminates redundant fs.existsSync + readFileSync + JSON.parse per query invocation. Impact: 5 functions changed, 123 affected * fix: return structuredClone from config cache and guard benchmark db handle Prevent callers from mutating the cached config object by returning a deep clone on cache hits. Add try/finally to selectTargets() so the database handle is closed even if a query throws. Impact: 2 functions changed, 1 affected * fix: install @huggingface/transformers in npm-mode benchmark workers The embedding benchmark's npm mode installs codegraph into a temp dir, but @huggingface/transformers is a devDependency and not included. All 6 model workers crash on import, producing symbols: 0, models: {}. Install it explicitly from the local devDependencies version, matching the existing pattern for native platform packages. Also add a guard in update-embedding-report.js to reject empty results and fail loudly instead of silently overwriting valid benchmark data. * fix: free leaked WASM trees in native engine typeMap backfill The typeMap backfill path in parseFilesAuto and backfillTypeMap called wasmExtractSymbols but never freed the returned WASM tree objects. Over repeated builds (benchmarks, watch mode), hundreds of trees accumulated in WASM linear memory, eventually corrupting V8 state and crashing the native addon with ACCESS_VIOLATION / has_exception(). Two fixes: 1. Free WASM trees immediately after extracting typeMap data in both backfillTypeMap() and the parseFilesAuto() bulk backfill loop. 2. Skip backfill entirely for JS files — only TS/TSX have type annotations that WASM can extract. The native engine already handles JS `new Expr()` patterns, so re-parsing all JS files with WASM was pure waste. Closes #530 Impact: 2 functions changed, 2 affected * fix(native): align edge builder kind filters with JS parity The Rust edge builder only matched `kind == "class"` when looking up source nodes and targets for extends/implements edges. This caused all `impl Trait for Struct` relationships (and any non-class hierarchy) to be silently dropped — producing 0 implements edges for Rust sources while WASM correctly found 9. Align the three kind filter sets with the JS-side constants: - Source: class, struct, record, enum (was: class only) - Extends targets: class, struct, trait, record (was: class only) - Implements targets: interface, class, trait (was: interface, class) Fixes #530 (partial — implements parity gap) Impact: 1 functions changed, 0 affected * fix: address review feedback on WASM tree cleanup and JS backfill skip - Consolidate duplicated tree.delete() in backfillTypeMap into a single finally block, preventing future early-return paths from leaking trees - Skip WASM typeMap backfill for JS files in parseFileAuto and parseFileIncremental single-file paths, matching the bulk path behavior Impact: 3 functions changed, 2 affected
1 parent f8016c6 commit 8e78e62

2 files changed

Lines changed: 66 additions & 25 deletions

File tree

crates/codegraph-core/src/edge_builder.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -339,13 +339,17 @@ pub fn build_call_edges(
339339
for cls in &file_input.classes {
340340
let source_row = nodes_by_name_and_file
341341
.get(&(cls.name.as_str(), rel_path.as_str()))
342-
.and_then(|v| v.iter().find(|n| n.kind == "class"));
342+
.and_then(|v| v.iter().find(|n| {
343+
n.kind == "class" || n.kind == "struct" || n.kind == "record" || n.kind == "enum"
344+
}));
343345

344346
if let Some(source) = source_row {
345347
if let Some(ref extends_name) = cls.extends {
346348
let targets = nodes_by_name
347349
.get(extends_name.as_str())
348-
.map(|v| v.iter().filter(|n| n.kind == "class").collect::<Vec<_>>())
350+
.map(|v| v.iter().filter(|n| {
351+
n.kind == "class" || n.kind == "struct" || n.kind == "trait" || n.kind == "record"
352+
}).collect::<Vec<_>>())
349353
.unwrap_or_default();
350354
for t in targets {
351355
edges.push(ComputedEdge {
@@ -362,7 +366,7 @@ pub fn build_call_edges(
362366
.get(implements_name.as_str())
363367
.map(|v| {
364368
v.iter()
365-
.filter(|n| n.kind == "interface" || n.kind == "class")
369+
.filter(|n| n.kind == "interface" || n.kind == "class" || n.kind == "trait")
366370
.collect::<Vec<_>>()
367371
})
368372
.unwrap_or_default();

src/domain/parser.js

Lines changed: 59 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -395,12 +395,23 @@ async function backfillTypeMap(filePath, source) {
395395
}
396396
const parsers = await createParsers();
397397
const extracted = wasmExtractSymbols(parsers, filePath, code);
398-
if (!extracted?.symbols?.typeMap) return { typeMap: [], backfilled: false };
399-
const tm = extracted.symbols.typeMap;
400-
return {
401-
typeMap: tm instanceof Map ? tm : new Map(tm.map((e) => [e.name, e.typeName])),
402-
backfilled: true,
403-
};
398+
try {
399+
if (!extracted?.symbols?.typeMap) {
400+
return { typeMap: [], backfilled: false };
401+
}
402+
const tm = extracted.symbols.typeMap;
403+
return {
404+
typeMap: tm instanceof Map ? tm : new Map(tm.map((e) => [e.name, e.typeName])),
405+
backfilled: true,
406+
};
407+
} finally {
408+
// Free the WASM tree to prevent memory accumulation across repeated builds
409+
if (extracted?.tree && typeof extracted.tree.delete === 'function') {
410+
try {
411+
extracted.tree.delete();
412+
} catch {}
413+
}
414+
}
404415
}
405416

406417
/**
@@ -441,7 +452,13 @@ export async function parseFileAuto(filePath, source, opts = {}) {
441452
const result = native.parseFile(filePath, source, !!opts.dataflow, opts.ast !== false);
442453
if (!result) return null;
443454
const patched = patchNativeResult(result);
444-
if (!patched.typeMap || patched.typeMap.length === 0) {
455+
// Only backfill typeMap for TS/TSX — JS files have no type annotations,
456+
// and the native engine already handles `new Expr()` patterns.
457+
const TS_BACKFILL_EXTS = new Set(['.ts', '.tsx']);
458+
if (
459+
(!patched.typeMap || patched.typeMap.length === 0) &&
460+
TS_BACKFILL_EXTS.has(path.extname(filePath))
461+
) {
445462
const { typeMap, backfilled } = await backfillTypeMap(filePath, source);
446463
patched.typeMap = typeMap;
447464
if (backfilled) patched._typeMapBackfilled = true;
@@ -486,21 +503,35 @@ export async function parseFilesAuto(filePaths, rootDir, opts = {}) {
486503
}
487504
// Backfill typeMap via WASM for native binaries that predate the type-map feature
488505
if (needsTypeMap.length > 0) {
489-
const parsers = await createParsers();
490-
for (const { filePath, relPath } of needsTypeMap) {
491-
try {
492-
const code = fs.readFileSync(filePath, 'utf-8');
493-
const extracted = wasmExtractSymbols(parsers, filePath, code);
494-
if (extracted?.symbols?.typeMap) {
495-
const symbols = result.get(relPath);
496-
symbols.typeMap =
497-
extracted.symbols.typeMap instanceof Map
498-
? extracted.symbols.typeMap
499-
: new Map(extracted.symbols.typeMap.map((e) => [e.name, e.typeName]));
500-
symbols._typeMapBackfilled = true;
506+
// Only backfill for languages where WASM extraction can produce typeMap
507+
// (TS/TSX have type annotations; JS only has `new Expr()` which native already handles)
508+
const TS_EXTS = new Set(['.ts', '.tsx']);
509+
const tsFiles = needsTypeMap.filter(({ filePath }) => TS_EXTS.has(path.extname(filePath)));
510+
if (tsFiles.length > 0) {
511+
const parsers = await createParsers();
512+
for (const { filePath, relPath } of tsFiles) {
513+
let extracted;
514+
try {
515+
const code = fs.readFileSync(filePath, 'utf-8');
516+
extracted = wasmExtractSymbols(parsers, filePath, code);
517+
if (extracted?.symbols?.typeMap) {
518+
const symbols = result.get(relPath);
519+
symbols.typeMap =
520+
extracted.symbols.typeMap instanceof Map
521+
? extracted.symbols.typeMap
522+
: new Map(extracted.symbols.typeMap.map((e) => [e.name, e.typeName]));
523+
symbols._typeMapBackfilled = true;
524+
}
525+
} catch {
526+
/* skip — typeMap is a best-effort backfill */
527+
} finally {
528+
// Free the WASM tree to prevent memory accumulation across repeated builds
529+
if (extracted?.tree && typeof extracted.tree.delete === 'function') {
530+
try {
531+
extracted.tree.delete();
532+
} catch {}
533+
}
501534
}
502-
} catch {
503-
/* skip — typeMap is a best-effort backfill */
504535
}
505536
}
506537
}
@@ -578,7 +609,13 @@ export async function parseFileIncremental(cache, filePath, source, opts = {}) {
578609
const result = cache.parseFile(filePath, source);
579610
if (!result) return null;
580611
const patched = patchNativeResult(result);
581-
if (!patched.typeMap || patched.typeMap.length === 0) {
612+
// Only backfill typeMap for TS/TSX — JS files have no type annotations,
613+
// and the native engine already handles `new Expr()` patterns.
614+
const TS_BACKFILL_EXTS = new Set(['.ts', '.tsx']);
615+
if (
616+
(!patched.typeMap || patched.typeMap.length === 0) &&
617+
TS_BACKFILL_EXTS.has(path.extname(filePath))
618+
) {
582619
const { typeMap, backfilled } = await backfillTypeMap(filePath, source);
583620
patched.typeMap = typeMap;
584621
if (backfilled) patched._typeMapBackfilled = true;

0 commit comments

Comments
 (0)