Skip to content

Commit b4b84d2

Browse files
authored
fix(wasm): resolve incremental edge loss, unnecessary reparses, and V8 crash (#938)
* fix(wasm): resolve incremental edge loss, unnecessary reparses, and V8 crash Three WASM-engine bugs fixed: 1. Edge loss (#932): purgeAndAddReverseDeps deleted ALL outgoing edges from reverse-dep files, then relied on reparsing to rebuild them — but reparsed data was incomplete. Now saves edge topology before purge and reconnects to new node IDs after insert, preserving all edges without reparsing. 2. Unnecessary reparses (#933): reverse-dep files were added to parseChanges and fully reparsed even though only their edge targets changed. The new save-and-reconnect approach eliminates all reverse-dep reparsing — only actually-changed files are parsed. 3. V8 crash (#931): WASM tree objects stored in symbols._tree were never cleaned up on error paths, causing V8 to crash during GC of orphaned WASM objects (~40% of builds). Added error-path tree cleanup in the pipeline catch block. Also added a depth guard (MAX_WALK_DEPTH=500) in walkWithVisitors to prevent stack overflow during deep AST recursion. Closes #931, closes #932, closes #933 Impact: 8 functions changed, 28 affected * fix: filter stale source IDs, add depth-limit debug log, free WASM trees on success path (#938) Impact: 4 functions changed, 26 affected * fix: use proximity-based ORDER BY for deterministic edge reconnection (#938) Add ORDER BY ABS(line - ?) to the reconnectReverseDepEdges node lookup so duplicate (name, kind, file) nodes (e.g. TypeScript overloads) pick the closest match by line number instead of relying on non-deterministic LIMIT 1 behavior. Impact: 1 functions changed, 3 affected
1 parent dfa7424 commit b4b84d2

6 files changed

Lines changed: 206 additions & 40 deletions

File tree

src/ast-analysis/visitor.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* so individual visitors don't need to track traversal state themselves.
1212
*/
1313

14+
import { debug } from '../infrastructure/logger.js';
1415
import type {
1516
ScopeEntry,
1617
TreeSitterNode,
@@ -132,6 +133,16 @@ function dispatchExitFunction(
132133
}
133134
}
134135

136+
/**
137+
* Maximum AST depth before walk() bails out. WASM-backed tree-sitter parsers
138+
* have a smaller stack than native V8 (~64-256 KB). Deep recursion in
139+
* generated code, deeply nested object literals, or pathologically large
140+
* switch statements can exhaust that stack, causing a V8 fatal error /
141+
* segfault (#931). 500 levels is well beyond any realistic hand-written
142+
* code while staying safely inside the WASM stack budget.
143+
*/
144+
const MAX_WALK_DEPTH = 500;
145+
135146
/** Collect finish() results from all visitors into a name-keyed map. */
136147
function collectResults(visitors: Visitor[]): WalkResults {
137148
const results: WalkResults = {};
@@ -181,6 +192,10 @@ export function walkWithVisitors(
181192

182193
function walk(node: TreeSitterNode | null, depth: number): void {
183194
if (!node) return;
195+
if (depth > MAX_WALK_DEPTH) {
196+
debug(`walkWithVisitors: AST depth limit (${MAX_WALK_DEPTH}) hit — subtree truncated`);
197+
return;
198+
}
184199

185200
const type = node.type;
186201
const isFuncBoundary = allFuncTypes.has(type);

src/domain/graph/builder/context.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,23 @@ export class PipelineContext {
6363
nodesByName!: Map<string, NodeRow[]>;
6464
nodesByNameAndFile!: Map<string, NodeRow[]>;
6565

66+
// ── Reverse-dep edge reconnection (set by detectChanges) ───────────
67+
/**
68+
* Edges from reverse-dep files to changed files, saved before purge so they
69+
* can be reconnected to new node IDs after insertNodes (#932, #933).
70+
* Eliminates the need to reparse reverse-dep files entirely.
71+
*/
72+
savedReverseDepEdges: Array<{
73+
sourceId: number;
74+
tgtName: string;
75+
tgtKind: string;
76+
tgtFile: string;
77+
tgtLine: number;
78+
edgeKind: string;
79+
confidence: number;
80+
dynamic: number;
81+
}> = [];
82+
6683
// ── Misc state ─────────────────────────────────────────────────────
6784
hasEmbeddings: boolean = false;
6885
lineCountMap!: Map<string, number>;

src/domain/graph/builder/pipeline.ts

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,23 @@ async function runPipelineStages(ctx: PipelineContext): Promise<void> {
781781

782782
await runAnalyses(ctx);
783783

784+
// Release WASM trees deterministically on the success path — same cleanup
785+
// as the error-path catch block. Without this, trees stay allocated until
786+
// GC collects ctx, holding WASM memory for the rest of the build (#931).
787+
if (ctx.allSymbols?.size > 0) {
788+
for (const [, symbols] of ctx.allSymbols) {
789+
const tree = symbols._tree as { delete?: () => void } | undefined;
790+
if (tree && typeof tree.delete === 'function') {
791+
try {
792+
tree.delete();
793+
} catch {
794+
/* ignore cleanup errors */
795+
}
796+
}
797+
symbols._tree = undefined;
798+
}
799+
}
800+
784801
// Flush Rust WAL writes (AST, complexity, CFG, dataflow) so the JS
785802
// connection and any post-build readers can see them. One TRUNCATE
786803
// here replaces the N per-feature resumeJsDb checkpoints (#checkpoint-opt).
@@ -842,8 +859,25 @@ export async function buildGraph(
842859

843860
await runPipelineStages(ctx);
844861
} catch (err) {
845-
if (!ctx.earlyExit && ctx.db) {
846-
closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
862+
if (!ctx.earlyExit) {
863+
// Release WASM trees before closing DB to prevent V8 crash during
864+
// GC cleanup of orphaned WASM objects (#931).
865+
if (ctx.allSymbols?.size > 0) {
866+
for (const [, symbols] of ctx.allSymbols) {
867+
const tree = symbols._tree as { delete?: () => void } | undefined;
868+
if (tree && typeof tree.delete === 'function') {
869+
try {
870+
tree.delete();
871+
} catch {
872+
/* ignore cleanup errors */
873+
}
874+
}
875+
symbols._tree = undefined;
876+
}
877+
}
878+
if (ctx.db) {
879+
closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
880+
}
847881
}
848882
throw err;
849883
}

src/domain/graph/builder/stages/build-edges.ts

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,69 @@ function buildClassHierarchyEdges(
699699
}
700700
}
701701

702+
// ── Reverse-dep edge reconnection (#932, #933) ─────────────────────────
703+
704+
/**
705+
* Reconnect edges that were saved before changed-file purge.
706+
*
707+
* Each saved edge records: sourceId (still valid — reverse-dep nodes were not
708+
* purged) and target attributes (name, kind, file, line). The target node was
709+
* deleted and re-inserted with a new ID by insertNodes. We look up the new ID
710+
* by (name, kind, file) and re-create the edge.
711+
*/
712+
function reconnectReverseDepEdges(ctx: PipelineContext): void {
713+
const { db } = ctx;
714+
const findNodeStmt = db.prepare(
715+
'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? ORDER BY ABS(line - ?) LIMIT 1',
716+
);
717+
const reconnectedRows: EdgeRowTuple[] = [];
718+
let dropped = 0;
719+
720+
for (const saved of ctx.savedReverseDepEdges) {
721+
const newTarget = findNodeStmt.get(
722+
saved.tgtName,
723+
saved.tgtKind,
724+
saved.tgtFile,
725+
saved.tgtLine,
726+
) as { id: number } | undefined;
727+
if (newTarget) {
728+
reconnectedRows.push([
729+
saved.sourceId,
730+
newTarget.id,
731+
saved.edgeKind,
732+
saved.confidence,
733+
saved.dynamic,
734+
]);
735+
} else {
736+
// Target was removed or renamed in the changed file — edge is stale
737+
dropped++;
738+
}
739+
}
740+
741+
if (reconnectedRows.length > 0) {
742+
if (ctx.nativeDb?.bulkInsertEdges) {
743+
const nativeEdges = reconnectedRows.map((r) => ({
744+
sourceId: r[0],
745+
targetId: r[1],
746+
kind: r[2],
747+
confidence: r[3],
748+
dynamic: r[4],
749+
}));
750+
const ok = ctx.nativeDb.bulkInsertEdges(nativeEdges);
751+
if (!ok) {
752+
batchInsertEdges(db, reconnectedRows);
753+
}
754+
} else {
755+
batchInsertEdges(db, reconnectedRows);
756+
}
757+
}
758+
759+
debug(
760+
`Reconnected ${reconnectedRows.length} reverse-dep edges` +
761+
(dropped > 0 ? ` (${dropped} dropped — targets removed/renamed)` : ''),
762+
);
763+
}
764+
702765
// ── Main entry point ────────────────────────────────────────────────────
703766

704767
/**
@@ -860,5 +923,14 @@ export async function buildEdges(ctx: PipelineContext): Promise<void> {
860923
}
861924
}
862925

926+
// Phase 3: Reconnect saved reverse-dep edges (#932, #933).
927+
// When the WASM/JS path purged changed files, edges FROM reverse-dep files TO
928+
// those files were deleted (target-side). The reverse-dep files were NOT
929+
// reparsed — instead we saved the edge topology before purge and now reconnect
930+
// each edge to the new node IDs created by insertNodes.
931+
if (ctx.savedReverseDepEdges.length > 0) {
932+
reconnectReverseDepEdges(ctx);
933+
}
934+
863935
ctx.timing.edgesMs = performance.now() - t0;
864936
}

src/domain/graph/builder/stages/detect-changes.ts

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -374,24 +374,73 @@ function purgeAndAddReverseDeps(
374374
// Prefer NativeDatabase: purge + reverse-dep edge deletion in one transaction (#670)
375375
if (ctx.engineName === 'native' && ctx.nativeDb?.purgeFilesData) {
376376
ctx.nativeDb.purgeFilesData(filesToPurge, false, hasReverseDeps ? reverseDepList : undefined);
377+
// Native path still reparses reverse-deps (works correctly with native edge builder)
378+
for (const relPath of reverseDeps) {
379+
const absPath = path.join(rootDir, relPath);
380+
ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true });
381+
}
377382
} else {
383+
// WASM/JS path: save edges from reverse-dep files → changed files BEFORE
384+
// purge, then reconnect them to new node IDs after insertNodes (#932, #933).
385+
//
386+
// purgeFilesFromGraph deletes edges in BOTH directions for changed files,
387+
// which already removes the reverse-dep → changed-file edges. The old
388+
// approach then over-deleted ALL outgoing edges from reverse-dep files and
389+
// reparsed them to rebuild everything — expensive (87 extra parses) and
390+
// lossy (442 missing edges due to imperfect resolution on rebuild).
391+
//
392+
// New approach: save the edge topology, let purge handle deletion, then
393+
// reconnect using new node IDs. No reparse needed.
394+
if (hasReverseDeps && hasPurge) {
395+
const changePathSet = new Set(changePaths);
396+
const saveEdgesStmt = db.prepare(`
397+
SELECT e.source_id, n_tgt.name AS tgt_name, n_tgt.kind AS tgt_kind,
398+
n_tgt.file AS tgt_file, n_tgt.line AS tgt_line,
399+
e.kind AS edge_kind, e.confidence, e.dynamic,
400+
n_src.file AS src_file
401+
FROM edges e
402+
JOIN nodes n_src ON e.source_id = n_src.id
403+
JOIN nodes n_tgt ON e.target_id = n_tgt.id
404+
WHERE n_tgt.file = ? AND n_src.file != n_tgt.file
405+
`);
406+
for (const changedPath of changePaths) {
407+
for (const row of saveEdgesStmt.all(changedPath) as Array<{
408+
source_id: number;
409+
tgt_name: string;
410+
tgt_kind: string;
411+
tgt_file: string;
412+
tgt_line: number;
413+
edge_kind: string;
414+
confidence: number;
415+
dynamic: number;
416+
src_file: string;
417+
}>) {
418+
// Skip edges whose source is also being purged — buildEdges will
419+
// re-create them with correct new IDs.
420+
if (changePathSet.has(row.src_file)) continue;
421+
ctx.savedReverseDepEdges.push({
422+
sourceId: row.source_id,
423+
tgtName: row.tgt_name,
424+
tgtKind: row.tgt_kind,
425+
tgtFile: row.tgt_file,
426+
tgtLine: row.tgt_line,
427+
edgeKind: row.edge_kind,
428+
confidence: row.confidence,
429+
dynamic: row.dynamic,
430+
});
431+
}
432+
}
433+
debug(`Saved ${ctx.savedReverseDepEdges.length} reverse-dep edges for reconnection`);
434+
}
435+
378436
if (hasPurge) {
379437
purgeFilesFromGraph(db, filesToPurge, { purgeHashes: false });
380438
}
381-
if (hasReverseDeps) {
382-
const deleteOutgoingEdgesForFile = db.prepare(
383-
'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)',
384-
);
385-
for (const relPath of reverseDepList) {
386-
deleteOutgoingEdgesForFile.run(relPath);
387-
}
388-
}
439+
// No outgoing-edge deletion for reverse-deps — purge already removed
440+
// edges targeting the changed files, and other outgoing edges are valid.
441+
// No reverse-deps added to parseChanges — no reparse needed.
389442
}
390443
}
391-
for (const relPath of reverseDeps) {
392-
const absPath = path.join(rootDir, relPath);
393-
ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true });
394-
}
395444
}
396445

397446
function detectHasEmbeddings(db: BetterSqlite3Database, nativeDb?: NativeDatabase): boolean {

src/domain/graph/builder/stages/run-analyses.ts

Lines changed: 5 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,39 +2,18 @@
22
* Stage: runAnalyses
33
*
44
* Dispatches to the unified AST analysis engine (AST nodes, complexity, CFG, dataflow).
5-
* Filters out reverse-dep files for incremental builds.
5+
* Reverse-dep files are no longer in allSymbols (they are not reparsed since #932/#933),
6+
* so no filtering is needed here.
67
*/
7-
import { debug, warn } from '../../../../infrastructure/logger.js';
8-
import type { ExtractorOutput } from '../../../../types.js';
8+
import { warn } from '../../../../infrastructure/logger.js';
99
import type { PipelineContext } from '../context.js';
1010

1111
export async function runAnalyses(ctx: PipelineContext): Promise<void> {
12-
const { db, allSymbols, rootDir, opts, engineOpts, isFullBuild, filesToParse } = ctx;
13-
14-
// For incremental builds, exclude reverse-dep-only files
15-
let astComplexitySymbols: Map<string, ExtractorOutput> = allSymbols;
16-
if (!isFullBuild) {
17-
const reverseDepFiles = new Set(
18-
filesToParse
19-
.filter((item) => (item as { _reverseDepOnly?: boolean })._reverseDepOnly)
20-
.map((item) => item.relPath),
21-
);
22-
if (reverseDepFiles.size > 0) {
23-
astComplexitySymbols = new Map();
24-
for (const [relPath, symbols] of allSymbols) {
25-
if (!reverseDepFiles.has(relPath)) {
26-
astComplexitySymbols.set(relPath, symbols);
27-
}
28-
}
29-
debug(
30-
`AST/complexity/CFG/dataflow: processing ${astComplexitySymbols.size} changed files (skipping ${reverseDepFiles.size} reverse-deps)`,
31-
);
32-
}
33-
}
12+
const { db, allSymbols, rootDir, opts, engineOpts } = ctx;
3413

3514
const { runAnalyses: runAnalysesFn } = await import('../../../../ast-analysis/engine.js');
3615
try {
37-
const analysisTiming = await runAnalysesFn(db, astComplexitySymbols, rootDir, opts, engineOpts);
16+
const analysisTiming = await runAnalysesFn(db, allSymbols, rootDir, opts, engineOpts);
3817
ctx.timing.astMs = analysisTiming.astMs;
3918
ctx.timing.complexityMs = analysisTiming.complexityMs;
4019
ctx.timing.cfgMs = analysisTiming.cfgMs;

0 commit comments

Comments
 (0)