diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index e43fabf8..a1bf7aa9 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -1538,26 +1538,16 @@ export async function tryNativeOrchestrator( // stale native binaries). WASM handles those — backfill via WASM so both // engines process the same file set (#967). // - // Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for - // both gating and the backfill itself. On dirty incrementals/full builds - // the orchestrator signals trigger backfill, so the walk happens once - // (instead of redundantly inside backfill). On quiet incrementals we - // still pay the walk so we can detect brand-new files in dropped-language - // extensions — a gap that the orchestrator's `detect_removed_files` - // filter (#1070) leaves open (#1083, #1091). The pre-check is cheap - // because the expensive part (WASM re-parse of the missing set) is - // gated below. - const removedCount = result.removedCount ?? 0; - const changedCount = result.changedCount ?? 0; + // Detect the gap once (fs walk + 2 DB queries) and use it for both gating + // and the backfill itself. On quiet incrementals we still pay the walk so + // we can detect brand-new files in dropped-language extensions — a gap that + // the orchestrator's `detect_removed_files` filter (#1070) leaves open + // (#1083, #1091). The pre-check is cheap because the expensive part (WASM + // re-parse of the missing set) is gated below. const gapDetectStart = performance.now(); const gap = detectDroppedLanguageGap(ctx); - if ( - result.isFullBuild || - removedCount > 0 || - changedCount > 0 || - gap.missingAbs.length > 0 || - gap.staleRel.length > 0 - ) { + const backfillHappened = gap.missingAbs.length > 0 || gap.staleRel.length > 0; + if (backfillHappened) { await backfillNativeDroppedFiles(ctx, gap); } const gapDetectMs = performance.now() - gapDetectStart; @@ -1638,19 +1628,27 @@ export async function tryNativeOrchestrator( // Re-count nodes/edges now that all edge-writing post-passes have run: the // Rust orchestrator captured its counts before the JS post-passes added // edges, so both its summary and build_meta under-report (#1452). + // + // Fast path: skip the COUNT(*) scan when no post-pass wrote any edges. + // COUNT(*) on large tables (50K+ edges) is non-trivial, especially via the + // NativeDbProxy napi-rs round-trip. When all post-passes were no-ops, the + // Rust orchestrator's counts are still accurate — no re-count needed. let finalNodeCount = result.nodeCount ?? 0; let finalEdgeCount = result.edgeCount ?? 0; - try { - const counts = (ctx.db as unknown as BetterSqlite3Database) - .prepare('SELECT (SELECT COUNT(*) FROM nodes) AS n, (SELECT COUNT(*) FROM edges) AS e') - .get() as { n: number; e: number }; - if (counts.n !== finalNodeCount || counts.e !== finalEdgeCount) { - finalNodeCount = counts.n; - finalEdgeCount = counts.e; - setBuildMeta(ctx.db, { node_count: finalNodeCount, edge_count: finalEdgeCount }); + const postPassWroteData = backfillHappened || chaEdgeCount > 0 || thisDispatchTargetIds.size > 0; + if (postPassWroteData) { + try { + const counts = (ctx.db as unknown as BetterSqlite3Database) + .prepare('SELECT (SELECT COUNT(*) FROM nodes) AS n, (SELECT COUNT(*) FROM edges) AS e') + .get() as { n: number; e: number }; + if (counts.n !== finalNodeCount || counts.e !== finalEdgeCount) { + finalNodeCount = counts.n; + finalEdgeCount = counts.e; + setBuildMeta(ctx.db, { node_count: finalNodeCount, edge_count: finalEdgeCount }); + } + } catch (err) { + debug(`Post-pass node/edge re-count failed: ${toErrorMessage(err)}`); } - } catch (err) { - debug(`Post-pass node/edge re-count failed: ${toErrorMessage(err)}`); } info( `Native build orchestrator completed: ${finalNodeCount} nodes, ${finalEdgeCount} edges, ${result.fileCount ?? 0} files`, diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 883687cf..119d3108 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -1181,12 +1181,25 @@ async function parseFilesWasm( /** * Files at or below this count use the inline parse path (no worker spawn). * - * Sized for typical engine-parity drops: a handful of fixture files in one - * or two languages (the recurring HCL case is 4 files). Above this, the - * worker-pool's IPC + crash-isolation cost (#965) is amortized over enough - * parse work to be worth paying; below it, the ~1–2s cold-start dominates. + * The worker pool exists for crash safety (#965): exotic (non-required) WASM + * grammars can trigger uncatchable V8 fatal errors that would kill the main + * process. Running them in a worker means only the worker dies; the pool + * detects the exit, skips the file, respawns, and continues. + * + * JS/TS/TSX are required-tier grammars — they have never triggered the V8 + * fatal crash class and are safe to run inline. The primary hot caller + * (this/super dispatch post-pass) exclusively handles JS/TS/TSX files and + * measured ~55–64ms/file through the pool vs ~8–10ms/file inline (#1435); + * IPC overhead scales linearly with file count, not amortised. + * + * The threshold is set high enough to keep typical this-dispatch batches + * (≤ 18 files on the codegraph corpus) on the inline path, while still + * routing truly large exotic-language drops (rare; typical HCL case is 4 + * files) through the pool for crash isolation. Exotic-language drops are + * almost always well under this limit anyway, so they benefit from the + * inline fast path too without meaningful crash risk increase. */ -const INLINE_BACKFILL_THRESHOLD = 16; +const INLINE_BACKFILL_THRESHOLD = 32; /** * Inline WASM parse (no worker) for small file batches. @@ -1246,8 +1259,7 @@ async function parseFilesWasmInline( /** * Backfill helper: small batches use the inline (main-thread) path; larger * batches keep the worker-pool isolation against tree-sitter WASM crashes - * (#965). Threshold matches typical engine-parity drop sizes (a few fixture - * files in one or two languages). + * (#965). See INLINE_BACKFILL_THRESHOLD for threshold rationale. * * `opts.symbolsOnly` skips the AST/complexity/CFG/dataflow visitors in the * worker (and their result serialization across the thread boundary) for