Skip to content

Commit 3db5d8c

Browse files
committed
fix(perf): scope runPostNativeCha to changed files on incremental builds
On incremental builds, runPostNativeCha previously scanned all call→qualified-method edges in the DB (~12ms flat, O(graph size)), even for 1-file changes where no hierarchy or RTA evidence changed. Add two cheap indexed gate queries. Gate A checks whether any changed file introduced a class/interface/trait/struct/record node (hierarchy may have new implementors reachable from unchanged call sites). Gate B checks whether any changed file added a call edge to a class-kind target (RTA set may have grown, enabling previously filtered expansions in unchanged callers). If neither gate fires, restrict the candidate query to src.file IN changedFiles — safe because the hierarchy and instantiated set are unchanged for all other files. Full builds (isFullBuild=true) and cases where either gate fires retain the existing full-scan behaviour. Mirrors the changed-files scoping pattern of runPostNativeThisDispatch. Closes #1441
1 parent d07b358 commit 3db5d8c

1 file changed

Lines changed: 119 additions & 11 deletions

File tree

src/domain/graph/builder/stages/native-orchestrator.ts

Lines changed: 119 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -401,12 +401,26 @@ async function runPostNativeAnalysis(
401401
* Note: `this`/`super` dispatch is handled separately by `runPostNativeThisDispatch`,
402402
* which WASM-re-parses JS/TS files to obtain raw call site receiver info.
403403
*
404+
* `changedFiles` controls candidate scoping on incremental builds:
405+
* - null → full build; scan all call→method edges (existing behaviour).
406+
* - array → incremental; two cheap gate queries decide scope:
407+
* Gate A: any class/interface/trait/struct/record nodes in changed files?
408+
* If yes, a new implementor may have appeared — full scan required.
409+
* Gate B: any `calls` edges from changed-file sources targeting class-kind
410+
* nodes? If yes, the RTA set may have grown, enabling previously
411+
* filtered expansions in unchanged caller files — full scan required.
412+
* If neither gate fires: scope `callToMethods` to `src.file IN changedFiles`
413+
* (safe because no hierarchy or RTA evidence changed).
414+
*
404415
* Returns the count of newly inserted CHA edges plus the set of files containing
405416
* the new edges' endpoints, so the caller can scope role re-classification to the
406417
* nodes whose fan-in/out actually changed. A zero count means no edges were added
407418
* and role re-classification is unnecessary.
408419
*/
409-
function runPostNativeCha(db: BetterSqlite3Database): {
420+
function runPostNativeCha(
421+
db: BetterSqlite3Database,
422+
changedFiles: string[] | null,
423+
): {
410424
newEdgeCount: number;
411425
affectedFiles: Set<string>;
412426
} {
@@ -474,19 +488,111 @@ function runPostNativeCha(db: BetterSqlite3Database): {
474488
debug('runPostNativeCha: no constructor-call evidence found — proceeding without RTA filter');
475489
}
476490

491+
// ── Incremental candidate scoping ──────────────────────────────────────────
492+
// On incremental builds, two gate queries decide whether to restrict the
493+
// candidate scan to changed-file call sites or run the full graph scan.
494+
//
495+
// Gate A: did a changed file add/change a class hierarchy node?
496+
// A new `extends`/`implements` edge means a previously-untracked implementor
497+
// is now in the hierarchy — unchanged call sites in OTHER files may gain new
498+
// valid expansions, so the full scan is required.
499+
//
500+
// Gate B: did a changed file add new RTA evidence (`new ConcreteX()`)?
501+
// A new `calls` edge to a class-kind target means the instantiated set grew —
502+
// previously RTA-filtered expansions in unchanged caller files become
503+
// admissible, so the full scan is required.
504+
//
505+
// If neither gate fires, the hierarchy and RTA set are unchanged for all files
506+
// outside changedFiles, so restricting to changed-file sources is safe.
507+
let scopeToChangedFiles = false; // true → add WHERE src.file IN changedFiles
508+
if (changedFiles !== null && changedFiles.length > 0) {
509+
// Gate A: class/interface/trait/struct/record nodes in changed files?
510+
const CHUNK_SIZE = 500;
511+
let gateAFired = false;
512+
for (let i = 0; i < changedFiles.length && !gateAFired; i += CHUNK_SIZE) {
513+
const chunk = changedFiles.slice(i, i + CHUNK_SIZE);
514+
const ph = chunk.map(() => '?').join(',');
515+
const row = db
516+
.prepare(
517+
`SELECT 1 FROM nodes
518+
WHERE file IN (${ph})
519+
AND kind IN ('class', 'interface', 'trait', 'struct', 'record')
520+
LIMIT 1`,
521+
)
522+
.get(...chunk);
523+
if (row) gateAFired = true;
524+
}
525+
526+
// Gate B: calls from changed-file sources to class-kind targets?
527+
let gateBFired = false;
528+
if (!gateAFired) {
529+
for (let i = 0; i < changedFiles.length && !gateBFired; i += CHUNK_SIZE) {
530+
const chunk = changedFiles.slice(i, i + CHUNK_SIZE);
531+
const ph = chunk.map(() => '?').join(',');
532+
const row = db
533+
.prepare(
534+
`SELECT 1 FROM edges e
535+
JOIN nodes src ON e.source_id = src.id
536+
JOIN nodes tgt ON e.target_id = tgt.id
537+
WHERE e.kind = 'calls' AND tgt.kind = 'class'
538+
AND src.file IN (${ph})
539+
LIMIT 1`,
540+
)
541+
.get(...chunk);
542+
if (row) gateBFired = true;
543+
}
544+
}
545+
546+
if (!gateAFired && !gateBFired) {
547+
scopeToChangedFiles = true;
548+
debug(
549+
`runPostNativeCha: neither gate fired — scoping candidate scan to ${changedFiles.length} changed file(s)`,
550+
);
551+
} else {
552+
debug(
553+
`runPostNativeCha: ${gateAFired ? 'Gate A (hierarchy)' : 'Gate B (RTA)'} fired — running full scan`,
554+
);
555+
}
556+
}
557+
477558
// Find existing call edges targeting qualified methods (e.g., 'IWorker.doWork').
478559
// Include the caller node's file so confidence can be computed file-pair-aware,
479560
// matching the WASM path's computeConfidence(callerFile, targetFile, null) - CHA_DISPATCH_PENALTY formula.
480-
const callToMethods = db
481-
.prepare(`
482-
SELECT e.source_id, tgt.name AS method_name, src.file AS caller_file
483-
FROM edges e
484-
JOIN nodes tgt ON e.target_id = tgt.id
485-
JOIN nodes src ON e.source_id = src.id
486-
WHERE e.kind = 'calls' AND tgt.kind = 'method'
487-
AND INSTR(tgt.name, '.') > 0
488-
`)
489-
.all() as Array<{ source_id: number; method_name: string; caller_file: string | null }>;
561+
// When scopeToChangedFiles is true, restrict to call sites in the changed files
562+
// (safe because no hierarchy or RTA evidence changed outside those files).
563+
let callToMethods: Array<{ source_id: number; method_name: string; caller_file: string | null }>;
564+
if (scopeToChangedFiles && changedFiles && changedFiles.length > 0) {
565+
const CHUNK_SIZE = 500;
566+
const rows: Array<{ source_id: number; method_name: string; caller_file: string | null }> = [];
567+
for (let i = 0; i < changedFiles.length; i += CHUNK_SIZE) {
568+
const chunk = changedFiles.slice(i, i + CHUNK_SIZE);
569+
const ph = chunk.map(() => '?').join(',');
570+
const chunkRows = db
571+
.prepare(
572+
`SELECT e.source_id, tgt.name AS method_name, src.file AS caller_file
573+
FROM edges e
574+
JOIN nodes tgt ON e.target_id = tgt.id
575+
JOIN nodes src ON e.source_id = src.id
576+
WHERE e.kind = 'calls' AND tgt.kind = 'method'
577+
AND INSTR(tgt.name, '.') > 0
578+
AND src.file IN (${ph})`,
579+
)
580+
.all(...chunk) as Array<{ source_id: number; method_name: string; caller_file: string | null }>;
581+
rows.push(...chunkRows);
582+
}
583+
callToMethods = rows;
584+
} else {
585+
callToMethods = db
586+
.prepare(`
587+
SELECT e.source_id, tgt.name AS method_name, src.file AS caller_file
588+
FROM edges e
589+
JOIN nodes tgt ON e.target_id = tgt.id
590+
JOIN nodes src ON e.source_id = src.id
591+
WHERE e.kind = 'calls' AND tgt.kind = 'method'
592+
AND INSTR(tgt.name, '.') > 0
593+
`)
594+
.all() as Array<{ source_id: number; method_name: string; caller_file: string | null }>;
595+
}
490596

491597
// Seed seen-pairs only from the source_ids we'll be expanding — avoids loading every
492598
// call edge in the DB (which would be O(all edges)) for large codebases.
@@ -1427,6 +1533,8 @@ export async function tryNativeOrchestrator(
14271533
// no WASM re-parse post-pass is needed for them. `Foo.prototype.bar = fn` likewise.
14281534
const { newEdgeCount: chaEdgeCount, affectedFiles: chaAffectedFiles } = runPostNativeCha(
14291535
ctx.db as unknown as BetterSqlite3Database,
1536+
// null = full build (scan all call→method edges); array = incremental (gate queries decide scope)
1537+
result.isFullBuild ? null : (result.changedFiles ?? null),
14301538
);
14311539

14321540
// Phase 8.5: this/super dispatch — hybrid WASM re-parse to resolve call sites

0 commit comments

Comments
 (0)