From 246bce3159b57cdaea779829c24d7873e91028ea Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 20 May 2026 13:02:00 -0600 Subject: [PATCH 1/2] fix: iterate barrel re-parse discovery to stop dropping chained-barrel edges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #1174 Stage 6b's barrel candidate discovery was single-pass: it only looked at the originally changed files' imports. When a hybrid barrel (file with ≥1 reexport + many local defs, e.g. src/domain/parser.ts) was re-parsed, its outgoing edges were wiped — but the barrel-through edges from that barrel to leaf files (via a second-level barrel like src/extractors/ index.ts) could not be re-emitted because the second barrel was never loaded into file_symbols. Result: 32 imports edges (native) / 37 (WASM) silently dropped per incremental rebuild, and they never came back without --no-incremental. The Rust orchestrator and the JS WASM-fallback pipeline now iterate the barrel-candidate discovery until file_symbols is stable. JS-side, three related defects were exposed once parser.ts started being processed correctly: * reparseBarrelFiles was marking every re-parsed file as barrel-only even when it was a hybrid (the isBarrelFile heuristic returns true for reexports >= ownDefs); only mark the actual barrel-only files. * build-edges' lazy fallback queried kind != 'file', broader than the upfront load's specific definition kinds, leaking parameters and properties into call resolution. * resolve-imports' delete-outgoing-edges had no kind filter, wiping contains/parameter_of that insertNodes only emits for changed files — aligned with the Rust orchestrator's filter. Verification on the dogfooded repo: full=1371, incremental=1371 imports edges on both engines (was -32 / -37 on main). Every edge kind is identical between full and incremental on the native engine; only minor imports-type / contains stragglers remain on WASM (separate concerns). Regression test parameterized over both engines: tests/integration/issue-1174-chained-barrel-incremental.test.ts. --- crates/codegraph-core/src/build_pipeline.rs | 175 ++++++++++++------ .../graph/builder/stages/build-edges.ts | 11 +- .../graph/builder/stages/resolve-imports.ts | 51 ++++- .../fixtures/issue-1174-chained-barrel/app.js | 5 + .../extractors/alpha.js | 3 + .../extractors/beta.js | 3 + .../extractors/delta.js | 3 + .../extractors/gamma.js | 3 + .../extractors/index.js | 6 + .../issue-1174-chained-barrel/parser.js | 28 +++ .../issue-1174-chained-barrel/types/index.js | 7 + ...ue-1174-chained-barrel-incremental.test.ts | 140 ++++++++++++++ 12 files changed, 370 insertions(+), 65 deletions(-) create mode 100644 tests/fixtures/issue-1174-chained-barrel/app.js create mode 100644 tests/fixtures/issue-1174-chained-barrel/extractors/alpha.js create mode 100644 tests/fixtures/issue-1174-chained-barrel/extractors/beta.js create mode 100644 tests/fixtures/issue-1174-chained-barrel/extractors/delta.js create mode 100644 tests/fixtures/issue-1174-chained-barrel/extractors/gamma.js create mode 100644 tests/fixtures/issue-1174-chained-barrel/extractors/index.js create mode 100644 tests/fixtures/issue-1174-chained-barrel/parser.js create mode 100644 tests/fixtures/issue-1174-chained-barrel/types/index.js create mode 100644 tests/integration/issue-1174-chained-barrel-incremental.test.ts diff --git a/crates/codegraph-core/src/build_pipeline.rs b/crates/codegraph-core/src/build_pipeline.rs index 72644d4d8..dba6e7f28 100644 --- a/crates/codegraph-core/src/build_pipeline.rs +++ b/crates/codegraph-core/src/build_pipeline.rs @@ -600,6 +600,16 @@ fn collect_source_files( /// Barrel files (re-export-only index files) may not be in file_symbols because /// they weren't changed or reverse-deps. Without their symbols, barrel resolution /// in Stage 7 can't create transitive import edges. +/// +/// Discovery is iterative: a barrel that imports another barrel (e.g. +/// `parser.ts → extractors/index.ts → extractors/.ts`) needs both +/// loaded so Stage 7 can emit the barrel-through edges from the first barrel +/// to the leaf targets. Without the loop, only the first level of barrels +/// gets merged into `file_symbols`; the deeper chain has no entry in +/// `reexport_map`, so `resolve_barrel_export` returns `None` and the +/// barrel-through edges are silently dropped on every incremental rebuild +/// (#1174). Convergence is guaranteed because `file_symbols` grows +/// monotonically and is bounded by the set of barrel files in the project. fn reparse_barrel_candidates( conn: &Connection, root_dir: &str, @@ -624,67 +634,38 @@ fn reparse_barrel_candidates( rows.into_iter().collect() }; - // Check which barrels are imported by parsed files but not in file_symbols - let mut barrel_paths_to_parse: Vec = Vec::new(); - for (rel_path, symbols) in file_symbols.iter() { - for imp in &symbols.imports { - let abs_file = Path::new(root_dir).join(rel_path); - let fwd = abs_file.to_str().unwrap_or("").replace('\\', "/"); - let key = format!("{}|{}", fwd, imp.source); - if let Some(resolved) = batch_resolved.get(&key) { - if barrel_files_in_db.contains(resolved) && !file_symbols.contains_key(resolved) - { - let abs = Path::new(root_dir).join(resolved); - if abs.exists() { - barrel_paths_to_parse - .push(abs.to_str().unwrap_or("").to_string()); - } - } - } - } - } - - // Also find barrels that re-export FROM changed files - { - let changed_rel: Vec<&str> = file_symbols.keys().map(|s| s.as_str()).collect(); - if let Ok(mut stmt) = conn.prepare( - "SELECT DISTINCT n1.file FROM edges e \ - JOIN nodes n1 ON e.source_id = n1.id \ - JOIN nodes n2 ON e.target_id = n2.id \ - WHERE e.kind = 'reexports' AND n1.kind = 'file' AND n2.file = ?1", - ) { - for changed in &changed_rel { - if let Ok(rows) = stmt.query_map(rusqlite::params![changed], |row| { - row.get::<_, String>(0) - }) { - for row in rows.flatten() { - if !file_symbols.contains_key(&row) { - let abs = Path::new(root_dir).join(&row); - if abs.exists() { - barrel_paths_to_parse - .push(abs.to_str().unwrap_or("").to_string()); - } - } - } - } - } - } - } - - // Re-parse barrel files and merge into file_symbols - if !barrel_paths_to_parse.is_empty() { + // Seed: barrels imported by the initial file_symbols (= changed files), + // plus barrels that re-export FROM any changed file. The reexport-from + // seed only fires on the initial pass — re-parsed barrels haven't + // changed in content, so they can't trigger new reexport-from candidates. + let initial_files: Vec = file_symbols.keys().cloned().collect(); + let mut barrel_paths_to_parse: Vec = collect_imported_barrel_candidates( + root_dir, + &initial_files, + batch_resolved, + &barrel_files_in_db, + file_symbols, + ); + barrel_paths_to_parse.extend(collect_reexport_from_barrels( + conn, + root_dir, + &initial_files, + file_symbols, + )); + + // Iterative re-parse: each pass merges the queued barrels into file_symbols, + // then scans their imports for additional barrel candidates the previous + // pass couldn't see. + while !barrel_paths_to_parse.is_empty() { barrel_paths_to_parse.sort(); barrel_paths_to_parse.dedup(); + let to_parse = std::mem::take(&mut barrel_paths_to_parse); // Re-parse barrel candidates — these may be hybrid barrels (reexports // AND local definitions / call sites, see #979). Dataflow/AST analysis // is skipped because the barrel is not itself a "changed" file; Stage 7 // will reconstruct all outgoing edge kinds from the fresh parse. - let barrel_parsed = parallel::parse_files_parallel( - &barrel_paths_to_parse, - root_dir, - false, - false, - ); + let barrel_parsed = parallel::parse_files_parallel(&to_parse, root_dir, false, false); + let mut newly_added: Vec = Vec::with_capacity(barrel_parsed.len()); for mut sym in barrel_parsed { let rel = relative_path(root_dir, &sym.file); sym.file = rel.clone(); @@ -727,9 +708,91 @@ fn reparse_barrel_candidates( batch_resolved.insert(key, r.resolved_path.clone()); } } - file_symbols.insert(rel, sym); + file_symbols.insert(rel.clone(), sym); + newly_added.push(rel); + } + + // Scan just-merged barrels for further barrel imports (next level of + // the chain). batch_resolved is now up to date for these imports. + barrel_paths_to_parse = collect_imported_barrel_candidates( + root_dir, + &newly_added, + batch_resolved, + &barrel_files_in_db, + file_symbols, + ); + } +} + +/// Walk the imports of `from_files` and return absolute paths of any barrel +/// candidates (files in `barrel_files_in_db` not yet in `file_symbols`) that +/// exist on disk. +fn collect_imported_barrel_candidates( + root_dir: &str, + from_files: &[String], + batch_resolved: &HashMap, + barrel_files_in_db: &HashSet, + file_symbols: &HashMap, +) -> Vec { + let mut out = Vec::new(); + for rel_path in from_files { + let symbols = match file_symbols.get(rel_path) { + Some(s) => s, + None => continue, + }; + let abs_file = Path::new(root_dir).join(rel_path); + let fwd = abs_file.to_str().unwrap_or("").replace('\\', "/"); + for imp in &symbols.imports { + let key = format!("{}|{}", fwd, imp.source); + if let Some(resolved) = batch_resolved.get(&key) { + if barrel_files_in_db.contains(resolved) + && !file_symbols.contains_key(resolved) + { + let abs = Path::new(root_dir).join(resolved); + if abs.exists() { + out.push(abs.to_str().unwrap_or("").to_string()); + } + } + } + } + } + out +} + +/// Find barrels that re-export from any of `changed_files`. Used as a seed +/// for the iterative re-parse so a renamed/removed symbol in a changed file +/// re-emits the affected barrel's outgoing edges. +fn collect_reexport_from_barrels( + conn: &Connection, + root_dir: &str, + changed_files: &[String], + file_symbols: &HashMap, +) -> Vec { + let mut out = Vec::new(); + let mut stmt = match conn.prepare( + "SELECT DISTINCT n1.file FROM edges e \ + JOIN nodes n1 ON e.source_id = n1.id \ + JOIN nodes n2 ON e.target_id = n2.id \ + WHERE e.kind = 'reexports' AND n1.kind = 'file' AND n2.file = ?1", + ) { + Ok(stmt) => stmt, + Err(_) => return out, + }; + for changed in changed_files { + if let Ok(rows) = + stmt.query_map(rusqlite::params![changed], |row| row.get::<_, String>(0)) + { + for row in rows.flatten() { + if !file_symbols.contains_key(&row) { + let abs = Path::new(root_dir).join(&row); + if abs.exists() { + out.push(abs.to_str().unwrap_or("").to_string()); + } + } + } } } + out } /// Stage 9: Finalize build — persist metadata, write journal, return counts. diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 849b9e50f..fc08160b3 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -770,9 +770,11 @@ function reconnectReverseDepEdges(ctx: PipelineContext): void { * their import targets. Falls back to loading ALL nodes for full builds or * larger incremental changes. */ +const NODE_KIND_FILTER_SQL = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`; + function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolean } { const { db, fileSymbols, isFullBuild, batchResolved } = ctx; - const nodeKindFilter = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`; + const nodeKindFilter = NODE_KIND_FILTER_SQL; // Gate: only scope for small incremental on large codebases if (!isFullBuild && fileSymbols.size <= ctx.config.build.smallFilesThreshold) { @@ -816,8 +818,13 @@ function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolea function addLazyFallback(ctx: PipelineContext, scopedLoad: boolean): void { if (!scopedLoad) return; const { db } = ctx; + // Match the upfront kind filter exactly. Using `kind != 'file'` here lets + // parameters, properties, and other non-definition kinds leak into call + // resolution, producing bogus call edges like `parser.ts → ` (#1174 follow-up). Calls only ever target the + // definition kinds, so the fallback's filter must agree with `loadNodes`. const fallbackStmt = db.prepare( - `SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND kind != 'file'`, + `SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND ${NODE_KIND_FILTER_SQL}`, ); const originalGet = ctx.nodesByName.get.bind(ctx.nodesByName); ctx.nodesByName.get = (name: string) => { diff --git a/src/domain/graph/builder/stages/resolve-imports.ts b/src/domain/graph/builder/stages/resolve-imports.ts index 66b49087b..50836094c 100644 --- a/src/domain/graph/builder/stages/resolve-imports.ts +++ b/src/domain/graph/builder/stages/resolve-imports.ts @@ -95,11 +95,22 @@ function findBarrelCandidates(ctx: PipelineContext): Array<{ file: string }> { .all() as Array<{ file: string }>; } -/** Re-parse barrel files and update fileSymbols/reexportMap with fresh data. */ +/** + * Re-parse barrel files and update fileSymbols/reexportMap with fresh data. + * Returns the relative paths of newly-merged files so the caller can scan + * them for the next level of barrel candidates. + * + * A re-parsed file is marked `barrel-only` only when it really is one (the + * `isBarrelFile` check — reexports >= ownDefs). The previous unconditional + * `.add(relPath)` caused hybrid barrels with many local defs (e.g. a file + * with one `export type ... from` and dozens of internal functions) to drop + * all their non-reexport imports in build-edges, since the barrel-only branch + * skips them (#1174). + */ async function reparseBarrelFiles( ctx: PipelineContext, barrelCandidates: Array<{ file: string }>, -): Promise { +): Promise { const { db, fileSymbols, rootDir, engineOpts } = ctx; const barrelPaths: string[] = []; @@ -109,18 +120,27 @@ async function reparseBarrelFiles( } } - if (barrelPaths.length === 0) return; + if (barrelPaths.length === 0) return []; + // Preserve `contains` and `parameter_of` — those are emitted by insertNodes, + // which only runs on the original (changed + reverse-dep) fileSymbols. Barrel + // candidates are merged here *after* insertNodes, so wiping those kinds + // would permanently drop them (mirrors the Rust orchestrator's Stage 6b + // delete in build_pipeline.rs). const deleteOutgoingEdges = db.prepare( - 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', + `DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?) + AND kind NOT IN ('contains', 'parameter_of')`, ); + const added: string[] = []; try { const barrelSymbols = await parseFilesAuto(barrelPaths, rootDir, engineOpts); for (const [relPath, fileSym] of barrelSymbols) { deleteOutgoingEdges.run(relPath); fileSymbols.set(relPath, fileSym); - ctx.barrelOnlyFiles.add(relPath); + if (isBarrelFile(ctx, relPath)) { + ctx.barrelOnlyFiles.add(relPath); + } const reexports = fileSym.imports.filter((imp: Import) => imp.reexport); if (reexports.length > 0) { ctx.reexportMap.set( @@ -132,10 +152,12 @@ async function reparseBarrelFiles( })), ); } + added.push(relPath); } } catch (e: unknown) { debug(`Barrel re-parse failed (non-fatal): ${(e as Error).message}`); } + return added; } export async function resolveImports(ctx: PipelineContext): Promise { @@ -156,8 +178,23 @@ export async function resolveImports(ctx: PipelineContext): Promise { ctx.barrelOnlyFiles = new Set(); if (!isFullBuild) { - const barrelCandidates = findBarrelCandidates(ctx); - await reparseBarrelFiles(ctx, barrelCandidates); + // Iteratively discover and re-parse barrel chains. A barrel that imports + // another barrel (e.g. `parser.ts → extractors/index.ts → extractors/.ts`) + // needs both loaded so build-edges can emit the barrel-through edges from + // the first barrel to the leaf targets. Without iteration, only the first + // level of barrels gets merged into fileSymbols; the deeper chain has no + // entry in reexportMap and the resolver silently drops the affected edges + // on every incremental rebuild (#1174). + // + // Convergence is guaranteed because fileSymbols grows monotonically and + // is bounded by the set of barrel files in the project — each iteration + // either adds a previously-unseen barrel or terminates. + while (true) { + const before = fileSymbols.size; + const barrelCandidates = findBarrelCandidates(ctx); + await reparseBarrelFiles(ctx, barrelCandidates); + if (fileSymbols.size === before) break; + } } } diff --git a/tests/fixtures/issue-1174-chained-barrel/app.js b/tests/fixtures/issue-1174-chained-barrel/app.js new file mode 100644 index 000000000..af106f915 --- /dev/null +++ b/tests/fixtures/issue-1174-chained-barrel/app.js @@ -0,0 +1,5 @@ +import { runParser } from './parser.js'; + +export function main(input) { + return runParser(input); +} diff --git a/tests/fixtures/issue-1174-chained-barrel/extractors/alpha.js b/tests/fixtures/issue-1174-chained-barrel/extractors/alpha.js new file mode 100644 index 000000000..b652ec439 --- /dev/null +++ b/tests/fixtures/issue-1174-chained-barrel/extractors/alpha.js @@ -0,0 +1,3 @@ +export function extractAlpha(input) { + return `alpha:${input}`; +} diff --git a/tests/fixtures/issue-1174-chained-barrel/extractors/beta.js b/tests/fixtures/issue-1174-chained-barrel/extractors/beta.js new file mode 100644 index 000000000..eef7a404d --- /dev/null +++ b/tests/fixtures/issue-1174-chained-barrel/extractors/beta.js @@ -0,0 +1,3 @@ +export function extractBeta(input) { + return `beta:${input}`; +} diff --git a/tests/fixtures/issue-1174-chained-barrel/extractors/delta.js b/tests/fixtures/issue-1174-chained-barrel/extractors/delta.js new file mode 100644 index 000000000..c42e87add --- /dev/null +++ b/tests/fixtures/issue-1174-chained-barrel/extractors/delta.js @@ -0,0 +1,3 @@ +export function extractDelta(input) { + return `delta:${input}`; +} diff --git a/tests/fixtures/issue-1174-chained-barrel/extractors/gamma.js b/tests/fixtures/issue-1174-chained-barrel/extractors/gamma.js new file mode 100644 index 000000000..9f04a13d9 --- /dev/null +++ b/tests/fixtures/issue-1174-chained-barrel/extractors/gamma.js @@ -0,0 +1,3 @@ +export function extractGamma(input) { + return `gamma:${input}`; +} diff --git a/tests/fixtures/issue-1174-chained-barrel/extractors/index.js b/tests/fixtures/issue-1174-chained-barrel/extractors/index.js new file mode 100644 index 000000000..95806f4b0 --- /dev/null +++ b/tests/fixtures/issue-1174-chained-barrel/extractors/index.js @@ -0,0 +1,6 @@ +// Pure barrel — re-exports only, no local definitions. +// Two-level chain test: parser.js (hybrid barrel) → this barrel → leaf files. +export { extractAlpha } from './alpha.js'; +export { extractBeta } from './beta.js'; +export { extractDelta } from './delta.js'; +export { extractGamma } from './gamma.js'; diff --git a/tests/fixtures/issue-1174-chained-barrel/parser.js b/tests/fixtures/issue-1174-chained-barrel/parser.js new file mode 100644 index 000000000..1e9db89ac --- /dev/null +++ b/tests/fixtures/issue-1174-chained-barrel/parser.js @@ -0,0 +1,28 @@ +// Hybrid barrel: has one re-export AND many local definitions. +// Mirrors src/domain/parser.ts in the dogfooded reproduction of #1174: +// the file is flagged as a barrel candidate by the orchestrator (because +// it has ≥1 reexports edge in the DB) yet is *not* barrel-only because +// its local defs outnumber its reexports. +export { Token } from './types/index.js'; + +import { extractAlpha, extractBeta, extractDelta, extractGamma } from './extractors/index.js'; + +export function runParser(input) { + const alpha = extractAlpha(input); + const beta = extractBeta(input); + const gamma = extractGamma(input); + const delta = extractDelta(input); + return combineResults(alpha, beta, gamma, delta); +} + +export function combineResults(a, b, c, d) { + return [a, b, c, d].join('|'); +} + +export function describeParser() { + return 'chained-barrel parser'; +} + +export function resetParser() { + return null; +} diff --git a/tests/fixtures/issue-1174-chained-barrel/types/index.js b/tests/fixtures/issue-1174-chained-barrel/types/index.js new file mode 100644 index 000000000..1ac8e9a21 --- /dev/null +++ b/tests/fixtures/issue-1174-chained-barrel/types/index.js @@ -0,0 +1,7 @@ +// Leaf re-export source for the hybrid barrel. +export class Token { + constructor(kind, value) { + this.kind = kind; + this.value = value; + } +} diff --git a/tests/integration/issue-1174-chained-barrel-incremental.test.ts b/tests/integration/issue-1174-chained-barrel-incremental.test.ts new file mode 100644 index 000000000..a0611de09 --- /dev/null +++ b/tests/integration/issue-1174-chained-barrel-incremental.test.ts @@ -0,0 +1,140 @@ +/** + * Regression for #1174: incremental rebuild silently drops imports edges + * when an unrelated file in a barrel chain is touched. + * + * Fixture shape (mirrors the dogfooded reproduction): + * + * app.js + * └─ imports `runParser` from parser.js + * + * parser.js (hybrid barrel — 1 reexport + many local defs) + * ├─ `export { Token } from './types/index.js'` + * └─ imports `extractAlpha/Beta/Gamma/Delta` from extractors/index.js + * + * extractors/index.js (pure barrel — re-exports only) + * └─ re-exports each `extract*` symbol from its leaf file + * + * extractors/{alpha,beta,gamma,delta}.js (leaf definitions) + * + * Before the fix, editing `app.js` triggered re-parse of `parser.js` (it has + * one re-export so the orchestrator flags it as a barrel candidate), wiped + * its outgoing edges, and re-emitted them. The barrel-through edges from + * `parser.js` to each `extractors/.js` were silently dropped because + * `extractors/index.js` was never added to `file_symbols` — Stage 6b's + * candidate discovery was single-pass. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { buildGraph } from '../../src/domain/graph/builder.js'; +import type { EngineMode } from '../../src/types.js'; + +const FIXTURE_DIR = path.join(import.meta.dirname, '..', 'fixtures', 'issue-1174-chained-barrel'); + +function copyDirSync(src: string, dest: string) { + fs.mkdirSync(dest, { recursive: true }); + for (const entry of fs.readdirSync(src, { withFileTypes: true })) { + const s = path.join(src, entry.name); + const d = path.join(dest, entry.name); + if (entry.isDirectory()) copyDirSync(s, d); + else fs.copyFileSync(s, d); + } +} + +interface EdgeRow { + source_file: string; + source_name: string; + target_file: string; + target_name: string; + kind: string; +} + +function readEdges(dbPath: string): EdgeRow[] { + const db = new Database(dbPath, { readonly: true }); + try { + return db + .prepare( + `SELECT n1.file AS source_file, n1.name AS source_name, + n2.file AS target_file, n2.name AS target_name, e.kind + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + ORDER BY n1.file, n1.name, n2.file, n2.name, e.kind`, + ) + .all() as EdgeRow[]; + } finally { + db.close(); + } +} + +const ENGINES: EngineMode[] = ['wasm', 'native']; + +describe.each(ENGINES)('Issue #1174 chained-barrel incremental parity (%s)', (engine) => { + let fullEdges: EdgeRow[]; + let incrEdges: EdgeRow[]; + let tmpBase: string; + + beforeAll(async () => { + tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), `codegraph-1174-${engine}-`)); + const fullDir = path.join(tmpBase, 'full'); + const incrDir = path.join(tmpBase, 'incr'); + copyDirSync(FIXTURE_DIR, fullDir); + copyDirSync(FIXTURE_DIR, incrDir); + + // Establish baseline on the incremental copy + await buildGraph(incrDir, { incremental: false, skipRegistry: true, engine }); + + // Mutate app.js (the only "changed" file) on both copies + const mutate = (dir: string) => { + fs.appendFileSync(path.join(dir, 'app.js'), '\n// touch\n'); + }; + mutate(fullDir); + mutate(incrDir); + + // Full build on the full copy + await buildGraph(fullDir, { incremental: false, skipRegistry: true, engine }); + // Incremental rebuild on the incr copy + await buildGraph(incrDir, { incremental: true, skipRegistry: true, engine }); + + fullEdges = readEdges(path.join(fullDir, '.codegraph', 'graph.db')); + incrEdges = readEdges(path.join(incrDir, '.codegraph', 'graph.db')); + }, 90_000); + + afterAll(() => { + if (tmpBase) fs.rmSync(tmpBase, { recursive: true, force: true }); + }); + + it('emits the parser.js → extractors/*.js barrel-through edges on full build', () => { + const barrelThrough = fullEdges.filter( + (e) => + e.source_file === 'parser.js' && + e.target_file.startsWith('extractors/') && + e.target_file !== 'extractors/index.js' && + e.kind === 'imports', + ); + // Four leaf extractors: alpha, beta, gamma, delta. + expect(barrelThrough.map((e) => e.target_file).sort()).toEqual([ + 'extractors/alpha.js', + 'extractors/beta.js', + 'extractors/delta.js', + 'extractors/gamma.js', + ]); + }); + + it('imports edge count matches full rebuild', () => { + const fullImports = fullEdges.filter((e) => e.kind === 'imports'); + const incrImports = incrEdges.filter((e) => e.kind === 'imports'); + expect(incrImports.length).toBe(fullImports.length); + }); + + it('every barrel-through edge survives the incremental rebuild', () => { + const key = (e: EdgeRow) => `${e.source_file}|${e.target_file}|${e.kind}`; + const fullKeys = new Set(fullEdges.filter((e) => e.kind === 'imports').map(key)); + const incrKeys = new Set(incrEdges.filter((e) => e.kind === 'imports').map(key)); + const missing = [...fullKeys].filter((k) => !incrKeys.has(k)); + expect(missing).toEqual([]); + }); +}); From b36a09d76cd30ad5596e14598f8d5b504d64b8c0 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 20 May 2026 13:14:58 -0600 Subject: [PATCH 2/2] perf(builder): scope barrel iteration to newly-merged paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Iteration 2+ of the JS barrel-discovery loop was re-querying the DB for every key in fileSymbols rather than just barrels newly merged in the previous pass, ballooning the work each iteration. The Rust orchestrator already does this correctly via the `&newly_added` slice. Wire the `string[]` returned by reparseBarrelFiles through the loop so each pass only walks newly-merged paths' imports, and gate the reexport-from DB query behind a `firstPass` flag — re-parsed barrels haven't changed content, so they can't surface new reexport-from candidates anyway. Matches the Rust seed-only `collect_reexport_from_barrels` call. No behavior change beyond perf; the regression test for #1174 (issue-1174-chained-barrel-incremental.test.ts) still passes on both engines and full=1371 / incremental=1371 imports edges on the dogfooded repo. Addresses Greptile feedback on #1179. --- .../graph/builder/stages/resolve-imports.ts | 63 ++++++++++++------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/src/domain/graph/builder/stages/resolve-imports.ts b/src/domain/graph/builder/stages/resolve-imports.ts index 50836094c..5d1b0456c 100644 --- a/src/domain/graph/builder/stages/resolve-imports.ts +++ b/src/domain/graph/builder/stages/resolve-imports.ts @@ -33,15 +33,23 @@ function buildReexportMap(ctx: PipelineContext): void { } /** - * Find barrel files related to changed files for scoped re-parsing. - * For small incremental builds (<=smallFilesThreshold files), only barrels that re-export from - * or are imported by the changed files. For larger changes, all barrels. + * Find barrel files related to `fromRelPaths` for scoped re-parsing. + * For small frontiers (<=smallFilesThreshold files), only barrels that re-export from + * or are imported by `fromRelPaths`. For larger frontiers, all barrels. + * + * `firstPass` gates the reexport-from DB scan: re-parsed barrels haven't + * changed content, so subsequent passes can't surface new reexport-from + * candidates and only need to follow imports of newly-merged barrels + * (mirrors the Rust orchestrator's seed-only `collect_reexport_from_barrels`). */ -function findBarrelCandidates(ctx: PipelineContext): Array<{ file: string }> { +function findBarrelCandidates( + ctx: PipelineContext, + fromRelPaths: readonly string[], + firstPass: boolean, +): Array<{ file: string }> { const { db, fileSymbols, rootDir, aliases } = ctx; - const changedRelPaths = new Set(fileSymbols.keys()); - if (changedRelPaths.size <= ctx.config.build.smallFilesThreshold) { + if (fromRelPaths.length <= ctx.config.build.smallFilesThreshold) { const allBarrelFiles = new Set( ( db @@ -56,9 +64,9 @@ function findBarrelCandidates(ctx: PipelineContext): Array<{ file: string }> { const barrels = new Set(); - // Find barrels imported by changed files using parsed import data + // Find barrels imported by `fromRelPaths` using parsed import data // (can't query DB edges -- they were purged for the changed files). - for (const relPath of changedRelPaths) { + for (const relPath of fromRelPaths) { const symbols = fileSymbols.get(relPath); if (!symbols) continue; for (const imp of symbols.imports) { @@ -71,16 +79,17 @@ function findBarrelCandidates(ctx: PipelineContext): Array<{ file: string }> { } } - // Also find barrels that re-export from the changed files - const reexportSourceStmt = db.prepare( - `SELECT DISTINCT n1.file FROM edges e - JOIN nodes n1 ON e.source_id = n1.id - JOIN nodes n2 ON e.target_id = n2.id - WHERE e.kind = 'reexports' AND n1.kind = 'file' AND n2.file = ?`, - ); - for (const relPath of changedRelPaths) { - for (const row of reexportSourceStmt.all(relPath) as Array<{ file: string }>) { - barrels.add(row.file); + if (firstPass) { + const reexportSourceStmt = db.prepare( + `SELECT DISTINCT n1.file FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE e.kind = 'reexports' AND n1.kind = 'file' AND n2.file = ?`, + ); + for (const relPath of fromRelPaths) { + for (const row of reexportSourceStmt.all(relPath) as Array<{ file: string }>) { + barrels.add(row.file); + } } } return [...barrels].map((file) => ({ file })); @@ -189,11 +198,19 @@ export async function resolveImports(ctx: PipelineContext): Promise { // Convergence is guaranteed because fileSymbols grows monotonically and // is bounded by the set of barrel files in the project — each iteration // either adds a previously-unseen barrel or terminates. - while (true) { - const before = fileSymbols.size; - const barrelCandidates = findBarrelCandidates(ctx); - await reparseBarrelFiles(ctx, barrelCandidates); - if (fileSymbols.size === before) break; + // + // Subsequent passes only walk newly-merged barrels' imports (`frontier` + // = paths returned by reparseBarrelFiles), matching the Rust + // orchestrator's `&newly_added` slice. Without this, every pass would + // re-query the DB for every key in `fileSymbols`. + let frontier: readonly string[] = [...fileSymbols.keys()]; + let firstPass = true; + while (frontier.length > 0) { + const barrelCandidates = findBarrelCandidates(ctx, frontier, firstPass); + const added = await reparseBarrelFiles(ctx, barrelCandidates); + if (added.length === 0) break; + frontier = added; + firstPass = false; } } }