Skip to content
Merged
273 changes: 270 additions & 3 deletions src/domain/graph/builder/incremental.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
*
* Reuses pipeline helpers instead of duplicating node insertion and edge building
* logic from the main builder. This eliminates the watcher.js divergence (ROADMAP 3.9).
*
* Reverse-dep cascade: when a file changes, files that have edges targeting it
* must have their outgoing edges rebuilt (since the changed file's node IDs change).
*/
import fs from 'node:fs';
import path from 'node:path';
import { bulkNodeIdsByFile } from '../../../db/index.js';
import { warn } from '../../../infrastructure/logger.js';
import { normalizePath } from '../../../shared/constants.js';
import { parseFileIncremental } from '../../parser.js';
Expand All @@ -18,15 +22,204 @@ function insertFileNodes(stmts, relPath, symbols) {
stmts.insertNode.run(relPath, 'file', relPath, 0, null);
for (const def of symbols.definitions) {
stmts.insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null);
if (def.children?.length) {
for (const child of def.children) {
stmts.insertNode.run(
child.name,
child.kind,
relPath,
child.line,
child.endLine || null,
);
}
}
}
for (const exp of symbols.exports) {
stmts.insertNode.run(exp.name, exp.kind, relPath, exp.line, null);
}
}

// ── Containment edges ──────────────────────────────────────────────────

function buildContainmentEdges(db, stmts, relPath, symbols) {
const nodeIdMap = new Map();
for (const row of bulkNodeIdsByFile(db, relPath)) {
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
}
const fileId = nodeIdMap.get(`${relPath}|file|0`);
let edgesAdded = 0;
for (const def of symbols.definitions) {
const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
if (fileId && defId) {
stmts.insertEdge.run(fileId, defId, 'contains', 1.0, 0);
edgesAdded++;
}
if (def.children?.length && defId) {
for (const child of def.children) {
const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`);
if (childId) {
stmts.insertEdge.run(defId, childId, 'contains', 1.0, 0);
edgesAdded++;
if (child.kind === 'parameter') {
stmts.insertEdge.run(childId, defId, 'parameter_of', 1.0, 0);
edgesAdded++;
}
}
}
}
}
return edgesAdded;
}

// ── Reverse-dep cascade ────────────────────────────────────────────────

function findReverseDeps(db, relPath) {
return db
.prepare(
`SELECT DISTINCT n_src.file FROM edges e
JOIN nodes n_src ON e.source_id = n_src.id
JOIN nodes n_tgt ON e.target_id = n_tgt.id
WHERE n_tgt.file = ? AND n_src.file != ? AND n_src.kind != 'directory'`,
)
.all(relPath, relPath)
.map((r) => r.file);
}

function deleteOutgoingEdges(db, relPath) {
db.prepare('DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)').run(
relPath,
);
}

async function parseReverseDep(rootDir, depRelPath, engineOpts, cache) {
const absPath = path.join(rootDir, depRelPath);
if (!fs.existsSync(absPath)) return null;

let code;
try {
code = readFileSafe(absPath);
} catch {
return null;
}

return parseFileIncremental(cache, absPath, code, engineOpts);
}

function rebuildReverseDepEdges(db, rootDir, depRelPath, symbols, stmts, skipBarrel) {
const fileNodeRow = stmts.getNodeId.get(depRelPath, 'file', depRelPath, 0);
if (!fileNodeRow) return 0;

const aliases = { baseUrl: null, paths: {} };
let edgesAdded = buildContainmentEdges(db, stmts, depRelPath, symbols);
// Don't rebuild dir→file containment for reverse-deps (it was never deleted)
edgesAdded += buildImportEdges(
stmts,
depRelPath,
symbols,
rootDir,
fileNodeRow.id,
aliases,
skipBarrel ? null : db,
);
const importedNames = buildImportedNamesMap(symbols, rootDir, depRelPath, aliases);
edgesAdded += buildCallEdges(stmts, depRelPath, symbols, fileNodeRow, importedNames);
return edgesAdded;
}

// ── Directory containment edges ────────────────────────────────────────

function rebuildDirContainment(db, stmts, relPath) {
const dir = normalizePath(path.dirname(relPath));
if (!dir || dir === '.') return 0;
const dirRow = stmts.getNodeId.get(dir, 'directory', dir, 0);
const fileRow = stmts.getNodeId.get(relPath, 'file', relPath, 0);
if (dirRow && fileRow) {
stmts.insertEdge.run(dirRow.id, fileRow.id, 'contains', 1.0, 0);
return 1;
}
return 0;
}

// ── Ancillary table cleanup ────────────────────────────────────────────

function purgeAncillaryData(db, relPath) {
const tryExec = (sql, ...args) => {
try {
db.prepare(sql).run(...args);
} catch {
/* table may not exist */
}
};
tryExec(
'DELETE FROM function_complexity WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
);
tryExec(
'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
);
tryExec(
'DELETE FROM cfg_edges WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
);
tryExec(
'DELETE FROM cfg_blocks WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
);
tryExec(
'DELETE FROM dataflow WHERE source_id IN (SELECT id FROM nodes WHERE file = ?) OR target_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
relPath,
);
tryExec('DELETE FROM ast_nodes WHERE file = ?', relPath);
}
Comment on lines +150 to +182

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 purgeAncillaryData silently swallows all exceptions

The bare catch {} in tryExec is commented as /* table may not exist */, but it catches every exception — not just SQLite's "no such table" error. If any DELETE fails for a genuine reason (e.g., a locked database, a schema inconsistency, or a bug in the WHERE clause), the error is silently discarded. The subsequent stmts.deleteNodes.run(relPath) on line 382 would then fail with SQLITE_CONSTRAINT_FOREIGNKEY because the ancillary rows (which still reference those nodes' IDs) were never cleaned up. This is especially risky because the entire function exists specifically to prevent FK violations.

A more targeted guard would only ignore "no such table" errors:

const tryExec = (sql, ...args) => {
  try {
    db.prepare(sql).run(...args);
  } catch (err) {
    if (!err?.message?.includes('no such table')) throw err;
  }
};

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed — \ now only catches errors containing 'no such table' and re-throws all other exceptions. This prevents masking genuine write failures while still handling the optional-table case.


// ── Import edge building ────────────────────────────────────────────────

function buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeId, aliases) {
function isBarrelFile(db, relPath) {
const reexportCount = db
.prepare(
`SELECT COUNT(*) as c FROM edges e
JOIN nodes n ON e.source_id = n.id
WHERE e.kind = 'reexports' AND n.file = ? AND n.kind = 'file'`,
)
.get(relPath)?.c;
return (reexportCount || 0) > 0;
}

function resolveBarrelTarget(db, barrelPath, symbolName, visited = new Set()) {
if (visited.has(barrelPath)) return null;
visited.add(barrelPath);

// Find re-export targets from this barrel
const reexportTargets = db
.prepare(
`SELECT DISTINCT n2.file FROM edges e
JOIN nodes n1 ON e.source_id = n1.id
JOIN nodes n2 ON e.target_id = n2.id
WHERE e.kind = 'reexports' AND n1.file = ? AND n1.kind = 'file'`,
)
.all(barrelPath);

for (const { file: targetFile } of reexportTargets) {
// Check if the symbol is defined in this target file
const hasDef = db
.prepare(
`SELECT 1 FROM nodes WHERE name = ? AND file = ? AND kind != 'file' AND kind != 'directory' LIMIT 1`,
)
.get(symbolName, targetFile);
if (hasDef) return targetFile;

// Recurse through barrel chains
if (isBarrelFile(db, targetFile)) {
const deeper = resolveBarrelTarget(db, targetFile, symbolName, visited);
if (deeper) return deeper;
}
}
return null;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Prepared statements allocated inside hot-loop functions

Both isBarrelFile and resolveBarrelTarget call db.prepare(...) on every invocation. These functions are called in a tight loop — once per imported name, for every import from a barrel file, and resolveBarrelTarget recurses through barrel chains. Allocating a new prepared statement on each call negates the performance benefit of prepared statements (the whole point of which is to parse the SQL once and reuse the plan).

The queries should be prepared once at module scope (or lazily on first call, cached in a closure) and reused:

// At module scope
let _isBarrelStmt = null;
let _reexportTargetsStmt = null;
let _hasDefStmt = null;

function getIsBarrelStmt(db) {
  return (_isBarrelStmt ??= db.prepare(
    `SELECT COUNT(*) as c FROM edges e
     JOIN nodes n ON e.source_id = n.id
     WHERE e.kind = 'reexports' AND n.file = ? AND n.kind = 'file'`
  ));
}

For a project with dozens of barrel imports this is a minor overhead, but for large repos with many barrel files the cost can add up across the full reverse-dep cascade.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed — barrel resolution prepared statements (isBarrelStmt, reexportTargetsStmt, hasDefStmt) are now lazily cached at module scope via getBarrelStmts(db) and reused across all invocations for the same database instance.

}

function buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeId, aliases, db) {
let edgesAdded = 0;
for (const imp of symbols.imports) {
const resolvedPath = resolveImportPath(
Expand All @@ -40,6 +233,24 @@ function buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeId, aliases)
const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports';
stmts.insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0);
edgesAdded++;

// Barrel resolution: create edges through re-export chains
if (!imp.reexport && db && isBarrelFile(db, resolvedPath)) {
const resolvedSources = new Set();
for (const name of imp.names) {
const cleanName = name.replace(/^\*\s+as\s+/, '');
const actualSource = resolveBarrelTarget(db, resolvedPath, cleanName);
if (actualSource && actualSource !== resolvedPath && !resolvedSources.has(actualSource)) {
resolvedSources.add(actualSource);
const actualRow = stmts.getNodeId.get(actualSource, 'file', actualSource, 0);
if (actualRow) {
const kind = edgeKind === 'imports-type' ? 'imports-type' : 'imports';
stmts.insertEdge.run(fileNodeId, actualRow.id, kind, 0.9, 0);
edgesAdded++;
}
}
}
}
}
}
return edgesAdded;
Expand Down Expand Up @@ -156,12 +367,17 @@ function buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames) {
* @param {Function} [options.diffSymbols] - Symbol diff function
* @returns {Promise<object|null>} Update result or null on failure
*/
export async function rebuildFile(_db, rootDir, filePath, stmts, engineOpts, cache, options = {}) {
export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cache, options = {}) {
const { diffSymbols } = options;
const relPath = normalizePath(path.relative(rootDir, filePath));
const oldNodes = stmts.countNodes.get(relPath)?.c || 0;
const oldSymbols = diffSymbols ? stmts.listSymbols.all(relPath) : [];

// Find reverse-deps BEFORE purging (edges still reference the old nodes)
const reverseDeps = findReverseDeps(db, relPath);

// Purge ancillary tables, then edges, then nodes
purgeAncillaryData(db, relPath);
stmts.deleteEdgesForFile.run(relPath);
stmts.deleteNodes.run(relPath);

Expand Down Expand Up @@ -203,10 +419,61 @@ export async function rebuildFile(_db, rootDir, filePath, stmts, engineOpts, cac

const aliases = { baseUrl: null, paths: {} };

let edgesAdded = buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeRow.id, aliases);
let edgesAdded = buildContainmentEdges(db, stmts, relPath, symbols);
edgesAdded += rebuildDirContainment(db, stmts, relPath);
edgesAdded += buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeRow.id, aliases, db);
const importedNames = buildImportedNamesMap(symbols, rootDir, relPath, aliases);
edgesAdded += buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames);

// Cascade: rebuild outgoing edges for reverse-dep files.
// Two-pass approach: first rebuild direct edges (creating reexports edges for barrels),
// then add barrel import edges (which need reexports edges to exist for resolution).
const depSymbols = new Map();
for (const depRelPath of reverseDeps) {
deleteOutgoingEdges(db, depRelPath);
const symbols_ = await parseReverseDep(rootDir, depRelPath, engineOpts, cache);
if (symbols_) depSymbols.set(depRelPath, symbols_);
}
Comment on lines +473 to +479

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Reverse-dep edges permanently deleted when parse fails

deleteOutgoingEdges is called unconditionally for every reverse-dep file before attempting to parse it. If parseReverseDep returns null (file unreadable, parse error, or file already deleted), no entry is placed in depSymbols. Pass 1 and Pass 2 then skip the file entirely, meaning its outgoing edges were wiped and are never rebuilt — leaving the graph in a permanently inconsistent state until the next full build.

The fix is to parse first and only delete edges for files that successfully parsed:

// Parse BEFORE deleting so a failed parse doesn't orphan the file
const depSymbols = new Map();
for (const depRelPath of reverseDeps) {
  const symbols_ = await parseReverseDep(rootDir, depRelPath, engineOpts, cache);
  if (symbols_) depSymbols.set(depRelPath, symbols_);
}
// Now it's safe to delete — every file in depSymbols will be rebuilt
for (const [depRelPath] of depSymbols) {
  deleteOutgoingEdges(db, depRelPath);
}

This preserves the invariant that edges are only deleted when they will be immediately rebuilt.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in bdf3f77 — moved deleteOutgoingEdges after parseReverseDep succeeds. If parsing returns null, edges are now preserved instead of permanently lost. Also fixed a TypeError: typeMap.get is not a function in the same commit: the native engine returns typeMap as a plain array, which was not being coerced to a Map for JS files after the TS-only backfill restriction.

// Pass 1: direct edges only (no barrel resolution) — creates reexports edges
for (const [depRelPath, symbols_] of depSymbols) {
edgesAdded += rebuildReverseDepEdges(db, rootDir, depRelPath, symbols_, stmts, true);
}
// Pass 2: add barrel import edges (reexports edges now exist)
for (const [depRelPath, symbols_] of depSymbols) {
const fileNodeRow_ = stmts.getNodeId.get(depRelPath, 'file', depRelPath, 0);
if (!fileNodeRow_) continue;
const aliases_ = { baseUrl: null, paths: {} };
for (const imp of symbols_.imports) {
if (imp.reexport) continue;
const resolvedPath = resolveImportPath(
path.join(rootDir, depRelPath),
imp.source,
rootDir,
aliases_,
);
if (db && isBarrelFile(db, resolvedPath)) {
const resolvedSources = new Set();
for (const name of imp.names) {
const cleanName = name.replace(/^\*\s+as\s+/, '');
const actualSource = resolveBarrelTarget(db, resolvedPath, cleanName);
if (
actualSource &&
actualSource !== resolvedPath &&
!resolvedSources.has(actualSource)
) {
resolvedSources.add(actualSource);
const actualRow = stmts.getNodeId.get(actualSource, 'file', actualSource, 0);
if (actualRow) {
const kind = imp.typeOnly ? 'imports-type' : 'imports';
stmts.insertEdge.run(fileNodeRow_.id, actualRow.id, kind, 0.9, 0);
edgesAdded++;
}
}
}
}
}
}

const symbolDiff = diffSymbols ? diffSymbols(oldSymbols, newSymbols) : null;
const event = oldNodes === 0 ? 'added' : 'modified';

Expand Down
9 changes: 9 additions & 0 deletions tests/fixtures/deep-deps-project/app.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { runQuery } from './features/query.js';
import { formatOutput } from './features/format.js';
import { MAX_ITEMS } from './shared/constants.js';

export function main(input, page) {
const results = runQuery(input, page);
const label = formatOutput(input);
return { label, results, max: MAX_ITEMS };
}
2 changes: 2 additions & 0 deletions tests/fixtures/deep-deps-project/domain/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export { parseItems } from './parser.js';
export { resolve } from './resolver.js';
6 changes: 6 additions & 0 deletions tests/fixtures/deep-deps-project/domain/parser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import { MAX_ITEMS, clamp } from '../shared/index.js';

export function parseItems(raw) {
const items = raw.split(',').map(s => s.trim());
return items.slice(0, clamp(items.length, 0, MAX_ITEMS));
}
7 changes: 7 additions & 0 deletions tests/fixtures/deep-deps-project/domain/resolver.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { DEFAULT_NAME } from '../shared/constants.js';
import { formatName } from '../shared/helpers.js';

export function resolve(input) {
const name = input || DEFAULT_NAME;
return formatName(name);
}
7 changes: 7 additions & 0 deletions tests/fixtures/deep-deps-project/features/format.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { resolve } from '../domain/resolver.js';
import { DEFAULT_NAME } from '../shared/index.js';

export function formatOutput(input) {
const resolved = resolve(input);
return resolved === DEFAULT_NAME ? 'default' : resolved;
}
9 changes: 9 additions & 0 deletions tests/fixtures/deep-deps-project/features/query.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { parseItems } from '../domain/index.js';
import { paginate } from '../shared/helpers.js';
import { clamp } from '../shared/constants.js';

export function runQuery(raw, page) {
const items = parseItems(raw);
const safePage = clamp(page, 0, 100);
return paginate(items, safePage, 10);
}
7 changes: 7 additions & 0 deletions tests/fixtures/deep-deps-project/shared/constants.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// The deeply-imported leaf file
export const MAX_ITEMS = 100;
export const DEFAULT_NAME = 'codegraph';

export function clamp(value, min, max) {
return Math.min(Math.max(value, min), max);
}
11 changes: 11 additions & 0 deletions tests/fixtures/deep-deps-project/shared/helpers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { clamp, MAX_ITEMS } from './constants.js';

export function paginate(items, page, size) {
const safeSize = clamp(size, 1, MAX_ITEMS);
const start = page * safeSize;
return items.slice(start, start + safeSize);
}

export function formatName(name) {
return name.trim().toLowerCase();
}
2 changes: 2 additions & 0 deletions tests/fixtures/deep-deps-project/shared/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export { MAX_ITEMS, DEFAULT_NAME, clamp } from './constants.js';
export { paginate, formatName } from './helpers.js';
Loading
Loading