Skip to content
Merged
301 changes: 296 additions & 5 deletions src/domain/graph/builder/incremental.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
*
* Reuses pipeline helpers instead of duplicating node insertion and edge building
* logic from the main builder. This eliminates the watcher.js divergence (ROADMAP 3.9).
*
* Reverse-dep cascade: when a file changes, files that have edges targeting it
* must have their outgoing edges rebuilt (since the changed file's node IDs change).
*/
import fs from 'node:fs';
import path from 'node:path';
import { bulkNodeIdsByFile } from '../../../db/index.js';
import { warn } from '../../../infrastructure/logger.js';
import { normalizePath } from '../../../shared/constants.js';
import { parseFileIncremental } from '../../parser.js';
Expand All @@ -18,15 +22,252 @@ function insertFileNodes(stmts, relPath, symbols) {
stmts.insertNode.run(relPath, 'file', relPath, 0, null);
for (const def of symbols.definitions) {
stmts.insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null);
if (def.children?.length) {
for (const child of def.children) {
stmts.insertNode.run(child.name, child.kind, relPath, child.line, child.endLine || null);
}
}
}
for (const exp of symbols.exports) {
stmts.insertNode.run(exp.name, exp.kind, relPath, exp.line, null);
}
}

// ── Containment edges ──────────────────────────────────────────────────

function buildContainmentEdges(db, stmts, relPath, symbols) {
const nodeIdMap = new Map();
for (const row of bulkNodeIdsByFile(db, relPath)) {
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
}
const fileId = nodeIdMap.get(`${relPath}|file|0`);
let edgesAdded = 0;
for (const def of symbols.definitions) {
const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
if (fileId && defId) {
stmts.insertEdge.run(fileId, defId, 'contains', 1.0, 0);
edgesAdded++;
}
if (def.children?.length && defId) {
for (const child of def.children) {
const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`);
if (childId) {
stmts.insertEdge.run(defId, childId, 'contains', 1.0, 0);
edgesAdded++;
if (child.kind === 'parameter') {
stmts.insertEdge.run(childId, defId, 'parameter_of', 1.0, 0);
edgesAdded++;
}
}
}
}
}
return edgesAdded;
}

// ── Reverse-dep cascade ────────────────────────────────────────────────

// Lazily-cached prepared statements for reverse-dep operations
let _revDepDb = null;
let _findRevDepsStmt = null;
let _deleteOutEdgesStmt = null;

function getRevDepStmts(db) {
if (_revDepDb !== db) {
_revDepDb = db;
_findRevDepsStmt = db.prepare(
`SELECT DISTINCT n_src.file FROM edges e
JOIN nodes n_src ON e.source_id = n_src.id
JOIN nodes n_tgt ON e.target_id = n_tgt.id
WHERE n_tgt.file = ? AND n_src.file != ? AND n_src.kind != 'directory'`,
);
_deleteOutEdgesStmt = db.prepare(
'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)',
);
}
return { findRevDepsStmt: _findRevDepsStmt, deleteOutEdgesStmt: _deleteOutEdgesStmt };
}

function findReverseDeps(db, relPath) {
const { findRevDepsStmt } = getRevDepStmts(db);
return findRevDepsStmt.all(relPath, relPath).map((r) => r.file);
}

function deleteOutgoingEdges(db, relPath) {
const { deleteOutEdgesStmt } = getRevDepStmts(db);
deleteOutEdgesStmt.run(relPath);
}

async function parseReverseDep(rootDir, depRelPath, engineOpts, cache) {
const absPath = path.join(rootDir, depRelPath);
if (!fs.existsSync(absPath)) return null;

let code;
try {
code = readFileSafe(absPath);
} catch {
return null;
}

return parseFileIncremental(cache, absPath, code, engineOpts);
}

function rebuildReverseDepEdges(db, rootDir, depRelPath, symbols, stmts, skipBarrel) {
const fileNodeRow = stmts.getNodeId.get(depRelPath, 'file', depRelPath, 0);
if (!fileNodeRow) return 0;

const aliases = { baseUrl: null, paths: {} };
let edgesAdded = buildContainmentEdges(db, stmts, depRelPath, symbols);
// Don't rebuild dir→file containment for reverse-deps (it was never deleted)
edgesAdded += buildImportEdges(
stmts,
depRelPath,
symbols,
rootDir,
fileNodeRow.id,
aliases,
skipBarrel ? null : db,
);
const importedNames = buildImportedNamesMap(symbols, rootDir, depRelPath, aliases);
edgesAdded += buildCallEdges(stmts, depRelPath, symbols, fileNodeRow, importedNames);
return edgesAdded;
}

// ── Directory containment edges ────────────────────────────────────────

function rebuildDirContainment(_db, stmts, relPath) {
const dir = normalizePath(path.dirname(relPath));
if (!dir || dir === '.') return 0;
const dirRow = stmts.getNodeId.get(dir, 'directory', dir, 0);
const fileRow = stmts.getNodeId.get(relPath, 'file', relPath, 0);
if (dirRow && fileRow) {
stmts.insertEdge.run(dirRow.id, fileRow.id, 'contains', 1.0, 0);
return 1;
}
return 0;
}

// ── Ancillary table cleanup ────────────────────────────────────────────

function purgeAncillaryData(db, relPath) {
const tryExec = (sql, ...args) => {
try {
db.prepare(sql).run(...args);
} catch (err) {
if (!err?.message?.includes('no such table')) throw err;
}
};
tryExec(
'DELETE FROM function_complexity WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
);
tryExec(
'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
);
tryExec(
'DELETE FROM cfg_edges WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
);
tryExec(
'DELETE FROM cfg_blocks WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
);
tryExec(
'DELETE FROM dataflow WHERE source_id IN (SELECT id FROM nodes WHERE file = ?) OR target_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
relPath,
);
tryExec('DELETE FROM ast_nodes WHERE file = ?', relPath);
}
Comment on lines +150 to +182

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 purgeAncillaryData silently swallows all exceptions

The bare catch {} in tryExec is commented as /* table may not exist */, but it catches every exception — not just SQLite's "no such table" error. If any DELETE fails for a genuine reason (e.g., a locked database, a schema inconsistency, or a bug in the WHERE clause), the error is silently discarded. The subsequent stmts.deleteNodes.run(relPath) on line 382 would then fail with SQLITE_CONSTRAINT_FOREIGNKEY because the ancillary rows (which still reference those nodes' IDs) were never cleaned up. This is especially risky because the entire function exists specifically to prevent FK violations.

A more targeted guard would only ignore "no such table" errors:

const tryExec = (sql, ...args) => {
  try {
    db.prepare(sql).run(...args);
  } catch (err) {
    if (!err?.message?.includes('no such table')) throw err;
  }
};

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed — \ now only catches errors containing 'no such table' and re-throws all other exceptions. This prevents masking genuine write failures while still handling the optional-table case.


// ── Import edge building ────────────────────────────────────────────────

function buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeId, aliases) {
// Lazily-cached prepared statements for barrel resolution (avoid re-preparing in hot loops)
let _barrelDb = null;
let _isBarrelStmt = null;
let _reexportTargetsStmt = null;
let _hasDefStmt = null;

function getBarrelStmts(db) {
if (_barrelDb !== db) {
_barrelDb = db;
_isBarrelStmt = db.prepare(
`SELECT COUNT(*) as c FROM edges e
JOIN nodes n ON e.source_id = n.id
WHERE e.kind = 'reexports' AND n.file = ? AND n.kind = 'file'`,
);
_reexportTargetsStmt = db.prepare(
`SELECT DISTINCT n2.file FROM edges e
JOIN nodes n1 ON e.source_id = n1.id
JOIN nodes n2 ON e.target_id = n2.id
WHERE e.kind = 'reexports' AND n1.file = ? AND n1.kind = 'file'`,
);
_hasDefStmt = db.prepare(
`SELECT 1 FROM nodes WHERE name = ? AND file = ? AND kind != 'file' AND kind != 'directory' LIMIT 1`,
);
}
return {
isBarrelStmt: _isBarrelStmt,
reexportTargetsStmt: _reexportTargetsStmt,
hasDefStmt: _hasDefStmt,
};
}

function isBarrelFile(db, relPath) {
const { isBarrelStmt } = getBarrelStmts(db);
const reexportCount = isBarrelStmt.get(relPath)?.c;
return (reexportCount || 0) > 0;
}

function resolveBarrelTarget(db, barrelPath, symbolName, visited = new Set()) {
if (visited.has(barrelPath)) return null;
visited.add(barrelPath);

const { reexportTargetsStmt, hasDefStmt } = getBarrelStmts(db);

// Find re-export targets from this barrel
const reexportTargets = reexportTargetsStmt.all(barrelPath);

for (const { file: targetFile } of reexportTargets) {
// Check if the symbol is defined in this target file
const hasDef = hasDefStmt.get(symbolName, targetFile);
if (hasDef) return targetFile;

// Recurse through barrel chains
if (isBarrelFile(db, targetFile)) {
const deeper = resolveBarrelTarget(db, targetFile, symbolName, visited);
if (deeper) return deeper;
}
}
return null;
}

/**
* Resolve barrel imports for a single import statement and create edges to actual source files.
* Shared by buildImportEdges (primary file) and Pass 2 of the reverse-dep cascade.
*/
function resolveBarrelImportEdges(db, stmts, fileNodeId, resolvedPath, imp) {
let edgesAdded = 0;
if (!isBarrelFile(db, resolvedPath)) return edgesAdded;
const resolvedSources = new Set();
for (const name of imp.names) {
const cleanName = name.replace(/^\*\s+as\s+/, '');
const actualSource = resolveBarrelTarget(db, resolvedPath, cleanName);
if (actualSource && actualSource !== resolvedPath && !resolvedSources.has(actualSource)) {
resolvedSources.add(actualSource);
const actualRow = stmts.getNodeId.get(actualSource, 'file', actualSource, 0);
if (actualRow) {
const kind = imp.typeOnly ? 'imports-type' : 'imports';
stmts.insertEdge.run(fileNodeId, actualRow.id, kind, 0.9, 0);
edgesAdded++;
}
}
}
return edgesAdded;
}

function buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeId, aliases, db) {
let edgesAdded = 0;
for (const imp of symbols.imports) {
const resolvedPath = resolveImportPath(
Expand All @@ -40,6 +281,11 @@ function buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeId, aliases)
const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports';
stmts.insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0);
edgesAdded++;

// Barrel resolution: create edges through re-export chains
if (!imp.reexport && db) {
edgesAdded += resolveBarrelImportEdges(db, stmts, fileNodeId, resolvedPath, imp);
}
}
}
return edgesAdded;
Expand Down Expand Up @@ -116,7 +362,13 @@ function resolveCallTargets(stmts, call, relPath, importedNames, typeMap) {
}

function buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames) {
const typeMap = symbols.typeMap || new Map();
const rawTM = symbols.typeMap;
const typeMap =
rawTM instanceof Map
? rawTM
: Array.isArray(rawTM) && rawTM.length > 0
? new Map(rawTM.map((e) => [e.name, e.typeName ?? e.type ?? null]))
: new Map();
let edgesAdded = 0;
for (const call of symbols.calls) {
if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue;
Expand Down Expand Up @@ -146,7 +398,7 @@ function buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames) {
/**
* Parse a single file and update the database incrementally.
*
* @param {import('better-sqlite3').Database} _db
* @param {import('better-sqlite3').Database} db
* @param {string} rootDir - Absolute root directory
* @param {string} filePath - Absolute file path
* @param {object} stmts - Prepared DB statements
Expand All @@ -156,12 +408,17 @@ function buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames) {
* @param {Function} [options.diffSymbols] - Symbol diff function
* @returns {Promise<object|null>} Update result or null on failure
*/
export async function rebuildFile(_db, rootDir, filePath, stmts, engineOpts, cache, options = {}) {
export async function rebuildFile(db, rootDir, filePath, stmts, engineOpts, cache, options = {}) {
const { diffSymbols } = options;
const relPath = normalizePath(path.relative(rootDir, filePath));
const oldNodes = stmts.countNodes.get(relPath)?.c || 0;
const oldSymbols = diffSymbols ? stmts.listSymbols.all(relPath) : [];

// Find reverse-deps BEFORE purging (edges still reference the old nodes)
const reverseDeps = findReverseDeps(db, relPath);

// Purge ancillary tables, then edges, then nodes
purgeAncillaryData(db, relPath);
stmts.deleteEdgesForFile.run(relPath);
stmts.deleteNodes.run(relPath);

Expand Down Expand Up @@ -203,10 +460,44 @@ export async function rebuildFile(_db, rootDir, filePath, stmts, engineOpts, cac

const aliases = { baseUrl: null, paths: {} };

let edgesAdded = buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeRow.id, aliases);
let edgesAdded = buildContainmentEdges(db, stmts, relPath, symbols);
edgesAdded += rebuildDirContainment(db, stmts, relPath);
edgesAdded += buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeRow.id, aliases, db);
const importedNames = buildImportedNamesMap(symbols, rootDir, relPath, aliases);
edgesAdded += buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames);

// Cascade: rebuild outgoing edges for reverse-dep files.
// Two-pass approach: first rebuild direct edges (creating reexports edges for barrels),
// then add barrel import edges (which need reexports edges to exist for resolution).
const depSymbols = new Map();
for (const depRelPath of reverseDeps) {
const symbols_ = await parseReverseDep(rootDir, depRelPath, engineOpts, cache);
if (symbols_) {
deleteOutgoingEdges(db, depRelPath);
depSymbols.set(depRelPath, symbols_);
}
}
Comment on lines +473 to +479

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Reverse-dep edges permanently deleted when parse fails

deleteOutgoingEdges is called unconditionally for every reverse-dep file before attempting to parse it. If parseReverseDep returns null (file unreadable, parse error, or file already deleted), no entry is placed in depSymbols. Pass 1 and Pass 2 then skip the file entirely, meaning its outgoing edges were wiped and are never rebuilt — leaving the graph in a permanently inconsistent state until the next full build.

The fix is to parse first and only delete edges for files that successfully parsed:

// Parse BEFORE deleting so a failed parse doesn't orphan the file
const depSymbols = new Map();
for (const depRelPath of reverseDeps) {
  const symbols_ = await parseReverseDep(rootDir, depRelPath, engineOpts, cache);
  if (symbols_) depSymbols.set(depRelPath, symbols_);
}
// Now it's safe to delete — every file in depSymbols will be rebuilt
for (const [depRelPath] of depSymbols) {
  deleteOutgoingEdges(db, depRelPath);
}

This preserves the invariant that edges are only deleted when they will be immediately rebuilt.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in bdf3f77 — moved deleteOutgoingEdges after parseReverseDep succeeds. If parsing returns null, edges are now preserved instead of permanently lost. Also fixed a TypeError: typeMap.get is not a function in the same commit: the native engine returns typeMap as a plain array, which was not being coerced to a Map for JS files after the TS-only backfill restriction.

// Pass 1: direct edges only (no barrel resolution) — creates reexports edges
for (const [depRelPath, symbols_] of depSymbols) {
edgesAdded += rebuildReverseDepEdges(db, rootDir, depRelPath, symbols_, stmts, true);
}
// Pass 2: add barrel import edges (reexports edges now exist)
for (const [depRelPath, symbols_] of depSymbols) {
const fileNodeRow_ = stmts.getNodeId.get(depRelPath, 'file', depRelPath, 0);
if (!fileNodeRow_) continue;
const aliases_ = { baseUrl: null, paths: {} };
for (const imp of symbols_.imports) {
if (imp.reexport) continue;
const resolvedPath = resolveImportPath(
path.join(rootDir, depRelPath),
imp.source,
rootDir,
aliases_,
);
edgesAdded += resolveBarrelImportEdges(db, stmts, fileNodeRow_.id, resolvedPath, imp);
}
}

const symbolDiff = diffSymbols ? diffSymbols(oldSymbols, newSymbols) : null;
const event = oldNodes === 0 ? 'added' : 'modified';

Expand Down
2 changes: 1 addition & 1 deletion tests/benchmarks/resolution/fixtures/typescript/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { JsonSerializer } from './serializer';
import { createService, UserService } from './service';
import { createService, type UserService } from './service';

export function main(): void {
const svc = createService();
Expand Down
9 changes: 9 additions & 0 deletions tests/fixtures/deep-deps-project/app.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { formatOutput } from './features/format.js';
import { runQuery } from './features/query.js';
import { MAX_ITEMS } from './shared/constants.js';

export function main(input, page) {
const results = runQuery(input, page);
const label = formatOutput(input);
return { label, results, max: MAX_ITEMS };
}
2 changes: 2 additions & 0 deletions tests/fixtures/deep-deps-project/domain/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export { parseItems } from './parser.js';
export { resolve } from './resolver.js';
6 changes: 6 additions & 0 deletions tests/fixtures/deep-deps-project/domain/parser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import { clamp, MAX_ITEMS } from '../shared/index.js';

export function parseItems(raw) {
const items = raw.split(',').map((s) => s.trim());
return items.slice(0, clamp(items.length, 0, MAX_ITEMS));
}
Loading
Loading