Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 6 additions & 41 deletions src/domain/graph/builder/incremental.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
*/
import fs from 'node:fs';
import path from 'node:path';
import { bulkNodeIdsByFile } from '../../../db/index.js';
import { bulkNodeIdsByFile, purgeFileData } from '../../../db/index.js';
import { debug, warn } from '../../../infrastructure/logger.js';
import { normalizePath } from '../../../shared/constants.js';
import type {
Expand All @@ -29,8 +29,6 @@ export interface IncrementalStmts {
insertNode: { run: (...params: unknown[]) => unknown };
insertEdge: { run: (...params: unknown[]) => unknown };
getNodeId: { get: (...params: unknown[]) => { id: number } | undefined };
deleteEdgesForFile: { run: (...params: unknown[]) => unknown };
deleteNodes: { run: (...params: unknown[]) => unknown };
countNodes: { get: (...params: unknown[]) => { c: number } | undefined };
listSymbols: { all: (...params: unknown[]) => unknown[] };
findNodeInFile: { all: (...params: unknown[]) => unknown[] };
Expand Down Expand Up @@ -208,40 +206,6 @@ function rebuildDirContainment(
return 0;
}

// ── Ancillary table cleanup ────────────────────────────────────────────

function purgeAncillaryData(db: BetterSqlite3Database, relPath: string): void {
const tryExec = (sql: string, ...args: string[]): void => {
try {
db.prepare(sql).run(...args);
} catch (err: unknown) {
if (!(err as Error | undefined)?.message?.includes('no such table')) throw err;
}
};
tryExec(
'DELETE FROM function_complexity WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
);
tryExec(
'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
);
tryExec(
'DELETE FROM cfg_edges WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
);
tryExec(
'DELETE FROM cfg_blocks WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
);
tryExec(
'DELETE FROM dataflow WHERE source_id IN (SELECT id FROM nodes WHERE file = ?) OR target_id IN (SELECT id FROM nodes WHERE file = ?)',
relPath,
relPath,
);
tryExec('DELETE FROM ast_nodes WHERE file = ?', relPath);
}

// ── Import edge building ────────────────────────────────────────────────

// Lazily-cached prepared statements for barrel resolution (avoid re-preparing in hot loops)
Expand Down Expand Up @@ -547,10 +511,11 @@ export async function rebuildFile(
// Find reverse-deps BEFORE purging (edges still reference the old nodes)
const reverseDeps = findReverseDeps(db, relPath);

// Purge ancillary tables, then edges, then nodes
purgeAncillaryData(db, relPath);
stmts.deleteEdgesForFile.run(relPath);
stmts.deleteNodes.run(relPath);
// Purge ancillary tables (incl. embeddings), edges, and nodes in one pass.
// Embeddings must be purged before nodes — better-sqlite3 enforces foreign
// keys by default, and `embeddings.node_id` references `nodes.id`. Issue #1176.
// `purgeHashes: false` preserves file_hashes for the next incremental build.
purgeFileData(db, relPath, { purgeHashes: false });

if (!fs.existsSync(filePath)) {
if (cache) (cache as { remove(p: string): void }).remove(filePath);
Expand Down
44 changes: 21 additions & 23 deletions src/domain/graph/watcher.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fs from 'node:fs';
import path from 'node:path';
import { closeDb, getNodeId as getNodeIdQuery, initSchema, openDb } from '../../db/index.js';
import { debug, info } from '../../infrastructure/logger.js';
import { debug, info, warn } from '../../infrastructure/logger.js';
import { isSupportedFile, normalizePath, shouldIgnore } from '../../shared/constants.js';
import { DbError } from '../../shared/errors.js';
import { createParseTreeCache, getActiveEngine } from '../parser.js';
Expand All @@ -16,23 +16,21 @@ function shouldIgnorePath(filePath: string): boolean {

/** Prepare all SQL statements needed by the watcher's incremental rebuild. */
function prepareWatcherStatements(db: ReturnType<typeof openDb>): IncrementalStmts {
const stmts = {
return {
insertNode: db.prepare(
'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)',
),
getNodeId: {
get: (name: string, kind: string, file: string, line: number) => {
get: (...params: unknown[]) => {
const [name, kind, file, line] = params as [string, string, string, number];
const id = getNodeIdQuery(db, name, kind, file, line);
return id != null ? { id } : undefined;
},
},
insertEdge: db.prepare(
'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)',
),
deleteNodes: db.prepare('DELETE FROM nodes WHERE file = ?'),
deleteEdgesForFile: null as { run: (f: string) => void } | null,
countNodes: db.prepare('SELECT COUNT(*) as c FROM nodes WHERE file = ?'),
countEdgesForFile: null as { get: (f: string) => { c: number } | undefined } | null,
findNodeInFile: db.prepare(
"SELECT id, file FROM nodes WHERE name = ? AND kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant') AND file = ?",
),
Expand All @@ -41,19 +39,6 @@ function prepareWatcherStatements(db: ReturnType<typeof openDb>): IncrementalStm
),
listSymbols: db.prepare("SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file'"),
};

const origDeleteEdges = db.prepare(
`DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f) OR target_id IN (SELECT id FROM nodes WHERE file = @f)`,
);
const origCountEdges = db.prepare(
`SELECT COUNT(*) as c FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f) OR target_id IN (SELECT id FROM nodes WHERE file = @f)`,
);
stmts.deleteEdgesForFile = { run: (f: string) => origDeleteEdges.run({ f }) };
stmts.countEdgesForFile = {
get: (f: string) => origCountEdges.get({ f }) as { c: number } | undefined,
};

return stmts as IncrementalStmts;
}

/** Rebuild result shape from rebuildFile. */
Expand All @@ -80,10 +65,23 @@ async function processPendingFiles(
): Promise<void> {
const results: RebuildResult[] = [];
for (const filePath of files) {
const result = (await rebuildFile(db, rootDir, filePath, stmts, engineOpts, cache, {
diffSymbols: diffSymbols as (old: unknown[], new_: unknown[]) => unknown,
})) as RebuildResult | null;
if (result) results.push(result);
// Per-file try/catch so one bad rebuild doesn't crash the watcher loop.
// The watcher is a long-running session — any SQLite error, parse failure,
// or filesystem race must be reported and skipped, not propagated. Issue #1176.
try {
const result = (await rebuildFile(db, rootDir, filePath, stmts, engineOpts, cache, {
diffSymbols: diffSymbols as (old: unknown[], new_: unknown[]) => unknown,
})) as RebuildResult | null;
if (result) results.push(result);
} catch (err: unknown) {
const relPath = normalizePath(path.relative(rootDir, filePath));
// Narrow with `instanceof` instead of casting: a non-Error throw (a plain
// string, `null`, or any value a third-party dependency throws) would log
// `(err as Error).message` as `undefined`. See Greptile review on #1182.
const message = err instanceof Error ? err.message : String(err);
warn(`Failed to rebuild ${relPath}: ${message} — skipping`);
debug(err instanceof Error ? (err.stack ?? message) : String(err));
}
}

if (results.length > 0) {
Expand Down
128 changes: 128 additions & 0 deletions tests/integration/watcher-fk-embeddings.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/**
* Regression test for #1176 — watch-mode rebuildFile must purge `embeddings`
* before deleting nodes, otherwise `FOREIGN KEY constraint failed` crashes the
* watcher (better-sqlite3 enforces FKs by default).
*
* Setup mirrors the user-reported reproduction: full build, write an
* `embeddings` row referencing a node from the file we're about to rebuild,
* then run `rebuildFile` and assert it returns cleanly.
*/

import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import Database from 'better-sqlite3';
import { afterAll, beforeAll, describe, expect, it } from 'vitest';
import { getNodeId as getNodeIdQuery, initSchema, openDb } from '../../src/db/index.js';
import { rebuildFile } from '../../src/domain/graph/builder/incremental.js';
import { buildGraph } from '../../src/domain/graph/builder.js';

const FIXTURE_DIR = path.join(import.meta.dirname, '..', 'fixtures', 'deep-deps-project');

function copyDirSync(src: string, dest: string): void {
fs.mkdirSync(dest, { recursive: true });
for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
const s = path.join(src, entry.name);
const d = path.join(dest, entry.name);
if (entry.isDirectory()) copyDirSync(s, d);
else fs.copyFileSync(s, d);
}
}

function makeStmts(db: Database.Database): Parameters<typeof rebuildFile>[3] {
return {
insertNode: db.prepare(
'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)',
),
getNodeId: {
get: (name: string, kind: string, file: string, line: number) => {
const id = getNodeIdQuery(db, name, kind, file, line);
return id != null ? { id } : undefined;
},
},
insertEdge: db.prepare(
'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)',
),
countNodes: db.prepare('SELECT COUNT(*) as c FROM nodes WHERE file = ?'),
findNodeInFile: db.prepare(
"SELECT id, file FROM nodes WHERE name = ? AND kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant') AND file = ?",
),
findNodeByName: db.prepare(
"SELECT id, file FROM nodes WHERE name = ? AND kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant')",
),
listSymbols: db.prepare("SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file'"),
} as Parameters<typeof rebuildFile>[3];
}

describe('rebuildFile FK safety with embeddings (#1176)', () => {
let workDir: string;
let tmpBase: string;
let dbPath: string;

beforeAll(async () => {
tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fk-1176-'));
workDir = path.join(tmpBase, 'project');
copyDirSync(FIXTURE_DIR, workDir);

await buildGraph(workDir, { incremental: false, skipRegistry: true });

dbPath = path.join(workDir, '.codegraph', 'graph.db');

// Simulate `codegraph embed`: create the embeddings table (better-sqlite3
// creates it lazily in `initEmbeddingsSchema`) and insert a row that
// references a node belonging to the file we are about to rebuild.
const seed = new Database(dbPath);
try {
seed.exec(`
CREATE TABLE IF NOT EXISTS embeddings (
node_id INTEGER PRIMARY KEY,
vector BLOB NOT NULL,
text_preview TEXT,
FOREIGN KEY(node_id) REFERENCES nodes(id)
);
`);
const target = seed
.prepare('SELECT id FROM nodes WHERE file = ? LIMIT 1')
.get('shared/constants.js') as { id: number } | undefined;
expect(target, 'fixture should contain a node for shared/constants.js').toBeDefined();
seed
.prepare('INSERT INTO embeddings (node_id, vector, text_preview) VALUES (?, ?, ?)')
.run(target!.id, Buffer.from([0, 1, 2, 3]), 'seeded');
} finally {
seed.close();
}
}, 60_000);

afterAll(() => {
try {
if (tmpBase) fs.rmSync(tmpBase, { recursive: true, force: true });
} catch {
/* ignore */
}
});

it('does not throw FOREIGN KEY constraint failed when rebuilding a file with embeddings', async () => {
const db = openDb(dbPath);
initSchema(db);
// Make this connection match the watcher's: better-sqlite3 enables foreign
// keys by default in v9+. Set explicitly so this test catches a regression
// even on older builds.
db.pragma('foreign_keys = ON');
const stmts = makeStmts(db);
const leafPath = path.join(workDir, 'shared', 'constants.js');
fs.appendFileSync(leafPath, '\n// touched\n');

await expect(
rebuildFile(db, workDir, leafPath, stmts, { engine: 'auto' }, null),
).resolves.not.toBeNull();

// The seeded embedding row should be gone — embeddings for a rebuilt
// file are purged alongside the nodes they referenced. Count all rows in
// `embeddings` directly (exactly one was seeded) so the assertion still
// fails if the row survives as an orphan with a dangling node_id.
const remaining = db.prepare('SELECT COUNT(*) AS c FROM embeddings').get() as { c: number };
expect(remaining.c).toBe(0);

db.close();
}, 60_000);
});
16 changes: 1 addition & 15 deletions tests/integration/watcher-rebuild.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ function readGraph(dbPath) {

/** Build the prepared statements object that watcher.js normally provides. */
function makeStmts(db) {
const stmts = {
return {
insertNode: db.prepare(
'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)',
),
Expand All @@ -66,10 +66,7 @@ function makeStmts(db) {
insertEdge: db.prepare(
'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)',
),
deleteNodes: db.prepare('DELETE FROM nodes WHERE file = ?'),
deleteEdgesForFile: null,
countNodes: db.prepare('SELECT COUNT(*) as c FROM nodes WHERE file = ?'),
countEdgesForFile: null,
findNodeInFile: db.prepare(
"SELECT id, file FROM nodes WHERE name = ? AND kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant') AND file = ?",
),
Expand All @@ -78,17 +75,6 @@ function makeStmts(db) {
),
listSymbols: db.prepare("SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file'"),
};

const origDeleteEdges = db.prepare(
`DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f) OR target_id IN (SELECT id FROM nodes WHERE file = @f)`,
);
const origCountEdges = db.prepare(
`SELECT COUNT(*) as c FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f) OR target_id IN (SELECT id FROM nodes WHERE file = @f)`,
);
stmts.deleteEdgesForFile = { run: (f) => origDeleteEdges.run({ f }) };
stmts.countEdgesForFile = { get: (f) => origCountEdges.get({ f }) };

return stmts;
}

describe('Watcher rebuildFile parity (#533)', () => {
Expand Down
Loading