From d7e94a9eecb1babe234dc92768f4b43a01b93d60 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 20 May 2026 19:41:18 -0600 Subject: [PATCH 1/3] fix(watch): purge embeddings before nodes to stop FK crash in rebuildFile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The watcher's `rebuildFile` had its own `purgeAncillaryData` helper that missed the `embeddings` table. With `codegraph embed` populated and better-sqlite3's default `foreign_keys = ON`, the next `deleteNodes` fired `SQLITE_CONSTRAINT_FOREIGNKEY` (embeddings.node_id → nodes.id) and killed the watcher process. Switch to the shared `purgeFileData` helper used by the main builder — it purges embeddings, cfg_*, dataflow, complexity, node_metrics, ast_nodes, edges, and nodes in the correct order — and drop the now unused stmt prep. Wrap per-file `rebuildFile` calls in try/catch so any future SQLite or parse error logs+skips instead of crashing the long-running watcher. Closes #1176 --- src/domain/graph/builder/incremental.ts | 47 +------ src/domain/graph/watcher.ts | 37 ++--- .../integration/watcher-fk-embeddings.test.ts | 132 ++++++++++++++++++ tests/integration/watcher-rebuild.test.ts | 16 +-- 4 files changed, 154 insertions(+), 78 deletions(-) create mode 100644 tests/integration/watcher-fk-embeddings.test.ts diff --git a/src/domain/graph/builder/incremental.ts b/src/domain/graph/builder/incremental.ts index c35ed51da..66853983e 100644 --- a/src/domain/graph/builder/incremental.ts +++ b/src/domain/graph/builder/incremental.ts @@ -9,7 +9,7 @@ */ import fs from 'node:fs'; import path from 'node:path'; -import { bulkNodeIdsByFile } from '../../../db/index.js'; +import { bulkNodeIdsByFile, purgeFileData } from '../../../db/index.js'; import { debug, warn } from '../../../infrastructure/logger.js'; import { normalizePath } from '../../../shared/constants.js'; import type { @@ -29,8 +29,6 @@ export interface IncrementalStmts { insertNode: { run: (...params: unknown[]) => unknown }; insertEdge: { run: (...params: unknown[]) => unknown }; getNodeId: { get: (...params: unknown[]) => { id: number } | undefined }; - deleteEdgesForFile: { run: (...params: unknown[]) => unknown }; - deleteNodes: { run: (...params: unknown[]) => unknown }; countNodes: { get: (...params: unknown[]) => { c: number } | undefined }; listSymbols: { all: (...params: unknown[]) => unknown[] }; findNodeInFile: { all: (...params: unknown[]) => unknown[] }; @@ -208,40 +206,6 @@ function rebuildDirContainment( return 0; } -// ── Ancillary table cleanup ──────────────────────────────────────────── - -function purgeAncillaryData(db: BetterSqlite3Database, relPath: string): void { - const tryExec = (sql: string, ...args: string[]): void => { - try { - db.prepare(sql).run(...args); - } catch (err: unknown) { - if (!(err as Error | undefined)?.message?.includes('no such table')) throw err; - } - }; - tryExec( - 'DELETE FROM function_complexity WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)', - relPath, - ); - tryExec( - 'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)', - relPath, - ); - tryExec( - 'DELETE FROM cfg_edges WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)', - relPath, - ); - tryExec( - 'DELETE FROM cfg_blocks WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)', - relPath, - ); - tryExec( - 'DELETE FROM dataflow WHERE source_id IN (SELECT id FROM nodes WHERE file = ?) OR target_id IN (SELECT id FROM nodes WHERE file = ?)', - relPath, - relPath, - ); - tryExec('DELETE FROM ast_nodes WHERE file = ?', relPath); -} - // ── Import edge building ──────────────────────────────────────────────── // Lazily-cached prepared statements for barrel resolution (avoid re-preparing in hot loops) @@ -547,10 +511,11 @@ export async function rebuildFile( // Find reverse-deps BEFORE purging (edges still reference the old nodes) const reverseDeps = findReverseDeps(db, relPath); - // Purge ancillary tables, then edges, then nodes - purgeAncillaryData(db, relPath); - stmts.deleteEdgesForFile.run(relPath); - stmts.deleteNodes.run(relPath); + // Purge ancillary tables (incl. embeddings), edges, and nodes in one pass. + // Embeddings must be purged before nodes — better-sqlite3 enforces foreign + // keys by default, and `embeddings.node_id` references `nodes.id`. Issue #1176. + // `purgeHashes: false` preserves file_hashes for the next incremental build. + purgeFileData(db, relPath, { purgeHashes: false }); if (!fs.existsSync(filePath)) { if (cache) (cache as { remove(p: string): void }).remove(filePath); diff --git a/src/domain/graph/watcher.ts b/src/domain/graph/watcher.ts index 0738151af..6a8ce9bbd 100644 --- a/src/domain/graph/watcher.ts +++ b/src/domain/graph/watcher.ts @@ -1,7 +1,7 @@ import fs from 'node:fs'; import path from 'node:path'; import { closeDb, getNodeId as getNodeIdQuery, initSchema, openDb } from '../../db/index.js'; -import { debug, info } from '../../infrastructure/logger.js'; +import { debug, info, warn } from '../../infrastructure/logger.js'; import { isSupportedFile, normalizePath, shouldIgnore } from '../../shared/constants.js'; import { DbError } from '../../shared/errors.js'; import { createParseTreeCache, getActiveEngine } from '../parser.js'; @@ -16,7 +16,7 @@ function shouldIgnorePath(filePath: string): boolean { /** Prepare all SQL statements needed by the watcher's incremental rebuild. */ function prepareWatcherStatements(db: ReturnType): IncrementalStmts { - const stmts = { + return { insertNode: db.prepare( 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', ), @@ -29,10 +29,7 @@ function prepareWatcherStatements(db: ReturnType): IncrementalStm insertEdge: db.prepare( 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)', ), - deleteNodes: db.prepare('DELETE FROM nodes WHERE file = ?'), - deleteEdgesForFile: null as { run: (f: string) => void } | null, countNodes: db.prepare('SELECT COUNT(*) as c FROM nodes WHERE file = ?'), - countEdgesForFile: null as { get: (f: string) => { c: number } | undefined } | null, findNodeInFile: db.prepare( "SELECT id, file FROM nodes WHERE name = ? AND kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant') AND file = ?", ), @@ -41,19 +38,6 @@ function prepareWatcherStatements(db: ReturnType): IncrementalStm ), listSymbols: db.prepare("SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file'"), }; - - const origDeleteEdges = db.prepare( - `DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f) OR target_id IN (SELECT id FROM nodes WHERE file = @f)`, - ); - const origCountEdges = db.prepare( - `SELECT COUNT(*) as c FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f) OR target_id IN (SELECT id FROM nodes WHERE file = @f)`, - ); - stmts.deleteEdgesForFile = { run: (f: string) => origDeleteEdges.run({ f }) }; - stmts.countEdgesForFile = { - get: (f: string) => origCountEdges.get({ f }) as { c: number } | undefined, - }; - - return stmts as IncrementalStmts; } /** Rebuild result shape from rebuildFile. */ @@ -80,10 +64,19 @@ async function processPendingFiles( ): Promise { const results: RebuildResult[] = []; for (const filePath of files) { - const result = (await rebuildFile(db, rootDir, filePath, stmts, engineOpts, cache, { - diffSymbols: diffSymbols as (old: unknown[], new_: unknown[]) => unknown, - })) as RebuildResult | null; - if (result) results.push(result); + // Per-file try/catch so one bad rebuild doesn't crash the watcher loop. + // The watcher is a long-running session — any SQLite error, parse failure, + // or filesystem race must be reported and skipped, not propagated. Issue #1176. + try { + const result = (await rebuildFile(db, rootDir, filePath, stmts, engineOpts, cache, { + diffSymbols: diffSymbols as (old: unknown[], new_: unknown[]) => unknown, + })) as RebuildResult | null; + if (result) results.push(result); + } catch (err: unknown) { + const relPath = normalizePath(path.relative(rootDir, filePath)); + warn(`Failed to rebuild ${relPath}: ${(err as Error).message} — skipping`); + debug((err as Error).stack ?? String(err)); + } } if (results.length > 0) { diff --git a/tests/integration/watcher-fk-embeddings.test.ts b/tests/integration/watcher-fk-embeddings.test.ts new file mode 100644 index 000000000..c60455d67 --- /dev/null +++ b/tests/integration/watcher-fk-embeddings.test.ts @@ -0,0 +1,132 @@ +/** + * Regression test for #1176 — watch-mode rebuildFile must purge `embeddings` + * before deleting nodes, otherwise `FOREIGN KEY constraint failed` crashes the + * watcher (better-sqlite3 enforces FKs by default). + * + * Setup mirrors the user-reported reproduction: full build, write an + * `embeddings` row referencing a node from the file we're about to rebuild, + * then run `rebuildFile` and assert it returns cleanly. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { getNodeId as getNodeIdQuery, initSchema, openDb } from '../../src/db/index.js'; +import { rebuildFile } from '../../src/domain/graph/builder/incremental.js'; +import { buildGraph } from '../../src/domain/graph/builder.js'; + +const FIXTURE_DIR = path.join(import.meta.dirname, '..', 'fixtures', 'deep-deps-project'); + +function copyDirSync(src: string, dest: string): void { + fs.mkdirSync(dest, { recursive: true }); + for (const entry of fs.readdirSync(src, { withFileTypes: true })) { + const s = path.join(src, entry.name); + const d = path.join(dest, entry.name); + if (entry.isDirectory()) copyDirSync(s, d); + else fs.copyFileSync(s, d); + } +} + +function makeStmts(db: Database.Database): Parameters[3] { + return { + insertNode: db.prepare( + 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', + ), + getNodeId: { + get: (name: string, kind: string, file: string, line: number) => { + const id = getNodeIdQuery(db, name, kind, file, line); + return id != null ? { id } : undefined; + }, + }, + insertEdge: db.prepare( + 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)', + ), + countNodes: db.prepare('SELECT COUNT(*) as c FROM nodes WHERE file = ?'), + findNodeInFile: db.prepare( + "SELECT id, file FROM nodes WHERE name = ? AND kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant') AND file = ?", + ), + findNodeByName: db.prepare( + "SELECT id, file FROM nodes WHERE name = ? AND kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant')", + ), + listSymbols: db.prepare("SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file'"), + } as Parameters[3]; +} + +describe('rebuildFile FK safety with embeddings (#1176)', () => { + let workDir: string; + let tmpBase: string; + let dbPath: string; + + beforeAll(async () => { + tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fk-1176-')); + workDir = path.join(tmpBase, 'project'); + copyDirSync(FIXTURE_DIR, workDir); + + await buildGraph(workDir, { incremental: false, skipRegistry: true }); + + dbPath = path.join(workDir, '.codegraph', 'graph.db'); + + // Simulate `codegraph embed`: create the embeddings table (better-sqlite3 + // creates it lazily in `initEmbeddingsSchema`) and insert a row that + // references a node belonging to the file we are about to rebuild. + const seed = new Database(dbPath); + try { + seed.exec(` + CREATE TABLE IF NOT EXISTS embeddings ( + node_id INTEGER PRIMARY KEY, + vector BLOB NOT NULL, + text_preview TEXT, + FOREIGN KEY(node_id) REFERENCES nodes(id) + ); + `); + const target = seed + .prepare('SELECT id FROM nodes WHERE file = ? LIMIT 1') + .get('shared/constants.js') as { id: number } | undefined; + expect(target, 'fixture should contain a node for shared/constants.js').toBeDefined(); + seed + .prepare('INSERT INTO embeddings (node_id, vector, text_preview) VALUES (?, ?, ?)') + .run(target!.id, Buffer.from([0, 1, 2, 3]), 'seeded'); + } finally { + seed.close(); + } + }, 60_000); + + afterAll(() => { + try { + if (tmpBase) fs.rmSync(tmpBase, { recursive: true, force: true }); + } catch { + /* ignore */ + } + }); + + it('does not throw FOREIGN KEY constraint failed when rebuilding a file with embeddings', async () => { + const db = openDb(dbPath); + initSchema(db); + // Make this connection match the watcher's: better-sqlite3 enables foreign + // keys by default in v9+. Set explicitly so this test catches a regression + // even on older builds. + db.pragma('foreign_keys = ON'); + const stmts = makeStmts(db); + const leafPath = path.join(workDir, 'shared', 'constants.js'); + fs.appendFileSync(leafPath, '\n// touched\n'); + + await expect( + rebuildFile(db, workDir, leafPath, stmts, { engine: 'auto' }, null), + ).resolves.not.toBeNull(); + + // The seeded embedding row should be gone — embeddings for a rebuilt + // file are purged alongside the nodes they referenced. + const remaining = db + .prepare( + `SELECT COUNT(*) AS c FROM embeddings e + JOIN nodes n ON e.node_id = n.id + WHERE n.file = ?`, + ) + .get('shared/constants.js') as { c: number }; + expect(remaining.c).toBe(0); + + db.close(); + }, 60_000); +}); diff --git a/tests/integration/watcher-rebuild.test.ts b/tests/integration/watcher-rebuild.test.ts index be394f71f..29c31827d 100644 --- a/tests/integration/watcher-rebuild.test.ts +++ b/tests/integration/watcher-rebuild.test.ts @@ -53,7 +53,7 @@ function readGraph(dbPath) { /** Build the prepared statements object that watcher.js normally provides. */ function makeStmts(db) { - const stmts = { + return { insertNode: db.prepare( 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', ), @@ -66,10 +66,7 @@ function makeStmts(db) { insertEdge: db.prepare( 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)', ), - deleteNodes: db.prepare('DELETE FROM nodes WHERE file = ?'), - deleteEdgesForFile: null, countNodes: db.prepare('SELECT COUNT(*) as c FROM nodes WHERE file = ?'), - countEdgesForFile: null, findNodeInFile: db.prepare( "SELECT id, file FROM nodes WHERE name = ? AND kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module', 'constant') AND file = ?", ), @@ -78,17 +75,6 @@ function makeStmts(db) { ), listSymbols: db.prepare("SELECT name, kind, line FROM nodes WHERE file = ? AND kind != 'file'"), }; - - const origDeleteEdges = db.prepare( - `DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f) OR target_id IN (SELECT id FROM nodes WHERE file = @f)`, - ); - const origCountEdges = db.prepare( - `SELECT COUNT(*) as c FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f) OR target_id IN (SELECT id FROM nodes WHERE file = @f)`, - ); - stmts.deleteEdgesForFile = { run: (f) => origDeleteEdges.run({ f }) }; - stmts.countEdgesForFile = { get: (f) => origCountEdges.get({ f }) }; - - return stmts; } describe('Watcher rebuildFile parity (#533)', () => { From 2e786f0b09eb032453ac7a9bd771f65ade249523 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 20 May 2026 20:47:06 -0600 Subject: [PATCH 2/3] fix(watch): widen getNodeId.get to match IncrementalStmts signature (#1182) The watcher's local getNodeId.get used a typed parameter list (name, kind, file, line) but IncrementalStmts declares get: (...params: unknown[]). TS2322 rejected the narrower form, which broke the build job and all downstream CI checks. Switch to (...params: unknown[]) and destructure inside the body so the implementation matches the interface contract. --- src/domain/graph/watcher.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/domain/graph/watcher.ts b/src/domain/graph/watcher.ts index 6a8ce9bbd..7a72bc735 100644 --- a/src/domain/graph/watcher.ts +++ b/src/domain/graph/watcher.ts @@ -21,7 +21,8 @@ function prepareWatcherStatements(db: ReturnType): IncrementalStm 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', ), getNodeId: { - get: (name: string, kind: string, file: string, line: number) => { + get: (...params: unknown[]) => { + const [name, kind, file, line] = params as [string, string, string, number]; const id = getNodeIdQuery(db, name, kind, file, line); return id != null ? { id } : undefined; }, From d2db31668098268014c0e55f254cc09cae3052db Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 20 May 2026 20:47:31 -0600 Subject: [PATCH 3/3] fix(watch): narrow error type and assert all embeddings purged (#1182) Addresses Greptile P2 review feedback: - src/domain/graph/watcher.ts: replace `(err as Error).message` with `instanceof Error` narrowing so a non-Error throw (plain string, null, or any value from a third-party dependency) logs a usable message instead of "undefined". - tests/integration/watcher-fk-embeddings.test.ts: count all rows in `embeddings` directly instead of joining on `nodes.file`. The JOIN-based check would pass even if the seeded row survived as an orphan with a dangling node_id; counting the table itself catches that regression. --- src/domain/graph/watcher.ts | 8 ++++++-- tests/integration/watcher-fk-embeddings.test.ts | 12 ++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/domain/graph/watcher.ts b/src/domain/graph/watcher.ts index 7a72bc735..103adb5fb 100644 --- a/src/domain/graph/watcher.ts +++ b/src/domain/graph/watcher.ts @@ -75,8 +75,12 @@ async function processPendingFiles( if (result) results.push(result); } catch (err: unknown) { const relPath = normalizePath(path.relative(rootDir, filePath)); - warn(`Failed to rebuild ${relPath}: ${(err as Error).message} — skipping`); - debug((err as Error).stack ?? String(err)); + // Narrow with `instanceof` instead of casting: a non-Error throw (a plain + // string, `null`, or any value a third-party dependency throws) would log + // `(err as Error).message` as `undefined`. See Greptile review on #1182. + const message = err instanceof Error ? err.message : String(err); + warn(`Failed to rebuild ${relPath}: ${message} — skipping`); + debug(err instanceof Error ? (err.stack ?? message) : String(err)); } } diff --git a/tests/integration/watcher-fk-embeddings.test.ts b/tests/integration/watcher-fk-embeddings.test.ts index c60455d67..133231d07 100644 --- a/tests/integration/watcher-fk-embeddings.test.ts +++ b/tests/integration/watcher-fk-embeddings.test.ts @@ -117,14 +117,10 @@ describe('rebuildFile FK safety with embeddings (#1176)', () => { ).resolves.not.toBeNull(); // The seeded embedding row should be gone — embeddings for a rebuilt - // file are purged alongside the nodes they referenced. - const remaining = db - .prepare( - `SELECT COUNT(*) AS c FROM embeddings e - JOIN nodes n ON e.node_id = n.id - WHERE n.file = ?`, - ) - .get('shared/constants.js') as { c: number }; + // file are purged alongside the nodes they referenced. Count all rows in + // `embeddings` directly (exactly one was seeded) so the assertion still + // fails if the row survives as an orphan with a dangling node_id. + const remaining = db.prepare('SELECT COUNT(*) AS c FROM embeddings').get() as { c: number }; expect(remaining.c).toBe(0); db.close();