Skip to content

Commit 7c3af0e

Browse files
andreinknvclaude
andcommitted
feat: PR #115 (sql-refs) on top of refactors
Extracts SQL string-literal references to tables (read/write/ddl) into sql_refs and exposes via codegraph_sql MCP tool. Lands as a registered IndexHook (sql-refs). - Migration 007: sql_refs table - src/sql-refs/ (pure module): regex extractor with comment strip + SQL-keyword pre-filter - src/index-hooks/sql-refs.ts (registered hook with full / files scoping; uses replaceAllSqlRefs for atomic indexAll swap) - CodeGraph public methods: getSqlTables, getSqlRefsByTable, getSqlTablesForNode - codegraph_sql MCP tool wired through ToolModule registry - enableSqlRefs flag default true - Removed defensive ensureSqlRefsTable guard + its test (same reason as #113 / #114: bug class is impossible under file-based migrations). Tests: 514/515 pass (1 watcher flake under load). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent f8fc536 commit 7c3af0e

19 files changed

Lines changed: 972 additions & 3 deletions

__tests__/foundation.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ describe('Database Connection', () => {
305305

306306
const version = db.getSchemaVersion();
307307
expect(version).not.toBeNull();
308-
expect(version?.version).toBe(6);
308+
expect(version?.version).toBe(7);
309309

310310
db.close();
311311
});

__tests__/mcp-tool-registry.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ describe('MCP tool registry — single source of truth', () => {
4949
'codegraph_impact',
5050
'codegraph_node',
5151
'codegraph_search',
52+
'codegraph_sql',
5253
'codegraph_status',
5354
];
5455
const actual = getToolModules()

__tests__/pr19-improvements.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ describe('Best-Candidate Resolution', () => {
299299
describe('Schema v2 Migration', () => {
300300
it.skipIf(!HAS_SQLITE)('should have correct current schema version', async () => {
301301
const { CURRENT_SCHEMA_VERSION } = await import('../src/db/migrations');
302-
expect(CURRENT_SCHEMA_VERSION).toBe(6);
302+
expect(CURRENT_SCHEMA_VERSION).toBe(7);
303303
});
304304

305305
it.skipIf(!HAS_SQLITE)('should have migration for version 2', async () => {

__tests__/sql-refs.test.ts

Lines changed: 339 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,339 @@
1+
/**
2+
* SQL call-site tests: parser unit tests + end-to-end through CodeGraph.
3+
*/
4+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
5+
import * as fs from 'fs';
6+
import * as os from 'os';
7+
import * as path from 'path';
8+
import { extractSqlRefs } from '../src/sql-refs';
9+
import CodeGraph from '../src/index';
10+
11+
let testDir: string;
12+
let cg: CodeGraph | null = null;
13+
14+
function write(rel: string, content: string) {
15+
const abs = path.join(testDir, rel);
16+
fs.mkdirSync(path.dirname(abs), { recursive: true });
17+
fs.writeFileSync(abs, content);
18+
}
19+
20+
beforeEach(() => {
21+
testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-sql-'));
22+
});
23+
24+
afterEach(() => {
25+
if (cg) {
26+
cg.destroy();
27+
cg = null;
28+
}
29+
if (fs.existsSync(testDir)) fs.rmSync(testDir, { recursive: true, force: true });
30+
});
31+
32+
// ============================================================================
33+
// Pure parser tests
34+
// ============================================================================
35+
36+
describe('extractSqlRefs', () => {
37+
it('captures FROM <table> as a read', () => {
38+
write('a.ts', `db.prepare('SELECT id FROM users WHERE id = ?');\n`);
39+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
40+
expect(refs).toHaveLength(1);
41+
expect(refs[0]!).toMatchObject({ tableName: 'users', op: 'read' });
42+
});
43+
44+
it('captures INSERT INTO as a write', () => {
45+
write('a.ts', `db.prepare('INSERT INTO logs (msg) VALUES (?)');\n`);
46+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
47+
expect(refs).toHaveLength(1);
48+
expect(refs[0]!).toMatchObject({ tableName: 'logs', op: 'write' });
49+
});
50+
51+
it('captures UPDATE ... SET as a write', () => {
52+
write('a.ts', `db.run('UPDATE users SET name = ? WHERE id = ?', ['x', 1]);\n`);
53+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
54+
expect(refs).toHaveLength(1);
55+
expect(refs[0]!).toMatchObject({ tableName: 'users', op: 'write' });
56+
});
57+
58+
it('captures DELETE FROM as a write (and not as a read)', () => {
59+
write('a.ts', `db.run('DELETE FROM sessions WHERE expired_at < ?');\n`);
60+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
61+
// Both regexes (DELETE FROM as write, FROM as read) hit, so we expect
62+
// two refs for the same table but different ops.
63+
expect(refs.map((r) => r.op).sort()).toEqual(['read', 'write']);
64+
expect(new Set(refs.map((r) => r.tableName))).toEqual(new Set(['sessions']));
65+
});
66+
67+
it('captures CREATE TABLE / ALTER / DROP as ddl', () => {
68+
write(
69+
'a.ts',
70+
[
71+
`db.exec('CREATE TABLE IF NOT EXISTS audit (id INTEGER)');`,
72+
`db.exec('ALTER TABLE audit ADD COLUMN ts INTEGER');`,
73+
`db.exec('DROP TABLE IF EXISTS audit_old');`,
74+
].join('\n')
75+
);
76+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
77+
const ddls = refs.filter((r) => r.op === 'ddl');
78+
expect(new Set(ddls.map((r) => r.tableName))).toEqual(new Set(['audit', 'audit_old']));
79+
});
80+
81+
it('captures JOIN as a read', () => {
82+
write(
83+
'a.ts',
84+
`db.prepare('SELECT u.name, p.title FROM users u JOIN posts p ON p.user_id = u.id');\n`
85+
);
86+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
87+
const tables = new Set(refs.map((r) => r.tableName));
88+
expect(tables).toEqual(new Set(['users', 'posts']));
89+
});
90+
91+
it('handles backtick (MySQL) and double-quoted (Postgres) identifiers', () => {
92+
write(
93+
'a.ts',
94+
[
95+
"db.prepare('SELECT id FROM `mysql_table`');",
96+
`db.prepare('SELECT id FROM "pg_table"');`,
97+
].join('\n')
98+
);
99+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
100+
expect(new Set(refs.map((r) => r.tableName))).toEqual(
101+
new Set(['mysql_table', 'pg_table'])
102+
);
103+
});
104+
105+
it('handles schema-qualified identifiers (drops the schema, keeps the table)', () => {
106+
write('a.ts', `db.prepare('SELECT * FROM public.users');\n`);
107+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
108+
expect(refs[0]!.tableName).toBe('users');
109+
});
110+
111+
it('does NOT match a JS variable named like a SQL keyword', () => {
112+
// Without the FROM/INTO/etc. prefix, a bare identifier `users` is
113+
// not caught — that's the whole point vs. plain grep.
114+
write('a.ts', `const users = await loadUsers();\nfor (const user of users) {}\n`);
115+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
116+
expect(refs).toEqual([]);
117+
});
118+
119+
it('skips unsupported languages (e.g. swift) without error', () => {
120+
write('a.swift', `let q = "SELECT id FROM users"\n`);
121+
const refs = extractSqlRefs(testDir, [{ path: 'a.swift', language: 'swift' }], () => null);
122+
expect(refs).toEqual([]);
123+
});
124+
125+
it('captures the correct 1-indexed line number', () => {
126+
write(
127+
'a.ts',
128+
[`// blah`, `// blah`, `db.prepare('SELECT * FROM line_three');`, `// blah`].join('\n')
129+
);
130+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
131+
expect(refs[0]).toEqual(expect.objectContaining({ tableName: 'line_three', line: 3 }));
132+
});
133+
134+
it('threads the resolveEnclosing closure correctly', () => {
135+
write('a.ts', `db.prepare('SELECT * FROM t');\n`);
136+
const calls: Array<[string, number]> = [];
137+
extractSqlRefs(
138+
testDir,
139+
[{ path: 'a.ts', language: 'typescript' }],
140+
(filePath, line) => {
141+
calls.push([filePath, line]);
142+
return 'fake-id';
143+
}
144+
);
145+
expect(calls).toEqual([['a.ts', 1]]);
146+
});
147+
148+
it('drops reserved-word "table names" (WHERE/ON/AS/SELECT)', () => {
149+
// Common over-match: `JOIN ... ON x = y` would otherwise pick up
150+
// `ON` as the table name. The reserved set blocks that.
151+
write('a.ts', `db.prepare('SELECT * FROM users JOIN posts ON posts.uid = users.id');\n`);
152+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
153+
const names = new Set(refs.map((r) => r.tableName));
154+
expect(names).toEqual(new Set(['users', 'posts']));
155+
});
156+
157+
it('handles multiple SQL operations on a single line', () => {
158+
write(
159+
'a.ts',
160+
`db.exec('CREATE TABLE foo (id INTEGER); INSERT INTO foo VALUES (1)');\n`
161+
);
162+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
163+
const ops = new Set(refs.map((r) => `${r.tableName}|${r.op}`));
164+
expect(ops).toEqual(new Set(['foo|ddl', 'foo|write']));
165+
});
166+
167+
it('survives a missing file (skips, no throw)', () => {
168+
const refs = extractSqlRefs(
169+
testDir,
170+
[{ path: 'missing.ts', language: 'typescript' }],
171+
() => null
172+
);
173+
expect(refs).toEqual([]);
174+
});
175+
176+
it('rejects prose comments containing a quoted SQL example', () => {
177+
// Reviewer-flagged regression: a comment like
178+
// // example: db.prepare('SELECT name FROM the docs')
179+
// used to falsely match `the` as a table because the quote inside
180+
// the comment passed isInsideString(). The comment-stripper now
181+
// removes everything after `//` before the regex sees the line.
182+
write(
183+
'a.ts',
184+
[
185+
`// example: db.prepare('SELECT name FROM the docs')`,
186+
`// "SELECT id FROM the comment"`,
187+
`function ok() {`,
188+
` // sample SELECT FROM users in a comment — should be ignored`,
189+
` return 1;`,
190+
`}`,
191+
].join('\n')
192+
);
193+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
194+
expect(refs).toEqual([]);
195+
});
196+
197+
it('rejects same-line block comments containing a quoted SQL example', () => {
198+
write(
199+
'a.ts',
200+
`/* "SELECT * FROM ghost" */ const x = 1;\n`
201+
);
202+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
203+
expect(refs).toEqual([]);
204+
});
205+
206+
it('still keeps a real SQL call when there is a trailing comment', () => {
207+
write('a.ts', `db.prepare('SELECT * FROM users'); // good doc\n`);
208+
const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
209+
expect(refs.length).toBe(1);
210+
expect(refs[0]!.tableName).toBe('users');
211+
});
212+
213+
it('strips Python `#` comments', () => {
214+
write(
215+
'a.py',
216+
`# example: db.execute('SELECT * FROM the_docs')\nrows = db.execute('SELECT * FROM real_table')\n`
217+
);
218+
const refs = extractSqlRefs(testDir, [{ path: 'a.py', language: 'python' }], () => null);
219+
expect(refs.map((r) => r.tableName)).toEqual(['real_table']);
220+
});
221+
});
222+
223+
// ============================================================================
224+
// End-to-end through CodeGraph
225+
// ============================================================================
226+
227+
describe('CodeGraph SQL refs', () => {
228+
it('persists call sites and resolves enclosing function', async () => {
229+
write(
230+
'src/db.ts',
231+
[
232+
`export function getUser(id: number) {`,
233+
` return db.prepare('SELECT * FROM users WHERE id = ?').get(id);`,
234+
`}`,
235+
``,
236+
`export function logEvent(msg: string) {`,
237+
` db.prepare('INSERT INTO events (msg) VALUES (?)').run(msg);`,
238+
`}`,
239+
].join('\n')
240+
);
241+
cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
242+
await cg.indexAll();
243+
244+
const tables = cg.getSqlTables();
245+
expect(new Set(tables.map((t) => t.tableName))).toEqual(new Set(['users', 'events']));
246+
247+
const userSites = cg.getSqlRefsByTable('users');
248+
expect(userSites[0]!.sourceName).toBe('getUser');
249+
250+
const eventSites = cg.getSqlRefsByTable('events');
251+
expect(eventSites[0]!.sourceName).toBe('logEvent');
252+
expect(eventSites[0]!.op).toBe('write');
253+
});
254+
255+
it('reverse view: getSqlTablesForNode returns tables touched by a function', async () => {
256+
write(
257+
'src/a.ts',
258+
[
259+
`export function multiTouch() {`,
260+
` db.prepare('SELECT * FROM a').all();`,
261+
` db.prepare('INSERT INTO b VALUES (?)').run(1);`,
262+
`}`,
263+
].join('\n')
264+
);
265+
cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
266+
await cg.indexAll();
267+
268+
const node = cg.getNodesInFile('src/a.ts').find((n) => n.name === 'multiTouch')!;
269+
const touched = cg.getSqlTablesForNode(node.id);
270+
const summary = touched.map((r) => `${r.tableName}|${r.op}`).sort();
271+
expect(summary).toEqual(['a|read', 'b|write']);
272+
});
273+
274+
it('case-insensitive table lookup', async () => {
275+
write('src/a.ts', `db.prepare('SELECT * FROM Users');\n`);
276+
cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
277+
await cg.indexAll();
278+
expect(cg.getSqlRefsByTable('users').length).toBe(1);
279+
expect(cg.getSqlRefsByTable('USERS').length).toBe(1);
280+
});
281+
282+
it('respects enableSqlRefs=false', async () => {
283+
write('src/a.ts', `db.prepare('SELECT * FROM users');\n`);
284+
cg = CodeGraph.initSync(testDir, {
285+
config: { include: ['**/*.ts'], exclude: [], enableSqlRefs: false },
286+
});
287+
await cg.indexAll();
288+
expect(cg.getSqlTables()).toEqual([]);
289+
});
290+
291+
it('incremental sync replaces refs for changed files only', async () => {
292+
write('src/a.ts', `db.prepare('SELECT * FROM old_table');\n`);
293+
write('src/b.ts', `db.prepare('SELECT * FROM stable_table');\n`);
294+
cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
295+
await cg.indexAll();
296+
expect(new Set(cg.getSqlTables().map((t) => t.tableName))).toEqual(
297+
new Set(['old_table', 'stable_table'])
298+
);
299+
300+
write('src/a.ts', `db.prepare('SELECT * FROM new_table');\n`);
301+
await cg.sync();
302+
303+
const tables = new Set(cg.getSqlTables().map((t) => t.tableName));
304+
expect(tables).toContain('new_table');
305+
expect(tables).toContain('stable_table');
306+
expect(tables).not.toContain('old_table');
307+
});
308+
309+
it('drops refs when a file is edited to remove its last SQL ref', async () => {
310+
// Same regression as PR C — applySqlRefs([]) shouldn't leave
311+
// stale rows. Pre-deleting the changed paths in runSqlRefsPass
312+
// is the fix.
313+
write('src/a.ts', `db.prepare('SELECT * FROM going_away');\n`);
314+
cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
315+
await cg.indexAll();
316+
expect(cg.getSqlTables().some((t) => t.tableName === 'going_away')).toBe(true);
317+
318+
write('src/a.ts', `// no sql here anymore\nexport const x = 1;\n`);
319+
await cg.sync();
320+
321+
expect(cg.getSqlTables().some((t) => t.tableName === 'going_away')).toBe(false);
322+
});
323+
324+
it('drops refs for files removed between syncs', async () => {
325+
write('src/a.ts', `db.prepare('SELECT * FROM gone_table');\n`);
326+
cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
327+
await cg.indexAll();
328+
expect(cg.getSqlTables().some((t) => t.tableName === 'gone_table')).toBe(true);
329+
330+
fs.unlinkSync(path.join(testDir, 'src/a.ts'));
331+
await cg.sync();
332+
expect(cg.getSqlTables().some((t) => t.tableName === 'gone_table')).toBe(false);
333+
});
334+
335+
// (Removed: a defensive test for the v4-migration-collision bug class.
336+
// With file-based migrations (NNN-name.ts), two PRs claiming the same
337+
// version produces a filesystem-level conflict, so the silent skip the
338+
// defensive guard protected against can no longer happen.)
339+
});

src/config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ function mergeConfig(
132132
enableChurn: overrides.enableChurn ?? defaults.enableChurn,
133133
enableIssueHistory: overrides.enableIssueHistory ?? defaults.enableIssueHistory,
134134
enableConfigRefs: overrides.enableConfigRefs ?? defaults.enableConfigRefs,
135+
enableSqlRefs: overrides.enableSqlRefs ?? defaults.enableSqlRefs,
135136
};
136137
}
137138

src/db/migrations/007-sql-refs.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import type { MigrationModule } from './types';
2+
3+
export const MIGRATION: MigrationModule = {
4+
description: 'Add sql_refs table for SQL string-literal references to tables',
5+
up: (db) => {
6+
db.exec(`
7+
CREATE TABLE IF NOT EXISTS sql_refs (
8+
id INTEGER PRIMARY KEY AUTOINCREMENT,
9+
table_name TEXT NOT NULL,
10+
op TEXT NOT NULL CHECK (op IN ('read','write','ddl')),
11+
source_node_id TEXT,
12+
file_path TEXT NOT NULL,
13+
line INTEGER NOT NULL,
14+
FOREIGN KEY (source_node_id) REFERENCES nodes(id) ON DELETE CASCADE
15+
);
16+
CREATE INDEX IF NOT EXISTS idx_sql_refs_table
17+
ON sql_refs(lower(table_name));
18+
CREATE INDEX IF NOT EXISTS idx_sql_refs_node
19+
ON sql_refs(source_node_id);
20+
CREATE INDEX IF NOT EXISTS idx_sql_refs_file
21+
ON sql_refs(file_path);
22+
`);
23+
},
24+
};

0 commit comments

Comments
 (0)