diff --git a/src/cli.js b/src/cli.js index ccf8fbc47..4700ebfd2 100644 --- a/src/cli.js +++ b/src/cli.js @@ -395,8 +395,15 @@ registry .command('prune') .description('Remove stale registry entries (missing directories or idle beyond TTL)') .option('--ttl ', 'Days of inactivity before pruning (default: 30)', '30') + .option('--exclude ', 'Comma-separated repo names to preserve from pruning') .action((opts) => { - const pruned = pruneRegistry(undefined, parseInt(opts.ttl, 10)); + const excludeNames = opts.exclude + ? opts.exclude + .split(',') + .map((s) => s.trim()) + .filter((s) => s.length > 0) + : []; + const pruned = pruneRegistry(undefined, parseInt(opts.ttl, 10), excludeNames); if (pruned.length === 0) { console.log('No stale entries found.'); } else { @@ -464,6 +471,7 @@ program .option('-k, --kind ', 'Filter by kind: function, method, class') .option('--file ', 'Filter by file path pattern') .option('--rrf-k ', 'RRF k parameter for multi-query ranking', '60') + .option('-j, --json', 'Output as JSON') .action(async (query, opts) => { await search(query, opts.db, { limit: parseInt(opts.limit, 10), @@ -473,6 +481,7 @@ program kind: opts.kind, filePattern: opts.file, rrfK: parseInt(opts.rrfK, 10), + json: opts.json, }); }); diff --git a/src/embedder.js b/src/embedder.js index 20882e9bc..b0e0588ca 100644 --- a/src/embedder.js +++ b/src/embedder.js @@ -16,6 +16,28 @@ function splitIdentifier(name) { .trim(); } +/** + * Match a file path against a glob pattern. + * Supports *, **, and ? wildcards. Zero dependencies. + */ +function globMatch(filePath, pattern) { + // Normalize separators to forward slashes + const normalized = filePath.replace(/\\/g, '/'); + // Escape regex specials except glob chars + let regex = pattern.replace(/\\/g, '/').replace(/[.+^${}()|[\]\\]/g, '\\$&'); + // Replace ** first (matches any path segment), then * and ? + regex = regex.replace(/\*\*/g, '\0'); + regex = regex.replace(/\*/g, '[^/]*'); + regex = regex.replace(/\0/g, '.*'); + regex = regex.replace(/\?/g, '[^/]'); + try { + return new RegExp(`^${regex}$`).test(normalized); + } catch { + // Malformed pattern — fall back to substring match + return normalized.includes(pattern); + } +} + // Lazy-load transformers (heavy, optional module) let pipeline = null; let _cos_sim = null; @@ -496,7 +518,8 @@ function _prepareSearch(customDbPath, opts = {}) { conditions.push('n.kind = ?'); params.push(opts.kind); } - if (opts.filePattern) { + const isGlob = opts.filePattern && /[*?[\]]/.test(opts.filePattern); + if (opts.filePattern && !isGlob) { conditions.push('n.file LIKE ?'); params.push(`%${opts.filePattern}%`); } @@ -505,6 +528,9 @@ function _prepareSearch(customDbPath, opts = {}) { } let rows = db.prepare(sql).all(...params); + if (isGlob) { + rows = rows.filter((row) => globMatch(row.file, opts.filePattern)); + } if (noTests) { rows = rows.filter((row) => !TEST_PATTERN.test(row.file)); } @@ -668,6 +694,11 @@ export async function search(query, customDbPath, opts = {}) { const data = await searchData(singleQuery, customDbPath, opts); if (!data) return; + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + console.log(`\nSemantic search: "${singleQuery}"\n`); if (data.results.length === 0) { @@ -687,6 +718,11 @@ export async function search(query, customDbPath, opts = {}) { const data = await multiSearchData(queries, customDbPath, opts); if (!data) return; + if (opts.json) { + console.log(JSON.stringify(data, null, 2)); + return; + } + console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`); queries.forEach((q, i) => { console.log(` [${i + 1}] "${q}"`); diff --git a/src/registry.js b/src/registry.js index caa970e8c..33acc8c75 100644 --- a/src/registry.js +++ b/src/registry.js @@ -136,12 +136,20 @@ export function resolveRepoDbPath(name, registryPath = REGISTRY_PATH) { * or that haven't been accessed within `ttlDays` days. * Returns an array of `{ name, path, reason }` for each pruned entry. */ -export function pruneRegistry(registryPath = REGISTRY_PATH, ttlDays = DEFAULT_TTL_DAYS) { +export function pruneRegistry( + registryPath = REGISTRY_PATH, + ttlDays = DEFAULT_TTL_DAYS, + excludeNames = [], +) { const registry = loadRegistry(registryPath); const pruned = []; const cutoff = Date.now() - ttlDays * 24 * 60 * 60 * 1000; + const excludeSet = new Set( + excludeNames.filter((n) => typeof n === 'string' && n.trim().length > 0), + ); for (const [name, entry] of Object.entries(registry.repos)) { + if (excludeSet.has(name)) continue; if (!fs.existsSync(entry.path)) { pruned.push({ name, path: entry.path, reason: 'missing' }); delete registry.repos[name]; diff --git a/tests/search/embedder-search.test.js b/tests/search/embedder-search.test.js index 8af53953a..bbe57b674 100644 --- a/tests/search/embedder-search.test.js +++ b/tests/search/embedder-search.test.js @@ -220,6 +220,24 @@ describe('multiSearchData', () => { }); }); +describe('searchData file pattern', () => { + test('glob src/*.js matches only direct children of src/', async () => { + const data = await searchData('auth', dbPath, { minScore: 0.01, filePattern: 'src/*.js' }); + expect(data).not.toBeNull(); + for (const r of data.results) { + expect(r.file).toMatch(/^src\/[^/]+\.js$/); + } + }); + + test('plain substring auth still works (backward compat)', async () => { + const data = await searchData('auth', dbPath, { minScore: 0.01, filePattern: 'auth' }); + expect(data).not.toBeNull(); + for (const r of data.results) { + expect(r.file).toContain('auth'); + } + }); +}); + describe('search (CLI wrapper)', () => { /** Capture console.log calls and return joined output. */ function captureLog(fn) { @@ -253,4 +271,22 @@ describe('search (CLI wrapper)', () => { expect(out).toContain('Semantic search: "auth"'); expect(out).not.toContain('Multi-query'); }); + + test('single query with json: true outputs valid JSON with results array', async () => { + const out = await captureLog(() => search('auth', dbPath, { minScore: 0.2, json: true })); + const parsed = JSON.parse(out); + expect(parsed.results).toBeInstanceOf(Array); + expect(parsed.results.length).toBeGreaterThan(0); + expect(parsed.results[0]).toHaveProperty('similarity'); + expect(parsed.results[0]).toHaveProperty('name'); + }); + + test('multi query with json: true outputs valid JSON with rrf and queryScores', async () => { + const out = await captureLog(() => search('auth ; jwt', dbPath, { minScore: 0.2, json: true })); + const parsed = JSON.parse(out); + expect(parsed.results).toBeInstanceOf(Array); + expect(parsed.results.length).toBeGreaterThan(0); + expect(parsed.results[0]).toHaveProperty('rrf'); + expect(parsed.results[0]).toHaveProperty('queryScores'); + }); }); diff --git a/tests/unit/registry.test.js b/tests/unit/registry.test.js index 7623ba8f2..d70c95a26 100644 --- a/tests/unit/registry.test.js +++ b/tests/unit/registry.test.js @@ -454,6 +454,71 @@ describe('pruneRegistry', () => { const pruned = pruneRegistry(registryPath); expect(pruned).toEqual([]); }); + + it('excluded entry survives missing-dir prune', () => { + const dir1 = path.join(tmpDir, 'keep'); + const dir2 = path.join(tmpDir, 'gone-excluded'); + fs.mkdirSync(dir1, { recursive: true }); + fs.mkdirSync(dir2, { recursive: true }); + + registerRepo(dir1, 'keep', registryPath); + registerRepo(dir2, 'gone-excluded', registryPath); + + // Remove the directory + fs.rmSync(dir2, { recursive: true, force: true }); + + const pruned = pruneRegistry(registryPath, 30, ['gone-excluded']); + expect(pruned).toHaveLength(0); + + const reg = loadRegistry(registryPath); + expect(reg.repos['gone-excluded']).toBeDefined(); + }); + + it('excluded entry survives TTL prune', () => { + const dir = path.join(tmpDir, 'protected'); + fs.mkdirSync(dir, { recursive: true }); + + const oldDate = new Date(Date.now() - 60 * 24 * 60 * 60 * 1000).toISOString(); + const registry = { + repos: { + protected: { + path: dir, + dbPath: path.join(dir, '.codegraph', 'graph.db'), + addedAt: oldDate, + lastAccessedAt: oldDate, + }, + }, + }; + saveRegistry(registry, registryPath); + + const pruned = pruneRegistry(registryPath, 30, ['protected']); + expect(pruned).toHaveLength(0); + + const reg = loadRegistry(registryPath); + expect(reg.repos.protected).toBeDefined(); + }); + + it('empty exclude array prunes normally (backward compat)', () => { + const dir = path.join(tmpDir, 'stale'); + fs.mkdirSync(dir, { recursive: true }); + + const oldDate = new Date(Date.now() - 60 * 24 * 60 * 60 * 1000).toISOString(); + const registry = { + repos: { + stale: { + path: dir, + dbPath: path.join(dir, '.codegraph', 'graph.db'), + addedAt: oldDate, + lastAccessedAt: oldDate, + }, + }, + }; + saveRegistry(registry, registryPath); + + const pruned = pruneRegistry(registryPath, 30, []); + expect(pruned).toHaveLength(1); + expect(pruned[0].name).toBe('stale'); + }); }); // ─── DEFAULT_TTL_DAYS ────────────────────────────────────────────── diff --git a/vitest.config.js b/vitest.config.js index 97575fa9b..a92ad038c 100644 --- a/vitest.config.js +++ b/vitest.config.js @@ -4,5 +4,6 @@ export default defineConfig({ test: { globals: true, testTimeout: 30000, + exclude: ['**/node_modules/**', '**/.git/**', '.claude/**'], }, });