Skip to content

Commit 5b05092

Browse files
authored
feat(AST indexing): Implement relationship index (#38)
* feat: persist versioned relationship sidecar for incremental indexing * docs: document index versioning (phase 06) and relationship sidecar (phase 07)
1 parent 512148a commit 5b05092

File tree

9 files changed

+348
-20
lines changed

9 files changed

+348
-20
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
### Added
66

7+
- **Index versioning (Phase 06)**: Index artifacts are versioned via `index-meta.json`. Mixed-version indexes are never served; version mismatches or corruption trigger automatic rebuild.
8+
- **Crash-safe rebuilds (Phase 06)**: Full rebuilds write to `.staging/` and swap atomically only on success. Failed rebuilds don't corrupt the active index.
9+
- **Relationship sidecar (Phase 07)**: New `relationships.json` artifact containing file import graph, reverse imports, and symbol export index. Updated incrementally alongside the main index.
710
- Tree-sitter-backed symbol extraction is now used by the Generic analyzer when available (with safe fallbacks).
811
- Expanded language/extension detection to improve indexing coverage (e.g. `.pyi`, `.php`, `.kt`/`.kts`, `.cc`/`.cxx`, `.cs`, `.swift`, `.scala`, `.toml`, `.xml`).
912
- New tool: `get_symbol_references` for concrete symbol usage evidence (usageCount + top snippets).

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ The retrieval pipeline is designed around one goal: give the agent the right con
207207
- **Import centrality** - files that are imported more often rank higher.
208208
- **Cross-encoder reranking** - a stage-2 reranker triggers only when top scores are ambiguous. CPU-only, bounded to top-K.
209209
- **Incremental indexing** - only re-indexes files that changed since last run (SHA-256 manifest diffing).
210+
- **Version gating** - index artifacts are versioned; mismatches trigger automatic rebuild so mixed-version data is never served.
210211
- **Auto-heal** - if the index corrupts, search triggers a full re-index automatically.
211212

212213
## Language Support
@@ -239,7 +240,9 @@ Structured filters available: `framework`, `language`, `componentType`, `layer`
239240
```
240241
.codebase-context/
241242
memory.json # Team knowledge (should be persisted in git)
243+
index-meta.json # Index metadata and version (generated)
242244
intelligence.json # Pattern analysis (generated)
245+
relationships.json # File/symbol relationships (generated)
243246
index.json # Keyword index (generated)
244247
index/ # Vector database (generated)
245248
```

docs/capabilities.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,10 @@ Output: `{ ready: boolean, reason?: string }`
8484

8585
- Initial: full scan → chunking (50 lines, 0 overlap) → embedding → vector DB (LanceDB) + keyword index (Fuse.js)
8686
- Incremental: SHA-256 manifest diffing, selective embed/delete, full intelligence regeneration
87+
- Version gating: `index-meta.json` tracks format version; mismatches trigger automatic rebuild
88+
- Crash-safe rebuilds: full rebuilds write to `.staging/` and swap atomically only on success
8789
- Auto-heal: corrupted index triggers automatic full re-index on next search
90+
- Relationships sidecar: `relationships.json` contains file import graph and symbol export index
8891
- Storage: `.codebase-context/` directory (memory.json + generated files)
8992

9093
## Analyzers

src/constants/codebase-context.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ export const KEYWORD_INDEX_FILENAME = 'index.json' as const;
2424
export const INDEXING_STATS_FILENAME = 'indexing-stats.json' as const;
2525
export const VECTOR_DB_DIRNAME = 'index' as const;
2626
export const MANIFEST_FILENAME = 'manifest.json' as const;
27+
export const RELATIONSHIPS_FILENAME = 'relationships.json' as const;

src/core/index-meta.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import {
99
INDEX_META_VERSION,
1010
INTELLIGENCE_FILENAME,
1111
KEYWORD_INDEX_FILENAME,
12+
RELATIONSHIPS_FILENAME,
1213
VECTOR_DB_DIRNAME
1314
} from '../constants/codebase-context.js';
1415
import { IndexCorruptedError } from '../errors/index.js';
@@ -34,6 +35,12 @@ const IntelligenceFileSchema = z
3435
})
3536
.passthrough();
3637

38+
const RelationshipsFileSchema = z
39+
.object({
40+
header: ArtifactHeaderSchema
41+
})
42+
.passthrough();
43+
3744
export const IndexMetaSchema = z.object({
3845
metaVersion: z.number().int().positive(),
3946
formatVersion: z.number().int().nonnegative(),
@@ -221,4 +228,34 @@ export async function validateIndexArtifacts(rootDir: string, meta: IndexMeta):
221228
throw asIndexCorrupted('Intelligence corrupted (rebuild required)', error);
222229
}
223230
}
231+
232+
// Optional relationships sidecar: validate if present, but do not require.
233+
const relationshipsPath = path.join(contextDir, RELATIONSHIPS_FILENAME);
234+
if (await pathExists(relationshipsPath)) {
235+
try {
236+
const raw = await fs.readFile(relationshipsPath, 'utf-8');
237+
const json = JSON.parse(raw);
238+
const parsed = RelationshipsFileSchema.safeParse(json);
239+
if (!parsed.success) {
240+
throw new IndexCorruptedError(
241+
`Relationships schema mismatch (rebuild required): ${parsed.error.message}`
242+
);
243+
}
244+
245+
const { buildId, formatVersion } = parsed.data.header;
246+
if (formatVersion !== meta.formatVersion) {
247+
throw new IndexCorruptedError(
248+
`Relationships formatVersion mismatch (rebuild required): meta=${meta.formatVersion}, relationships.json=${formatVersion}`
249+
);
250+
}
251+
if (buildId !== meta.buildId) {
252+
throw new IndexCorruptedError(
253+
`Relationships buildId mismatch (rebuild required): meta=${meta.buildId}, relationships.json=${buildId}`
254+
);
255+
}
256+
} catch (error) {
257+
if (error instanceof IndexCorruptedError) throw error;
258+
throw asIndexCorrupted('Relationships sidecar corrupted (rebuild required)', error);
259+
}
260+
}
224261
}

src/core/indexer.ts

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import {
3939
INTELLIGENCE_FILENAME,
4040
KEYWORD_INDEX_FILENAME,
4141
MANIFEST_FILENAME,
42+
RELATIONSHIPS_FILENAME,
4243
VECTOR_DB_DIRNAME
4344
} from '../constants/codebase-context.js';
4445

@@ -91,13 +92,15 @@ async function atomicSwapStagingToActive(
9192
const activeVectorDir = path.join(contextDir, VECTOR_DB_DIRNAME);
9293
const activeManifestPath = path.join(contextDir, MANIFEST_FILENAME);
9394
const activeStatsPath = path.join(contextDir, INDEXING_STATS_FILENAME);
95+
const activeRelationshipsPath = path.join(contextDir, RELATIONSHIPS_FILENAME);
9496

9597
const stagingMetaPath = path.join(stagingDir, INDEX_META_FILENAME);
9698
const stagingIndexPath = path.join(stagingDir, KEYWORD_INDEX_FILENAME);
9799
const stagingIntelligencePath = path.join(stagingDir, INTELLIGENCE_FILENAME);
98100
const stagingVectorDir = path.join(stagingDir, VECTOR_DB_DIRNAME);
99101
const stagingManifestPath = path.join(stagingDir, MANIFEST_FILENAME);
100102
const stagingStatsPath = path.join(stagingDir, INDEXING_STATS_FILENAME);
103+
const stagingRelationshipsPath = path.join(stagingDir, RELATIONSHIPS_FILENAME);
101104

102105
// Step 1: Create .previous directory and move current active there
103106
await fs.mkdir(previousDir, { recursive: true });
@@ -134,6 +137,7 @@ async function atomicSwapStagingToActive(
134137
await moveIfExists(activeIntelligencePath, path.join(previousDir, INTELLIGENCE_FILENAME));
135138
await moveIfExists(activeManifestPath, path.join(previousDir, MANIFEST_FILENAME));
136139
await moveIfExists(activeStatsPath, path.join(previousDir, INDEXING_STATS_FILENAME));
140+
await moveIfExists(activeRelationshipsPath, path.join(previousDir, RELATIONSHIPS_FILENAME));
137141
await moveDirIfExists(activeVectorDir, path.join(previousDir, VECTOR_DB_DIRNAME));
138142

139143
// Step 2: Move staging artifacts to active location
@@ -143,6 +147,7 @@ async function atomicSwapStagingToActive(
143147
await moveIfExists(stagingIntelligencePath, activeIntelligencePath);
144148
await moveIfExists(stagingManifestPath, activeManifestPath);
145149
await moveIfExists(stagingStatsPath, activeStatsPath);
150+
await moveIfExists(stagingRelationshipsPath, activeRelationshipsPath);
146151
await moveDirIfExists(stagingVectorDir, activeVectorDir);
147152

148153
// Step 3: Clean up .previous and staging directories
@@ -171,6 +176,7 @@ async function atomicSwapStagingToActive(
171176
await moveIfExists(path.join(previousDir, INTELLIGENCE_FILENAME), activeIntelligencePath);
172177
await moveIfExists(path.join(previousDir, MANIFEST_FILENAME), activeManifestPath);
173178
await moveIfExists(path.join(previousDir, INDEXING_STATS_FILENAME), activeStatsPath);
179+
await moveIfExists(path.join(previousDir, RELATIONSHIPS_FILENAME), activeRelationshipsPath);
174180
await moveDirIfExists(path.join(previousDir, VECTOR_DB_DIRNAME), activeVectorDir);
175181
console.error('Rollback successful');
176182
} catch (rollbackError) {
@@ -796,6 +802,51 @@ export class CodebaseIndexer {
796802
};
797803
await fs.writeFile(intelligencePath, JSON.stringify(intelligence, null, 2));
798804

805+
// Write relationships sidecar (versioned, for fast lookup)
806+
const relationshipsPath = path.join(activeContextDir, RELATIONSHIPS_FILENAME);
807+
const graphData = internalFileGraph.toJSON();
808+
809+
// Build reverse import map (importedBy)
810+
const importedBy: Record<string, string[]> = {};
811+
if (graphData.imports) {
812+
for (const [file, deps] of Object.entries(graphData.imports)) {
813+
for (const dep of deps as string[]) {
814+
if (!importedBy[dep]) importedBy[dep] = [];
815+
importedBy[dep].push(file);
816+
}
817+
}
818+
}
819+
820+
// Build symbol export map (exportedBy)
821+
const exportedBy: Record<string, string[]> = {};
822+
if (graphData.exports) {
823+
for (const [file, exps] of Object.entries(graphData.exports)) {
824+
for (const exp of exps as Array<{ name: string; type: string }>) {
825+
if (exp.name && exp.name !== 'default') {
826+
if (!exportedBy[exp.name]) exportedBy[exp.name] = [];
827+
if (!exportedBy[exp.name].includes(file)) {
828+
exportedBy[exp.name].push(file);
829+
}
830+
}
831+
}
832+
}
833+
}
834+
835+
const relationships = {
836+
header: { buildId, formatVersion: INDEX_FORMAT_VERSION },
837+
generatedAt,
838+
graph: {
839+
imports: graphData.imports || {},
840+
importedBy,
841+
exports: graphData.exports || {}
842+
},
843+
symbols: {
844+
exportedBy
845+
},
846+
stats: graphData.stats || internalFileGraph.getStats()
847+
};
848+
await fs.writeFile(relationshipsPath, JSON.stringify(relationships, null, 2));
849+
799850
// Write manifest (both full and incremental)
800851
// For full rebuild, write to staging; for incremental, write to active
801852
const activeManifestPath = path.join(activeContextDir, MANIFEST_FILENAME);
@@ -831,7 +882,8 @@ export class CodebaseIndexer {
831882
vectorDb: { path: VECTOR_DB_DIRNAME, provider: 'lancedb' },
832883
intelligence: { path: INTELLIGENCE_FILENAME },
833884
manifest: { path: MANIFEST_FILENAME },
834-
indexingStats: { path: INDEXING_STATS_FILENAME }
885+
indexingStats: { path: INDEXING_STATS_FILENAME },
886+
relationships: { path: RELATIONSHIPS_FILENAME }
835887
}
836888
},
837889
null,

src/tools/detect-circular-dependencies.ts

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import type { Tool } from '@modelcontextprotocol/sdk/types.js';
22
import { promises as fs } from 'fs';
3+
import path from 'path';
34
import type { ToolContext, ToolResponse } from './types.js';
45
import { InternalFileGraph } from '../utils/usage-tracker.js';
6+
import { RELATIONSHIPS_FILENAME } from '../constants/codebase-context.js';
57

68
export const definition: Tool = {
79
name: 'detect_circular_dependencies',
@@ -27,11 +29,36 @@ export async function handle(
2729
const { scope } = args as { scope?: string };
2830

2931
try {
30-
const intelligencePath = ctx.paths.intelligence;
31-
const content = await fs.readFile(intelligencePath, 'utf-8');
32-
const intelligence = JSON.parse(content);
32+
// Try relationships sidecar first (preferred), then intelligence
33+
let graphDataSource: any = null;
34+
let graphStats: any = null;
3335

34-
if (!intelligence.internalFileGraph) {
36+
const relationshipsPath = path.join(
37+
path.dirname(ctx.paths.intelligence),
38+
RELATIONSHIPS_FILENAME
39+
);
40+
try {
41+
const relationshipsContent = await fs.readFile(relationshipsPath, 'utf-8');
42+
const relationships = JSON.parse(relationshipsContent);
43+
if (relationships?.graph) {
44+
graphDataSource = relationships.graph;
45+
graphStats = relationships.stats;
46+
}
47+
} catch {
48+
// Relationships sidecar not available, try intelligence
49+
}
50+
51+
if (!graphDataSource) {
52+
const intelligencePath = ctx.paths.intelligence;
53+
const content = await fs.readFile(intelligencePath, 'utf-8');
54+
const intelligence = JSON.parse(content);
55+
if (intelligence.internalFileGraph) {
56+
graphDataSource = intelligence.internalFileGraph;
57+
graphStats = intelligence.internalFileGraph.stats;
58+
}
59+
}
60+
61+
if (!graphDataSource) {
3562
return {
3663
content: [
3764
{
@@ -51,9 +78,9 @@ export async function handle(
5178
}
5279

5380
// Reconstruct the graph from stored data
54-
const graph = InternalFileGraph.fromJSON(intelligence.internalFileGraph, ctx.rootPath);
81+
const graph = InternalFileGraph.fromJSON(graphDataSource, ctx.rootPath);
5582
const cycles = graph.findCycles(scope);
56-
const graphStats = intelligence.internalFileGraph.stats || graph.getStats();
83+
const stats = graphStats || graph.getStats();
5784

5885
if (cycles.length === 0) {
5986
return {
@@ -67,7 +94,7 @@ export async function handle(
6794
? `No circular dependencies detected in scope: ${scope}`
6895
: 'No circular dependencies detected in the codebase.',
6996
scope,
70-
graphStats
97+
graphStats: stats
7198
},
7299
null,
73100
2
@@ -92,7 +119,7 @@ export async function handle(
92119
severity: c.length === 2 ? 'high' : c.length <= 3 ? 'medium' : 'low'
93120
})),
94121
count: cycles.length,
95-
graphStats,
122+
graphStats: stats,
96123
advice:
97124
'Shorter cycles (length 2-3) are typically more problematic. Consider breaking the cycle by extracting shared dependencies.'
98125
},

src/tools/search-codebase.ts

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import { assessSearchQuality } from '../core/search-quality.js';
1616
import { IndexCorruptedError } from '../errors/index.js';
1717
import { readMemoriesFile, withConfidence } from '../memory/store.js';
1818
import { InternalFileGraph } from '../utils/usage-tracker.js';
19+
import { RELATIONSHIPS_FILENAME } from '../constants/codebase-context.js';
1920

2021
export const definition: Tool = {
2122
name: 'search_codebase',
@@ -229,6 +230,30 @@ export async function handle(
229230
/* graceful degradation — intelligence file may not exist yet */
230231
}
231232

233+
// Load relationships sidecar (preferred over intelligence.internalFileGraph)
234+
let relationships: any = null;
235+
try {
236+
const relationshipsPath = path.join(
237+
path.dirname(ctx.paths.intelligence),
238+
RELATIONSHIPS_FILENAME
239+
);
240+
const relationshipsContent = await fs.readFile(relationshipsPath, 'utf-8');
241+
relationships = JSON.parse(relationshipsContent);
242+
} catch {
243+
/* graceful degradation — relationships sidecar may not exist yet */
244+
}
245+
246+
// Helper to get imports graph from relationships sidecar (preferred) or intelligence
247+
function getImportsGraph(): Record<string, string[]> | null {
248+
if (relationships?.graph?.imports) {
249+
return relationships.graph.imports as Record<string, string[]>;
250+
}
251+
if (intelligence?.internalFileGraph?.imports) {
252+
return intelligence.internalFileGraph.imports as Record<string, string[]>;
253+
}
254+
return null;
255+
}
256+
232257
function computeIndexConfidence(): 'fresh' | 'aging' | 'stale' {
233258
let confidence: 'fresh' | 'aging' | 'stale' = 'stale';
234259
if (intelligence?.generatedAt) {
@@ -246,8 +271,8 @@ export async function handle(
246271
// Cheap impact breadth estimate from the import graph (used for risk assessment).
247272
function computeImpactCandidates(resultPaths: string[]): string[] {
248273
const impactCandidates: string[] = [];
249-
if (!intelligence?.internalFileGraph?.imports) return impactCandidates;
250-
const allImports = intelligence.internalFileGraph.imports as Record<string, string[]>;
274+
const allImports = getImportsGraph();
275+
if (!allImports) return impactCandidates;
251276
for (const [file, deps] of Object.entries(allImports)) {
252277
if (
253278
deps.some((dep: string) => resultPaths.some((rp) => dep.endsWith(rp) || rp.endsWith(dep)))
@@ -260,10 +285,11 @@ export async function handle(
260285
return impactCandidates;
261286
}
262287

263-
// Build reverse import map from intelligence graph
288+
// Build reverse import map from relationships sidecar (preferred) or intelligence graph
264289
const reverseImports = new Map<string, string[]>();
265-
if (intelligence?.internalFileGraph?.imports) {
266-
for (const [file, deps] of Object.entries<string[]>(intelligence.internalFileGraph.imports)) {
290+
const importsGraph = getImportsGraph();
291+
if (importsGraph) {
292+
for (const [file, deps] of Object.entries<string[]>(importsGraph)) {
267293
for (const dep of deps) {
268294
if (!reverseImports.has(dep)) reverseImports.set(dep, []);
269295
reverseImports.get(dep)!.push(file);
@@ -285,8 +311,8 @@ export async function handle(
285311

286312
// imports: files this result depends on (forward lookup)
287313
const imports: string[] = [];
288-
if (intelligence?.internalFileGraph?.imports) {
289-
for (const [file, deps] of Object.entries<string[]>(intelligence.internalFileGraph.imports)) {
314+
if (importsGraph) {
315+
for (const [file, deps] of Object.entries<string[]>(importsGraph)) {
290316
if (file.endsWith(rPath) || rPath.endsWith(file)) {
291317
imports.push(...deps);
292318
}
@@ -296,8 +322,8 @@ export async function handle(
296322
// testedIn: heuristic — same basename with .spec/.test extension
297323
const testedIn: string[] = [];
298324
const baseName = path.basename(rPath).replace(/\.[^.]+$/, '');
299-
if (intelligence?.internalFileGraph?.imports) {
300-
for (const file of Object.keys(intelligence.internalFileGraph.imports)) {
325+
if (importsGraph) {
326+
for (const file of Object.keys(importsGraph)) {
301327
const fileBase = path.basename(file);
302328
if (
303329
(fileBase.includes('.spec.') || fileBase.includes('.test.')) &&
@@ -416,9 +442,10 @@ export async function handle(
416442
// --- Risk level (based on circular deps + impact breadth) ---
417443
let riskLevel: 'low' | 'medium' | 'high' = 'low';
418444
let cycleCount = 0;
419-
if (intelligence.internalFileGraph) {
445+
const graphDataSource = relationships?.graph || intelligence?.internalFileGraph;
446+
if (graphDataSource) {
420447
try {
421-
const graph = InternalFileGraph.fromJSON(intelligence.internalFileGraph, ctx.rootPath);
448+
const graph = InternalFileGraph.fromJSON(graphDataSource, ctx.rootPath);
422449
// Use directory prefixes as scope (not full file paths)
423450
// findCycles(scope) filters files by startsWith, so a full path would only match itself
424451
const scopes = new Set(

0 commit comments

Comments
 (0)