Skip to content

Commit fee8bdf

Browse files
prosdevclaude
andcommitted
feat(core,mcp): cached dependency graph for scale
Build dependency graph at index time and save as JSON. Load cached graph in dev_map and dev_refs instead of fetching all docs via getAll. Incremental graph updates via file watcher. Falls back to current approach if cache is missing or corrupted. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7853fa5 commit fee8bdf

10 files changed

Lines changed: 388 additions & 19 deletions

File tree

packages/core/src/indexer/index.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ import * as fs from 'node:fs/promises';
1010
import * as path from 'node:path';
1111
import type { Logger } from '@prosdevlab/kero';
1212
import type { EventBus } from '../events/types.js';
13+
import { buildDependencyGraph, serializeGraph } from '../map/graph';
1314
import { scanRepository } from '../scanner';
15+
import { getStorageFilePaths } from '../storage/path';
1416
import type { EmbeddingDocument, LinearMergeResult, SearchOptions, SearchResult } from '../vector';
1517
import { VectorStorage } from '../vector';
1618
import { StatsAggregator } from './stats-aggregator';
@@ -183,6 +185,23 @@ export class RepositoryIndexer {
183185
`Linear Merge complete: ${mergeResult.upserted} upserted, ${mergeResult.skipped} unchanged, ${mergeResult.deleted} removed`
184186
);
185187

188+
// Build and cache dependency graph
189+
try {
190+
const graphDocs = embeddingDocuments.map((d) => ({
191+
id: d.id,
192+
score: 0,
193+
metadata: d.metadata,
194+
}));
195+
const graph = buildDependencyGraph(graphDocs);
196+
const storagePath = path.dirname(this.config.vectorStorePath);
197+
const graphPath = getStorageFilePaths(storagePath).dependencyGraph;
198+
await fs.writeFile(graphPath, serializeGraph(graph), 'utf-8');
199+
logger?.info({ nodes: graph.size }, 'Dependency graph cached');
200+
} catch (graphError) {
201+
// Non-fatal — graph is a performance optimization, not required
202+
logger?.warn({ error: graphError }, 'Failed to cache dependency graph');
203+
}
204+
186205
// Phase 4: Complete
187206
const endTime = new Date();
188207
onProgress?.({

packages/core/src/map/__tests__/graph.test.ts

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@ import { describe, expect, it } from 'vitest';
99
import {
1010
buildDependencyGraph,
1111
connectedComponents,
12+
deserializeGraph,
13+
loadOrBuildGraph,
1214
pageRank,
15+
serializeGraph,
1316
shortestPath,
17+
updateGraphIncremental,
1418
type WeightedEdge,
1519
} from '../graph';
1620

@@ -333,3 +337,178 @@ describe('shortestPath', () => {
333337
expect(shortestPath(new Map(), 'X', 'Y')).toBeNull();
334338
});
335339
});
340+
341+
// ============================================================================
342+
// Serialization
343+
// ============================================================================
344+
345+
describe('serializeGraph / deserializeGraph', () => {
346+
it('should round-trip correctly', () => {
347+
const graph = new Map<string, WeightedEdge[]>();
348+
graph.set('src/a.ts', [edge('src/b.ts', 1.414), edge('src/c.ts', 1)]);
349+
graph.set('src/b.ts', [edge('src/c.ts', 2)]);
350+
351+
const json = serializeGraph(graph);
352+
const restored = deserializeGraph(json);
353+
354+
expect(restored).not.toBeNull();
355+
expect(restored!.size).toBe(2);
356+
expect(restored!.get('src/a.ts')).toEqual([
357+
{ target: 'src/b.ts', weight: 1.414 },
358+
{ target: 'src/c.ts', weight: 1 },
359+
]);
360+
expect(restored!.get('src/b.ts')).toEqual([{ target: 'src/c.ts', weight: 2 }]);
361+
});
362+
363+
it('should include metadata in serialized JSON', () => {
364+
const graph = new Map<string, WeightedEdge[]>();
365+
graph.set('a', [edge('b')]);
366+
367+
const parsed = JSON.parse(serializeGraph(graph));
368+
expect(parsed.version).toBe(1);
369+
expect(parsed.nodeCount).toBe(1);
370+
expect(parsed.edgeCount).toBe(1);
371+
expect(parsed.generatedAt).toBeTruthy();
372+
});
373+
374+
it('should return null for invalid JSON', () => {
375+
expect(deserializeGraph('not json')).toBeNull();
376+
});
377+
378+
it('should return null for wrong version', () => {
379+
const json = JSON.stringify({ version: 99, graph: {} });
380+
expect(deserializeGraph(json)).toBeNull();
381+
});
382+
383+
it('should return null for missing graph field', () => {
384+
const json = JSON.stringify({ version: 1 });
385+
expect(deserializeGraph(json)).toBeNull();
386+
});
387+
388+
it('should handle empty graph', () => {
389+
const graph = new Map<string, WeightedEdge[]>();
390+
const json = serializeGraph(graph);
391+
const restored = deserializeGraph(json);
392+
expect(restored).not.toBeNull();
393+
expect(restored!.size).toBe(0);
394+
});
395+
});
396+
397+
// ============================================================================
398+
// loadOrBuildGraph
399+
// ============================================================================
400+
401+
describe('loadOrBuildGraph', () => {
402+
it('should call fallback when graphPath is undefined', async () => {
403+
const fallbackDocs = [
404+
{
405+
id: '1',
406+
score: 0,
407+
metadata: {
408+
path: 'src/a.ts',
409+
callees: [{ name: 'foo', file: 'src/b.ts', line: 1 }],
410+
},
411+
},
412+
];
413+
414+
const graph = await loadOrBuildGraph(undefined, async () => fallbackDocs);
415+
expect(graph.get('src/a.ts')).toBeDefined();
416+
});
417+
418+
it('should call fallback when graphPath file does not exist', async () => {
419+
const fallbackDocs = [
420+
{
421+
id: '1',
422+
score: 0,
423+
metadata: {
424+
path: 'src/x.ts',
425+
callees: [{ name: 'bar', file: 'src/y.ts', line: 1 }],
426+
},
427+
},
428+
];
429+
430+
const graph = await loadOrBuildGraph('/nonexistent/path.json', async () => fallbackDocs);
431+
expect(graph.get('src/x.ts')).toBeDefined();
432+
});
433+
});
434+
435+
// ============================================================================
436+
// updateGraphIncremental
437+
// ============================================================================
438+
439+
describe('updateGraphIncremental', () => {
440+
it('should add edges for new files', () => {
441+
const existing = new Map<string, WeightedEdge[]>();
442+
existing.set('src/a.ts', [edge('src/b.ts')]);
443+
444+
const changedDocs = [
445+
{
446+
id: '1',
447+
score: 0,
448+
metadata: {
449+
path: 'src/c.ts',
450+
callees: [{ name: 'foo', file: 'src/d.ts', line: 1 }],
451+
},
452+
},
453+
];
454+
455+
const updated = updateGraphIncremental(existing, changedDocs, []);
456+
expect(updated.get('src/a.ts')).toBeDefined(); // Kept
457+
expect(updated.get('src/c.ts')).toBeDefined(); // Added
458+
});
459+
460+
it('should remove edges for deleted files', () => {
461+
const existing = new Map<string, WeightedEdge[]>();
462+
existing.set('src/a.ts', [edge('src/b.ts')]);
463+
existing.set('src/b.ts', [edge('src/c.ts')]);
464+
465+
const updated = updateGraphIncremental(existing, [], ['src/a.ts']);
466+
expect(updated.has('src/a.ts')).toBe(false); // Removed
467+
expect(updated.get('src/b.ts')).toBeDefined(); // Kept
468+
});
469+
470+
it('should replace edges for changed files', () => {
471+
const existing = new Map<string, WeightedEdge[]>();
472+
existing.set('src/a.ts', [edge('src/old.ts')]);
473+
474+
const changedDocs = [
475+
{
476+
id: '1',
477+
score: 0,
478+
metadata: {
479+
path: 'src/a.ts',
480+
callees: [{ name: 'foo', file: 'src/new.ts', line: 1 }],
481+
},
482+
},
483+
];
484+
485+
const updated = updateGraphIncremental(existing, changedDocs, []);
486+
const edges = updated.get('src/a.ts')!;
487+
expect(edges.length).toBe(1);
488+
expect(edges[0].target).toBe('src/new.ts'); // Replaced
489+
});
490+
491+
it('should not mutate the existing graph', () => {
492+
const existing = new Map<string, WeightedEdge[]>();
493+
existing.set('src/a.ts', [edge('src/b.ts')]);
494+
495+
updateGraphIncremental(existing, [], ['src/a.ts']);
496+
expect(existing.has('src/a.ts')).toBe(true); // Original unchanged
497+
});
498+
499+
it('should handle empty existing graph', () => {
500+
const changedDocs = [
501+
{
502+
id: '1',
503+
score: 0,
504+
metadata: {
505+
path: 'src/a.ts',
506+
callees: [{ name: 'foo', file: 'src/b.ts', line: 1 }],
507+
},
508+
},
509+
];
510+
511+
const updated = updateGraphIncremental(new Map(), changedDocs, []);
512+
expect(updated.get('src/a.ts')).toBeDefined();
513+
});
514+
});

packages/core/src/map/graph.ts

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* which uses NetworkX PageRank over a weighted dependency graph.
1212
*/
1313

14+
import * as fs from 'node:fs/promises';
1415
import type { SearchResult } from '../vector/types';
1516

1617
// ============================================================================
@@ -22,6 +23,14 @@ export interface WeightedEdge {
2223
weight: number;
2324
}
2425

26+
export interface CachedGraph {
27+
version: 1;
28+
generatedAt: string;
29+
nodeCount: number;
30+
edgeCount: number;
31+
graph: Record<string, WeightedEdge[]>;
32+
}
33+
2534
// ============================================================================
2635
// Graph Builder
2736
// ============================================================================
@@ -258,3 +267,106 @@ export function shortestPath(
258267

259268
return null;
260269
}
270+
271+
// ============================================================================
272+
// Serialization
273+
// ============================================================================
274+
275+
const GRAPH_VERSION = 1;
276+
277+
/**
278+
* Serialize a dependency graph to JSON string.
279+
*/
280+
export function serializeGraph(graph: Map<string, WeightedEdge[]>): string {
281+
let edgeCount = 0;
282+
const obj: Record<string, WeightedEdge[]> = {};
283+
for (const [key, edges] of graph) {
284+
obj[key] = edges;
285+
edgeCount += edges.length;
286+
}
287+
const cached: CachedGraph = {
288+
version: GRAPH_VERSION,
289+
generatedAt: new Date().toISOString(),
290+
nodeCount: graph.size,
291+
edgeCount,
292+
graph: obj,
293+
};
294+
return JSON.stringify(cached);
295+
}
296+
297+
/**
298+
* Deserialize a JSON string to a dependency graph.
299+
* Returns null if JSON is invalid or version doesn't match.
300+
*/
301+
export function deserializeGraph(json: string): Map<string, WeightedEdge[]> | null {
302+
try {
303+
const data = JSON.parse(json) as CachedGraph;
304+
if (data.version !== GRAPH_VERSION) return null;
305+
if (!data.graph || typeof data.graph !== 'object') return null;
306+
307+
const graph = new Map<string, WeightedEdge[]>();
308+
for (const [key, edges] of Object.entries(data.graph)) {
309+
graph.set(key, edges as WeightedEdge[]);
310+
}
311+
return graph;
312+
} catch {
313+
return null;
314+
}
315+
}
316+
317+
// ============================================================================
318+
// Load / Build
319+
// ============================================================================
320+
321+
/**
322+
* Load dependency graph from cache, or build from docs as fallback.
323+
*/
324+
export async function loadOrBuildGraph(
325+
graphPath: string | undefined,
326+
fallbackDocs: () => Promise<SearchResult[]>
327+
): Promise<Map<string, WeightedEdge[]>> {
328+
if (graphPath) {
329+
try {
330+
const json = await fs.readFile(graphPath, 'utf-8');
331+
const graph = deserializeGraph(json);
332+
if (graph) return graph;
333+
} catch {
334+
// File missing or unreadable — fall through to build
335+
}
336+
}
337+
338+
const docs = await fallbackDocs();
339+
return buildDependencyGraph(docs);
340+
}
341+
342+
// ============================================================================
343+
// Incremental Update
344+
// ============================================================================
345+
346+
/**
347+
* Update a dependency graph incrementally.
348+
*
349+
* For changed/new files: remove old edges from those files, add new edges.
350+
* For deleted files: remove all edges from those files.
351+
* Returns a new graph (does not mutate existing).
352+
*/
353+
export function updateGraphIncremental(
354+
existing: Map<string, WeightedEdge[]>,
355+
changedDocs: SearchResult[],
356+
deletedFiles: string[]
357+
): Map<string, WeightedEdge[]> {
358+
const updated = new Map(existing);
359+
360+
// Remove edges for deleted files
361+
for (const file of deletedFiles) {
362+
updated.delete(file);
363+
}
364+
365+
// Build graph from changed docs, then merge
366+
const changedGraph = buildDependencyGraph(changedDocs);
367+
for (const [file, edges] of changedGraph) {
368+
updated.set(file, edges);
369+
}
370+
371+
return updated;
372+
}

packages/core/src/map/index.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import { stripFocusPrefix } from '../indexer/utils/change-frequency.js';
1010
import { getFileIcon } from '../utils/icons';
1111
import type { SearchResult } from '../vector/types';
1212
import type { LocalGitExtractor } from './git-extractor';
13-
import { buildDependencyGraph, connectedComponents, pageRank } from './graph';
13+
import { connectedComponents, loadOrBuildGraph, pageRank } from './graph';
1414
import type {
1515
ChangeFrequency,
1616
CodebaseMap,
@@ -45,6 +45,8 @@ export interface MapGenerationContext {
4545
indexer: RepositoryIndexer;
4646
gitExtractor?: LocalGitExtractor;
4747
logger?: Logger;
48+
/** Path to cached dependency-graph.json — avoids rebuilding from getAll */
49+
graphPath?: string;
4850
}
4951

5052
/**
@@ -120,9 +122,9 @@ export async function generateCodebaseMap(
120122
'Counted components'
121123
);
122124

123-
// Build dependency graph once, share between hot paths and components
125+
// Load cached dependency graph or build from docs as fallback
124126
const t7 = Date.now();
125-
const graph = buildDependencyGraph(allDocs);
127+
const graph = await loadOrBuildGraph(context.graphPath, async () => allDocs);
126128
const hotPaths = opts.includeHotPaths ? computeHotPaths(allDocs, graph, opts.maxHotPaths) : [];
127129
const rawComponents = connectedComponents(graph);
128130
const components = rawComponents

packages/core/src/storage/__tests__/path.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ describe('Storage Path Utilities', () => {
188188
const paths = getStorageFilePaths(storagePath);
189189

190190
expect(paths.vectors).toBe(path.join(storagePath, 'vectors'));
191+
expect(paths.dependencyGraph).toBe(path.join(storagePath, 'dependency-graph.json'));
191192
expect(paths.githubState).toBe(path.join(storagePath, 'github-state.json'));
192193
expect(paths.metadata).toBe(path.join(storagePath, 'metadata.json'));
193194
expect(paths.indexerState).toBe(path.join(storagePath, 'indexer-state.json'));

0 commit comments

Comments
 (0)