Skip to content

Commit 37c86b7

Browse files
prosdevclaude
andcommitted
feat(core,mcp): use Antfly index for pattern analysis (10-30x faster)
Add getDocsByFilePath to VectorStorage and analyzeFileFromIndex to PatternAnalysisService. When vectorStorage is available, comparePatterns reads signatures from the Antfly index instead of re-scanning with ts-morph. Falls back to scanner for tests/offline. Wire VectorStorage through InspectAdapter so dev_patterns uses the fast path in production. Add scratchpad tracking the 5k doc cap as a known limitation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 86a8cc1 commit 37c86b7

7 files changed

Lines changed: 283 additions & 33 deletions

File tree

.claude/scratchpad.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Scratchpad
2+
3+
## Known Limitations
4+
5+
- **`getDocsByFilePath` fetches all docs client-side (capped at 5k).** Uses `getAll(limit: 5000)` + exact path filter. Fine for single repos (dev-agent has ~2,200 docs). Won't scale to monorepos with 50k+ files. Future fix: server-side path filter in Antfly SDK.
6+
7+
## Open Questions
8+
9+
- Can Antfly SDK support server-side path filtering? Would eliminate the 5k doc cap in `getDocsByFilePath`. Worth raising with Antfly team after MCP Phase 1 ships.
10+
11+
## Future Work
12+
13+
- Antfly SDK: server-side path filter for `getDocsByFilePath` (eliminates 5k cap)
14+
- `dev_patterns format: "json"` for token-efficient agent output (MCP Phase 1, Part 1.4)
15+
- ast-grep as optional dep for pattern analysis (MCP Phase 1, Part 1.5)
16+
- PageRank for `dev_map` hot paths (MCP Phase 1, Part 1.6)
17+
- E2E tests in CI — blocked on Antfly memory requirements vs GitHub runner limits (7GB)
18+
19+
## Notes
20+
21+
- Both pattern analysis paths (index vs scan) must use the same pure extractors from 1.1 to avoid drift. Test this explicitly.
22+
- Log which path is used (index vs scanner) at debug level so we can verify the fast path fires in production.

packages/core/src/services/__tests__/pattern-analysis-service.test.ts

Lines changed: 148 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import * as fs from 'node:fs/promises';
88
import * as os from 'node:os';
99
import * as path from 'node:path';
10-
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
10+
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
1111
import {
1212
extractErrorHandlingFromContent,
1313
extractImportStyleFromContent,
@@ -110,6 +110,153 @@ describe('Pure Pattern Extractors', () => {
110110
});
111111
});
112112

113+
// ========================================================================
114+
// analyzeFileFromIndex (index-based, no ts-morph)
115+
// ========================================================================
116+
117+
describe('analyzeFileFromIndex', () => {
118+
let tempDir: string;
119+
let service: PatternAnalysisService;
120+
121+
beforeEach(async () => {
122+
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'analyze-from-index-'));
123+
service = new PatternAnalysisService({ repositoryPath: tempDir });
124+
});
125+
126+
afterEach(async () => {
127+
await fs.rm(tempDir, { recursive: true, force: true });
128+
});
129+
130+
it('should extract patterns from indexed metadata', async () => {
131+
await fs.mkdir(path.join(tempDir, 'src'), { recursive: true });
132+
await fs.writeFile(
133+
path.join(tempDir, 'src/test.ts'),
134+
'import { foo } from "./bar";\n\nexport function test(): string {\n throw new Error("oops");\n return "hello";\n}\n'
135+
);
136+
137+
const indexedDocs = [
138+
{
139+
id: 'src/test.ts:test:3',
140+
score: 0.9,
141+
metadata: {
142+
path: 'src/test.ts',
143+
type: 'function',
144+
name: 'test',
145+
signature: 'function test(): string',
146+
exported: true,
147+
},
148+
},
149+
];
150+
151+
const result = await service.analyzeFileFromIndex('src/test.ts', indexedDocs);
152+
expect(result.importStyle.style).toBe('esm');
153+
expect(result.typeAnnotations.coverage).toBe('full');
154+
expect(result.fileSize.lines).toBe(7);
155+
expect(result.errorHandling.style).toBe('throw');
156+
});
157+
158+
it('should handle files with no indexed docs', async () => {
159+
await fs.writeFile(path.join(tempDir, 'empty.ts'), 'const x = 1;\n');
160+
const result = await service.analyzeFileFromIndex('empty.ts', []);
161+
expect(result.typeAnnotations.coverage).toBe('none');
162+
expect(result.typeAnnotations.totalCount).toBe(0);
163+
});
164+
165+
it('should handle deleted files gracefully (ENOENT)', async () => {
166+
const result = await service.analyzeFileFromIndex('nonexistent.ts', []);
167+
expect(result.fileSize.lines).toBe(0);
168+
expect(result.fileSize.bytes).toBe(0);
169+
expect(result.importStyle.style).toBe('unknown');
170+
expect(result.errorHandling.style).toBe('unknown');
171+
});
172+
});
173+
174+
// ========================================================================
175+
// comparePatterns with vectorStorage (fast path)
176+
// ========================================================================
177+
178+
describe('comparePatterns with vectorStorage', () => {
179+
let tempDir: string;
180+
181+
beforeEach(async () => {
182+
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'compare-index-'));
183+
});
184+
185+
afterEach(async () => {
186+
await fs.rm(tempDir, { recursive: true, force: true });
187+
});
188+
189+
it('should use index path when vectorStorage is provided', async () => {
190+
await fs.mkdir(path.join(tempDir, 'src'), { recursive: true });
191+
await fs.writeFile(
192+
path.join(tempDir, 'src/target.ts'),
193+
'import { x } from "./y";\nexport function target(): string { throw new Error("oops"); }\n'
194+
);
195+
await fs.writeFile(
196+
path.join(tempDir, 'src/similar.ts'),
197+
'import { a } from "./b";\nexport function similar(): number { throw new Error("bad"); }\n'
198+
);
199+
200+
const mockGetDocsByFilePath = vi.fn().mockResolvedValue(
201+
new Map([
202+
[
203+
'src/target.ts',
204+
[
205+
{
206+
id: 'src/target.ts:target:2',
207+
score: 0.9,
208+
metadata: {
209+
path: 'src/target.ts',
210+
type: 'function',
211+
signature: 'function target(): string',
212+
},
213+
},
214+
],
215+
],
216+
[
217+
'src/similar.ts',
218+
[
219+
{
220+
id: 'src/similar.ts:similar:2',
221+
score: 0.9,
222+
metadata: {
223+
path: 'src/similar.ts',
224+
type: 'function',
225+
signature: 'function similar(): number',
226+
},
227+
},
228+
],
229+
],
230+
])
231+
);
232+
233+
const mockVectorStorage = { getDocsByFilePath: mockGetDocsByFilePath } as any;
234+
const serviceWithIndex = new PatternAnalysisService({
235+
repositoryPath: tempDir,
236+
vectorStorage: mockVectorStorage,
237+
});
238+
239+
const result = await serviceWithIndex.comparePatterns('src/target.ts', ['src/similar.ts']);
240+
expect(mockGetDocsByFilePath).toHaveBeenCalledWith(['src/target.ts', 'src/similar.ts']);
241+
expect(result.importStyle.yourFile).toBe('esm');
242+
expect(result.typeAnnotations.yourFile).toBe('full');
243+
});
244+
245+
it('should handle empty results from index gracefully', async () => {
246+
await fs.writeFile(path.join(tempDir, 'solo.ts'), 'const x = 1;\n');
247+
248+
const mockGetDocsByFilePath = vi.fn().mockResolvedValue(new Map());
249+
const mockVectorStorage = { getDocsByFilePath: mockGetDocsByFilePath } as any;
250+
const serviceWithIndex = new PatternAnalysisService({
251+
repositoryPath: tempDir,
252+
vectorStorage: mockVectorStorage,
253+
});
254+
255+
const result = await serviceWithIndex.comparePatterns('solo.ts', []);
256+
expect(result.fileSize.yourFile).toBeGreaterThan(0);
257+
});
258+
});
259+
113260
// ========================================================================
114261
// PatternAnalysisService (integration — uses file I/O)
115262
// ========================================================================

packages/core/src/services/pattern-analysis-service.ts

Lines changed: 76 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import * as path from 'node:path';
1010
import { scanRepository } from '../scanner';
1111
import type { Document } from '../scanner/types';
1212
import { findTestFile, isTestFile } from '../utils/test-utils';
13+
import type { SearchResult } from '../vector/types';
1314
import type {
1415
ErrorHandlingComparison,
1516
ErrorHandlingPattern,
@@ -129,45 +130,88 @@ export class PatternAnalysisService {
129130
}
130131

131132
/**
132-
* Compare patterns between target file and similar files
133+
* Analyze file patterns using indexed metadata (fast — no ts-morph).
133134
*
134-
* OPTIMIZED: Batch scans all files in one pass to avoid repeated ts-morph initialization
135+
* Reads signatures from the Antfly index, content from disk (for line count
136+
* and error handling regex). Falls back gracefully on ENOENT (deleted file).
137+
*/
138+
async analyzeFileFromIndex(filePath: string, indexedDocs: SearchResult[]): Promise<FilePatterns> {
139+
const fullPath = path.join(this.config.repositoryPath, filePath);
140+
141+
let content = '';
142+
let bytes = 0;
143+
let lines = 0;
144+
try {
145+
const [fileContent, stat] = await Promise.all([
146+
fs.readFile(fullPath, 'utf-8'),
147+
fs.stat(fullPath),
148+
]);
149+
content = fileContent;
150+
bytes = stat.size;
151+
lines = content.split('\n').length;
152+
} catch (error) {
153+
if ((error as NodeJS.ErrnoException).code !== 'ENOENT') throw error;
154+
// File deleted between index and analysis — return empty patterns
155+
}
156+
157+
const testing = await this.analyzeTesting(filePath);
158+
const signatures = indexedDocs
159+
.filter((d) => d.metadata.type === 'function' || d.metadata.type === 'method')
160+
.map((d) => (d.metadata.signature as string) || '')
161+
.filter(Boolean);
162+
163+
return {
164+
fileSize: { lines, bytes },
165+
testing,
166+
importStyle: extractImportStyleFromContent(content),
167+
errorHandling: extractErrorHandlingFromContent(content),
168+
typeAnnotations: extractTypeCoverageFromSignatures(signatures),
169+
};
170+
}
171+
172+
/**
173+
* Compare patterns between target file and similar files
135174
*
136-
* @param targetFile - Target file to analyze
137-
* @param similarFiles - Array of similar file paths
138-
* @returns Pattern comparison results
175+
* Uses Antfly index when vectorStorage is available (fast path, ~100ms).
176+
* Falls back to ts-morph scanning when not (tests, offline).
139177
*/
140178
async comparePatterns(targetFile: string, similarFiles: string[]): Promise<PatternComparison> {
141-
// OPTIMIZATION: Batch scan all files at once (5-10x faster than individual scans)
142179
const allFiles = [targetFile, ...similarFiles];
143-
const batchResult = await scanRepository({
144-
repoRoot: this.config.repositoryPath,
145-
include: allFiles,
146-
});
147-
148-
// Group documents by file for fast lookup
149-
const docsByFile = new Map<string, Document[]>();
150-
for (const doc of batchResult.documents) {
151-
const file = doc.metadata.file;
152-
if (!docsByFile.has(file)) {
153-
docsByFile.set(file, []);
154-
}
155-
const docs = docsByFile.get(file);
156-
if (docs) {
157-
docs.push(doc);
180+
let targetPatterns: FilePatterns;
181+
let similarPatterns: FilePatterns[];
182+
183+
if (this.config.vectorStorage) {
184+
// FAST PATH: read from Antfly index
185+
// Fast path: index-based analysis (~100ms vs 1-3s)
186+
const docsByFile = await this.config.vectorStorage.getDocsByFilePath(allFiles);
187+
188+
targetPatterns = await this.analyzeFileFromIndex(
189+
targetFile,
190+
docsByFile.get(targetFile) || []
191+
);
192+
similarPatterns = await Promise.all(
193+
similarFiles.map((f) => this.analyzeFileFromIndex(f, docsByFile.get(f) || []))
194+
);
195+
} else {
196+
// FALLBACK: scan files with ts-morph
197+
// Fallback: ts-morph scan (for tests/offline)
198+
const batchResult = await scanRepository({
199+
repoRoot: this.config.repositoryPath,
200+
include: allFiles,
201+
});
202+
203+
const docsByFile = new Map<string, Document[]>();
204+
for (const doc of batchResult.documents) {
205+
const file = doc.metadata.file;
206+
if (!docsByFile.has(file)) docsByFile.set(file, []);
207+
docsByFile.get(file)!.push(doc);
158208
}
159-
}
160209

161-
// Analyze target file with cached documents
162-
const targetPatterns = await this.analyzeFileWithDocs(
163-
targetFile,
164-
docsByFile.get(targetFile) || []
165-
);
166-
167-
// Analyze similar files in parallel with cached documents
168-
const similarPatterns = await Promise.all(
169-
similarFiles.map((f) => this.analyzeFileWithDocs(f, docsByFile.get(f) || []))
170-
);
210+
targetPatterns = await this.analyzeFileWithDocs(targetFile, docsByFile.get(targetFile) || []);
211+
similarPatterns = await Promise.all(
212+
similarFiles.map((f) => this.analyzeFileWithDocs(f, docsByFile.get(f) || []))
213+
);
214+
}
171215

172216
return {
173217
fileSize: this.compareFileSize(

packages/core/src/services/pattern-analysis-types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,4 +122,5 @@ export interface PatternComparison {
122122
*/
123123
export interface PatternAnalysisConfig {
124124
repositoryPath: string;
125+
vectorStorage?: import('../vector/index.js').VectorStorage;
125126
}

packages/core/src/vector/index.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,38 @@ export class VectorStorage {
110110
return this.store.getAll(options);
111111
}
112112

113+
/**
114+
* Get indexed documents grouped by file path.
115+
*
116+
* Uses getAll with a capped limit + client-side exact path filter.
117+
* More reliable than BM25 search which tokenizes paths unpredictably.
118+
*
119+
* Note: Fetches up to 5,000 docs client-side. Fine for single repos,
120+
* won't scale to monorepos with 50k+ files. See .claude/scratchpad.md.
121+
*/
122+
async getDocsByFilePath(filePaths: string[]): Promise<Map<string, SearchResult[]>> {
123+
this.assertReady();
124+
const DOC_LIMIT = 5000;
125+
const pathSet = new Set(filePaths);
126+
const allDocs = await this.getAll({ limit: DOC_LIMIT });
127+
128+
if (allDocs.length >= DOC_LIMIT) {
129+
console.error(
130+
`[dev-agent] Warning: getDocsByFilePath hit ${DOC_LIMIT} doc limit. Some files may be missing.`
131+
);
132+
}
133+
134+
const byFile = new Map<string, SearchResult[]>();
135+
for (const doc of allDocs) {
136+
const docPath = doc.metadata.path as string;
137+
if (pathSet.has(docPath)) {
138+
if (!byFile.has(docPath)) byFile.set(docPath, []);
139+
byFile.get(docPath)!.push(doc);
140+
}
141+
}
142+
return byFile;
143+
}
144+
113145
/**
114146
* Get a document by ID
115147
*/

packages/mcp-server/bin/dev-agent-mcp.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ async function main() {
268268
const inspectAdapter = new InspectAdapter({
269269
repositoryPath,
270270
searchService,
271+
vectorStorage: indexer.getVectorStorage(),
271272
defaultLimit: 10,
272273
defaultThreshold: 0.7,
273274
defaultFormat: 'compact',

packages/mcp-server/src/adapters/built-in/inspect-adapter.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import {
99
PatternAnalysisService,
1010
type PatternComparison,
1111
type SearchService,
12+
type VectorStorage,
1213
} from '@prosdevlab/dev-agent-core';
1314
import { InspectArgsSchema } from '../../schemas/index.js';
1415
import { ToolAdapter } from '../tool-adapter.js';
@@ -18,6 +19,7 @@ import { validateArgs } from '../validation.js';
1819
export interface InspectAdapterConfig {
1920
repositoryPath: string;
2021
searchService: SearchService;
22+
vectorStorage?: VectorStorage;
2123
defaultLimit?: number;
2224
defaultThreshold?: number;
2325
defaultFormat?: 'compact' | 'verbose';
@@ -49,6 +51,7 @@ export class InspectAdapter extends ToolAdapter {
4951
this.searchService = config.searchService;
5052
this.patternService = new PatternAnalysisService({
5153
repositoryPath: config.repositoryPath,
54+
vectorStorage: config.vectorStorage,
5255
});
5356
this.defaultLimit = config.defaultLimit ?? 10;
5457
this.defaultThreshold = config.defaultThreshold ?? 0.7;

0 commit comments

Comments
 (0)