Skip to content

Commit fd5e617

Browse files
committed
fix: restore accurate stats for no-op incremental indexing
1 parent 833e759 commit fd5e617

File tree

3 files changed

+143
-43
lines changed

3 files changed

+143
-43
lines changed

src/constants/codebase-context.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ export const CODEBASE_CONTEXT_DIRNAME = '.codebase-context' as const;
66
export const MEMORY_FILENAME = 'memory.json' as const;
77
export const INTELLIGENCE_FILENAME = 'intelligence.json' as const;
88
export const KEYWORD_INDEX_FILENAME = 'index.json' as const;
9+
export const INDEXING_STATS_FILENAME = 'indexing-stats.json' as const;
910
export const VECTOR_DB_DIRNAME = 'index' as const;
1011
export const MANIFEST_FILENAME = 'manifest.json' as const;

src/core/indexer.ts

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import {
3030
import { getFileCommitDates } from '../utils/git-dates.js';
3131
import {
3232
CODEBASE_CONTEXT_DIRNAME,
33+
INDEXING_STATS_FILENAME,
3334
INTELLIGENCE_FILENAME,
3435
KEYWORD_INDEX_FILENAME,
3536
MANIFEST_FILENAME,
@@ -51,6 +52,13 @@ export interface IndexerOptions {
5152
incrementalOnly?: boolean;
5253
}
5354

55+
interface PersistedIndexingStats {
56+
indexedFiles: number;
57+
totalChunks: number;
58+
totalFiles: number;
59+
generatedAt: string;
60+
}
61+
5462
export class CodebaseIndexer {
5563
private rootPath: string;
5664
private config: CodebaseConfig;
@@ -181,16 +189,18 @@ export class CodebaseIndexer {
181189
// Phase 1b: Incremental diff (if incremental mode)
182190
const contextDir = path.join(this.rootPath, CODEBASE_CONTEXT_DIRNAME);
183191
const manifestPath = path.join(contextDir, MANIFEST_FILENAME);
192+
const indexingStatsPath = path.join(contextDir, INDEXING_STATS_FILENAME);
184193
let diff: ManifestDiff | null = null;
185194
let currentHashes: Record<string, string> | null = null;
195+
let previousManifest: FileManifest | null = null;
186196

187197
if (this.incrementalOnly) {
188198
this.updateProgress('scanning', 10);
189199
console.error('Computing file hashes for incremental diff...');
190200
currentHashes = await computeFileHashes(files, this.rootPath);
191201

192-
const oldManifest = await readManifest(manifestPath);
193-
diff = diffManifest(oldManifest, currentHashes);
202+
previousManifest = await readManifest(manifestPath);
203+
diff = diffManifest(previousManifest, currentHashes);
194204

195205
console.error(
196206
`Incremental diff: ${diff.added.length} added, ${diff.changed.length} changed, ` +
@@ -211,21 +221,51 @@ export class CodebaseIndexer {
211221
stats.duration = Date.now() - startTime;
212222
stats.completedAt = new Date();
213223

214-
// Preserve accurate counts from the existing index (nothing changed, index is intact)
224+
let restoredFromPersistedStats = false;
225+
215226
try {
216-
const existingIndexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME);
217-
const existingChunks = JSON.parse(await fs.readFile(existingIndexPath, 'utf-8'));
218-
if (Array.isArray(existingChunks)) {
219-
stats.totalChunks = existingChunks.length;
220-
const uniqueFiles = new Set(
221-
existingChunks.map((c: { filePath?: string }) => c.filePath)
222-
);
223-
stats.indexedFiles = uniqueFiles.size;
227+
const persisted = JSON.parse(
228+
await fs.readFile(indexingStatsPath, 'utf-8')
229+
) as Partial<PersistedIndexingStats>;
230+
231+
if (
232+
typeof persisted.indexedFiles === 'number' &&
233+
typeof persisted.totalChunks === 'number' &&
234+
typeof persisted.totalFiles === 'number'
235+
) {
236+
stats.indexedFiles = persisted.indexedFiles;
237+
stats.totalChunks = persisted.totalChunks;
238+
stats.totalFiles = persisted.totalFiles;
239+
restoredFromPersistedStats = true;
224240
}
225241
} catch {
226-
// Keyword index doesn't exist yet — keep counts as 0
242+
// No persisted stats yet — fall back below
227243
}
228244

245+
if (!restoredFromPersistedStats) {
246+
if (previousManifest) {
247+
stats.indexedFiles = Object.keys(previousManifest.files).length;
248+
}
249+
250+
try {
251+
const existingIndexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME);
252+
const existingChunks = JSON.parse(await fs.readFile(existingIndexPath, 'utf-8'));
253+
if (Array.isArray(existingChunks)) {
254+
stats.totalChunks = existingChunks.length;
255+
if (stats.indexedFiles === 0) {
256+
const uniqueFiles = new Set(
257+
existingChunks.map((c: { filePath?: string }) => c.filePath)
258+
);
259+
stats.indexedFiles = uniqueFiles.size;
260+
}
261+
}
262+
} catch {
263+
// Keyword index doesn't exist yet — keep best-known counts
264+
}
265+
}
266+
267+
stats.totalFiles = files.length;
268+
229269
return stats;
230270
}
231271
}
@@ -575,6 +615,14 @@ export class CodebaseIndexer {
575615
};
576616
await writeManifest(manifestPath, manifest);
577617

618+
const persistedStats: PersistedIndexingStats = {
619+
indexedFiles: stats.indexedFiles,
620+
totalChunks: stats.totalChunks,
621+
totalFiles: stats.totalFiles,
622+
generatedAt: new Date().toISOString()
623+
};
624+
await fs.writeFile(indexingStatsPath, JSON.stringify(persistedStats, null, 2));
625+
578626
// Phase 5: Complete
579627
this.updateProgress('complete', 100);
580628

tests/incremental-indexing.test.ts

Lines changed: 82 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@ import path from 'path';
44
import os from 'os';
55
import { CodebaseIndexer } from '../src/core/indexer.js';
66
import { readManifest } from '../src/core/manifest.js';
7-
import { CODEBASE_CONTEXT_DIRNAME, MANIFEST_FILENAME, KEYWORD_INDEX_FILENAME } from '../src/constants/codebase-context.js';
7+
import {
8+
CODEBASE_CONTEXT_DIRNAME,
9+
MANIFEST_FILENAME,
10+
KEYWORD_INDEX_FILENAME,
11+
INDEXING_STATS_FILENAME
12+
} from '../src/constants/codebase-context.js';
813

914
describe('Incremental Indexing', () => {
1015
let tempDir: string;
@@ -68,35 +73,41 @@ describe('Incremental Indexing', () => {
6873

6974
it('should preserve indexedFiles and totalChunks in short-circuit (nothing changed)', async () => {
7075
// Use files substantial enough to produce chunks
71-
await fs.writeFile(path.join(tempDir, 'service.ts'), [
72-
'import { Injectable } from "@angular/core";',
73-
'',
74-
'@Injectable({ providedIn: "root" })',
75-
'export class UserService {',
76-
' private users: string[] = [];',
77-
'',
78-
' getUsers(): string[] {',
79-
' return this.users;',
80-
' }',
81-
'',
82-
' addUser(name: string): void {',
83-
' this.users.push(name);',
84-
' }',
85-
'}'
86-
].join('\n'));
87-
await fs.writeFile(path.join(tempDir, 'utils.ts'), [
88-
'export function formatDate(date: Date): string {',
89-
' return date.toISOString().split("T")[0];',
90-
'}',
91-
'',
92-
'export function capitalize(str: string): string {',
93-
' return str.charAt(0).toUpperCase() + str.slice(1);',
94-
'}',
95-
'',
96-
'export function range(n: number): number[] {',
97-
' return Array.from({ length: n }, (_, i) => i);',
98-
'}'
99-
].join('\n'));
76+
await fs.writeFile(
77+
path.join(tempDir, 'service.ts'),
78+
[
79+
'import { Injectable } from "@angular/core";',
80+
'',
81+
'@Injectable({ providedIn: "root" })',
82+
'export class UserService {',
83+
' private users: string[] = [];',
84+
'',
85+
' getUsers(): string[] {',
86+
' return this.users;',
87+
' }',
88+
'',
89+
' addUser(name: string): void {',
90+
' this.users.push(name);',
91+
' }',
92+
'}'
93+
].join('\n')
94+
);
95+
await fs.writeFile(
96+
path.join(tempDir, 'utils.ts'),
97+
[
98+
'export function formatDate(date: Date): string {',
99+
' return date.toISOString().split("T")[0];',
100+
'}',
101+
'',
102+
'export function capitalize(str: string): string {',
103+
' return str.charAt(0).toUpperCase() + str.slice(1);',
104+
'}',
105+
'',
106+
'export function range(n: number): number[] {',
107+
' return Array.from({ length: n }, (_, i) => i);',
108+
'}'
109+
].join('\n')
110+
);
100111

101112
// Full index first
102113
const indexer1 = new CodebaseIndexer({
@@ -119,6 +130,43 @@ describe('Incremental Indexing', () => {
119130
expect(incStats.totalFiles).toBe(fullStats.totalFiles);
120131
});
121132

133+
it('should prefer persisted stats over keyword index in no-op incremental runs', async () => {
134+
await fs.writeFile(path.join(tempDir, 'index.ts'), 'export const x = 1;');
135+
136+
const fullIndexer = new CodebaseIndexer({
137+
rootPath: tempDir,
138+
config: { skipEmbedding: true }
139+
});
140+
await fullIndexer.index();
141+
142+
const contextDir = path.join(tempDir, CODEBASE_CONTEXT_DIRNAME);
143+
await fs.writeFile(
144+
path.join(contextDir, INDEXING_STATS_FILENAME),
145+
JSON.stringify(
146+
{
147+
indexedFiles: 77,
148+
totalChunks: 1234,
149+
totalFiles: 88,
150+
generatedAt: new Date().toISOString()
151+
},
152+
null,
153+
2
154+
)
155+
);
156+
await fs.writeFile(path.join(contextDir, KEYWORD_INDEX_FILENAME), JSON.stringify([]));
157+
158+
const incIndexer = new CodebaseIndexer({
159+
rootPath: tempDir,
160+
config: { skipEmbedding: true },
161+
incrementalOnly: true
162+
});
163+
const stats = await incIndexer.index();
164+
165+
expect(stats.indexedFiles).toBe(77);
166+
expect(stats.totalChunks).toBe(1234);
167+
expect(stats.totalFiles).toBe(1);
168+
});
169+
122170
it('should detect changed files in incremental mode', async () => {
123171
await fs.writeFile(path.join(tempDir, 'index.ts'), 'export const x = 1;');
124172

@@ -155,7 +203,10 @@ describe('Incremental Indexing', () => {
155203
await indexer1.index();
156204

157205
// Add a new file
158-
await fs.writeFile(path.join(tempDir, 'utils.ts'), 'export function add(a: number, b: number) { return a + b; }');
206+
await fs.writeFile(
207+
path.join(tempDir, 'utils.ts'),
208+
'export function add(a: number, b: number) { return a + b; }'
209+
);
159210

160211
// Incremental index
161212
const indexer2 = new CodebaseIndexer({

0 commit comments

Comments
 (0)