Skip to content

Commit 6718caa

Browse files
committed
release: v1.3.1 - Auto-Heal for Silent Semantic Search Failure
1 parent bb5dcb1 commit 6718caa

File tree

11 files changed

+407
-19
lines changed

11 files changed

+407
-19
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
# Changelog
22

33

4+
## [1.3.1] - 2026-01-05
5+
6+
### Fixed
7+
- **Auto-Heal Semantic Search**: Detects LanceDB schema corruption (missing `vector` column), triggers re-indexing, and retries search instead of silently falling back to keyword-only results.
8+
49
## [1.3.0] - 2026-01-01
510

611
### Added

internal-docs

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "codebase-context",
3-
"version": "1.3.0",
3+
"version": "1.3.1",
44
"description": "MCP server that helps AI agents understand your codebase - patterns, libraries, architecture, monorepo support",
55
"type": "module",
66
"main": "./dist/lib.js",
@@ -120,4 +120,4 @@
120120
"sharp"
121121
]
122122
}
123-
}
123+
}

src/core/indexer.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ export class CodebaseIndexer {
8686
},
8787
embedding: {
8888
provider: 'transformers',
89-
model: 'Xenova/bge-base-en-v1.5',
89+
model: 'Xenova/bge-small-en-v1.5',
9090
batchSize: 100
9191
},
9292
skipEmbedding: false,
@@ -376,8 +376,7 @@ export class CodebaseIndexer {
376376

377377
if ((i + batchSize) % 100 === 0 || i + batchSize >= chunksToEmbed.length) {
378378
console.error(
379-
`Embedded ${Math.min(i + batchSize, chunksToEmbed.length)}/${
380-
chunksToEmbed.length
379+
`Embedded ${Math.min(i + batchSize, chunksToEmbed.length)}/${chunksToEmbed.length
381380
} chunks`
382381
);
383382
}

src/core/search.ts

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { CodeChunk, SearchResult, SearchFilters } from '../types/index.js';
99
import { EmbeddingProvider, getEmbeddingProvider } from '../embeddings/index.js';
1010
import { VectorStorageProvider, getStorageProvider } from '../storage/index.js';
1111
import { analyzerRegistry } from './analyzer-registry.js';
12+
import { IndexCorruptedError } from '../errors/index.js';
1213

1314
export interface SearchOptions {
1415
useSemanticSearch?: boolean;
@@ -62,6 +63,9 @@ export class CodebaseSearcher {
6263

6364
this.initialized = true;
6465
} catch (error) {
66+
if (error instanceof IndexCorruptedError) {
67+
throw error; // Propagate to handler for auto-heal
68+
}
6569
console.warn('Partial initialization (keyword search only):', error);
6670
this.initialized = true;
6771
}
@@ -217,6 +221,9 @@ export class CodebaseSearcher {
217221
}
218222
});
219223
} catch (error) {
224+
if (error instanceof IndexCorruptedError) {
225+
throw error; // Propagate to handler for auto-heal
226+
}
220227
console.warn('Semantic search failed:', error);
221228
}
222229
}
@@ -324,9 +331,8 @@ export class CodebaseSearcher {
324331
const name = componentName || (classMatch ? classMatch[1] : null);
325332

326333
if (name && componentType) {
327-
return `${
328-
componentType.charAt(0).toUpperCase() + componentType.slice(1)
329-
} '${name}' in ${fileName}.`;
334+
return `${componentType.charAt(0).toUpperCase() + componentType.slice(1)
335+
} '${name}' in ${fileName}.`;
330336
} else if (name) {
331337
return `'${name}' defined in ${fileName}.`;
332338
} else if (componentType) {
@@ -368,12 +374,16 @@ export class CodebaseSearcher {
368374

369375
const queryVector = await this.embeddingProvider.embed(query);
370376

371-
const results = await this.storageProvider.search(queryVector, limit, filters);
377+
try {
378+
const results = await this.storageProvider.search(queryVector, limit, filters);
372379

373-
return results.map((r) => ({
374-
chunk: r.chunk,
375-
score: r.score
376-
}));
380+
return results.map((r) => ({
381+
chunk: r.chunk,
382+
score: r.score
383+
}));
384+
} catch (error) {
385+
throw error;
386+
}
377387
}
378388

379389
private async keywordSearch(

src/errors/index.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/**
2+
* Thrown when the LanceDB index is corrupted or has a schema mismatch.
3+
* This error signals that re-indexing is required for semantic search to work.
4+
*/
5+
export class IndexCorruptedError extends Error {
6+
constructor(message: string) {
7+
super(message);
8+
this.name = 'IndexCorruptedError';
9+
}
10+
}
11+

src/index.ts

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@ import {
2020
Resource
2121
} from '@modelcontextprotocol/sdk/types.js';
2222
import { CodebaseIndexer } from './core/indexer.js';
23-
import { IndexingStats } from './types/index.js';
23+
import { IndexingStats, SearchResult } from './types/index.js';
2424
import { CodebaseSearcher } from './core/search.js';
2525
import { analyzerRegistry } from './core/analyzer-registry.js';
2626
import { AngularAnalyzer } from './analyzers/angular/index.js';
2727
import { GenericAnalyzer } from './analyzers/generic/index.js';
2828
import { InternalFileGraph } from './utils/usage-tracker.js';
29+
import { IndexCorruptedError } from './errors/index.js';
2930

3031
analyzerRegistry.register(new AngularAnalyzer());
3132
analyzerRegistry.register(new GenericAnalyzer());
@@ -66,7 +67,7 @@ const indexState: IndexState = {
6667
const server: Server = new Server(
6768
{
6869
name: 'codebase-context',
69-
version: '1.3.0'
70+
version: '1.3.1'
7071
},
7172
{
7273
capabilities: {
@@ -492,7 +493,62 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
492493
}
493494

494495
const searcher = new CodebaseSearcher(ROOT_PATH);
495-
const results = await searcher.search(query, limit || 5, filters);
496+
let results: SearchResult[];
497+
498+
try {
499+
results = await searcher.search(query, limit || 5, filters);
500+
} catch (error) {
501+
if (error instanceof IndexCorruptedError) {
502+
console.error('[Auto-Heal] Index corrupted. Triggering full re-index...');
503+
504+
await performIndexing();
505+
506+
if (indexState.status === 'ready') {
507+
console.error('[Auto-Heal] Success. Retrying search...');
508+
const freshSearcher = new CodebaseSearcher(ROOT_PATH);
509+
try {
510+
results = await freshSearcher.search(query, limit || 5, filters);
511+
} catch (retryError) {
512+
return {
513+
content: [
514+
{
515+
type: 'text',
516+
text: JSON.stringify(
517+
{
518+
status: 'error',
519+
message: `Auto-heal retry failed: ${
520+
retryError instanceof Error ? retryError.message : String(retryError)
521+
}`
522+
},
523+
null,
524+
2
525+
)
526+
}
527+
]
528+
};
529+
}
530+
} else {
531+
return {
532+
content: [
533+
{
534+
type: 'text',
535+
text: JSON.stringify(
536+
{
537+
status: 'error',
538+
message: `Auto-heal failed: Indexing ended with status '${indexState.status}'`,
539+
error: indexState.error
540+
},
541+
null,
542+
2
543+
)
544+
}
545+
]
546+
};
547+
}
548+
} else {
549+
throw error; // Propagate unexpected errors
550+
}
551+
}
496552

497553
return {
498554
content: [

src/storage/lancedb.ts

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import { promises as fs } from 'fs';
77
import { VectorStorageProvider, CodeChunkWithEmbedding, VectorSearchResult } from './types.js';
88
import { CodeChunk, SearchFilters } from '../types/index.js';
9+
import { IndexCorruptedError } from '../errors/index.js';
910

1011
export class LanceDBStorageProvider implements VectorStorageProvider {
1112
readonly name = 'lancedb';
@@ -44,20 +45,31 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
4445
console.error('Stale index detected (missing vector column). Rebuilding...');
4546
await this.db.dropTable('code_chunks');
4647
this.table = null;
48+
throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
4749
} else {
4850
console.error('Opened existing LanceDB table');
4951
}
50-
} catch (_schemaError) {
52+
} catch (schemaError) {
53+
if (schemaError instanceof IndexCorruptedError) {
54+
throw schemaError;
55+
}
5156
// If schema check fails, table is likely corrupted - drop and rebuild
5257
console.error('Failed to validate table schema, rebuilding index...');
5358
await this.db.dropTable('code_chunks');
5459
this.table = null;
60+
throw new IndexCorruptedError('LanceDB index corrupted: schema validation failed');
5561
}
62+
} else {
63+
// Table missing entirely - not necessarily an error during initialization
64+
this.table = null;
5665
}
5766

5867
this.initialized = true;
5968
console.error(`LanceDB initialized at: ${storagePath}`);
6069
} catch (error) {
70+
if (error instanceof IndexCorruptedError) {
71+
throw error;
72+
}
6173
console.error('Failed to initialize LanceDB:', error);
6274
throw error;
6375
}
@@ -115,7 +127,8 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
115127
filters?: SearchFilters
116128
): Promise<VectorSearchResult[]> {
117129
if (!this.initialized || !this.table) {
118-
return [];
130+
// If table is missing, throw so auto-heal can fix it
131+
throw new IndexCorruptedError('LanceDB index corrupted: no table available for search');
119132
}
120133

121134
try {
@@ -170,7 +183,15 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
170183
distance: result._distance || 0
171184
}));
172185
} catch (error) {
186+
if (error instanceof Error && error.message.includes('No vector column')) {
187+
throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
188+
}
173189
console.error('Failed to search:', error);
190+
// For other errors, we throw IndexCorruptedError to be safe and trigger auto-heal
191+
// if it looks like a database issue
192+
if (error instanceof Error && (error.message.includes('LanceDB') || error.message.includes('Arrow'))) {
193+
throw new IndexCorruptedError(`LanceDB runtime error: ${error.message}`);
194+
}
174195
return [];
175196
}
176197
}

tests/lancedb-corruption.test.ts

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
2+
import { promises as fs } from 'fs';
3+
import os from 'os';
4+
import path from 'path';
5+
import { IndexCorruptedError } from '../src/errors/index.js';
6+
7+
const lancedb = vi.hoisted(() => ({
8+
connect: vi.fn()
9+
}));
10+
11+
vi.mock('@lancedb/lancedb', () => ({
12+
connect: lancedb.connect
13+
}));
14+
15+
describe('LanceDBStorageProvider corruption detection', () => {
16+
let tempDir: string;
17+
let consoleErrorSpy: ReturnType<typeof vi.spyOn>;
18+
19+
beforeEach(async () => {
20+
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'lancedb-test-'));
21+
lancedb.connect.mockReset();
22+
consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
23+
});
24+
25+
afterEach(async () => {
26+
consoleErrorSpy.mockRestore();
27+
await fs.rm(tempDir, { recursive: true, force: true });
28+
});
29+
30+
it('throws IndexCorruptedError when vector column missing during initialize()', async () => {
31+
const dropTable = vi.fn(async () => {});
32+
const db = {
33+
tableNames: vi.fn(async () => ['code_chunks']),
34+
openTable: vi.fn(async () => ({
35+
schema: vi.fn(async () => ({ fields: [{ name: 'id' }] }))
36+
})),
37+
dropTable
38+
};
39+
40+
lancedb.connect.mockResolvedValue(db);
41+
42+
const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js');
43+
const provider = new LanceDBStorageProvider();
44+
45+
await expect(provider.initialize(tempDir)).rejects.toBeInstanceOf(IndexCorruptedError);
46+
expect(dropTable).toHaveBeenCalledWith('code_chunks');
47+
});
48+
49+
it('throws IndexCorruptedError when schema validation fails during initialize()', async () => {
50+
const dropTable = vi.fn(async () => {});
51+
const db = {
52+
tableNames: vi.fn(async () => ['code_chunks']),
53+
openTable: vi.fn(async () => ({
54+
schema: vi.fn(async () => {
55+
throw new Error('schema error');
56+
})
57+
})),
58+
dropTable
59+
};
60+
61+
lancedb.connect.mockResolvedValue(db);
62+
63+
const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js');
64+
const provider = new LanceDBStorageProvider();
65+
66+
await expect(provider.initialize(tempDir)).rejects.toBeInstanceOf(IndexCorruptedError);
67+
expect(dropTable).toHaveBeenCalledWith('code_chunks');
68+
});
69+
70+
it('throws IndexCorruptedError when vector search fails with "No vector column"', async () => {
71+
const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js');
72+
const provider = new LanceDBStorageProvider() as any;
73+
74+
const query = {
75+
limit: vi.fn(() => query),
76+
where: vi.fn(() => query),
77+
toArray: vi.fn(async () => {
78+
throw new Error('Schema Error: No vector column found to create index');
79+
})
80+
};
81+
82+
provider.initialized = true;
83+
provider.table = {
84+
vectorSearch: vi.fn(() => query)
85+
};
86+
87+
await expect(provider.search([0.1, 0.2], 5)).rejects.toBeInstanceOf(IndexCorruptedError);
88+
});
89+
});
90+

0 commit comments

Comments
 (0)