Skip to content

Commit 9fde6c0

Browse files
authored
Merge pull request #8 from PatrickSys/feat/auto-heal-semantic-search
feat: Auto-heal for silent semantic search failure
2 parents bb5dcb1 + 9edbfc0 commit 9fde6c0

File tree

11 files changed

+419
-45
lines changed

11 files changed

+419
-45
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
# Changelog
22

33

4+
## [1.3.1] - 2026-01-05
5+
6+
### Fixed
7+
- **Auto-Heal Semantic Search**: Detects LanceDB schema corruption (missing `vector` column), triggers re-indexing, and retries search instead of silently falling back to keyword-only results.
8+
49
## [1.3.0] - 2026-01-01
510

611
### Added

internal-docs

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "codebase-context",
3-
"version": "1.3.0",
3+
"version": "1.3.1",
44
"description": "MCP server that helps AI agents understand your codebase - patterns, libraries, architecture, monorepo support",
55
"type": "module",
66
"main": "./dist/lib.js",
@@ -120,4 +120,4 @@
120120
"sharp"
121121
]
122122
}
123-
}
123+
}

src/core/indexer.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ export class CodebaseIndexer {
8686
},
8787
embedding: {
8888
provider: 'transformers',
89-
model: 'Xenova/bge-base-en-v1.5',
89+
model: 'Xenova/bge-small-en-v1.5',
9090
batchSize: 100
9191
},
9292
skipEmbedding: false,

src/core/search.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { CodeChunk, SearchResult, SearchFilters } from '../types/index.js';
99
import { EmbeddingProvider, getEmbeddingProvider } from '../embeddings/index.js';
1010
import { VectorStorageProvider, getStorageProvider } from '../storage/index.js';
1111
import { analyzerRegistry } from './analyzer-registry.js';
12+
import { IndexCorruptedError } from '../errors/index.js';
1213

1314
export interface SearchOptions {
1415
useSemanticSearch?: boolean;
@@ -62,6 +63,9 @@ export class CodebaseSearcher {
6263

6364
this.initialized = true;
6465
} catch (error) {
66+
if (error instanceof IndexCorruptedError) {
67+
throw error; // Propagate to handler for auto-heal
68+
}
6569
console.warn('Partial initialization (keyword search only):', error);
6670
this.initialized = true;
6771
}
@@ -217,6 +221,9 @@ export class CodebaseSearcher {
217221
}
218222
});
219223
} catch (error) {
224+
if (error instanceof IndexCorruptedError) {
225+
throw error; // Propagate to handler for auto-heal
226+
}
220227
console.warn('Semantic search failed:', error);
221228
}
222229
}

src/errors/index.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
/**
2+
* Thrown when the LanceDB index is corrupted or has a schema mismatch.
3+
* This error signals that re-indexing is required for semantic search to work.
4+
*/
5+
export class IndexCorruptedError extends Error {
6+
constructor(message: string) {
7+
super(message);
8+
this.name = 'IndexCorruptedError';
9+
}
10+
}

src/index.ts

Lines changed: 69 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@ import {
2020
Resource
2121
} from '@modelcontextprotocol/sdk/types.js';
2222
import { CodebaseIndexer } from './core/indexer.js';
23-
import { IndexingStats } from './types/index.js';
23+
import { IndexingStats, SearchResult } from './types/index.js';
2424
import { CodebaseSearcher } from './core/search.js';
2525
import { analyzerRegistry } from './core/analyzer-registry.js';
2626
import { AngularAnalyzer } from './analyzers/angular/index.js';
2727
import { GenericAnalyzer } from './analyzers/generic/index.js';
2828
import { InternalFileGraph } from './utils/usage-tracker.js';
29+
import { IndexCorruptedError } from './errors/index.js';
2930

3031
analyzerRegistry.register(new AngularAnalyzer());
3132
analyzerRegistry.register(new GenericAnalyzer());
@@ -62,11 +63,10 @@ const indexState: IndexState = {
6263
status: 'idle'
6364
};
6465

65-
6666
const server: Server = new Server(
6767
{
6868
name: 'codebase-context',
69-
version: '1.3.0'
69+
version: '1.3.1'
7070
},
7171
{
7272
capabilities: {
@@ -492,7 +492,62 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
492492
}
493493

494494
const searcher = new CodebaseSearcher(ROOT_PATH);
495-
const results = await searcher.search(query, limit || 5, filters);
495+
let results: SearchResult[];
496+
497+
try {
498+
results = await searcher.search(query, limit || 5, filters);
499+
} catch (error) {
500+
if (error instanceof IndexCorruptedError) {
501+
console.error('[Auto-Heal] Index corrupted. Triggering full re-index...');
502+
503+
await performIndexing();
504+
505+
if (indexState.status === 'ready') {
506+
console.error('[Auto-Heal] Success. Retrying search...');
507+
const freshSearcher = new CodebaseSearcher(ROOT_PATH);
508+
try {
509+
results = await freshSearcher.search(query, limit || 5, filters);
510+
} catch (retryError) {
511+
return {
512+
content: [
513+
{
514+
type: 'text',
515+
text: JSON.stringify(
516+
{
517+
status: 'error',
518+
message: `Auto-heal retry failed: ${
519+
retryError instanceof Error ? retryError.message : String(retryError)
520+
}`
521+
},
522+
null,
523+
2
524+
)
525+
}
526+
]
527+
};
528+
}
529+
} else {
530+
return {
531+
content: [
532+
{
533+
type: 'text',
534+
text: JSON.stringify(
535+
{
536+
status: 'error',
537+
message: `Auto-heal failed: Indexing ended with status '${indexState.status}'`,
538+
error: indexState.error
539+
},
540+
null,
541+
2
542+
)
543+
}
544+
]
545+
};
546+
}
547+
} else {
548+
throw error; // Propagate unexpected errors
549+
}
550+
}
496551

497552
return {
498553
content: [
@@ -538,19 +593,19 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
538593
lastIndexed: indexState.lastIndexed?.toISOString(),
539594
stats: indexState.stats
540595
? {
541-
totalFiles: indexState.stats.totalFiles,
542-
indexedFiles: indexState.stats.indexedFiles,
543-
totalChunks: indexState.stats.totalChunks,
544-
duration: `${(indexState.stats.duration / 1000).toFixed(2)}s`
545-
}
596+
totalFiles: indexState.stats.totalFiles,
597+
indexedFiles: indexState.stats.indexedFiles,
598+
totalChunks: indexState.stats.totalChunks,
599+
duration: `${(indexState.stats.duration / 1000).toFixed(2)}s`
600+
}
546601
: undefined,
547602
progress: progress
548603
? {
549-
phase: progress.phase,
550-
percentage: progress.percentage,
551-
filesProcessed: progress.filesProcessed,
552-
totalFiles: progress.totalFiles
553-
}
604+
phase: progress.phase,
605+
percentage: progress.percentage,
606+
filesProcessed: progress.filesProcessed,
607+
totalFiles: progress.totalFiles
608+
}
554609
: undefined,
555610
error: indexState.error,
556611
hint: 'Use refresh_index to manually trigger re-indexing when needed.'

src/storage/lancedb.ts

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import { promises as fs } from 'fs';
77
import { VectorStorageProvider, CodeChunkWithEmbedding, VectorSearchResult } from './types.js';
88
import { CodeChunk, SearchFilters } from '../types/index.js';
9+
import { IndexCorruptedError } from '../errors/index.js';
910

1011
export class LanceDBStorageProvider implements VectorStorageProvider {
1112
readonly name = 'lancedb';
@@ -20,44 +21,33 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
2021

2122
try {
2223
this.storagePath = storagePath;
23-
24-
// Ensure directory exists
2524
await fs.mkdir(storagePath, { recursive: true });
2625

27-
// Dynamic import to avoid issues at require time
2826
const lancedb = await import('@lancedb/lancedb');
29-
30-
// Connect to database
3127
this.db = await lancedb.connect(storagePath);
3228

33-
// Check if table exists and has valid schema
29+
// Check if table exists and validate schema
3430
const tableNames = await this.db.tableNames();
3531
if (tableNames.includes('code_chunks')) {
3632
this.table = await this.db.openTable('code_chunks');
3733

38-
// Validate schema has vector column (required for semantic search)
39-
try {
40-
const schema = await this.table.schema();
41-
const hasVectorColumn = schema.fields.some((f: any) => f.name === 'vector');
42-
43-
if (!hasVectorColumn) {
44-
console.error('Stale index detected (missing vector column). Rebuilding...');
45-
await this.db.dropTable('code_chunks');
46-
this.table = null;
47-
} else {
48-
console.error('Opened existing LanceDB table');
49-
}
50-
} catch (_schemaError) {
51-
// If schema check fails, table is likely corrupted - drop and rebuild
52-
console.error('Failed to validate table schema, rebuilding index...');
53-
await this.db.dropTable('code_chunks');
54-
this.table = null;
34+
const schema = await this.table.schema();
35+
const hasVectorColumn = schema.fields.some((f: any) => f.name === 'vector');
36+
37+
if (!hasVectorColumn) {
38+
throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
5539
}
40+
console.error('Opened existing LanceDB table');
41+
} else {
42+
this.table = null;
5643
}
5744

5845
this.initialized = true;
5946
console.error(`LanceDB initialized at: ${storagePath}`);
6047
} catch (error) {
48+
if (error instanceof IndexCorruptedError) {
49+
throw error;
50+
}
6151
console.error('Failed to initialize LanceDB:', error);
6252
throw error;
6353
}
@@ -115,7 +105,7 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
115105
filters?: SearchFilters
116106
): Promise<VectorSearchResult[]> {
117107
if (!this.initialized || !this.table) {
118-
return [];
108+
throw new IndexCorruptedError('LanceDB index corrupted: no table available for search');
119109
}
120110

121111
try {
@@ -170,7 +160,13 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
170160
distance: result._distance || 0
171161
}));
172162
} catch (error) {
173-
console.error('Failed to search:', error);
163+
// Only trigger auto-heal for verified corruption patterns
164+
if (error instanceof Error && error.message.toLowerCase().includes('no vector column')) {
165+
throw new IndexCorruptedError(`LanceDB index corrupted: ${error.message}`);
166+
}
167+
168+
// Transient errors - log and gracefully degrade
169+
console.error('[LanceDB] Search error:', error instanceof Error ? error.message : error);
174170
return [];
175171
}
176172
}
@@ -199,8 +195,7 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
199195
}
200196

201197
try {
202-
const result = await this.table.countRows();
203-
return result;
198+
return await this.table.countRows();
204199
} catch (error) {
205200
console.error('Failed to count rows:', error);
206201
return 0;

0 commit comments

Comments
 (0)