Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Changelog


## [1.3.1] - 2026-01-05

### Fixed
- **Auto-Heal Semantic Search**: Detects LanceDB schema corruption (missing `vector` column), triggers re-indexing, and retries search instead of silently falling back to keyword-only results.

## [1.3.0] - 2026-01-01

### Added
Expand Down
2 changes: 1 addition & 1 deletion internal-docs
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "codebase-context",
"version": "1.3.0",
"version": "1.3.1",
"description": "MCP server that helps AI agents understand your codebase - patterns, libraries, architecture, monorepo support",
"type": "module",
"main": "./dist/lib.js",
Expand Down Expand Up @@ -120,4 +120,4 @@
"sharp"
]
}
}
}
2 changes: 1 addition & 1 deletion src/core/indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ export class CodebaseIndexer {
},
embedding: {
provider: 'transformers',
model: 'Xenova/bge-base-en-v1.5',
model: 'Xenova/bge-small-en-v1.5',
batchSize: 100
},
skipEmbedding: false,
Expand Down
7 changes: 7 additions & 0 deletions src/core/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { CodeChunk, SearchResult, SearchFilters } from '../types/index.js';
import { EmbeddingProvider, getEmbeddingProvider } from '../embeddings/index.js';
import { VectorStorageProvider, getStorageProvider } from '../storage/index.js';
import { analyzerRegistry } from './analyzer-registry.js';
import { IndexCorruptedError } from '../errors/index.js';

export interface SearchOptions {
useSemanticSearch?: boolean;
Expand Down Expand Up @@ -62,6 +63,9 @@ export class CodebaseSearcher {

this.initialized = true;
} catch (error) {
if (error instanceof IndexCorruptedError) {
throw error; // Propagate to handler for auto-heal
}
console.warn('Partial initialization (keyword search only):', error);
this.initialized = true;
}
Expand Down Expand Up @@ -217,6 +221,9 @@ export class CodebaseSearcher {
}
});
} catch (error) {
if (error instanceof IndexCorruptedError) {
throw error; // Propagate to handler for auto-heal
}
console.warn('Semantic search failed:', error);
}
}
Expand Down
10 changes: 10 additions & 0 deletions src/errors/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/**
* Thrown when the LanceDB index is corrupted or has a schema mismatch.
* This error signals that re-indexing is required for semantic search to work.
*/
export class IndexCorruptedError extends Error {
constructor(message: string) {
super(message);
this.name = 'IndexCorruptedError';
}
}
83 changes: 69 additions & 14 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ import {
Resource
} from '@modelcontextprotocol/sdk/types.js';
import { CodebaseIndexer } from './core/indexer.js';
import { IndexingStats } from './types/index.js';
import { IndexingStats, SearchResult } from './types/index.js';
import { CodebaseSearcher } from './core/search.js';
import { analyzerRegistry } from './core/analyzer-registry.js';
import { AngularAnalyzer } from './analyzers/angular/index.js';
import { GenericAnalyzer } from './analyzers/generic/index.js';
import { InternalFileGraph } from './utils/usage-tracker.js';
import { IndexCorruptedError } from './errors/index.js';

analyzerRegistry.register(new AngularAnalyzer());
analyzerRegistry.register(new GenericAnalyzer());
Expand Down Expand Up @@ -62,11 +63,10 @@ const indexState: IndexState = {
status: 'idle'
};


const server: Server = new Server(
{
name: 'codebase-context',
version: '1.3.0'
version: '1.3.1'
},
{
capabilities: {
Expand Down Expand Up @@ -492,7 +492,62 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
}

const searcher = new CodebaseSearcher(ROOT_PATH);
const results = await searcher.search(query, limit || 5, filters);
let results: SearchResult[];

try {
results = await searcher.search(query, limit || 5, filters);
} catch (error) {
if (error instanceof IndexCorruptedError) {
console.error('[Auto-Heal] Index corrupted. Triggering full re-index...');

await performIndexing();

if (indexState.status === 'ready') {
console.error('[Auto-Heal] Success. Retrying search...');
const freshSearcher = new CodebaseSearcher(ROOT_PATH);
try {
results = await freshSearcher.search(query, limit || 5, filters);
} catch (retryError) {
return {
content: [
{
type: 'text',
text: JSON.stringify(
{
status: 'error',
message: `Auto-heal retry failed: ${
retryError instanceof Error ? retryError.message : String(retryError)
}`
},
null,
2
)
}
]
};
}
} else {
return {
content: [
{
type: 'text',
text: JSON.stringify(
{
status: 'error',
message: `Auto-heal failed: Indexing ended with status '${indexState.status}'`,
error: indexState.error
},
null,
2
)
}
]
};
}
} else {
throw error; // Propagate unexpected errors
}
}

return {
content: [
Expand Down Expand Up @@ -538,19 +593,19 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
lastIndexed: indexState.lastIndexed?.toISOString(),
stats: indexState.stats
? {
totalFiles: indexState.stats.totalFiles,
indexedFiles: indexState.stats.indexedFiles,
totalChunks: indexState.stats.totalChunks,
duration: `${(indexState.stats.duration / 1000).toFixed(2)}s`
}
totalFiles: indexState.stats.totalFiles,
indexedFiles: indexState.stats.indexedFiles,
totalChunks: indexState.stats.totalChunks,
duration: `${(indexState.stats.duration / 1000).toFixed(2)}s`
}
: undefined,
progress: progress
? {
phase: progress.phase,
percentage: progress.percentage,
filesProcessed: progress.filesProcessed,
totalFiles: progress.totalFiles
}
phase: progress.phase,
percentage: progress.percentage,
filesProcessed: progress.filesProcessed,
totalFiles: progress.totalFiles
}
: undefined,
error: indexState.error,
hint: 'Use refresh_index to manually trigger re-indexing when needed.'
Expand Down
28 changes: 26 additions & 2 deletions src/storage/lancedb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import { promises as fs } from 'fs';
import { VectorStorageProvider, CodeChunkWithEmbedding, VectorSearchResult } from './types.js';
import { CodeChunk, SearchFilters } from '../types/index.js';
import { IndexCorruptedError } from '../errors/index.js';

export class LanceDBStorageProvider implements VectorStorageProvider {
readonly name = 'lancedb';
Expand Down Expand Up @@ -44,20 +45,31 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
console.error('Stale index detected (missing vector column). Rebuilding...');
await this.db.dropTable('code_chunks');
this.table = null;
throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
} else {
console.error('Opened existing LanceDB table');
}
} catch (_schemaError) {
} catch (schemaError) {
if (schemaError instanceof IndexCorruptedError) {
throw schemaError;
}
// If schema check fails, table is likely corrupted - drop and rebuild
console.error('Failed to validate table schema, rebuilding index...');
await this.db.dropTable('code_chunks');
this.table = null;
throw new IndexCorruptedError('LanceDB index corrupted: schema validation failed');
}
} else {
// Table missing entirely - not necessarily an error during initialization
this.table = null;
}

this.initialized = true;
console.error(`LanceDB initialized at: ${storagePath}`);
} catch (error) {
if (error instanceof IndexCorruptedError) {
throw error;
}
console.error('Failed to initialize LanceDB:', error);
throw error;
}
Expand Down Expand Up @@ -115,7 +127,8 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
filters?: SearchFilters
): Promise<VectorSearchResult[]> {
if (!this.initialized || !this.table) {
return [];
// If table is missing, throw so auto-heal can fix it
throw new IndexCorruptedError('LanceDB index corrupted: no table available for search');
}

try {
Expand Down Expand Up @@ -170,7 +183,18 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
distance: result._distance || 0
}));
} catch (error) {
if (error instanceof Error && error.message.includes('No vector column')) {
throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
}
console.error('Failed to search:', error);
// For other errors, we throw IndexCorruptedError to be safe and trigger auto-heal
// if it looks like a database issue
if (
error instanceof Error &&
(error.message.includes('LanceDB') || error.message.includes('Arrow'))
) {
throw new IndexCorruptedError(`LanceDB runtime error: ${error.message}`);
Comment thread
PatrickSys marked this conversation as resolved.
Outdated
}
return [];
}
}
Expand Down
90 changes: 90 additions & 0 deletions tests/lancedb-corruption.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import { promises as fs } from 'fs';
import os from 'os';
import path from 'path';
import { IndexCorruptedError } from '../src/errors/index.js';

const lancedb = vi.hoisted(() => ({
connect: vi.fn()
}));

vi.mock('@lancedb/lancedb', () => ({
connect: lancedb.connect
}));

describe('LanceDBStorageProvider corruption detection', () => {
let tempDir: string;
let consoleErrorSpy: ReturnType<typeof vi.spyOn>;

beforeEach(async () => {
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'lancedb-test-'));
lancedb.connect.mockReset();
consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
});

afterEach(async () => {
consoleErrorSpy.mockRestore();
await fs.rm(tempDir, { recursive: true, force: true });
});

it('throws IndexCorruptedError when vector column missing during initialize()', async () => {
const dropTable = vi.fn(async () => {});
const db = {
tableNames: vi.fn(async () => ['code_chunks']),
openTable: vi.fn(async () => ({
schema: vi.fn(async () => ({ fields: [{ name: 'id' }] }))
})),
dropTable
};

lancedb.connect.mockResolvedValue(db);

const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js');
const provider = new LanceDBStorageProvider();

await expect(provider.initialize(tempDir)).rejects.toBeInstanceOf(IndexCorruptedError);
expect(dropTable).toHaveBeenCalledWith('code_chunks');
});

it('throws IndexCorruptedError when schema validation fails during initialize()', async () => {
const dropTable = vi.fn(async () => {});
const db = {
tableNames: vi.fn(async () => ['code_chunks']),
openTable: vi.fn(async () => ({
schema: vi.fn(async () => {
throw new Error('schema error');
})
})),
dropTable
};

lancedb.connect.mockResolvedValue(db);

const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js');
const provider = new LanceDBStorageProvider();

await expect(provider.initialize(tempDir)).rejects.toBeInstanceOf(IndexCorruptedError);
expect(dropTable).toHaveBeenCalledWith('code_chunks');
});

it('throws IndexCorruptedError when vector search fails with "No vector column"', async () => {
const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js');
const provider = new LanceDBStorageProvider() as any;

const query = {
limit: vi.fn(() => query),
where: vi.fn(() => query),
toArray: vi.fn(async () => {
throw new Error('Schema Error: No vector column found to create index');
})
};

provider.initialized = true;
provider.table = {
vectorSearch: vi.fn(() => query)
};

await expect(provider.search([0.1, 0.2], 5)).rejects.toBeInstanceOf(IndexCorruptedError);
});
});

Loading
Loading