Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Changelog


## [1.3.1] - 2026-01-05

### Fixed
- **Auto-Heal Semantic Search**: Detects LanceDB schema corruption (missing `vector` column), triggers re-indexing, and retries search instead of silently falling back to keyword-only results.

## [1.3.0] - 2026-01-01

### Added
Expand Down
2 changes: 1 addition & 1 deletion internal-docs
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "codebase-context",
"version": "1.3.0",
"version": "1.3.1",
"description": "MCP server that helps AI agents understand your codebase - patterns, libraries, architecture, monorepo support",
"type": "module",
"main": "./dist/lib.js",
Expand Down Expand Up @@ -120,4 +120,4 @@
"sharp"
]
}
}
}
5 changes: 2 additions & 3 deletions src/core/indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ export class CodebaseIndexer {
},
embedding: {
provider: 'transformers',
model: 'Xenova/bge-base-en-v1.5',
model: 'Xenova/bge-small-en-v1.5',
batchSize: 100
},
skipEmbedding: false,
Expand Down Expand Up @@ -376,8 +376,7 @@ export class CodebaseIndexer {

if ((i + batchSize) % 100 === 0 || i + batchSize >= chunksToEmbed.length) {
console.error(
`Embedded ${Math.min(i + batchSize, chunksToEmbed.length)}/${
chunksToEmbed.length
`Embedded ${Math.min(i + batchSize, chunksToEmbed.length)}/${chunksToEmbed.length
} chunks`
);
}
Expand Down
26 changes: 18 additions & 8 deletions src/core/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import { EmbeddingProvider, getEmbeddingProvider } from '../embeddings/index.js';
import { VectorStorageProvider, getStorageProvider } from '../storage/index.js';
import { analyzerRegistry } from './analyzer-registry.js';
import { IndexCorruptedError } from '../errors/index.js';

export interface SearchOptions {
useSemanticSearch?: boolean;
Expand Down Expand Up @@ -62,6 +63,9 @@

this.initialized = true;
} catch (error) {
if (error instanceof IndexCorruptedError) {
throw error; // Propagate to handler for auto-heal
}
console.warn('Partial initialization (keyword search only):', error);
this.initialized = true;
}
Expand Down Expand Up @@ -217,6 +221,9 @@
}
});
} catch (error) {
if (error instanceof IndexCorruptedError) {
throw error; // Propagate to handler for auto-heal
}
console.warn('Semantic search failed:', error);
}
}
Expand Down Expand Up @@ -324,9 +331,8 @@
const name = componentName || (classMatch ? classMatch[1] : null);

if (name && componentType) {
return `${
componentType.charAt(0).toUpperCase() + componentType.slice(1)
} '${name}' in ${fileName}.`;
return `${componentType.charAt(0).toUpperCase() + componentType.slice(1)
} '${name}' in ${fileName}.`;
} else if (name) {
return `'${name}' defined in ${fileName}.`;
} else if (componentType) {
Expand Down Expand Up @@ -368,12 +374,16 @@

const queryVector = await this.embeddingProvider.embed(query);

const results = await this.storageProvider.search(queryVector, limit, filters);
try {

Check failure on line 377 in src/core/search.ts

View workflow job for this annotation

GitHub Actions / Quality Checks

Unnecessary try/catch wrapper
const results = await this.storageProvider.search(queryVector, limit, filters);

return results.map((r) => ({
chunk: r.chunk,
score: r.score
}));
return results.map((r) => ({
chunk: r.chunk,
score: r.score
}));
} catch (error) {
throw error;
}
}

private async keywordSearch(
Expand Down
11 changes: 11 additions & 0 deletions src/errors/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/**
* Thrown when the LanceDB index is corrupted or has a schema mismatch.
* This error signals that re-indexing is required for semantic search to work.
*/
export class IndexCorruptedError extends Error {
constructor(message: string) {
super(message);
this.name = 'IndexCorruptedError';
}
}

62 changes: 59 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ import {
Resource
} from '@modelcontextprotocol/sdk/types.js';
import { CodebaseIndexer } from './core/indexer.js';
import { IndexingStats } from './types/index.js';
import { IndexingStats, SearchResult } from './types/index.js';
import { CodebaseSearcher } from './core/search.js';
import { analyzerRegistry } from './core/analyzer-registry.js';
import { AngularAnalyzer } from './analyzers/angular/index.js';
import { GenericAnalyzer } from './analyzers/generic/index.js';
import { InternalFileGraph } from './utils/usage-tracker.js';
import { IndexCorruptedError } from './errors/index.js';

analyzerRegistry.register(new AngularAnalyzer());
analyzerRegistry.register(new GenericAnalyzer());
Expand Down Expand Up @@ -66,7 +67,7 @@ const indexState: IndexState = {
const server: Server = new Server(
{
name: 'codebase-context',
version: '1.3.0'
version: '1.3.1'
},
{
capabilities: {
Expand Down Expand Up @@ -492,7 +493,62 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
}

const searcher = new CodebaseSearcher(ROOT_PATH);
const results = await searcher.search(query, limit || 5, filters);
let results: SearchResult[];

try {
results = await searcher.search(query, limit || 5, filters);
} catch (error) {
if (error instanceof IndexCorruptedError) {
console.error('[Auto-Heal] Index corrupted. Triggering full re-index...');

await performIndexing();

if (indexState.status === 'ready') {
console.error('[Auto-Heal] Success. Retrying search...');
const freshSearcher = new CodebaseSearcher(ROOT_PATH);
try {
results = await freshSearcher.search(query, limit || 5, filters);
} catch (retryError) {
return {
content: [
{
type: 'text',
text: JSON.stringify(
{
status: 'error',
message: `Auto-heal retry failed: ${
retryError instanceof Error ? retryError.message : String(retryError)
}`
},
null,
2
)
}
]
};
}
} else {
return {
content: [
{
type: 'text',
text: JSON.stringify(
{
status: 'error',
message: `Auto-heal failed: Indexing ended with status '${indexState.status}'`,
error: indexState.error
},
null,
2
)
}
]
};
}
} else {
throw error; // Propagate unexpected errors
}
}

return {
content: [
Expand Down
25 changes: 23 additions & 2 deletions src/storage/lancedb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import { promises as fs } from 'fs';
import { VectorStorageProvider, CodeChunkWithEmbedding, VectorSearchResult } from './types.js';
import { CodeChunk, SearchFilters } from '../types/index.js';
import { IndexCorruptedError } from '../errors/index.js';

export class LanceDBStorageProvider implements VectorStorageProvider {
readonly name = 'lancedb';
Expand Down Expand Up @@ -44,20 +45,31 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
console.error('Stale index detected (missing vector column). Rebuilding...');
await this.db.dropTable('code_chunks');
this.table = null;
throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
} else {
console.error('Opened existing LanceDB table');
}
} catch (_schemaError) {
} catch (schemaError) {
if (schemaError instanceof IndexCorruptedError) {
throw schemaError;
}
// If schema check fails, table is likely corrupted - drop and rebuild
console.error('Failed to validate table schema, rebuilding index...');
await this.db.dropTable('code_chunks');
this.table = null;
throw new IndexCorruptedError('LanceDB index corrupted: schema validation failed');
}
} else {
// Table missing entirely - not necessarily an error during initialization
this.table = null;
}

this.initialized = true;
console.error(`LanceDB initialized at: ${storagePath}`);
} catch (error) {
if (error instanceof IndexCorruptedError) {
throw error;
}
console.error('Failed to initialize LanceDB:', error);
throw error;
}
Expand Down Expand Up @@ -115,7 +127,8 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
filters?: SearchFilters
): Promise<VectorSearchResult[]> {
if (!this.initialized || !this.table) {
return [];
// If table is missing, throw so auto-heal can fix it
throw new IndexCorruptedError('LanceDB index corrupted: no table available for search');
}

try {
Expand Down Expand Up @@ -170,7 +183,15 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
distance: result._distance || 0
}));
} catch (error) {
if (error instanceof Error && error.message.includes('No vector column')) {
throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
}
console.error('Failed to search:', error);
// For other errors, we throw IndexCorruptedError to be safe and trigger auto-heal
// if it looks like a database issue
if (error instanceof Error && (error.message.includes('LanceDB') || error.message.includes('Arrow'))) {
throw new IndexCorruptedError(`LanceDB runtime error: ${error.message}`);
Comment on lines +192 to +193
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: Overly broad error catching - any error matching "LanceDB" or "Arrow" triggers auto-heal

This catch-all may cause unnecessary re-indexing for transient network issues, OOM errors, or other non-corruption problems. Consider narrowing to specific error types or adding additional validation before throwing IndexCorruptedError.

Suggested change
if (error instanceof Error && (error.message.includes('LanceDB') || error.message.includes('Arrow'))) {
throw new IndexCorruptedError(`LanceDB runtime error: ${error.message}`);
if (error instanceof Error && error.message.includes('No vector column')) {
throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
}
Prompt To Fix With AI
This is a comment left during a code review.
Path: src/storage/lancedb.ts
Line: 192:193

Comment:
**logic:** Overly broad error catching - any error matching "LanceDB" or "Arrow" triggers auto-heal

This catch-all may cause unnecessary re-indexing for transient network issues, OOM errors, or other non-corruption problems. Consider narrowing to specific error types or adding additional validation before throwing `IndexCorruptedError`.

```suggestion
      if (error instanceof Error && error.message.includes('No vector column')) {
        throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
      }
```

How can I resolve this? If you propose a fix, please make it concise.

}
return [];
}
}
Expand Down
90 changes: 90 additions & 0 deletions tests/lancedb-corruption.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import { promises as fs } from 'fs';
import os from 'os';
import path from 'path';
import { IndexCorruptedError } from '../src/errors/index.js';

const lancedb = vi.hoisted(() => ({
connect: vi.fn()
}));

vi.mock('@lancedb/lancedb', () => ({
connect: lancedb.connect
}));

describe('LanceDBStorageProvider corruption detection', () => {
let tempDir: string;
let consoleErrorSpy: ReturnType<typeof vi.spyOn>;

beforeEach(async () => {
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'lancedb-test-'));
lancedb.connect.mockReset();
consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
});

afterEach(async () => {
consoleErrorSpy.mockRestore();
await fs.rm(tempDir, { recursive: true, force: true });
});

it('throws IndexCorruptedError when vector column missing during initialize()', async () => {
const dropTable = vi.fn(async () => {});
const db = {
tableNames: vi.fn(async () => ['code_chunks']),
openTable: vi.fn(async () => ({
schema: vi.fn(async () => ({ fields: [{ name: 'id' }] }))
})),
dropTable
};

lancedb.connect.mockResolvedValue(db);

const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js');
const provider = new LanceDBStorageProvider();

await expect(provider.initialize(tempDir)).rejects.toBeInstanceOf(IndexCorruptedError);
expect(dropTable).toHaveBeenCalledWith('code_chunks');
});

it('throws IndexCorruptedError when schema validation fails during initialize()', async () => {
const dropTable = vi.fn(async () => {});
const db = {
tableNames: vi.fn(async () => ['code_chunks']),
openTable: vi.fn(async () => ({
schema: vi.fn(async () => {
throw new Error('schema error');
})
})),
dropTable
};

lancedb.connect.mockResolvedValue(db);

const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js');
const provider = new LanceDBStorageProvider();

await expect(provider.initialize(tempDir)).rejects.toBeInstanceOf(IndexCorruptedError);
expect(dropTable).toHaveBeenCalledWith('code_chunks');
});

it('throws IndexCorruptedError when vector search fails with "No vector column"', async () => {
const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js');
const provider = new LanceDBStorageProvider() as any;

const query = {
limit: vi.fn(() => query),
where: vi.fn(() => query),
toArray: vi.fn(async () => {
throw new Error('Schema Error: No vector column found to create index');
})
};

provider.initialized = true;
provider.table = {
vectorSearch: vi.fn(() => query)
};

await expect(provider.search([0.1, 0.2], 5)).rejects.toBeInstanceOf(IndexCorruptedError);
});
});

Loading
Loading