Skip to content

Commit d682a00

Browse files
prosdevclaude
andcommitted
refactor(core): update VectorStorage facade to use AntflyVectorStore
Part 1.3 of antfly migration: - VectorStorage now wraps AntflyVectorStore (not LanceDB + TransformersEmbedder) - search() calls store.searchText() — hybrid search via Antfly - addDocuments() delegates to store.add() — Antfly auto-embeds - getStats() reads model info from AntflyVectorStore.getModelInfo() - skipEmbedder option preserved for API compat but is a no-op - deriveTableName() maps storePath to antfly table names - Barrel exports updated: antfly-store + types (removed embedder/store) Note: Tests that create real VectorStorage/RepositoryIndexer will fail until Part 1.4 updates mocks and removes old LanceDB deps. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 31f987e commit d682a00

1 file changed

Lines changed: 78 additions & 122 deletions

File tree

packages/core/src/vector/index.ts

Lines changed: 78 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -1,207 +1,157 @@
11
/**
2-
* Vector storage and embedding system
2+
* Vector storage system
3+
*
4+
* Backed by Antfly — handles embedding generation, vector storage,
5+
* and hybrid search (BM25 + vector + RRF) internally.
36
*/
47

5-
export * from './embedder';
6-
export * from './store';
7-
export * from './types';
8+
export * from './antfly-store.js';
9+
export * from './types.js';
810

9-
import * as fs from 'node:fs/promises';
10-
import { TransformersEmbedder } from './embedder';
11-
import { LanceDBVectorStore } from './store';
11+
import { type AntflyStoreConfig, AntflyVectorStore } from './antfly-store.js';
1212
import type {
1313
EmbeddingDocument,
1414
SearchOptions,
1515
SearchResult,
1616
VectorStats,
1717
VectorStorageConfig,
18-
} from './types';
18+
} from './types.js';
1919

2020
/**
21-
* Convenience class that combines embedder and vector store
22-
* Provides a simple API for storing and searching documents
21+
* Derives an antfly table name from a storePath.
22+
*
23+
* storePath examples:
24+
* ~/.dev-agent/indexes/my-project/vectors → dev-agent-my-project-code
25+
* ~/.dev-agent/indexes/my-project/vectors-git → dev-agent-my-project-git
26+
* ~/.dev-agent/indexes/my-project/vectors-github → dev-agent-my-project-github
27+
*/
28+
function deriveTableName(storePath: string): string {
29+
const parts = storePath.replace(/\/$/, '').split('/');
30+
const last = parts.at(-1) ?? 'code';
31+
const projectDir = parts.at(-2) ?? 'default';
32+
33+
// Sanitize for antfly table names (alphanumeric + hyphens)
34+
const project = projectDir.replace(/[^a-zA-Z0-9-]/g, '-').toLowerCase();
35+
36+
if (last === 'vectors') return `dev-agent-${project}-code`;
37+
if (last === 'vectors-git') return `dev-agent-${project}-git`;
38+
if (last === 'vectors-github') return `dev-agent-${project}-github`;
39+
return `dev-agent-${project}-${last.replace(/[^a-zA-Z0-9-]/g, '-')}`;
40+
}
41+
42+
/**
43+
* High-level vector storage API.
44+
*
45+
* Wraps AntflyVectorStore and preserves the same public interface that
46+
* all consumers (indexers, services, CLI, MCP) depend on.
47+
*
48+
* With Antfly, there is no separate embedding step — documents are
49+
* embedded automatically on insert and queries use hybrid search.
2350
*/
2451
export class VectorStorage {
25-
private readonly embedder: TransformersEmbedder;
26-
private readonly store: LanceDBVectorStore;
52+
private readonly store: AntflyVectorStore;
2753
private initialized = false;
2854

2955
constructor(config: VectorStorageConfig) {
30-
const { storePath, embeddingModel = 'Xenova/all-MiniLM-L6-v2', dimension = 384 } = config;
56+
const antflyConfig: AntflyStoreConfig = {
57+
table: deriveTableName(config.storePath),
58+
model: config.embeddingModel,
59+
};
3160

32-
this.embedder = new TransformersEmbedder(embeddingModel, dimension);
33-
this.store = new LanceDBVectorStore(storePath, dimension);
61+
this.store = new AntflyVectorStore(antflyConfig);
3462
}
3563

3664
/**
37-
* Initialize both embedder and store
38-
* @param options Optional initialization options
39-
* @param options.skipEmbedder Skip embedder initialization (useful for read-only operations)
65+
* Initialize the storage.
66+
*
67+
* The skipEmbedder option is accepted for backward compatibility but
68+
* has no effect — Antfly handles embeddings internally.
4069
*/
41-
async initialize(options?: { skipEmbedder?: boolean }): Promise<void> {
42-
if (this.initialized) {
43-
return;
44-
}
45-
46-
const { skipEmbedder = false } = options || {};
47-
48-
if (skipEmbedder) {
49-
// Only initialize store, skip embedder (much faster for read-only operations)
50-
await this.store.initialize();
51-
} else {
52-
// Initialize both embedder and store
53-
await Promise.all([this.embedder.initialize(), this.store.initialize()]);
54-
}
55-
70+
async initialize(_options?: { skipEmbedder?: boolean }): Promise<void> {
71+
if (this.initialized) return;
72+
await this.store.initialize();
5673
this.initialized = true;
5774
}
5875

5976
/**
60-
* Ensure embedder is initialized (lazy initialization for search operations)
61-
*/
62-
private async ensureEmbedder(): Promise<void> {
63-
if (!this.embedder) {
64-
throw new Error('Embedder not available');
65-
}
66-
// Initialize embedder if not already done
67-
await this.embedder.initialize();
68-
}
69-
70-
/**
71-
* Add documents to the store (automatically generates embeddings)
77+
* Add documents (Antfly generates embeddings automatically via Termite)
7278
*/
7379
async addDocuments(documents: EmbeddingDocument[]): Promise<void> {
74-
if (!this.initialized) {
75-
throw new Error('VectorStorage not initialized. Call initialize() first.');
76-
}
77-
78-
if (documents.length === 0) {
79-
return;
80-
}
81-
82-
// Generate embeddings
83-
const texts = documents.map((doc) => doc.text);
84-
const embeddings = await this.embedder.embedBatch(texts);
85-
86-
// Store documents with embeddings
87-
await this.store.add(documents, embeddings);
80+
this.assertReady();
81+
if (documents.length === 0) return;
82+
await this.store.add(documents);
8883
}
8984

9085
/**
91-
* Search for similar documents using natural language query
86+
* Search using hybrid search (BM25 + vector + RRF)
9287
*/
9388
async search(query: string, options?: SearchOptions): Promise<SearchResult[]> {
94-
if (!this.initialized) {
95-
throw new Error('VectorStorage not initialized. Call initialize() first.');
96-
}
97-
98-
// Ensure embedder is initialized (lazy load if needed)
99-
await this.ensureEmbedder();
100-
101-
// Generate query embedding
102-
const queryEmbedding = await this.embedder.embed(query);
103-
104-
// Search vector store
105-
return this.store.search(queryEmbedding, options);
89+
this.assertReady();
90+
return this.store.searchText(query, options);
10691
}
10792

10893
/**
109-
* Find similar documents to a given document by ID
110-
* More efficient than search() as it reuses the document's existing embedding
94+
* Find documents similar to a given document by ID
11195
*/
11296
async searchByDocumentId(documentId: string, options?: SearchOptions): Promise<SearchResult[]> {
113-
if (!this.initialized) {
114-
throw new Error('VectorStorage not initialized. Call initialize() first.');
115-
}
116-
97+
this.assertReady();
11798
return this.store.searchByDocumentId(documentId, options);
11899
}
119100

120101
/**
121-
* Get all documents without semantic search (fast scan)
122-
* Use this when you need all documents and don't need relevance ranking
123-
* This is 10-20x faster than search() as it skips embedding generation
102+
* Get all documents without semantic search (full scan)
124103
*/
125104
async getAll(options?: { limit?: number }): Promise<SearchResult[]> {
126-
if (!this.initialized) {
127-
throw new Error('VectorStorage not initialized. Call initialize() first.');
128-
}
129-
105+
this.assertReady();
130106
return this.store.getAll(options);
131107
}
132108

133109
/**
134110
* Get a document by ID
135111
*/
136112
async getDocument(id: string): Promise<EmbeddingDocument | null> {
137-
if (!this.initialized) {
138-
throw new Error('VectorStorage not initialized. Call initialize() first.');
139-
}
140-
113+
this.assertReady();
141114
return this.store.get(id);
142115
}
143116

144117
/**
145118
* Delete documents by ID
146119
*/
147120
async deleteDocuments(ids: string[]): Promise<void> {
148-
if (!this.initialized) {
149-
throw new Error('VectorStorage not initialized. Call initialize() first.');
150-
}
151-
121+
this.assertReady();
152122
await this.store.delete(ids);
153123
}
154124

155125
/**
156-
* Clear all documents from the store (destructive operation)
157-
* Used for force re-indexing
126+
* Clear all documents (destructive — used for force re-indexing)
158127
*/
159128
async clear(): Promise<void> {
160-
if (!this.initialized) {
161-
throw new Error('VectorStorage not initialized. Call initialize() first.');
162-
}
163-
129+
this.assertReady();
164130
await this.store.clear();
165131
}
166132

167133
/**
168134
* Get statistics about the vector store
169135
*/
170136
async getStats(): Promise<VectorStats> {
171-
if (!this.initialized) {
172-
throw new Error('VectorStorage not initialized. Call initialize() first.');
173-
}
174-
137+
this.assertReady();
138+
const modelInfo = this.store.getModelInfo();
175139
const totalDocuments = await this.store.count();
176-
177-
// Get storage size
178-
let storageSize = 0;
179-
try {
180-
const storePath = this.store.path;
181-
const stats = await fs.stat(storePath);
182-
storageSize = stats.size;
183-
} catch {
184-
// Directory might not exist yet
185-
storageSize = 0;
186-
}
140+
const storageSize = await this.store.getStorageSize();
187141

188142
return {
189143
totalDocuments,
190144
storageSize,
191-
dimension: this.embedder.dimension,
192-
modelName: this.embedder.modelName,
145+
dimension: modelInfo.dimension,
146+
modelName: modelInfo.modelName,
193147
};
194148
}
195149

196150
/**
197-
* Optimize the vector store (compact fragments, update indices)
198-
* Call this after bulk indexing operations for better performance
151+
* Optimize the store (no-op for Antfly — manages compaction internally)
199152
*/
200153
async optimize(): Promise<void> {
201-
if (!this.initialized) {
202-
throw new Error('VectorStorage not initialized. Call initialize() first.');
203-
}
204-
154+
this.assertReady();
205155
await this.store.optimize();
206156
}
207157

@@ -212,4 +162,10 @@ export class VectorStorage {
212162
await this.store.close();
213163
this.initialized = false;
214164
}
165+
166+
private assertReady(): void {
167+
if (!this.initialized) {
168+
throw new Error('VectorStorage not initialized. Call initialize() first.');
169+
}
170+
}
215171
}

0 commit comments

Comments
 (0)