11/**
2- * Vector storage and embedding system
2+ * Vector storage system
3+ *
4+ * Backed by Antfly — handles embedding generation, vector storage,
5+ * and hybrid search (BM25 + vector + RRF) internally.
36 */
47
5- export * from './embedder' ;
6- export * from './store' ;
7- export * from './types' ;
8+ export * from './antfly-store.js' ;
9+ export * from './types.js' ;
810
9- import * as fs from 'node:fs/promises' ;
10- import { TransformersEmbedder } from './embedder' ;
11- import { LanceDBVectorStore } from './store' ;
11+ import { type AntflyStoreConfig , AntflyVectorStore } from './antfly-store.js' ;
1212import type {
1313 EmbeddingDocument ,
1414 SearchOptions ,
1515 SearchResult ,
1616 VectorStats ,
1717 VectorStorageConfig ,
18- } from './types' ;
18+ } from './types.js ' ;
1919
2020/**
21- * Convenience class that combines embedder and vector store
22- * Provides a simple API for storing and searching documents
21+ * Derives an antfly table name from a storePath.
22+ *
23+ * storePath examples:
24+ * ~/.dev-agent/indexes/my-project/vectors → dev-agent-my-project-code
25+ * ~/.dev-agent/indexes/my-project/vectors-git → dev-agent-my-project-git
26+ * ~/.dev-agent/indexes/my-project/vectors-github → dev-agent-my-project-github
27+ */
28+ function deriveTableName ( storePath : string ) : string {
29+ const parts = storePath . replace ( / \/ $ / , '' ) . split ( '/' ) ;
30+ const last = parts . at ( - 1 ) ?? 'code' ;
31+ const projectDir = parts . at ( - 2 ) ?? 'default' ;
32+
33+ // Sanitize for antfly table names (alphanumeric + hyphens)
34+ const project = projectDir . replace ( / [ ^ a - z A - Z 0 - 9 - ] / g, '-' ) . toLowerCase ( ) ;
35+
36+ if ( last === 'vectors' ) return `dev-agent-${ project } -code` ;
37+ if ( last === 'vectors-git' ) return `dev-agent-${ project } -git` ;
38+ if ( last === 'vectors-github' ) return `dev-agent-${ project } -github` ;
39+ return `dev-agent-${ project } -${ last . replace ( / [ ^ a - z A - Z 0 - 9 - ] / g, '-' ) } ` ;
40+ }
41+
42+ /**
43+ * High-level vector storage API.
44+ *
45+ * Wraps AntflyVectorStore and preserves the same public interface that
46+ * all consumers (indexers, services, CLI, MCP) depend on.
47+ *
48+ * With Antfly, there is no separate embedding step — documents are
49+ * embedded automatically on insert and queries use hybrid search.
2350 */
2451export class VectorStorage {
25- private readonly embedder : TransformersEmbedder ;
26- private readonly store : LanceDBVectorStore ;
52+ private readonly store : AntflyVectorStore ;
2753 private initialized = false ;
2854
2955 constructor ( config : VectorStorageConfig ) {
30- const { storePath, embeddingModel = 'Xenova/all-MiniLM-L6-v2' , dimension = 384 } = config ;
56+ const antflyConfig : AntflyStoreConfig = {
57+ table : deriveTableName ( config . storePath ) ,
58+ model : config . embeddingModel ,
59+ } ;
3160
32- this . embedder = new TransformersEmbedder ( embeddingModel , dimension ) ;
33- this . store = new LanceDBVectorStore ( storePath , dimension ) ;
61+ this . store = new AntflyVectorStore ( antflyConfig ) ;
3462 }
3563
3664 /**
37- * Initialize both embedder and store
38- * @param options Optional initialization options
39- * @param options.skipEmbedder Skip embedder initialization (useful for read-only operations)
65+ * Initialize the storage.
66+ *
67+ * The skipEmbedder option is accepted for backward compatibility but
68+ * has no effect — Antfly handles embeddings internally.
4069 */
41- async initialize ( options ?: { skipEmbedder ?: boolean } ) : Promise < void > {
42- if ( this . initialized ) {
43- return ;
44- }
45-
46- const { skipEmbedder = false } = options || { } ;
47-
48- if ( skipEmbedder ) {
49- // Only initialize store, skip embedder (much faster for read-only operations)
50- await this . store . initialize ( ) ;
51- } else {
52- // Initialize both embedder and store
53- await Promise . all ( [ this . embedder . initialize ( ) , this . store . initialize ( ) ] ) ;
54- }
55-
70+ async initialize ( _options ?: { skipEmbedder ?: boolean } ) : Promise < void > {
71+ if ( this . initialized ) return ;
72+ await this . store . initialize ( ) ;
5673 this . initialized = true ;
5774 }
5875
5976 /**
60- * Ensure embedder is initialized (lazy initialization for search operations)
61- */
62- private async ensureEmbedder ( ) : Promise < void > {
63- if ( ! this . embedder ) {
64- throw new Error ( 'Embedder not available' ) ;
65- }
66- // Initialize embedder if not already done
67- await this . embedder . initialize ( ) ;
68- }
69-
70- /**
71- * Add documents to the store (automatically generates embeddings)
77+ * Add documents (Antfly generates embeddings automatically via Termite)
7278 */
7379 async addDocuments ( documents : EmbeddingDocument [ ] ) : Promise < void > {
74- if ( ! this . initialized ) {
75- throw new Error ( 'VectorStorage not initialized. Call initialize() first.' ) ;
76- }
77-
78- if ( documents . length === 0 ) {
79- return ;
80- }
81-
82- // Generate embeddings
83- const texts = documents . map ( ( doc ) => doc . text ) ;
84- const embeddings = await this . embedder . embedBatch ( texts ) ;
85-
86- // Store documents with embeddings
87- await this . store . add ( documents , embeddings ) ;
80+ this . assertReady ( ) ;
81+ if ( documents . length === 0 ) return ;
82+ await this . store . add ( documents ) ;
8883 }
8984
9085 /**
91- * Search for similar documents using natural language query
86+ * Search using hybrid search (BM25 + vector + RRF)
9287 */
9388 async search ( query : string , options ?: SearchOptions ) : Promise < SearchResult [ ] > {
94- if ( ! this . initialized ) {
95- throw new Error ( 'VectorStorage not initialized. Call initialize() first.' ) ;
96- }
97-
98- // Ensure embedder is initialized (lazy load if needed)
99- await this . ensureEmbedder ( ) ;
100-
101- // Generate query embedding
102- const queryEmbedding = await this . embedder . embed ( query ) ;
103-
104- // Search vector store
105- return this . store . search ( queryEmbedding , options ) ;
89+ this . assertReady ( ) ;
90+ return this . store . searchText ( query , options ) ;
10691 }
10792
10893 /**
109- * Find similar documents to a given document by ID
110- * More efficient than search() as it reuses the document's existing embedding
94+ * Find documents similar to a given document by ID
11195 */
11296 async searchByDocumentId ( documentId : string , options ?: SearchOptions ) : Promise < SearchResult [ ] > {
113- if ( ! this . initialized ) {
114- throw new Error ( 'VectorStorage not initialized. Call initialize() first.' ) ;
115- }
116-
97+ this . assertReady ( ) ;
11798 return this . store . searchByDocumentId ( documentId , options ) ;
11899 }
119100
120101 /**
121- * Get all documents without semantic search (fast scan)
122- * Use this when you need all documents and don't need relevance ranking
123- * This is 10-20x faster than search() as it skips embedding generation
102+ * Get all documents without semantic search (full scan)
124103 */
125104 async getAll ( options ?: { limit ?: number } ) : Promise < SearchResult [ ] > {
126- if ( ! this . initialized ) {
127- throw new Error ( 'VectorStorage not initialized. Call initialize() first.' ) ;
128- }
129-
105+ this . assertReady ( ) ;
130106 return this . store . getAll ( options ) ;
131107 }
132108
133109 /**
134110 * Get a document by ID
135111 */
136112 async getDocument ( id : string ) : Promise < EmbeddingDocument | null > {
137- if ( ! this . initialized ) {
138- throw new Error ( 'VectorStorage not initialized. Call initialize() first.' ) ;
139- }
140-
113+ this . assertReady ( ) ;
141114 return this . store . get ( id ) ;
142115 }
143116
144117 /**
145118 * Delete documents by ID
146119 */
147120 async deleteDocuments ( ids : string [ ] ) : Promise < void > {
148- if ( ! this . initialized ) {
149- throw new Error ( 'VectorStorage not initialized. Call initialize() first.' ) ;
150- }
151-
121+ this . assertReady ( ) ;
152122 await this . store . delete ( ids ) ;
153123 }
154124
155125 /**
156- * Clear all documents from the store (destructive operation)
157- * Used for force re-indexing
126+ * Clear all documents (destructive — used for force re-indexing)
158127 */
159128 async clear ( ) : Promise < void > {
160- if ( ! this . initialized ) {
161- throw new Error ( 'VectorStorage not initialized. Call initialize() first.' ) ;
162- }
163-
129+ this . assertReady ( ) ;
164130 await this . store . clear ( ) ;
165131 }
166132
167133 /**
168134 * Get statistics about the vector store
169135 */
170136 async getStats ( ) : Promise < VectorStats > {
171- if ( ! this . initialized ) {
172- throw new Error ( 'VectorStorage not initialized. Call initialize() first.' ) ;
173- }
174-
137+ this . assertReady ( ) ;
138+ const modelInfo = this . store . getModelInfo ( ) ;
175139 const totalDocuments = await this . store . count ( ) ;
176-
177- // Get storage size
178- let storageSize = 0 ;
179- try {
180- const storePath = this . store . path ;
181- const stats = await fs . stat ( storePath ) ;
182- storageSize = stats . size ;
183- } catch {
184- // Directory might not exist yet
185- storageSize = 0 ;
186- }
140+ const storageSize = await this . store . getStorageSize ( ) ;
187141
188142 return {
189143 totalDocuments,
190144 storageSize,
191- dimension : this . embedder . dimension ,
192- modelName : this . embedder . modelName ,
145+ dimension : modelInfo . dimension ,
146+ modelName : modelInfo . modelName ,
193147 } ;
194148 }
195149
196150 /**
197- * Optimize the vector store (compact fragments, update indices)
198- * Call this after bulk indexing operations for better performance
151+ * Optimize the store (no-op for Antfly — manages compaction internally)
199152 */
200153 async optimize ( ) : Promise < void > {
201- if ( ! this . initialized ) {
202- throw new Error ( 'VectorStorage not initialized. Call initialize() first.' ) ;
203- }
204-
154+ this . assertReady ( ) ;
205155 await this . store . optimize ( ) ;
206156 }
207157
@@ -212,4 +162,10 @@ export class VectorStorage {
212162 await this . store . close ( ) ;
213163 this . initialized = false ;
214164 }
165+
166+ private assertReady ( ) : void {
167+ if ( ! this . initialized ) {
168+ throw new Error ( 'VectorStorage not initialized. Call initialize() first.' ) ;
169+ }
170+ }
215171}
0 commit comments