diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts index f4839439..8498bb1e 100644 --- a/packages/core/src/context.ts +++ b/packages/core/src/context.ts @@ -6,7 +6,8 @@ import { import { Embedding, EmbeddingVector, - OpenAIEmbedding + OpenAIEmbedding, + EmbeddingCache } from './embedding'; import { VectorDatabase, @@ -108,6 +109,7 @@ export class Context { private collectionNameOverride?: string; private warnedOverrideSanitization = new Set(); private synchronizers = new Map(); + private embeddingCache: EmbeddingCache | null = null; constructor(config: ContextConfig = {}) { // Initialize services @@ -158,6 +160,25 @@ export class Context { if (envCustomIgnorePatterns.length > 0) { console.log(`[Context] ๐Ÿšซ Loaded ${envCustomIgnorePatterns.length} custom ignore patterns from environment: ${envCustomIgnorePatterns.join(', ')}`); } + + // Initialize embedding cache + this.initEmbeddingCache(); + } + + /** + * (Re)create the embedding cache keyed by current provider + dimension. + * Called on construction and again whenever the embedding instance changes + * via updateEmbedding(), so cached vectors from a previous model never bleed + * into the new one. + */ + private initEmbeddingCache(): void { + const dimension = this.embedding.getDimension(); + const cacheModel = `${this.embedding.getProvider()}_${dimension}`; + this.embeddingCache = new EmbeddingCache(cacheModel, undefined, dimension); + if (this.embeddingCache.isEnabled()) { + console.log(`[Context] ๐Ÿ’พ Embedding cache enabled for model: ${cacheModel}`); + this.embeddingCache.cleanup().catch(() => {}); + } } /** @@ -569,6 +590,61 @@ export class Context { } } + /** + * Embed batch with disk cache. Only calls API for uncached chunks. + * Also dedupes duplicate strings within the same batch โ€” without this, two + * identical chunks in one batch would each hit the API since neither is + * cached at the start of the call. Common in monorepos with re-exports or + * generated boilerplate. + */ + private async cachedEmbedBatch(contents: string[]): Promise { + if (!this.embeddingCache || !this.embeddingCache.isEnabled()) { + return this.embedding.embedBatch(contents); + } + + const { results, uncachedIndices } = this.embeddingCache.getBatch(contents); + + if (uncachedIndices.length === 0) { + console.log(`[Cache] โœ… All ${contents.length} embeddings from cache`); + return results as EmbeddingVector[]; + } + + // Dedupe uncached texts: send each unique string once and fan results + // back out to every original index pointing at that string. + const uniqueTexts: string[] = []; + const textToUniqueIndex = new Map(); + const indicesByUnique: number[][] = []; + for (const i of uncachedIndices) { + const text = contents[i]; + let uniq = textToUniqueIndex.get(text); + if (uniq === undefined) { + uniq = uniqueTexts.length; + textToUniqueIndex.set(text, uniq); + uniqueTexts.push(text); + indicesByUnique.push([]); + } + indicesByUnique[uniq].push(i); + } + + const newEmbeddings = await this.embedding.embedBatch(uniqueTexts); + + for (let u = 0; u < uniqueTexts.length; u++) { + const embedding = newEmbeddings[u]; + this.embeddingCache.set(uniqueTexts[u], embedding); + for (const i of indicesByUnique[u]) { + results[i] = embedding; + } + } + + const hitRate = ((contents.length - uncachedIndices.length) / contents.length * 100).toFixed(0); + const dedupNote = uniqueTexts.length < uncachedIndices.length + ? ` (deduped ${uncachedIndices.length} โ†’ ${uniqueTexts.length} API calls)` + : ''; + console.log(`[Cache] ${hitRate}% hit (${contents.length - uncachedIndices.length}/${contents.length} cached, ${uniqueTexts.length} embedded)${dedupNote}`); + + return results as EmbeddingVector[]; + } + /** * Check if index exists for codebase * @param codebasePath Codebase path to check @@ -653,6 +729,9 @@ export class Context { updateEmbedding(embedding: Embedding): void { this.embedding = embedding; console.log(`[Context] ๐Ÿ”„ Updated embedding provider: ${embedding.getProvider()}`); + // Cache key is `${provider}_${dimension}`; re-key so we don't return + // vectors from the previous model on the next embed call. + this.initEmbeddingCache(); } /** @@ -865,9 +944,9 @@ export class Context { private async processChunkBatch(chunks: CodeChunk[], codebasePath: string): Promise { const isHybrid = this.getIsHybrid(); - // Generate embedding vectors + // Generate embedding vectors (with cache) const chunkContents = chunks.map(chunk => chunk.content); - const embeddings = await this.embedding.embedBatch(chunkContents); + const embeddings = await this.cachedEmbedBatch(chunkContents); if (isHybrid === true) { // Create hybrid vector documents diff --git a/packages/core/src/embedding/embedding-cache.ts b/packages/core/src/embedding/embedding-cache.ts new file mode 100644 index 00000000..021fefb6 --- /dev/null +++ b/packages/core/src/embedding/embedding-cache.ts @@ -0,0 +1,168 @@ +import * as fs from 'fs'; +import * as fsp from 'fs/promises'; +import * as path from 'path'; +import * as os from 'os'; +import * as crypto from 'crypto'; +import { EmbeddingVector } from './base-embedding'; +import { envManager } from '../utils/env-manager'; + +export class EmbeddingCache { + private cacheDir: string; + private enabled: boolean; + private expectedDimension: number | null; + + constructor(model: string, cacheDir?: string, expectedDimension?: number) { + this.enabled = (envManager.get('EMBEDDING_CACHE') || 'true').toLowerCase() !== 'false'; + this.expectedDimension = expectedDimension ?? null; + + const baseDir = cacheDir + || envManager.get('EMBEDDING_CACHE_DIR') + || path.join(os.homedir(), '.context', 'embedding-cache'); + + // Sanitize model name for filesystem + const safeModel = model.replace(/[^a-zA-Z0-9_-]/g, '_'); + this.cacheDir = path.join(baseDir, safeModel); + + if (this.enabled) { + try { + fs.mkdirSync(this.cacheDir, { recursive: true }); + } catch { + console.warn(`[Cache] โš ๏ธ Could not create cache dir: ${this.cacheDir}`); + this.enabled = false; + } + } + } + + private hash(content: string): string { + return crypto.createHash('sha256').update(content).digest('hex'); + } + + /** + * Use the FULL sha256 (64 hex chars) as the filename โ€” truncating to 12 chars + * gave a birthday-collision probability of ~50% at ~78k entries, which is + * trivially reachable for a real codebase. Full hash makes collisions + * practically impossible. + */ + private getCachePath(contentHash: string): string { + const prefix = contentHash.slice(0, 2); + return path.join(this.cacheDir, prefix, contentHash + '.json'); + } + + get(content: string): EmbeddingVector | null { + if (!this.enabled) return null; + + try { + const h = this.hash(content); + const cachePath = this.getCachePath(h); + + if (!fs.existsSync(cachePath)) return null; + + const raw = fs.readFileSync(cachePath, 'utf-8'); + const data = JSON.parse(raw); + + // Shape validation โ€” partial writes / future format changes / bit rot + // shouldn't return garbage to the caller. Treat anything unexpected as a miss. + if (!data || !Array.isArray(data.v) || typeof data.d !== 'number') return null; + if (data.v.length !== data.d) return null; + if (this.expectedDimension !== null && data.d !== this.expectedDimension) return null; + + return { vector: data.v as number[], dimension: data.d }; + } catch { + return null; + } + } + + set(content: string, embedding: EmbeddingVector): void { + if (!this.enabled) return; + + try { + const h = this.hash(content); + const cachePath = this.getCachePath(h); + + fs.mkdirSync(path.dirname(cachePath), { recursive: true }); + fs.writeFileSync(cachePath, JSON.stringify({ v: embedding.vector, d: embedding.dimension })); + } catch { + // Silently fail โ€” cache is best-effort + } + } + + getBatch(contents: string[]): { results: (EmbeddingVector | null)[]; uncachedIndices: number[] } { + const results: (EmbeddingVector | null)[] = new Array(contents.length).fill(null); + const uncachedIndices: number[] = []; + + for (let i = 0; i < contents.length; i++) { + const cached = this.get(contents[i]); + if (cached) { + results[i] = cached; + } else { + uncachedIndices.push(i); + } + } + + return { results, uncachedIndices }; + } + + isEnabled(): boolean { + return this.enabled; + } + + /** + * Delete cache files not modified in the last maxAgeDays days. + * Truly async (uses fs.promises) so startup cleanup never blocks the event loop. + * Best-effort โ€” errors are silently ignored. + * + * `maxAgeDays <= 0` (or non-finite) disables cleanup. Documented escape hatch + * for users who want the cache to persist indefinitely. + */ + async cleanup(maxAgeDays?: number): Promise { + if (!this.enabled) return; + + const days = maxAgeDays ?? parseInt(envManager.get('EMBEDDING_CACHE_MAX_AGE_DAYS') || '30', 10); + if (!Number.isFinite(days) || days <= 0) return; + + const cutoff = Date.now() - days * 24 * 60 * 60 * 1000; + let deleted = 0; + + try { + const prefixDirs = await fsp.readdir(this.cacheDir); + for (const prefix of prefixDirs) { + const prefixPath = path.join(this.cacheDir, prefix); + let prefixStat; + try { + prefixStat = await fsp.stat(prefixPath); + } catch { + continue; + } + if (!prefixStat.isDirectory()) continue; + + const files = await fsp.readdir(prefixPath); + for (const file of files) { + const filePath = path.join(prefixPath, file); + try { + const stat = await fsp.stat(filePath); + if (stat.mtimeMs < cutoff) { + await fsp.unlink(filePath); + deleted++; + } + } catch { + // file vanished mid-scan, fine + } + } + + // Remove empty prefix dirs + try { + const remaining = await fsp.readdir(prefixPath); + if (remaining.length === 0) await fsp.rmdir(prefixPath); + } catch { + // best-effort + } + } + + if (deleted > 0) { + console.log(`[Cache] ๐Ÿงน Cleaned up ${deleted} stale cache files (>${days} days old)`); + } + } catch { + // Best-effort cleanup + } + } +} diff --git a/packages/core/src/embedding/index.ts b/packages/core/src/embedding/index.ts index e6110941..3f517749 100644 --- a/packages/core/src/embedding/index.ts +++ b/packages/core/src/embedding/index.ts @@ -5,4 +5,5 @@ export * from './base-embedding'; export * from './openai-embedding'; export * from './voyageai-embedding'; export * from './ollama-embedding'; -export * from './gemini-embedding'; \ No newline at end of file +export * from './gemini-embedding'; +export * from './embedding-cache';