From c81ad2ef90e88b7abdf79459c3546cba342e8320 Mon Sep 17 00:00:00 2001 From: Nawapat Buakoet Date: Sat, 25 Apr 2026 00:07:52 +0700 Subject: [PATCH 1/3] feat: add disk-based embedding cache to skip redundant API calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cache embedding vectors to ~/.context/embedding-cache/ keyed by SHA256(content) per model. On re-index, only uncached chunks hit the API โ€” cached chunks load from disk instantly. Logs cache hit rate per batch. Disable with EMBEDDING_CACHE=false. --- packages/core/src/context.ts | 44 ++++++++- .../core/src/embedding/embedding-cache.ts | 91 +++++++++++++++++++ packages/core/src/embedding/index.ts | 3 +- 3 files changed, 134 insertions(+), 4 deletions(-) create mode 100644 packages/core/src/embedding/embedding-cache.ts diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts index f4839439..0dc2759e 100644 --- a/packages/core/src/context.ts +++ b/packages/core/src/context.ts @@ -6,7 +6,8 @@ import { import { Embedding, EmbeddingVector, - OpenAIEmbedding + OpenAIEmbedding, + EmbeddingCache } from './embedding'; import { VectorDatabase, @@ -108,6 +109,7 @@ export class Context { private collectionNameOverride?: string; private warnedOverrideSanitization = new Set(); private synchronizers = new Map(); + private embeddingCache: EmbeddingCache | null = null; constructor(config: ContextConfig = {}) { // Initialize services @@ -158,6 +160,13 @@ export class Context { if (envCustomIgnorePatterns.length > 0) { console.log(`[Context] ๐Ÿšซ Loaded ${envCustomIgnorePatterns.length} custom ignore patterns from environment: ${envCustomIgnorePatterns.join(', ')}`); } + + // Initialize embedding cache + const cacheModel = `${this.embedding.getProvider()}_${this.embedding.getDimension()}`; + this.embeddingCache = new EmbeddingCache(cacheModel); + if (this.embeddingCache.isEnabled()) { + console.log(`[Context] ๐Ÿ’พ Embedding cache enabled for model: ${cacheModel}`); + } } /** @@ -569,6 +578,35 @@ export class Context { } } + /** + * Embed batch with disk cache. Only calls API for uncached chunks. + */ + private async cachedEmbedBatch(contents: string[]): Promise { + if (!this.embeddingCache || !this.embeddingCache.isEnabled()) { + return this.embedding.embedBatch(contents); + } + + const { results, uncachedIndices } = this.embeddingCache.getBatch(contents); + + if (uncachedIndices.length === 0) { + console.log(`[Cache] โœ… All ${contents.length} embeddings from cache`); + return results as EmbeddingVector[]; + } + + const uncachedTexts = uncachedIndices.map(i => contents[i]); + const newEmbeddings = await this.embedding.embedBatch(uncachedTexts); + + for (let j = 0; j < uncachedIndices.length; j++) { + results[uncachedIndices[j]] = newEmbeddings[j]; + this.embeddingCache.set(contents[uncachedIndices[j]], newEmbeddings[j]); + } + + const hitRate = ((contents.length - uncachedIndices.length) / contents.length * 100).toFixed(0); + console.log(`[Cache] ${hitRate}% hit (${contents.length - uncachedIndices.length}/${contents.length} cached, ${uncachedIndices.length} embedded)`); + + return results as EmbeddingVector[]; + } + /** * Check if index exists for codebase * @param codebasePath Codebase path to check @@ -865,9 +903,9 @@ export class Context { private async processChunkBatch(chunks: CodeChunk[], codebasePath: string): Promise { const isHybrid = this.getIsHybrid(); - // Generate embedding vectors + // Generate embedding vectors (with cache) const chunkContents = chunks.map(chunk => chunk.content); - const embeddings = await this.embedding.embedBatch(chunkContents); + const embeddings = await this.cachedEmbedBatch(chunkContents); if (isHybrid === true) { // Create hybrid vector documents diff --git a/packages/core/src/embedding/embedding-cache.ts b/packages/core/src/embedding/embedding-cache.ts new file mode 100644 index 00000000..f7792b31 --- /dev/null +++ b/packages/core/src/embedding/embedding-cache.ts @@ -0,0 +1,91 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import * as crypto from 'crypto'; +import { EmbeddingVector } from './base-embedding'; +import { envManager } from '../utils/env-manager'; + +export class EmbeddingCache { + private cacheDir: string; + private enabled: boolean; + + constructor(model: string, cacheDir?: string) { + this.enabled = (envManager.get('EMBEDDING_CACHE') || 'true').toLowerCase() !== 'false'; + + const baseDir = cacheDir + || envManager.get('EMBEDDING_CACHE_DIR') + || path.join(os.homedir(), '.context', 'embedding-cache'); + + // Sanitize model name for filesystem + const safeModel = model.replace(/[^a-zA-Z0-9_-]/g, '_'); + this.cacheDir = path.join(baseDir, safeModel); + + if (this.enabled) { + try { + fs.mkdirSync(this.cacheDir, { recursive: true }); + } catch { + console.warn(`[Cache] โš ๏ธ Could not create cache dir: ${this.cacheDir}`); + this.enabled = false; + } + } + } + + private hash(content: string): string { + return crypto.createHash('sha256').update(content).digest('hex'); + } + + private getCachePath(contentHash: string): string { + const prefix = contentHash.slice(0, 2); + return path.join(this.cacheDir, prefix, contentHash.slice(0, 12) + '.json'); + } + + get(content: string): EmbeddingVector | null { + if (!this.enabled) return null; + + try { + const h = this.hash(content); + const cachePath = this.getCachePath(h); + + if (!fs.existsSync(cachePath)) return null; + + const data = JSON.parse(fs.readFileSync(cachePath, 'utf-8')); + return { vector: data.v, dimension: data.d }; + } catch { + return null; + } + } + + set(content: string, embedding: EmbeddingVector): void { + if (!this.enabled) return; + + try { + const h = this.hash(content); + const cachePath = this.getCachePath(h); + + fs.mkdirSync(path.dirname(cachePath), { recursive: true }); + fs.writeFileSync(cachePath, JSON.stringify({ v: embedding.vector, d: embedding.dimension })); + } catch { + // Silently fail โ€” cache is best-effort + } + } + + getBatch(contents: string[]): { results: (EmbeddingVector | null)[]; uncachedIndices: number[] } { + const results: (EmbeddingVector | null)[] = new Array(contents.length).fill(null); + const uncachedIndices: number[] = []; + + for (let i = 0; i < contents.length; i++) { + const cached = this.get(contents[i]); + if (cached) { + results[i] = cached; + } else { + uncachedIndices.push(i); + } + } + + return { results, uncachedIndices }; + } + + isEnabled(): boolean { + return this.enabled; + } +} diff --git a/packages/core/src/embedding/index.ts b/packages/core/src/embedding/index.ts index e6110941..3f517749 100644 --- a/packages/core/src/embedding/index.ts +++ b/packages/core/src/embedding/index.ts @@ -5,4 +5,5 @@ export * from './base-embedding'; export * from './openai-embedding'; export * from './voyageai-embedding'; export * from './ollama-embedding'; -export * from './gemini-embedding'; \ No newline at end of file +export * from './gemini-embedding'; +export * from './embedding-cache'; From b37fb63f015ebf0af54704fbc5504e7d6214e2e0 Mon Sep 17 00:00:00 2001 From: Nawapat Buakoet Date: Sat, 25 Apr 2026 00:19:16 +0700 Subject: [PATCH 2/3] feat: auto-cleanup stale embedding cache files on startup Delete cached embeddings not modified in 30 days (configurable via EMBEDDING_CACHE_MAX_AGE_DAYS). Runs async on startup, non-blocking. Removes empty prefix directories after cleanup. --- packages/core/src/context.ts | 1 + .../core/src/embedding/embedding-cache.ts | 41 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts index 0dc2759e..916bdf49 100644 --- a/packages/core/src/context.ts +++ b/packages/core/src/context.ts @@ -166,6 +166,7 @@ export class Context { this.embeddingCache = new EmbeddingCache(cacheModel); if (this.embeddingCache.isEnabled()) { console.log(`[Context] ๐Ÿ’พ Embedding cache enabled for model: ${cacheModel}`); + this.embeddingCache.cleanup().catch(() => {}); } } diff --git a/packages/core/src/embedding/embedding-cache.ts b/packages/core/src/embedding/embedding-cache.ts index f7792b31..734d46a0 100644 --- a/packages/core/src/embedding/embedding-cache.ts +++ b/packages/core/src/embedding/embedding-cache.ts @@ -88,4 +88,45 @@ export class EmbeddingCache { isEnabled(): boolean { return this.enabled; } + + /** + * Delete cache files not modified in the last maxAgeDays days. + * Runs async, best-effort โ€” errors are silently ignored. + */ + async cleanup(maxAgeDays?: number): Promise { + if (!this.enabled) return; + + const days = maxAgeDays ?? parseInt(envManager.get('EMBEDDING_CACHE_MAX_AGE_DAYS') || '30', 10); + const cutoff = Date.now() - days * 24 * 60 * 60 * 1000; + let deleted = 0; + + try { + const prefixDirs = fs.readdirSync(this.cacheDir); + for (const prefix of prefixDirs) { + const prefixPath = path.join(this.cacheDir, prefix); + if (!fs.statSync(prefixPath).isDirectory()) continue; + + const files = fs.readdirSync(prefixPath); + for (const file of files) { + const filePath = path.join(prefixPath, file); + const stat = fs.statSync(filePath); + if (stat.mtimeMs < cutoff) { + fs.unlinkSync(filePath); + deleted++; + } + } + + // Remove empty prefix dirs + if (fs.readdirSync(prefixPath).length === 0) { + fs.rmdirSync(prefixPath); + } + } + + if (deleted > 0) { + console.log(`[Cache] ๐Ÿงน Cleaned up ${deleted} stale cache files (>${days} days old)`); + } + } catch { + // Best-effort cleanup + } + } } From ea99ede6f486e3402609f6710fefb55544a59f9d Mon Sep 17 00:00:00 2001 From: Nawapat Buakoet Date: Sat, 25 Apr 2026 17:24:09 +0700 Subject: [PATCH 3/3] fix: address Copilot review feedback on embedding cache - Use full SHA256 (64 chars) in filename, not 12-char prefix (collision risk) - Validate JSON shape in get() (Array.isArray, dimension match) and pass expectedDimension to constructor for stricter cross-model isolation - cleanup() now uses fs.promises (truly async, no event-loop block) - cleanup() guards maxAgeDays <= 0 / non-finite (prevents purge-everything) - updateEmbedding() now reinitializes the cache so model switches don't serve stale vectors from the previous model - cachedEmbedBatch() dedupes duplicate strings within a single batch so identical chunks don't each hit the API --- packages/core/src/context.ts | 56 +++++++++++++--- .../core/src/embedding/embedding-cache.ts | 64 +++++++++++++++---- 2 files changed, 98 insertions(+), 22 deletions(-) diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts index 916bdf49..8498bb1e 100644 --- a/packages/core/src/context.ts +++ b/packages/core/src/context.ts @@ -162,8 +162,19 @@ export class Context { } // Initialize embedding cache - const cacheModel = `${this.embedding.getProvider()}_${this.embedding.getDimension()}`; - this.embeddingCache = new EmbeddingCache(cacheModel); + this.initEmbeddingCache(); + } + + /** + * (Re)create the embedding cache keyed by current provider + dimension. + * Called on construction and again whenever the embedding instance changes + * via updateEmbedding(), so cached vectors from a previous model never bleed + * into the new one. + */ + private initEmbeddingCache(): void { + const dimension = this.embedding.getDimension(); + const cacheModel = `${this.embedding.getProvider()}_${dimension}`; + this.embeddingCache = new EmbeddingCache(cacheModel, undefined, dimension); if (this.embeddingCache.isEnabled()) { console.log(`[Context] ๐Ÿ’พ Embedding cache enabled for model: ${cacheModel}`); this.embeddingCache.cleanup().catch(() => {}); @@ -581,6 +592,10 @@ export class Context { /** * Embed batch with disk cache. Only calls API for uncached chunks. + * Also dedupes duplicate strings within the same batch โ€” without this, two + * identical chunks in one batch would each hit the API since neither is + * cached at the start of the call. Common in monorepos with re-exports or + * generated boilerplate. */ private async cachedEmbedBatch(contents: string[]): Promise { if (!this.embeddingCache || !this.embeddingCache.isEnabled()) { @@ -594,16 +609,38 @@ export class Context { return results as EmbeddingVector[]; } - const uncachedTexts = uncachedIndices.map(i => contents[i]); - const newEmbeddings = await this.embedding.embedBatch(uncachedTexts); + // Dedupe uncached texts: send each unique string once and fan results + // back out to every original index pointing at that string. + const uniqueTexts: string[] = []; + const textToUniqueIndex = new Map(); + const indicesByUnique: number[][] = []; + for (const i of uncachedIndices) { + const text = contents[i]; + let uniq = textToUniqueIndex.get(text); + if (uniq === undefined) { + uniq = uniqueTexts.length; + textToUniqueIndex.set(text, uniq); + uniqueTexts.push(text); + indicesByUnique.push([]); + } + indicesByUnique[uniq].push(i); + } - for (let j = 0; j < uncachedIndices.length; j++) { - results[uncachedIndices[j]] = newEmbeddings[j]; - this.embeddingCache.set(contents[uncachedIndices[j]], newEmbeddings[j]); + const newEmbeddings = await this.embedding.embedBatch(uniqueTexts); + + for (let u = 0; u < uniqueTexts.length; u++) { + const embedding = newEmbeddings[u]; + this.embeddingCache.set(uniqueTexts[u], embedding); + for (const i of indicesByUnique[u]) { + results[i] = embedding; + } } const hitRate = ((contents.length - uncachedIndices.length) / contents.length * 100).toFixed(0); - console.log(`[Cache] ${hitRate}% hit (${contents.length - uncachedIndices.length}/${contents.length} cached, ${uncachedIndices.length} embedded)`); + const dedupNote = uniqueTexts.length < uncachedIndices.length + ? ` (deduped ${uncachedIndices.length} โ†’ ${uniqueTexts.length} API calls)` + : ''; + console.log(`[Cache] ${hitRate}% hit (${contents.length - uncachedIndices.length}/${contents.length} cached, ${uniqueTexts.length} embedded)${dedupNote}`); return results as EmbeddingVector[]; } @@ -692,6 +729,9 @@ export class Context { updateEmbedding(embedding: Embedding): void { this.embedding = embedding; console.log(`[Context] ๐Ÿ”„ Updated embedding provider: ${embedding.getProvider()}`); + // Cache key is `${provider}_${dimension}`; re-key so we don't return + // vectors from the previous model on the next embed call. + this.initEmbeddingCache(); } /** diff --git a/packages/core/src/embedding/embedding-cache.ts b/packages/core/src/embedding/embedding-cache.ts index 734d46a0..021fefb6 100644 --- a/packages/core/src/embedding/embedding-cache.ts +++ b/packages/core/src/embedding/embedding-cache.ts @@ -1,4 +1,5 @@ import * as fs from 'fs'; +import * as fsp from 'fs/promises'; import * as path from 'path'; import * as os from 'os'; import * as crypto from 'crypto'; @@ -8,9 +9,11 @@ import { envManager } from '../utils/env-manager'; export class EmbeddingCache { private cacheDir: string; private enabled: boolean; + private expectedDimension: number | null; - constructor(model: string, cacheDir?: string) { + constructor(model: string, cacheDir?: string, expectedDimension?: number) { this.enabled = (envManager.get('EMBEDDING_CACHE') || 'true').toLowerCase() !== 'false'; + this.expectedDimension = expectedDimension ?? null; const baseDir = cacheDir || envManager.get('EMBEDDING_CACHE_DIR') @@ -34,9 +37,15 @@ export class EmbeddingCache { return crypto.createHash('sha256').update(content).digest('hex'); } + /** + * Use the FULL sha256 (64 hex chars) as the filename โ€” truncating to 12 chars + * gave a birthday-collision probability of ~50% at ~78k entries, which is + * trivially reachable for a real codebase. Full hash makes collisions + * practically impossible. + */ private getCachePath(contentHash: string): string { const prefix = contentHash.slice(0, 2); - return path.join(this.cacheDir, prefix, contentHash.slice(0, 12) + '.json'); + return path.join(this.cacheDir, prefix, contentHash + '.json'); } get(content: string): EmbeddingVector | null { @@ -48,8 +57,16 @@ export class EmbeddingCache { if (!fs.existsSync(cachePath)) return null; - const data = JSON.parse(fs.readFileSync(cachePath, 'utf-8')); - return { vector: data.v, dimension: data.d }; + const raw = fs.readFileSync(cachePath, 'utf-8'); + const data = JSON.parse(raw); + + // Shape validation โ€” partial writes / future format changes / bit rot + // shouldn't return garbage to the caller. Treat anything unexpected as a miss. + if (!data || !Array.isArray(data.v) || typeof data.d !== 'number') return null; + if (data.v.length !== data.d) return null; + if (this.expectedDimension !== null && data.d !== this.expectedDimension) return null; + + return { vector: data.v as number[], dimension: data.d }; } catch { return null; } @@ -91,34 +108,53 @@ export class EmbeddingCache { /** * Delete cache files not modified in the last maxAgeDays days. - * Runs async, best-effort โ€” errors are silently ignored. + * Truly async (uses fs.promises) so startup cleanup never blocks the event loop. + * Best-effort โ€” errors are silently ignored. + * + * `maxAgeDays <= 0` (or non-finite) disables cleanup. Documented escape hatch + * for users who want the cache to persist indefinitely. */ async cleanup(maxAgeDays?: number): Promise { if (!this.enabled) return; const days = maxAgeDays ?? parseInt(envManager.get('EMBEDDING_CACHE_MAX_AGE_DAYS') || '30', 10); + if (!Number.isFinite(days) || days <= 0) return; + const cutoff = Date.now() - days * 24 * 60 * 60 * 1000; let deleted = 0; try { - const prefixDirs = fs.readdirSync(this.cacheDir); + const prefixDirs = await fsp.readdir(this.cacheDir); for (const prefix of prefixDirs) { const prefixPath = path.join(this.cacheDir, prefix); - if (!fs.statSync(prefixPath).isDirectory()) continue; + let prefixStat; + try { + prefixStat = await fsp.stat(prefixPath); + } catch { + continue; + } + if (!prefixStat.isDirectory()) continue; - const files = fs.readdirSync(prefixPath); + const files = await fsp.readdir(prefixPath); for (const file of files) { const filePath = path.join(prefixPath, file); - const stat = fs.statSync(filePath); - if (stat.mtimeMs < cutoff) { - fs.unlinkSync(filePath); - deleted++; + try { + const stat = await fsp.stat(filePath); + if (stat.mtimeMs < cutoff) { + await fsp.unlink(filePath); + deleted++; + } + } catch { + // file vanished mid-scan, fine } } // Remove empty prefix dirs - if (fs.readdirSync(prefixPath).length === 0) { - fs.rmdirSync(prefixPath); + try { + const remaining = await fsp.readdir(prefixPath); + if (remaining.length === 0) await fsp.rmdir(prefixPath); + } catch { + // best-effort } }