Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 82 additions & 3 deletions packages/core/src/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import {
import {
Embedding,
EmbeddingVector,
OpenAIEmbedding
OpenAIEmbedding,
EmbeddingCache
} from './embedding';
import {
VectorDatabase,
Expand Down Expand Up @@ -108,6 +109,7 @@ export class Context {
private collectionNameOverride?: string;
private warnedOverrideSanitization = new Set<string>();
private synchronizers = new Map<string, FileSynchronizer>();
private embeddingCache: EmbeddingCache | null = null;

constructor(config: ContextConfig = {}) {
// Initialize services
Expand Down Expand Up @@ -158,6 +160,25 @@ export class Context {
if (envCustomIgnorePatterns.length > 0) {
console.log(`[Context] 🚫 Loaded ${envCustomIgnorePatterns.length} custom ignore patterns from environment: ${envCustomIgnorePatterns.join(', ')}`);
}

// Initialize embedding cache
this.initEmbeddingCache();
}

/**
* (Re)create the embedding cache keyed by current provider + dimension.
* Called on construction and again whenever the embedding instance changes
* via updateEmbedding(), so cached vectors from a previous model never bleed
* into the new one.
*/
private initEmbeddingCache(): void {
const dimension = this.embedding.getDimension();
const cacheModel = `${this.embedding.getProvider()}_${dimension}`;
this.embeddingCache = new EmbeddingCache(cacheModel, undefined, dimension);
if (this.embeddingCache.isEnabled()) {
console.log(`[Context] 💾 Embedding cache enabled for model: ${cacheModel}`);
this.embeddingCache.cleanup().catch(() => {});
}
}

/**
Expand Down Expand Up @@ -569,6 +590,61 @@ export class Context {
}
}

/**
* Embed batch with disk cache. Only calls API for uncached chunks.
* Also dedupes duplicate strings within the same batch — without this, two
* identical chunks in one batch would each hit the API since neither is
* cached at the start of the call. Common in monorepos with re-exports or
* generated boilerplate.
*/
private async cachedEmbedBatch(contents: string[]): Promise<EmbeddingVector[]> {
if (!this.embeddingCache || !this.embeddingCache.isEnabled()) {
return this.embedding.embedBatch(contents);
}

const { results, uncachedIndices } = this.embeddingCache.getBatch(contents);

if (uncachedIndices.length === 0) {
console.log(`[Cache] ✅ All ${contents.length} embeddings from cache`);
return results as EmbeddingVector[];
}

// Dedupe uncached texts: send each unique string once and fan results
// back out to every original index pointing at that string.
const uniqueTexts: string[] = [];
const textToUniqueIndex = new Map<string, number>();
const indicesByUnique: number[][] = [];
for (const i of uncachedIndices) {
const text = contents[i];
let uniq = textToUniqueIndex.get(text);
if (uniq === undefined) {
uniq = uniqueTexts.length;
textToUniqueIndex.set(text, uniq);
uniqueTexts.push(text);
indicesByUnique.push([]);
}
indicesByUnique[uniq].push(i);
}

const newEmbeddings = await this.embedding.embedBatch(uniqueTexts);

for (let u = 0; u < uniqueTexts.length; u++) {
const embedding = newEmbeddings[u];
this.embeddingCache.set(uniqueTexts[u], embedding);
for (const i of indicesByUnique[u]) {
results[i] = embedding;
}
}

const hitRate = ((contents.length - uncachedIndices.length) / contents.length * 100).toFixed(0);
const dedupNote = uniqueTexts.length < uncachedIndices.length
? ` (deduped ${uncachedIndices.length} → ${uniqueTexts.length} API calls)`
: '';
console.log(`[Cache] ${hitRate}% hit (${contents.length - uncachedIndices.length}/${contents.length} cached, ${uniqueTexts.length} embedded)${dedupNote}`);

return results as EmbeddingVector[];
}

/**
* Check if index exists for codebase
* @param codebasePath Codebase path to check
Expand Down Expand Up @@ -653,6 +729,9 @@ export class Context {
updateEmbedding(embedding: Embedding): void {
this.embedding = embedding;
console.log(`[Context] 🔄 Updated embedding provider: ${embedding.getProvider()}`);
// Cache key is `${provider}_${dimension}`; re-key so we don't return
// vectors from the previous model on the next embed call.
this.initEmbeddingCache();
}

/**
Expand Down Expand Up @@ -865,9 +944,9 @@ export class Context {
private async processChunkBatch(chunks: CodeChunk[], codebasePath: string): Promise<void> {
const isHybrid = this.getIsHybrid();

// Generate embedding vectors
// Generate embedding vectors (with cache)
const chunkContents = chunks.map(chunk => chunk.content);
const embeddings = await this.embedding.embedBatch(chunkContents);
const embeddings = await this.cachedEmbedBatch(chunkContents);

if (isHybrid === true) {
// Create hybrid vector documents
Expand Down
168 changes: 168 additions & 0 deletions packages/core/src/embedding/embedding-cache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import * as fs from 'fs';
import * as fsp from 'fs/promises';
import * as path from 'path';
import * as os from 'os';
import * as crypto from 'crypto';
import { EmbeddingVector } from './base-embedding';
import { envManager } from '../utils/env-manager';

export class EmbeddingCache {
private cacheDir: string;
private enabled: boolean;
private expectedDimension: number | null;

constructor(model: string, cacheDir?: string, expectedDimension?: number) {
this.enabled = (envManager.get('EMBEDDING_CACHE') || 'true').toLowerCase() !== 'false';
this.expectedDimension = expectedDimension ?? null;

const baseDir = cacheDir
|| envManager.get('EMBEDDING_CACHE_DIR')
|| path.join(os.homedir(), '.context', 'embedding-cache');

// Sanitize model name for filesystem
const safeModel = model.replace(/[^a-zA-Z0-9_-]/g, '_');
this.cacheDir = path.join(baseDir, safeModel);

if (this.enabled) {
try {
fs.mkdirSync(this.cacheDir, { recursive: true });
} catch {
console.warn(`[Cache] ⚠️ Could not create cache dir: ${this.cacheDir}`);
this.enabled = false;
}
}
}

private hash(content: string): string {
return crypto.createHash('sha256').update(content).digest('hex');
}

/**
* Use the FULL sha256 (64 hex chars) as the filename — truncating to 12 chars
* gave a birthday-collision probability of ~50% at ~78k entries, which is
* trivially reachable for a real codebase. Full hash makes collisions
* practically impossible.
*/
private getCachePath(contentHash: string): string {
const prefix = contentHash.slice(0, 2);
return path.join(this.cacheDir, prefix, contentHash + '.json');
}

get(content: string): EmbeddingVector | null {
if (!this.enabled) return null;

try {
const h = this.hash(content);
const cachePath = this.getCachePath(h);

if (!fs.existsSync(cachePath)) return null;

const raw = fs.readFileSync(cachePath, 'utf-8');
const data = JSON.parse(raw);

// Shape validation — partial writes / future format changes / bit rot
// shouldn't return garbage to the caller. Treat anything unexpected as a miss.
if (!data || !Array.isArray(data.v) || typeof data.d !== 'number') return null;
if (data.v.length !== data.d) return null;
if (this.expectedDimension !== null && data.d !== this.expectedDimension) return null;

return { vector: data.v as number[], dimension: data.d };
} catch {
return null;
}
}

set(content: string, embedding: EmbeddingVector): void {
if (!this.enabled) return;

try {
const h = this.hash(content);
const cachePath = this.getCachePath(h);

fs.mkdirSync(path.dirname(cachePath), { recursive: true });
fs.writeFileSync(cachePath, JSON.stringify({ v: embedding.vector, d: embedding.dimension }));
} catch {
// Silently fail — cache is best-effort
}
}

getBatch(contents: string[]): { results: (EmbeddingVector | null)[]; uncachedIndices: number[] } {
const results: (EmbeddingVector | null)[] = new Array(contents.length).fill(null);
const uncachedIndices: number[] = [];

for (let i = 0; i < contents.length; i++) {
const cached = this.get(contents[i]);
if (cached) {
results[i] = cached;
} else {
uncachedIndices.push(i);
}
}

return { results, uncachedIndices };
}

isEnabled(): boolean {
return this.enabled;
}

/**
* Delete cache files not modified in the last maxAgeDays days.
* Truly async (uses fs.promises) so startup cleanup never blocks the event loop.
* Best-effort — errors are silently ignored.
*
* `maxAgeDays <= 0` (or non-finite) disables cleanup. Documented escape hatch
* for users who want the cache to persist indefinitely.
*/
async cleanup(maxAgeDays?: number): Promise<void> {
if (!this.enabled) return;

const days = maxAgeDays ?? parseInt(envManager.get('EMBEDDING_CACHE_MAX_AGE_DAYS') || '30', 10);
if (!Number.isFinite(days) || days <= 0) return;

const cutoff = Date.now() - days * 24 * 60 * 60 * 1000;
let deleted = 0;

try {
const prefixDirs = await fsp.readdir(this.cacheDir);
for (const prefix of prefixDirs) {
const prefixPath = path.join(this.cacheDir, prefix);
let prefixStat;
try {
prefixStat = await fsp.stat(prefixPath);
} catch {
continue;
}
if (!prefixStat.isDirectory()) continue;

const files = await fsp.readdir(prefixPath);
for (const file of files) {
const filePath = path.join(prefixPath, file);
try {
const stat = await fsp.stat(filePath);
if (stat.mtimeMs < cutoff) {
await fsp.unlink(filePath);
deleted++;
}
} catch {
// file vanished mid-scan, fine
}
}

// Remove empty prefix dirs
try {
const remaining = await fsp.readdir(prefixPath);
if (remaining.length === 0) await fsp.rmdir(prefixPath);
} catch {
// best-effort
}
}

if (deleted > 0) {
console.log(`[Cache] 🧹 Cleaned up ${deleted} stale cache files (>${days} days old)`);
}
} catch {
// Best-effort cleanup
}
}
}
3 changes: 2 additions & 1 deletion packages/core/src/embedding/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ export * from './base-embedding';
export * from './openai-embedding';
export * from './voyageai-embedding';
export * from './ollama-embedding';
export * from './gemini-embedding';
export * from './gemini-embedding';
export * from './embedding-cache';
Loading