Skip to content

Commit 88c18d4

Browse files
committed
refactor(embeddings): centralize EMBEDDING_PROVIDER parsing
1 parent c9f15a4 commit 88c18d4

3 files changed

Lines changed: 23 additions & 11 deletions

File tree

src/core/indexer.ts

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,12 @@ import {
2121
} from '../types/index.js';
2222
import { analyzerRegistry } from './analyzer-registry.js';
2323
import { isCodeFile, isBinaryFile } from '../utils/language-detection.js';
24-
import { getEmbeddingProvider, getConfiguredDimensions, DEFAULT_MODEL } from '../embeddings/index.js';
24+
import {
25+
getEmbeddingProvider,
26+
getConfiguredDimensions,
27+
DEFAULT_MODEL,
28+
parseEmbeddingProviderName
29+
} from '../embeddings/index.js';
2530
import { getStorageProvider, CodeChunkWithEmbedding } from '../storage/index.js';
2631
import {
2732
LibraryUsageTracker,
@@ -240,14 +245,8 @@ export class CodebaseIndexer {
240245
}
241246

242247
private mergeConfig(userConfig?: Partial<CodebaseConfig>): CodebaseConfig {
243-
const envEmbeddingProvider = process.env.EMBEDDING_PROVIDER;
244248
const defaultEmbeddingProvider =
245-
envEmbeddingProvider === 'openai' ||
246-
envEmbeddingProvider === 'transformers' ||
247-
envEmbeddingProvider === 'ollama' ||
248-
envEmbeddingProvider === 'custom'
249-
? envEmbeddingProvider
250-
: 'transformers';
249+
parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers';
251250

252251
const defaultConfig: CodebaseConfig = {
253252
analyzers: {

src/embeddings/index.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
export * from './types.js';
22
export * from './transformers.js';
33

4-
import { EmbeddingProvider, EmbeddingConfig, DEFAULT_EMBEDDING_CONFIG, DEFAULT_MODEL } from './types.js';
4+
import {
5+
EmbeddingProvider,
6+
EmbeddingConfig,
7+
DEFAULT_EMBEDDING_CONFIG,
8+
DEFAULT_MODEL,
9+
parseEmbeddingProviderName
10+
} from './types.js';
511
import { TransformersEmbeddingProvider, MODEL_CONFIGS } from './transformers.js';
612

713
/**
@@ -12,7 +18,7 @@ import { TransformersEmbeddingProvider, MODEL_CONFIGS } from './transformers.js'
1218
* implementation) so new models are automatically handled without updating this function.
1319
*/
1420
export function getConfiguredDimensions(config: Partial<EmbeddingConfig> = {}): number {
15-
const provider = config.provider ?? (process.env.EMBEDDING_PROVIDER as string) ?? 'transformers';
21+
const provider = config.provider ?? parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers';
1622
const model = config.model ?? process.env.EMBEDDING_MODEL ?? DEFAULT_MODEL;
1723
if (provider === 'openai') return 1536; // text-embedding-3-small / ada-002
1824
// Look up from the same MODEL_CONFIGS the provider uses — avoids stale hardcoded guesses

src/embeddings/types.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,20 @@ export interface EmbeddingConfig {
1818
apiEndpoint?: string;
1919
}
2020

21+
export function parseEmbeddingProviderName(value: unknown): EmbeddingConfig['provider'] | undefined {
22+
if (value === 'transformers' || value === 'ollama' || value === 'openai' || value === 'custom') {
23+
return value;
24+
}
25+
return undefined;
26+
}
27+
2128
// Default: bge-small (fast, ~2min indexing, consumer-hardware safe)
2229
// Opt-in: set EMBEDDING_MODEL=onnx-community/granite-embedding-small-english-r2-ONNX for
2330
// better conceptual search at the cost of 5-10x slower indexing and higher RAM usage
2431
export const DEFAULT_MODEL = process.env.EMBEDDING_MODEL || 'Xenova/bge-small-en-v1.5';
2532

2633
export const DEFAULT_EMBEDDING_CONFIG: EmbeddingConfig = {
27-
provider: (process.env.EMBEDDING_PROVIDER as EmbeddingConfig['provider']) || 'transformers',
34+
provider: parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers',
2835
model: DEFAULT_MODEL,
2936
batchSize: 32,
3037
maxRetries: 3,

0 commit comments

Comments
 (0)