Skip to content

Commit 0375021

Browse files
PatrickSysclaude
andcommitted
fix(embeddings): model-aware OpenAI dimensions + safe default model
- text-embedding-3-large returns 3072 dims, not 1536; use a getter on OpenAIEmbeddingProvider so dimensions resolve after modelName is set - getConfiguredDimensions checks model name for 'large' before returning the OpenAI dimension value - mergeConfig now defaults to text-embedding-3-small when EMBEDDING_PROVIDER=openai and EMBEDDING_MODEL is unset, avoiding a 400 from the OpenAI API caused by sending 'Xenova/bge-small-en-v1.5' - add text-embedding-3-large test case; fix stale test description Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 88c18d4 commit 0375021

4 files changed

Lines changed: 18 additions & 4 deletions

File tree

src/core/indexer.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,14 @@ export class CodebaseIndexer {
248248
const defaultEmbeddingProvider =
249249
parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers';
250250

251+
// When provider=openai and EMBEDDING_MODEL is not set, DEFAULT_MODEL resolves to the
252+
// transformers fallback (Xenova/bge-small-en-v1.5), which the OpenAI API rejects.
253+
// Use a sane OpenAI default instead.
254+
const defaultModel =
255+
defaultEmbeddingProvider === 'openai' && !process.env.EMBEDDING_MODEL
256+
? 'text-embedding-3-small'
257+
: DEFAULT_MODEL;
258+
251259
const defaultConfig: CodebaseConfig = {
252260
analyzers: {
253261
angular: { enabled: true, priority: 100 },
@@ -287,7 +295,7 @@ export class CodebaseIndexer {
287295
},
288296
embedding: {
289297
provider: defaultEmbeddingProvider,
290-
model: DEFAULT_MODEL,
298+
model: defaultModel,
291299
batchSize: 32
292300
},
293301
skipEmbedding: false,

src/embeddings/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import { TransformersEmbeddingProvider, MODEL_CONFIGS } from './transformers.js'
2020
export function getConfiguredDimensions(config: Partial<EmbeddingConfig> = {}): number {
2121
const provider = config.provider ?? parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers';
2222
const model = config.model ?? process.env.EMBEDDING_MODEL ?? DEFAULT_MODEL;
23-
if (provider === 'openai') return 1536; // text-embedding-3-small / ada-002
23+
if (provider === 'openai') return model.includes('large') ? 3072 : 1536; // text-embedding-3-large: 3072, all others: 1536
2424
// Look up from the same MODEL_CONFIGS the provider uses — avoids stale hardcoded guesses
2525
return MODEL_CONFIGS[model]?.dimensions ?? 384;
2626
}

src/embeddings/openai.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ interface OpenAIEmbeddingResponse {
1111
*/
1212
export class OpenAIEmbeddingProvider implements EmbeddingProvider {
1313
readonly name = 'openai';
14-
readonly dimensions = 1536; // Default for text-embedding-3-small
14+
get dimensions(): number {
15+
return this.modelName.includes('large') ? 3072 : 1536;
16+
}
1517

1618
constructor(
1719
readonly modelName: string = 'text-embedding-3-small',

tests/embedding-mismatch.test.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,14 @@ describe('getConfiguredDimensions', () => {
8484
).toBe(384);
8585
});
8686

87-
it('returns 1536 for openai provider regardless of model', () => {
87+
it('returns 1536 for text-embedding-3-small', () => {
8888
expect(getConfiguredDimensions({ provider: 'openai', model: 'text-embedding-3-small' })).toBe(1536);
8989
});
9090

91+
it('returns 3072 for text-embedding-3-large', () => {
92+
expect(getConfiguredDimensions({ provider: 'openai', model: 'text-embedding-3-large' })).toBe(3072);
93+
});
94+
9195
it('returns 384 as fallback for unknown transformers model', () => {
9296
expect(getConfiguredDimensions({ provider: 'transformers', model: 'some/unknown-model' })).toBe(384);
9397
});

0 commit comments

Comments
 (0)