Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/getting-started/prerequisites.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Before setting up Claude Context, ensure you have the following requirements met

#### Option 3: Gemini
- **API Key**: Get from [Google AI Studio](https://aistudio.google.com/)
- **Models**: `gemini-embedding-001`
- **Models**: `gemini-embedding-001`, `gemini-embedding-2`
- **Quota**: Check current quotas and limits

#### Option 4: Ollama (Local)
Expand Down
4 changes: 2 additions & 2 deletions packages/core/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ results.forEach(result => {

- **OpenAI Embeddings** (`text-embedding-3-small`, `text-embedding-3-large`, `text-embedding-ada-002`)
- **VoyageAI Embeddings** - High-quality embeddings optimized for code (`voyage-code-3`, `voyage-3.5`, etc.)
- **Gemini Embeddings** - Google's embedding models (`gemini-embedding-001`)
- **Gemini Embeddings** - Google's embedding models (`gemini-embedding-001`, `gemini-embedding-2`)
- **Ollama Embeddings** - Local embedding models via Ollama

## Vector Database Support
Expand Down Expand Up @@ -282,4 +282,4 @@ This package is part of the Claude Context monorepo. Please see:

## License

MIT - See [LICENSE](../../LICENSE) for details
MIT - See [LICENSE](../../LICENSE) for details
122 changes: 122 additions & 0 deletions packages/core/src/embedding/gemini-embedding.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import { GoogleGenAI } from '@google/genai';
import { GeminiEmbedding } from './gemini-embedding';

const mockEmbedContent = jest.fn();

jest.mock('@google/genai', () => ({
GoogleGenAI: jest.fn().mockImplementation(() => ({
models: {
embedContent: mockEmbedContent
}
}))
}));

describe('GeminiEmbedding', () => {
beforeEach(() => {
mockEmbedContent.mockReset();
(GoogleGenAI as unknown as jest.Mock).mockClear();
});

it('exposes Gemini Embedding 2 model metadata', () => {
const supportedModels = GeminiEmbedding.getSupportedModels();

expect(supportedModels['gemini-embedding-2']).toMatchObject({
dimension: 3072,
contextLength: 8192,
});

const embedding = new GeminiEmbedding({
apiKey: 'test-api-key',
model: 'gemini-embedding-2',
});

expect(embedding.getDimension()).toBe(3072);
expect(embedding.getSupportedDimensions()).toContain(3072);
expect(embedding.getSupportedDimensions()).toContain(768);
});

it('keeps batched request behavior for Gemini Embedding 2', async () => {
mockEmbedContent.mockResolvedValue({
embeddings: [
{ values: [1, 0, 0] },
{ values: [0, 1, 0] },
],
});

const embedding = new GeminiEmbedding({
apiKey: 'test-api-key',
model: 'gemini-embedding-2',
});

const embeddings = await embedding.embedBatch(['first chunk', 'second chunk']);

expect(embeddings).toEqual([
{ vector: [1, 0, 0], dimension: 3 },
{ vector: [0, 1, 0], dimension: 3 },
]);
expect(mockEmbedContent).toHaveBeenCalledTimes(1);
expect(mockEmbedContent).toHaveBeenCalledWith({
model: 'gemini-embedding-2',
contents: ['first chunk', 'second chunk'],
config: {
outputDimensionality: 3072,
},
});
});

it('keeps the existing batched request behavior for Gemini Embedding 001', async () => {
mockEmbedContent.mockResolvedValue({
embeddings: [
{ values: [1, 0, 0] },
{ values: [0, 1, 0] },
],
});

const embedding = new GeminiEmbedding({
apiKey: 'test-api-key',
model: 'gemini-embedding-001',
});

const embeddings = await embedding.embedBatch(['first chunk', 'second chunk']);

expect(embeddings).toEqual([
{ vector: [1, 0, 0], dimension: 3 },
{ vector: [0, 1, 0], dimension: 3 },
]);
expect(mockEmbedContent).toHaveBeenCalledTimes(1);
expect(mockEmbedContent).toHaveBeenCalledWith({
model: 'gemini-embedding-001',
contents: ['first chunk', 'second chunk'],
config: {
outputDimensionality: 3072,
},
});
});

it('throws a clear error when a batched response count does not match the inputs', async () => {
mockEmbedContent.mockResolvedValue({
embeddings: [
{ values: [1, 0, 0] },
],
});

const embedding = new GeminiEmbedding({
apiKey: 'test-api-key',
model: 'gemini-embedding-001',
});

await expect(embedding.embedBatch(['first chunk', 'second chunk']))
.rejects
.toThrow('Gemini API returned 1 embeddings for 2 inputs');
});

it('returns an empty batch without calling the Gemini API', async () => {
const embedding = new GeminiEmbedding({
apiKey: 'test-api-key',
model: 'gemini-embedding-2',
});

await expect(embedding.embedBatch([])).resolves.toEqual([]);
expect(mockEmbedContent).not.toHaveBeenCalled();
});
});
63 changes: 44 additions & 19 deletions packages/core/src/embedding/gemini-embedding.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import { GoogleGenAI } from '@google/genai';
import { Embedding, EmbeddingVector } from './base-embedding';

type GeminiModelInfo = {
dimension: number;
contextLength: number;
description: string;
supportedDimensions?: number[];
};

export interface GeminiEmbeddingConfig {
model: string;
apiKey: string;
Expand All @@ -11,7 +18,7 @@ export interface GeminiEmbeddingConfig {
export class GeminiEmbedding extends Embedding {
private client: GoogleGenAI;
private config: GeminiEmbeddingConfig;
private dimension: number = 3072; // Default dimension for gemini-embedding-001
private dimension: number = 3072; // Default dimension for Gemini embedding models
protected maxTokens: number = 2048; // Maximum tokens for Gemini embedding models

constructor(config: GeminiEmbeddingConfig) {
Expand Down Expand Up @@ -59,28 +66,17 @@ export class GeminiEmbedding extends Embedding {
const model = this.config.model || 'gemini-embedding-001';

try {
const response = await this.client.models.embedContent({
model: model,
contents: processedText,
config: {
outputDimensionality: this.config.outputDimensionality || this.dimension,
},
});

if (!response.embeddings || !response.embeddings[0] || !response.embeddings[0].values) {
throw new Error('Gemini API returned invalid response');
}

return {
vector: response.embeddings[0].values,
dimension: response.embeddings[0].values.length
};
return await this.embedProcessedText(processedText, model);
} catch (error) {
throw new Error(`Gemini embedding failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}

async embedBatch(texts: string[]): Promise<EmbeddingVector[]> {
if (texts.length === 0) {
return [];
}

const processedTexts = this.preprocessTexts(texts);
const model = this.config.model || 'gemini-embedding-001';

Expand All @@ -97,6 +93,10 @@ export class GeminiEmbedding extends Embedding {
throw new Error('Gemini API returned invalid response');
}

if (response.embeddings.length !== processedTexts.length) {
throw new Error(`Gemini API returned ${response.embeddings.length} embeddings for ${processedTexts.length} inputs`);
}

return response.embeddings.map((embedding: any) => {
if (!embedding.values) {
throw new Error('Gemini API returned invalid embedding data');
Expand All @@ -111,6 +111,25 @@ export class GeminiEmbedding extends Embedding {
}
}

private async embedProcessedText(processedText: string, model: string): Promise<EmbeddingVector> {
const response = await this.client.models.embedContent({
model: model,
contents: processedText,
config: {
outputDimensionality: this.config.outputDimensionality || this.dimension,
},
});

if (!response.embeddings || !response.embeddings[0] || !response.embeddings[0].values) {
throw new Error('Gemini API returned invalid response');
}

return {
vector: response.embeddings[0].values,
dimension: response.embeddings[0].values.length
};
}

getDimension(): number {
return this.dimension;
}
Expand Down Expand Up @@ -147,13 +166,19 @@ export class GeminiEmbedding extends Embedding {
/**
* Get list of supported models
*/
static getSupportedModels(): Record<string, { dimension: number; contextLength: number; description: string; supportedDimensions?: number[] }> {
static getSupportedModels(): Record<string, GeminiModelInfo> {
return {
'gemini-embedding-001': {
dimension: 3072,
contextLength: 2048,
description: 'Latest Gemini embedding model with state-of-the-art performance (recommended)',
description: 'Gemini embedding model with state-of-the-art performance',
supportedDimensions: [3072, 1536, 768, 256] // Matryoshka Representation Learning support
},
'gemini-embedding-2': {
dimension: 3072,
contextLength: 8192,
description: 'Gemini Embedding 2 model with improved embedding quality and longer context',
supportedDimensions: [3072, 1536, 768, 256]
}
};
}
Expand Down
2 changes: 1 addition & 1 deletion packages/mcp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ Google's Gemini provides competitive embeddings with good multilingual support.
# Required: Your Gemini API key
GEMINI_API_KEY=your-gemini-api-key

# Optional: Specify embedding model (default: gemini-embedding-001)
# Optional: Specify embedding model (default: gemini-embedding-001; supports gemini-embedding-2)
EMBEDDING_MODEL=gemini-embedding-001

# Optional: Custom API base URL (for custom endpoints)
Expand Down
Loading