Skip to content

Commit 942e121

Browse files
committed
feat(ollama): return real token counts from embedding endpoint
- Switch from deprecated /api/embeddings to /api/embed - generateEmbedding() now returns EmbeddingResult { embedding, promptTokens } instead of plain number[] — promptTokens comes from prompt_eval_count - Add OllamaAdapter.embed() convenience method - Update live tests to verify promptTokens > 0 - Update README with new return type and adapter example BREAKING CHANGE: generateEmbedding() return type changed from Promise<number[]> to Promise<EmbeddingResult>
1 parent af045fe commit 942e121

3 files changed

Lines changed: 51 additions & 15 deletions

File tree

packages/ollama/README.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,10 @@ await client.generate(
5050
// Pull a model to local cache
5151
await client.pullModel('mistral');
5252

53-
// Generate embeddings
54-
const embedding = await client.generateEmbedding('Compute embeddings');
55-
console.log('Embedding vector length:', embedding.length);
53+
// Generate embeddings (with token count from /api/embed)
54+
const result = await client.generateEmbedding('Compute embeddings');
55+
console.log('Embedding vector length:', result.embedding.length);
56+
console.log('Prompt tokens:', result.promptTokens);
5657

5758
// Delete a pulled model when done
5859
await client.deleteModel('mistral');
@@ -64,7 +65,7 @@ await client.deleteModel('mistral');
6465
- `.listModels(): Promise<string[]>`
6566
- `.showModel(model: string): Promise<{ capabilities?: string[] } | null>`
6667
- `.generate(input: GenerateInput, onChunk?: (chunk: string) => void): Promise<string | void>`
67-
- `.generateEmbedding(text: string, model?: string): Promise<number[]>` — defaults to `nomic-embed-text`
68+
- `.generateEmbedding(text: string, model?: string): Promise<EmbeddingResult>` returns `{ embedding: number[], promptTokens: number }`, defaults to `nomic-embed-text`
6869
- `.pullModel(model: string): Promise<void>`
6970
- `.deleteModel(model: string): Promise<void>`
7071

@@ -75,6 +76,10 @@ import { OllamaAdapter } from '@agentic-kit/ollama';
7576

7677
const provider = new OllamaAdapter('http://localhost:11434');
7778
const model = provider.createModel('llama3');
79+
80+
// Embeddings with real token counts
81+
const result = await provider.embed('Compute embeddings', 'nomic-embed-text');
82+
console.log(result.embedding.length, result.promptTokens);
7883
```
7984

8085
## Local Live Tests

packages/ollama/__tests__/ollama.live.test.ts

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -205,13 +205,28 @@ describeExtended('Ollama live extended', () => {
205205
expect(output.trim().toLowerCase()).toContain('marble');
206206
});
207207

208-
itWithEmbeddings('generates local embeddings when an embed model is installed', async () => {
208+
itWithEmbeddings('generates local embeddings with token count via /api/embed', async () => {
209209
const client = new OllamaClient(baseUrl);
210-
const embedding = await client.generateEmbedding('hello world', embedModel);
210+
const result = await client.generateEmbedding('hello world', embedModel);
211+
212+
expect(result).toHaveProperty('embedding');
213+
expect(result).toHaveProperty('promptTokens');
214+
expect(Array.isArray(result.embedding)).toBe(true);
215+
expect(result.embedding.length).toBeGreaterThan(0);
216+
expect(result.embedding.every((value) => Number.isFinite(value))).toBe(true);
217+
expect(result.promptTokens).toBeGreaterThan(0);
218+
});
219+
220+
itWithEmbeddings('OllamaAdapter.embed() returns embedding with token count', async () => {
221+
const { OllamaAdapter } = require('../src/index');
222+
const adapter = new OllamaAdapter(baseUrl);
223+
const result = await adapter.embed('hello world', embedModel);
211224

212-
expect(Array.isArray(embedding)).toBe(true);
213-
expect(embedding.length).toBeGreaterThan(0);
214-
expect(embedding.every((value) => Number.isFinite(value))).toBe(true);
225+
expect(result).toHaveProperty('embedding');
226+
expect(result).toHaveProperty('promptTokens');
227+
expect(Array.isArray(result.embedding)).toBe(true);
228+
expect(result.embedding.length).toBeGreaterThan(0);
229+
expect(result.promptTokens).toBeGreaterThan(0);
215230
});
216231
});
217232

packages/ollama/src/index.ts

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,17 @@ interface OllamaChatLine {
246246
response?: string;
247247
}
248248

249-
interface OllamaEmbeddingResponse {
249+
interface OllamaEmbedResponse {
250+
model: string;
251+
embeddings: number[][];
252+
total_duration?: number;
253+
load_duration?: number;
254+
prompt_eval_count?: number;
255+
}
256+
257+
export interface EmbeddingResult {
250258
embedding: number[];
259+
promptTokens: number;
251260
}
252261

253262
export const OLLAMA_MODELS: ModelDescriptor[] = [];
@@ -297,18 +306,21 @@ export class OllamaClient {
297306
}
298307
}
299308

300-
async generateEmbedding(text: string, model = 'nomic-embed-text'): Promise<number[]> {
301-
const response = await fetch(`${this.baseUrl}/api/embeddings`, {
309+
async generateEmbedding(text: string, model = 'nomic-embed-text'): Promise<EmbeddingResult> {
310+
const response = await fetch(`${this.baseUrl}/api/embed`, {
302311
method: 'POST',
303312
headers: { 'Content-Type': 'application/json' },
304-
body: JSON.stringify({ model, prompt: text }),
313+
body: JSON.stringify({ model, input: text }),
305314
});
306315
if (!response.ok) {
307316
throw new Error(`generateEmbedding failed: ${response.status} ${response.statusText}`);
308317
}
309318

310-
const payload = (await response.json()) as OllamaEmbeddingResponse;
311-
return payload.embedding;
319+
const payload = (await response.json()) as OllamaEmbedResponse;
320+
return {
321+
embedding: payload.embeddings[0],
322+
promptTokens: payload.prompt_eval_count ?? 0,
323+
};
312324
}
313325

314326
async generate(input: GenerateInput): Promise<string>;
@@ -385,6 +397,10 @@ export class OllamaAdapter {
385397
return this.client.listModels();
386398
}
387399

400+
async embed(text: string, model = 'nomic-embed-text'): Promise<EmbeddingResult> {
401+
return this.client.generateEmbedding(text, model);
402+
}
403+
388404
stream(model: ModelDescriptor, context: Context, options?: StreamOptions): AssistantMessageEventStream {
389405
const stream = new DefaultAssistantMessageEventStream();
390406
const output = createAssistantMessage(model);

0 commit comments

Comments
 (0)