Skip to content

Commit 6d34ed7

Browse files
committed
fix(ollama): invoke calculateUsageCost so cost.total populates
The Ollama adapter previously assigned input/output/totalTokens but never ran the cost schedule, leaving cost.total at zero even when the model descriptor defined per-token rates. Apply the local calculateUsageCost helper after token assignment so the same Usage invariants hold across providers.
1 parent 649654c commit 6d34ed7

2 files changed

Lines changed: 54 additions & 0 deletions

File tree

packages/ollama/__tests__/ollama.test.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ describe('OllamaAdapter', () => {
146146
usage: {
147147
input: 0,
148148
output: 0,
149+
reasoning: 0,
149150
cacheRead: 0,
150151
cacheWrite: 0,
151152
totalTokens: 0,
@@ -212,6 +213,40 @@ describe('OllamaAdapter', () => {
212213
expect(message.errorMessage).toContain('aborted');
213214
});
214215

216+
it('populates usage.cost when the model descriptor has a cost schedule', async () => {
217+
(fetch as jest.Mock).mockResolvedValueOnce(
218+
createLineResponse([
219+
JSON.stringify({ message: { content: 'Hi' }, done: false }),
220+
JSON.stringify({ done: true, done_reason: 'stop', prompt_eval_count: 100, eval_count: 50 }),
221+
]),
222+
);
223+
224+
const adapter = new OllamaAdapter('http://localhost:11434');
225+
const model = adapter.createModel('llama3', {
226+
cost: { input: 2, output: 4 },
227+
});
228+
const stream = adapter.stream(model, {
229+
messages: [{ role: 'user', content: 'hi', timestamp: Date.now() }],
230+
});
231+
232+
for await (const _event of stream) {
233+
// Drain stream.
234+
}
235+
236+
const message = await stream.result();
237+
expect(message.usage.input).toBe(100);
238+
expect(message.usage.output).toBe(50);
239+
expect(message.usage.cost.total).toBeGreaterThan(0);
240+
expect(message.usage.cost.input + message.usage.cost.output).toBeCloseTo(
241+
message.usage.cost.total,
242+
10,
243+
);
244+
// 100 * (2 / 1_000_000) = 0.0002, 50 * (4 / 1_000_000) = 0.0002, total = 0.0004
245+
expect(message.usage.cost.input).toBeCloseTo(0.0002, 10);
246+
expect(message.usage.cost.output).toBeCloseTo(0.0002, 10);
247+
expect(message.usage.cost.total).toBeCloseTo(0.0004, 10);
248+
});
249+
215250
it('lists models through the client API', async () => {
216251
(fetch as jest.Mock).mockResolvedValueOnce({
217252
ok: true,

packages/ollama/src/index.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ interface ModelDescriptor {
1818
tools?: boolean;
1919
contextWindow?: number;
2020
maxOutputTokens?: number;
21+
cost?: {
22+
input?: number;
23+
output?: number;
24+
cacheRead?: number;
25+
cacheWrite?: number;
26+
};
2127
headers?: Record<string, string>;
2228
}
2329

@@ -47,6 +53,7 @@ interface ToolCallContent {
4753
interface Usage {
4854
input: number;
4955
output: number;
56+
reasoning: number;
5057
cacheRead: number;
5158
cacheWrite: number;
5259
totalTokens: number;
@@ -464,6 +471,7 @@ export class OllamaAdapter {
464471
output.usage.input = payload.prompt_eval_count ?? output.usage.input;
465472
output.usage.output = payload.eval_count ?? output.usage.output;
466473
output.usage.totalTokens = output.usage.input + output.usage.output;
474+
calculateUsageCost(model, output.usage);
467475
output.stopReason = payload.done_reason === 'length' ? 'length' : 'stop';
468476

469477
if (thinkingIndex !== undefined) {
@@ -622,6 +630,7 @@ function createAssistantMessage(model: ModelDescriptor): AssistantMessage {
622630
usage: {
623631
input: 0,
624632
output: 0,
633+
reasoning: 0,
625634
cacheRead: 0,
626635
cacheWrite: 0,
627636
totalTokens: 0,
@@ -632,6 +641,15 @@ function createAssistantMessage(model: ModelDescriptor): AssistantMessage {
632641
};
633642
}
634643

644+
function calculateUsageCost(model: ModelDescriptor, usage: Usage): void {
645+
usage.cost.input = ((model.cost?.input ?? 0) / 1_000_000) * usage.input;
646+
usage.cost.output = ((model.cost?.output ?? 0) / 1_000_000) * usage.output;
647+
usage.cost.cacheRead = ((model.cost?.cacheRead ?? 0) / 1_000_000) * usage.cacheRead;
648+
usage.cost.cacheWrite = ((model.cost?.cacheWrite ?? 0) / 1_000_000) * usage.cacheWrite;
649+
usage.cost.total =
650+
usage.cost.input + usage.cost.output + usage.cost.cacheRead + usage.cost.cacheWrite;
651+
}
652+
635653
function legacyInputToContext(input: GenerateInput): Context {
636654
const messages: Message[] = input.messages
637655
? input.messages
@@ -647,6 +665,7 @@ function legacyInputToContext(input: GenerateInput): Context {
647665
usage: {
648666
input: 0,
649667
output: 0,
668+
reasoning: 0,
650669
cacheRead: 0,
651670
cacheWrite: 0,
652671
totalTokens: 0,

0 commit comments

Comments
 (0)