Skip to content

Commit 649654c

Browse files
committed
fix(openai): split reasoning from output and fix cacheWrite in totalTokens fallback
- Stop double-counting reasoning by setting usage.output to completion_tokens (which already includes reasoning per OpenAI's wire contract) - Expose reasoning as a separate read-only count on usage.reasoning - Include cacheWrite in the totalTokens fallback when total_tokens is absent
1 parent e5cbff1 commit 649654c

2 files changed

Lines changed: 63 additions & 2 deletions

File tree

packages/openai/__tests__/openai.test.ts

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,64 @@ describe('OpenAIAdapter', () => {
7474
});
7575
});
7676

77+
it('surfaces reasoning tokens without double-counting output', async () => {
78+
(fetch as jest.Mock).mockResolvedValueOnce(
79+
createStreamingResponse([
80+
'data: {"choices":[{"delta":{"content":"Hi"},"finish_reason":"stop"}],"usage":{"prompt_tokens":20,"completion_tokens":15,"total_tokens":35,"completion_tokens_details":{"reasoning_tokens":8}}}',
81+
'data: [DONE]',
82+
])
83+
);
84+
85+
const adapter = new OpenAIAdapter({ apiKey: 'test-key' });
86+
const model = adapter.createModel('gpt-5.4-mini');
87+
const stream = adapter.stream(model, {
88+
messages: [{ role: 'user', content: 'hi', timestamp: Date.now() }],
89+
});
90+
91+
for await (const _ of stream) { /* drain */ }
92+
const message = await stream.result();
93+
94+
// output must equal completion_tokens (15), NOT completion_tokens + reasoning_tokens (23)
95+
expect(message.usage.output).toBe(15);
96+
// reasoning is exposed as its own field
97+
expect(message.usage.reasoning).toBe(8);
98+
// total from wire — no double-count
99+
expect(message.usage.totalTokens).toBe(35);
100+
});
101+
102+
it('totalTokens fallback includes cacheWrite when total_tokens is absent', async () => {
103+
// Simulate a chunk with no total_tokens but with cached input tokens and reasoning.
104+
// cacheWrite stays 0 (adapter default) but the fallback formula must still be correct.
105+
(fetch as jest.Mock).mockResolvedValueOnce(
106+
createStreamingResponse([
107+
'data: {"choices":[{"delta":{"content":"Hi"},"finish_reason":"stop"}],"usage":{"prompt_tokens":30,"completion_tokens":10,"completion_tokens_details":{"reasoning_tokens":4},"prompt_tokens_details":{"cached_tokens":6}}}',
108+
'data: [DONE]',
109+
])
110+
);
111+
112+
const adapter = new OpenAIAdapter({ apiKey: 'test-key' });
113+
const model = adapter.createModel('gpt-5.4-mini');
114+
const stream = adapter.stream(model, {
115+
messages: [{ role: 'user', content: 'hi', timestamp: Date.now() }],
116+
});
117+
118+
for await (const _ of stream) { /* drain */ }
119+
const message = await stream.result();
120+
121+
// input = prompt_tokens(30) - cached(6) = 24
122+
expect(message.usage.input).toBe(24);
123+
// output = completion_tokens (no double-count)
124+
expect(message.usage.output).toBe(10);
125+
// reasoning subset
126+
expect(message.usage.reasoning).toBe(4);
127+
// cacheRead = cached_tokens
128+
expect(message.usage.cacheRead).toBe(6);
129+
// cacheWrite = 0 (stock OpenAI doesn't emit it)
130+
expect(message.usage.cacheWrite).toBe(0);
131+
// fallback: input + output + cacheRead + cacheWrite = 24 + 10 + 6 + 0 = 40
132+
expect(message.usage.totalTokens).toBe(40);
133+
});
134+
77135
it('falls back to built-in models when no API key is configured', async () => {
78136
const adapter = new OpenAIAdapter();
79137
const models = await adapter.listModels();

packages/openai/src/index.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ interface StreamOptions {
128128
interface Usage {
129129
input: number;
130130
output: number;
131+
reasoning: number;
131132
cacheRead: number;
132133
cacheWrite: number;
133134
totalTokens: number;
@@ -797,6 +798,7 @@ function createAssistantMessage(model: ModelDescriptor): AssistantMessage {
797798
usage: {
798799
input: 0,
799800
output: 0,
801+
reasoning: 0,
800802
cacheRead: 0,
801803
cacheWrite: 0,
802804
totalTokens: 0,
@@ -825,10 +827,11 @@ function applyUsage(
825827
const reasoningTokens = payload.completion_tokens_details?.reasoning_tokens ?? 0;
826828

827829
usage.input = (payload.prompt_tokens ?? 0) - cachedTokens;
828-
usage.output = (payload.completion_tokens ?? 0) + reasoningTokens;
830+
usage.output = payload.completion_tokens ?? 0;
831+
usage.reasoning = reasoningTokens;
829832
usage.cacheRead = cachedTokens;
830833
usage.cacheWrite = 0;
831-
usage.totalTokens = payload.total_tokens ?? usage.input + usage.output + usage.cacheRead;
834+
usage.totalTokens = payload.total_tokens ?? usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
832835
calculateUsageCost(model, usage);
833836
}
834837

0 commit comments

Comments
 (0)