@@ -74,6 +74,64 @@ describe('OpenAIAdapter', () => {
7474 } ) ;
7575 } ) ;
7676
77+ it ( 'surfaces reasoning tokens without double-counting output' , async ( ) => {
78+ ( fetch as jest . Mock ) . mockResolvedValueOnce (
79+ createStreamingResponse ( [
80+ 'data: {"choices":[{"delta":{"content":"Hi"},"finish_reason":"stop"}],"usage":{"prompt_tokens":20,"completion_tokens":15,"total_tokens":35,"completion_tokens_details":{"reasoning_tokens":8}}}' ,
81+ 'data: [DONE]' ,
82+ ] )
83+ ) ;
84+
85+ const adapter = new OpenAIAdapter ( { apiKey : 'test-key' } ) ;
86+ const model = adapter . createModel ( 'gpt-5.4-mini' ) ;
87+ const stream = adapter . stream ( model , {
88+ messages : [ { role : 'user' , content : 'hi' , timestamp : Date . now ( ) } ] ,
89+ } ) ;
90+
91+ for await ( const _ of stream ) { /* drain */ }
92+ const message = await stream . result ( ) ;
93+
94+ // output must equal completion_tokens (15), NOT completion_tokens + reasoning_tokens (23)
95+ expect ( message . usage . output ) . toBe ( 15 ) ;
96+ // reasoning is exposed as its own field
97+ expect ( message . usage . reasoning ) . toBe ( 8 ) ;
98+ // total from wire — no double-count
99+ expect ( message . usage . totalTokens ) . toBe ( 35 ) ;
100+ } ) ;
101+
102+ it ( 'totalTokens fallback includes cacheWrite when total_tokens is absent' , async ( ) => {
103+ // Simulate a chunk with no total_tokens but with cached input tokens and reasoning.
104+ // cacheWrite stays 0 (adapter default) but the fallback formula must still be correct.
105+ ( fetch as jest . Mock ) . mockResolvedValueOnce (
106+ createStreamingResponse ( [
107+ 'data: {"choices":[{"delta":{"content":"Hi"},"finish_reason":"stop"}],"usage":{"prompt_tokens":30,"completion_tokens":10,"completion_tokens_details":{"reasoning_tokens":4},"prompt_tokens_details":{"cached_tokens":6}}}' ,
108+ 'data: [DONE]' ,
109+ ] )
110+ ) ;
111+
112+ const adapter = new OpenAIAdapter ( { apiKey : 'test-key' } ) ;
113+ const model = adapter . createModel ( 'gpt-5.4-mini' ) ;
114+ const stream = adapter . stream ( model , {
115+ messages : [ { role : 'user' , content : 'hi' , timestamp : Date . now ( ) } ] ,
116+ } ) ;
117+
118+ for await ( const _ of stream ) { /* drain */ }
119+ const message = await stream . result ( ) ;
120+
121+ // input = prompt_tokens(30) - cached(6) = 24
122+ expect ( message . usage . input ) . toBe ( 24 ) ;
123+ // output = completion_tokens (no double-count)
124+ expect ( message . usage . output ) . toBe ( 10 ) ;
125+ // reasoning subset
126+ expect ( message . usage . reasoning ) . toBe ( 4 ) ;
127+ // cacheRead = cached_tokens
128+ expect ( message . usage . cacheRead ) . toBe ( 6 ) ;
129+ // cacheWrite = 0 (stock OpenAI doesn't emit it)
130+ expect ( message . usage . cacheWrite ) . toBe ( 0 ) ;
131+ // fallback: input + output + cacheRead + cacheWrite = 24 + 10 + 6 + 0 = 40
132+ expect ( message . usage . totalTokens ) . toBe ( 40 ) ;
133+ } ) ;
134+
77135 it ( 'falls back to built-in models when no API key is configured' , async ( ) => {
78136 const adapter = new OpenAIAdapter ( ) ;
79137 const models = await adapter . listModels ( ) ;
0 commit comments