@@ -224,6 +224,8 @@ async function llmCall(messages, opts = {}) {
224224 const params = {
225225 messages,
226226 stream : true ,
227+ // Request token usage in streaming response (supported by OpenAI, some local servers)
228+ stream_options : { include_usage : true } ,
227229 ...( model && { model } ) ,
228230 ...( opts . temperature !== undefined && { temperature : opts . temperature } ) ,
229231 ...maxTokensParam ,
@@ -358,10 +360,24 @@ async function llmCall(messages, opts = {}) {
358360 content = reasoningContent ;
359361 }
360362
361- // Track token totals
362- results . tokenTotals . prompt += usage . prompt_tokens || 0 ;
363- results . tokenTotals . completion += usage . completion_tokens || 0 ;
364- results . tokenTotals . total += usage . total_tokens || 0 ;
363+ // Build per-call token data:
364+ // Prefer server-reported usage; fall back to chunk-counted completion tokens
365+ const promptTokens = usage . prompt_tokens || 0 ;
366+ const completionTokens = usage . completion_tokens || tokenCount ; // tokenCount = chunks with content/reasoning
367+ const totalTokens = usage . total_tokens || ( promptTokens + completionTokens ) ;
368+ const callTokens = { prompt : promptTokens , completion : completionTokens , total : totalTokens } ;
369+
370+ // Track global token totals
371+ results . tokenTotals . prompt += callTokens . prompt ;
372+ results . tokenTotals . completion += callTokens . completion ;
373+ results . tokenTotals . total += callTokens . total ;
374+
375+ // Track per-test tokens (accumulated across multiple llmCall invocations within one test)
376+ if ( _currentTestTokens ) {
377+ _currentTestTokens . prompt += callTokens . prompt ;
378+ _currentTestTokens . completion += callTokens . completion ;
379+ _currentTestTokens . total += callTokens . total ;
380+ }
365381
366382 // Capture model name from first response
367383 if ( opts . vlm ) {
@@ -370,7 +386,7 @@ async function llmCall(messages, opts = {}) {
370386 if ( ! results . model . name && model ) results . model . name = model ;
371387 }
372388
373- return { content, toolCalls, usage, model } ;
389+ return { content, toolCalls, usage : callTokens , model } ;
374390 } finally {
375391 clearTimeout ( idleTimer ) ;
376392 }
@@ -449,25 +465,33 @@ async function runSuites() {
449465 }
450466}
451467
468+ // ─── Per-test token accumulator (set by test(), read by llmCall) ──────────────
469+ let _currentTestTokens = null ;
470+
452471async function test ( name , fn ) {
453- const testResult = { name, status : 'pass' , timeMs : 0 , detail : '' , tokens : { } } ;
472+ const testResult = { name, status : 'pass' , timeMs : 0 , detail : '' , tokens : { prompt : 0 , completion : 0 , total : 0 } } ;
473+ _currentTestTokens = { prompt : 0 , completion : 0 , total : 0 } ;
454474 const start = Date . now ( ) ;
455475 try {
456476 const detail = await fn ( ) ;
457477 testResult . timeMs = Date . now ( ) - start ;
458478 testResult . detail = detail || '' ;
479+ testResult . tokens = { ..._currentTestTokens } ;
459480 currentSuite . passed ++ ;
460- log ( ` ✅ ${ name } (${ testResult . timeMs } ms)${ detail ? ` — ${ detail } ` : '' } ` ) ;
481+ const tokInfo = _currentTestTokens . total > 0 ? `, ${ _currentTestTokens . total } tok` : '' ;
482+ log ( ` ✅ ${ name } (${ testResult . timeMs } ms${ tokInfo } )${ detail ? ` — ${ detail } ` : '' } ` ) ;
461483 } catch ( err ) {
462484 testResult . timeMs = Date . now ( ) - start ;
463485 testResult . status = 'fail' ;
464486 testResult . detail = err . message ;
487+ testResult . tokens = { ..._currentTestTokens } ;
465488 currentSuite . failed ++ ;
466489 log ( ` ❌ ${ name } (${ testResult . timeMs } ms) — ${ err . message } ` ) ;
467490 }
491+ _currentTestTokens = null ;
468492 currentSuite . timeMs += testResult . timeMs ;
469493 currentSuite . tests . push ( testResult ) ;
470- emit ( { event : 'test_result' , suite : currentSuite . name , test : name , status : testResult . status , timeMs : testResult . timeMs , detail : testResult . detail . slice ( 0 , 120 ) } ) ;
494+ emit ( { event : 'test_result' , suite : currentSuite . name , test : name , status : testResult . status , timeMs : testResult . timeMs , detail : testResult . detail . slice ( 0 , 120 ) , tokens : testResult . tokens } ) ;
471495}
472496
473497function skip ( name , reason ) {
0 commit comments