@@ -243,12 +243,30 @@ def state_to_output(
243243 "input_tokens" : usage .get ("input_tokens" , 0.0 ),
244244 "output_tokens" : usage .get ("output_tokens" , 0.0 ),
245245 }
246- # Add context token metrics from trajectory
247- trajectory = state .get ("trajectory" , [])
248- if isinstance (trajectory , list ):
249- from verifiers .utils .usage_utils import compute_context_token_metrics
246+ # Context ("final") token metrics. v1 records these at write time from
247+ # the live Response (the serialized trajectory can't be re-derived since
248+ # responses are plain dicts), so prefer them when present. Classic envs
249+ # keep live Response objects in the trajectory, so recompute there.
250+ raw_usage = state .get ("token_usage" )
251+ final_output = (
252+ raw_usage .get ("final_output_tokens" )
253+ if isinstance (raw_usage , Mapping )
254+ else None
255+ )
256+ final_input = (
257+ raw_usage .get ("final_input_tokens" )
258+ if isinstance (raw_usage , Mapping )
259+ else None
260+ )
261+ if final_output is not None and final_input is not None :
262+ token_usage ["final_output_tokens" ] = float (final_output )
263+ token_usage ["final_input_tokens" ] = float (final_input )
264+ else :
265+ trajectory = state .get ("trajectory" , [])
266+ if isinstance (trajectory , list ):
267+ from verifiers .utils .usage_utils import compute_context_token_metrics
250268
251- token_usage .update (compute_context_token_metrics (trajectory ))
269+ token_usage .update (compute_context_token_metrics (trajectory ))
252270 output ["token_usage" ] = token_usage
253271
254272 # sanitize messages (handle None for error cases)
0 commit comments