@@ -163,17 +163,21 @@ def _calculate_token_usage(
163163
164164 if hasattr (response , "usage" ):
165165 input_tokens = _get_usage (response .usage , ["input_tokens" , "prompt_tokens" ])
166- if hasattr (response .usage , "input_tokens_details" ):
167- input_tokens_cached = _get_usage (
168- response .usage .input_tokens_details , ["cached_tokens" ]
169- )
166+ input_tokens_details = getattr (
167+ response .usage , "input_tokens_details" , None
168+ ) or getattr (response .usage , "prompt_tokens_details" , None )
169+ if input_tokens_details is not None :
170+ input_tokens_cached = _get_usage (input_tokens_details , ["cached_tokens" ])
170171
171172 output_tokens = _get_usage (
172173 response .usage , ["output_tokens" , "completion_tokens" ]
173174 )
174- if hasattr (response .usage , "output_tokens_details" ):
175+ output_tokens_details = getattr (
176+ response .usage , "output_tokens_details" , None
177+ ) or getattr (response .usage , "completion_tokens_details" , None )
178+ if output_tokens_details is not None :
175179 output_tokens_reasoning = _get_usage (
176- response . usage . output_tokens_details , ["reasoning_tokens" ]
180+ output_tokens_details , ["reasoning_tokens" ]
177181 )
178182
179183 total_tokens = _get_usage (response .usage , ["total_tokens" ])
@@ -610,10 +614,14 @@ def _set_streaming_completions_api_output_data(
610614
611615 def new_iterator () -> "Iterator[ChatCompletionChunk]" :
612616 nonlocal ttft
617+ usage_chunk = None
613618 for x in old_iterator :
614619 span .set_data (SPANDATA .GEN_AI_RESPONSE_MODEL , x .model )
615620
616621 with capture_internal_exceptions ():
622+ if hasattr (x , "usage" ) and x .usage is not None :
623+ usage_chunk = x
624+
617625 if hasattr (x , "choices" ):
618626 choice_index = 0
619627 for choice in x .choices :
@@ -643,7 +651,7 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
643651 )
644652 _calculate_token_usage (
645653 messages ,
646- response ,
654+ usage_chunk if usage_chunk is not None else response ,
647655 span ,
648656 all_responses ,
649657 integration .count_tokens ,
@@ -654,10 +662,14 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
654662
655663 async def new_iterator_async () -> "AsyncIterator[ChatCompletionChunk]" :
656664 nonlocal ttft
665+ usage_chunk = None
657666 async for x in old_iterator :
658667 span .set_data (SPANDATA .GEN_AI_RESPONSE_MODEL , x .model )
659668
660669 with capture_internal_exceptions ():
670+ if hasattr (x , "usage" ) and x .usage is not None :
671+ usage_chunk = x
672+
661673 if hasattr (x , "choices" ):
662674 choice_index = 0
663675 for choice in x .choices :
@@ -687,7 +699,7 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
687699 )
688700 _calculate_token_usage (
689701 messages ,
690- response ,
702+ usage_chunk if usage_chunk is not None else response ,
691703 span ,
692704 all_responses ,
693705 integration .count_tokens ,
0 commit comments