Skip to content

Commit f24ebc4

Browse files
committed
fix(openai): capture token usage for streaming responses when available
This requires the user to pass `stream_options={"include_usage": True}`
1 parent b1f8a42 commit f24ebc4

File tree

1 file changed

+20
-8
lines changed

1 file changed

+20
-8
lines changed

sentry_sdk/integrations/openai.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -163,17 +163,21 @@ def _calculate_token_usage(
163163

164164
if hasattr(response, "usage"):
165165
input_tokens = _get_usage(response.usage, ["input_tokens", "prompt_tokens"])
166-
if hasattr(response.usage, "input_tokens_details"):
167-
input_tokens_cached = _get_usage(
168-
response.usage.input_tokens_details, ["cached_tokens"]
169-
)
166+
input_tokens_details = getattr(
167+
response.usage, "input_tokens_details", None
168+
) or getattr(response.usage, "prompt_tokens_details", None)
169+
if input_tokens_details is not None:
170+
input_tokens_cached = _get_usage(input_tokens_details, ["cached_tokens"])
170171

171172
output_tokens = _get_usage(
172173
response.usage, ["output_tokens", "completion_tokens"]
173174
)
174-
if hasattr(response.usage, "output_tokens_details"):
175+
output_tokens_details = getattr(
176+
response.usage, "output_tokens_details", None
177+
) or getattr(response.usage, "completion_tokens_details", None)
178+
if output_tokens_details is not None:
175179
output_tokens_reasoning = _get_usage(
176-
response.usage.output_tokens_details, ["reasoning_tokens"]
180+
output_tokens_details, ["reasoning_tokens"]
177181
)
178182

179183
total_tokens = _get_usage(response.usage, ["total_tokens"])
@@ -610,10 +614,14 @@ def _set_streaming_completions_api_output_data(
610614

611615
def new_iterator() -> "Iterator[ChatCompletionChunk]":
612616
nonlocal ttft
617+
usage_chunk = None
613618
for x in old_iterator:
614619
span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model)
615620

616621
with capture_internal_exceptions():
622+
if hasattr(x, "usage") and x.usage is not None:
623+
usage_chunk = x
624+
617625
if hasattr(x, "choices"):
618626
choice_index = 0
619627
for choice in x.choices:
@@ -643,7 +651,7 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
643651
)
644652
_calculate_token_usage(
645653
messages,
646-
response,
654+
usage_chunk if usage_chunk is not None else response,
647655
span,
648656
all_responses,
649657
integration.count_tokens,
@@ -654,10 +662,14 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
654662

655663
async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
656664
nonlocal ttft
665+
usage_chunk = None
657666
async for x in old_iterator:
658667
span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model)
659668

660669
with capture_internal_exceptions():
670+
if hasattr(x, "usage") and x.usage is not None:
671+
usage_chunk = x
672+
661673
if hasattr(x, "choices"):
662674
choice_index = 0
663675
for choice in x.choices:
@@ -687,7 +699,7 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
687699
)
688700
_calculate_token_usage(
689701
messages,
690-
response,
702+
usage_chunk if usage_chunk is not None else response,
691703
span,
692704
all_responses,
693705
integration.count_tokens,

0 commit comments

Comments
 (0)