Skip to content

Commit 6e7fffc

Browse files
ericapisaniclaude
andcommitted
ref(openai): Fix token usage reporting for empty streams and non-streaming responses
Move _calculate_completions_token_usage outside the data_buf check so token usage from stream metadata is recorded even when no content chunks are produced (e.g. content filter). Also count output tokens from response.output when streaming_message_responses is absent in the Responses API path. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent cee1173 commit 6e7fffc

File tree

2 files changed

+221
-16
lines changed

2 files changed

+221
-16
lines changed

sentry_sdk/integrations/openai.py

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,12 @@ def _calculate_responses_token_usage(
293293
if streaming_message_responses is not None:
294294
for message in streaming_message_responses:
295295
output_tokens += count_tokens(message)
296+
elif hasattr(response, "output"):
297+
for output_item in response.output:
298+
if hasattr(output_item, "content"):
299+
for content_item in output_item.content:
300+
if hasattr(content_item, "text"):
301+
output_tokens += count_tokens(content_item.text)
296302

297303
# Do not set token data if it is 0
298304
input_tokens = input_tokens or None
@@ -794,18 +800,20 @@ def _wrap_synchronous_completions_chunk_iterator(
794800
set_data_normalized(
795801
span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
796802
)
803+
all_responses = None
797804
if len(data_buf) > 0:
798805
all_responses = ["".join(chunk) for chunk in data_buf]
799806
if should_send_default_pii() and integration.include_prompts:
800807
set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses)
801-
_calculate_completions_token_usage(
802-
messages=messages,
803-
response=response,
804-
span=span,
805-
streaming_message_responses=all_responses,
806-
streaming_message_total_token_usage=streaming_message_total_token_usage,
807-
count_tokens=integration.count_tokens,
808-
)
808+
809+
_calculate_completions_token_usage(
810+
messages=messages,
811+
response=response,
812+
span=span,
813+
streaming_message_responses=all_responses,
814+
streaming_message_total_token_usage=streaming_message_total_token_usage,
815+
count_tokens=integration.count_tokens,
816+
)
809817

810818
if finish_span:
811819
span.__exit__(None, None, None)
@@ -854,18 +862,20 @@ async def _wrap_asynchronous_completions_chunk_iterator(
854862
set_data_normalized(
855863
span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
856864
)
865+
all_responses = None
857866
if len(data_buf) > 0:
858867
all_responses = ["".join(chunk) for chunk in data_buf]
859868
if should_send_default_pii() and integration.include_prompts:
860869
set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses)
861-
_calculate_completions_token_usage(
862-
messages=messages,
863-
response=response,
864-
span=span,
865-
streaming_message_responses=all_responses,
866-
streaming_message_total_token_usage=streaming_message_total_token_usage,
867-
count_tokens=integration.count_tokens,
868-
)
870+
871+
_calculate_completions_token_usage(
872+
messages=messages,
873+
response=response,
874+
span=span,
875+
streaming_message_responses=all_responses,
876+
streaming_message_total_token_usage=streaming_message_total_token_usage,
877+
count_tokens=integration.count_tokens,
878+
)
869879

870880
if finish_span:
871881
span.__exit__(None, None, None)
@@ -921,6 +931,7 @@ def _wrap_synchronous_responses_event_iterator(
921931
all_responses = ["".join(chunk) for chunk in data_buf]
922932
if should_send_default_pii() and integration.include_prompts:
923933
set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses)
934+
924935
if count_tokens_manually:
925936
_calculate_responses_token_usage(
926937
input=input,

tests/integrations/openai/test_openai.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,136 @@ def test_streaming_chat_completion_with_usage_in_stream(
693693
assert span["data"]["gen_ai.usage.total_tokens"] == 30
694694

695695

696+
@pytest.mark.skipif(
697+
OPENAI_VERSION <= (1, 1, 0),
698+
reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.",
699+
)
700+
def test_streaming_chat_completion_empty_content_preserves_token_usage(
701+
sentry_init,
702+
capture_events,
703+
get_model_response,
704+
server_side_event_chunks,
705+
):
706+
"""Token usage from the stream is recorded even when no content is produced (e.g. content filter)."""
707+
sentry_init(
708+
integrations=[OpenAIIntegration(include_prompts=False)],
709+
traces_sample_rate=1.0,
710+
send_default_pii=False,
711+
)
712+
events = capture_events()
713+
714+
client = OpenAI(api_key="z")
715+
returned_stream = get_model_response(
716+
server_side_event_chunks(
717+
[
718+
ChatCompletionChunk(
719+
id="1",
720+
choices=[],
721+
created=100000,
722+
model="model-id",
723+
object="chat.completion.chunk",
724+
usage=CompletionUsage(
725+
prompt_tokens=20,
726+
completion_tokens=0,
727+
total_tokens=20,
728+
),
729+
),
730+
],
731+
include_event_type=False,
732+
)
733+
)
734+
735+
with mock.patch.object(
736+
client.chat._client._client,
737+
"send",
738+
return_value=returned_stream,
739+
):
740+
with start_transaction(name="openai tx"):
741+
response_stream = client.chat.completions.create(
742+
model="some-model",
743+
messages=[{"role": "user", "content": "hello"}],
744+
stream=True,
745+
stream_options={"include_usage": True},
746+
)
747+
for _ in response_stream:
748+
pass
749+
750+
tx = events[0]
751+
assert tx["type"] == "transaction"
752+
span = tx["spans"][0]
753+
assert span["op"] == "gen_ai.chat"
754+
assert span["data"]["gen_ai.usage.input_tokens"] == 20
755+
assert "gen_ai.usage.output_tokens" not in span["data"]
756+
assert span["data"]["gen_ai.usage.total_tokens"] == 20
757+
758+
759+
@pytest.mark.skipif(
760+
OPENAI_VERSION <= (1, 1, 0),
761+
reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.",
762+
)
763+
@pytest.mark.asyncio
764+
async def test_streaming_chat_completion_empty_content_preserves_token_usage_async(
765+
sentry_init,
766+
capture_events,
767+
get_model_response,
768+
async_iterator,
769+
server_side_event_chunks,
770+
):
771+
"""Token usage from the stream is recorded even when no content is produced - async variant."""
772+
sentry_init(
773+
integrations=[OpenAIIntegration(include_prompts=False)],
774+
traces_sample_rate=1.0,
775+
send_default_pii=False,
776+
)
777+
events = capture_events()
778+
779+
client = AsyncOpenAI(api_key="z")
780+
returned_stream = get_model_response(
781+
async_iterator(
782+
server_side_event_chunks(
783+
[
784+
ChatCompletionChunk(
785+
id="1",
786+
choices=[],
787+
created=100000,
788+
model="model-id",
789+
object="chat.completion.chunk",
790+
usage=CompletionUsage(
791+
prompt_tokens=20,
792+
completion_tokens=0,
793+
total_tokens=20,
794+
),
795+
),
796+
],
797+
include_event_type=False,
798+
)
799+
)
800+
)
801+
802+
with mock.patch.object(
803+
client.chat._client._client,
804+
"send",
805+
return_value=returned_stream,
806+
):
807+
with start_transaction(name="openai tx"):
808+
response_stream = await client.chat.completions.create(
809+
model="some-model",
810+
messages=[{"role": "user", "content": "hello"}],
811+
stream=True,
812+
stream_options={"include_usage": True},
813+
)
814+
async for _ in response_stream:
815+
pass
816+
817+
tx = events[0]
818+
assert tx["type"] == "transaction"
819+
span = tx["spans"][0]
820+
assert span["op"] == "gen_ai.chat"
821+
assert span["data"]["gen_ai.usage.input_tokens"] == 20
822+
assert "gen_ai.usage.output_tokens" not in span["data"]
823+
assert span["data"]["gen_ai.usage.total_tokens"] == 20
824+
825+
696826
@pytest.mark.skipif(
697827
OPENAI_VERSION <= (1, 1, 0),
698828
reason="OpenAI versions <=1.1.0 do not support the stream_options parameter.",
@@ -2247,6 +2377,70 @@ def count_tokens(msg):
22472377
)
22482378

22492379

2380+
@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
2381+
def test_responses_token_usage_manual_output_counting_response_output():
2382+
"""When output_tokens is missing, output tokens are counted from response.output."""
2383+
span = mock.MagicMock()
2384+
2385+
def count_tokens(msg):
2386+
return len(str(msg))
2387+
2388+
response = mock.MagicMock()
2389+
response.usage = mock.MagicMock()
2390+
response.usage.input_tokens = 20
2391+
response.usage.total_tokens = 20
2392+
response.output = [
2393+
ResponseOutputMessage(
2394+
id="msg-1",
2395+
content=[
2396+
ResponseOutputText(
2397+
annotations=[],
2398+
text="one",
2399+
type="output_text",
2400+
),
2401+
],
2402+
role="assistant",
2403+
status="completed",
2404+
type="message",
2405+
),
2406+
ResponseOutputMessage(
2407+
id="msg-2",
2408+
content=[
2409+
ResponseOutputText(
2410+
annotations=[],
2411+
text="two",
2412+
type="output_text",
2413+
),
2414+
ResponseOutputText(
2415+
annotations=[],
2416+
text="three",
2417+
type="output_text",
2418+
),
2419+
],
2420+
role="assistant",
2421+
status="completed",
2422+
type="message",
2423+
),
2424+
]
2425+
input = []
2426+
streaming_message_responses = None
2427+
2428+
with mock.patch(
2429+
"sentry_sdk.integrations.openai.record_token_usage"
2430+
) as mock_record_token_usage:
2431+
_calculate_responses_token_usage(
2432+
input, response, span, streaming_message_responses, count_tokens
2433+
)
2434+
mock_record_token_usage.assert_called_once_with(
2435+
span,
2436+
input_tokens=20,
2437+
input_tokens_cached=None,
2438+
output_tokens=11,
2439+
output_tokens_reasoning=None,
2440+
total_tokens=20,
2441+
)
2442+
2443+
22502444
@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
22512445
def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events):
22522446
sentry_init(

0 commit comments

Comments
 (0)