Skip to content

Commit d867724

Browse files
ericapisaniclaude
andcommitted
fix(openai): Extract cached and reasoning tokens in Completions token usage
The refactor that split _calculate_token_usage into separate Completions and Responses functions dropped extraction of prompt_tokens_details.cached_tokens and completion_tokens_details.reasoning_tokens from the Completions path. This restores those fields so spans for cached prompts and reasoning models (e.g. o1/o3) report complete token usage metrics. Also fixes streaming usage priority: streaming_message_token_usage now correctly takes precedence over response.usage via elif. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d1ae0b2 commit d867724

File tree

2 files changed

+65
-2
lines changed

2 files changed

+65
-2
lines changed

sentry_sdk/integrations/openai.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,23 +158,34 @@ def _calculate_completions_token_usage(
158158
) -> None:
159159
"""Extract and record token usage from a Chat Completions API response."""
160160
input_tokens: "Optional[int]" = 0
161+
input_tokens_cached: "Optional[int]" = 0
161162
output_tokens: "Optional[int]" = 0
163+
output_tokens_reasoning: "Optional[int]" = 0
162164
total_tokens: "Optional[int]" = 0
163165
usage = None
164166

165167
if streaming_message_token_usage:
166168
usage = streaming_message_token_usage
167-
168-
if hasattr(response, "usage"):
169+
elif hasattr(response, "usage"):
169170
usage = response.usage
170171

171172
if usage is not None:
172173
if hasattr(usage, "prompt_tokens") and isinstance(usage.prompt_tokens, int):
173174
input_tokens = usage.prompt_tokens
175+
if hasattr(usage, "prompt_tokens_details"):
176+
cached = getattr(usage.prompt_tokens_details, "cached_tokens", None)
177+
if isinstance(cached, int):
178+
input_tokens_cached = cached
174179
if hasattr(usage, "completion_tokens") and isinstance(
175180
usage.completion_tokens, int
176181
):
177182
output_tokens = usage.completion_tokens
183+
if hasattr(usage, "completion_tokens_details"):
184+
reasoning = getattr(
185+
usage.completion_tokens_details, "reasoning_tokens", None
186+
)
187+
if isinstance(reasoning, int):
188+
output_tokens_reasoning = reasoning
178189
if hasattr(usage, "total_tokens") and isinstance(usage.total_tokens, int):
179190
total_tokens = usage.total_tokens
180191

@@ -204,13 +215,17 @@ def _calculate_completions_token_usage(
204215

205216
# Do not set token data if it is 0
206217
input_tokens = input_tokens or None
218+
input_tokens_cached = input_tokens_cached or None
207219
output_tokens = output_tokens or None
220+
output_tokens_reasoning = output_tokens_reasoning or None
208221
total_tokens = total_tokens or None
209222

210223
record_token_usage(
211224
span,
212225
input_tokens=input_tokens,
226+
input_tokens_cached=input_tokens_cached,
213227
output_tokens=output_tokens,
228+
output_tokens_reasoning=output_tokens_reasoning,
214229
total_tokens=total_tokens,
215230
)
216231

tests/integrations/openai/test_openai.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1970,7 +1970,47 @@ def count_tokens(msg):
19701970
mock_record_token_usage.assert_called_once_with(
19711971
span,
19721972
input_tokens=20,
1973+
input_tokens_cached=None,
1974+
output_tokens=10,
1975+
output_tokens_reasoning=None,
1976+
total_tokens=30,
1977+
)
1978+
1979+
1980+
def test_completions_token_usage_with_detailed_fields():
1981+
"""Cached and reasoning token counts are extracted from prompt_tokens_details and completion_tokens_details."""
1982+
span = mock.MagicMock()
1983+
1984+
def count_tokens(msg):
1985+
return len(str(msg))
1986+
1987+
response = mock.MagicMock()
1988+
response.usage = mock.MagicMock()
1989+
response.usage.prompt_tokens = 20
1990+
response.usage.prompt_tokens_details = mock.MagicMock()
1991+
response.usage.prompt_tokens_details.cached_tokens = 5
1992+
response.usage.completion_tokens = 10
1993+
response.usage.completion_tokens_details = mock.MagicMock()
1994+
response.usage.completion_tokens_details.reasoning_tokens = 8
1995+
response.usage.total_tokens = 30
1996+
1997+
with mock.patch(
1998+
"sentry_sdk.integrations.openai.record_token_usage"
1999+
) as mock_record_token_usage:
2000+
_calculate_completions_token_usage(
2001+
messages=[],
2002+
response=response,
2003+
span=span,
2004+
streaming_message_responses=[],
2005+
streaming_message_token_usage=None,
2006+
count_tokens=count_tokens,
2007+
)
2008+
mock_record_token_usage.assert_called_once_with(
2009+
span,
2010+
input_tokens=20,
2011+
input_tokens_cached=5,
19732012
output_tokens=10,
2013+
output_tokens_reasoning=8,
19742014
total_tokens=30,
19752015
)
19762016

@@ -2007,7 +2047,9 @@ def count_tokens(msg):
20072047
mock_record_token_usage.assert_called_once_with(
20082048
span,
20092049
input_tokens=11,
2050+
input_tokens_cached=None,
20102051
output_tokens=10,
2052+
output_tokens_reasoning=None,
20112053
total_tokens=10,
20122054
)
20132055

@@ -2044,7 +2086,9 @@ def count_tokens(msg):
20442086
mock_record_token_usage.assert_called_once_with(
20452087
span,
20462088
input_tokens=20,
2089+
input_tokens_cached=None,
20472090
output_tokens=11,
2091+
output_tokens_reasoning=None,
20482092
total_tokens=20,
20492093
)
20502094

@@ -2082,7 +2126,9 @@ def count_tokens(msg):
20822126
mock_record_token_usage.assert_called_once_with(
20832127
span,
20842128
input_tokens=20,
2129+
input_tokens_cached=None,
20852130
output_tokens=None,
2131+
output_tokens_reasoning=None,
20862132
total_tokens=20,
20872133
)
20882134

@@ -2112,7 +2158,9 @@ def count_tokens(msg):
21122158
mock_record_token_usage.assert_called_once_with(
21132159
span,
21142160
input_tokens=None,
2161+
input_tokens_cached=None,
21152162
output_tokens=None,
2163+
output_tokens_reasoning=None,
21162164
total_tokens=None,
21172165
)
21182166

0 commit comments

Comments
 (0)