Skip to content

Commit aaf5c47

Browse files
authored
fix(anthropic): fix token accounting (#5490)
1 parent 699a1c9 commit aaf5c47

File tree

2 files changed

+219
-2
lines changed

2 files changed

+219
-2
lines changed

sentry_sdk/integrations/anthropic.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ def _get_token_usage(result: "Messages") -> "tuple[int, int, int, int]":
108108
):
109109
cache_write_input_tokens = usage.cache_creation_input_tokens
110110

111+
# Anthropic's input_tokens excludes cached/cache_write tokens.
112+
# Normalize to total input tokens so downstream cost calculations
113+
# (input_tokens - cached) don't produce negative values.
114+
input_tokens += cache_read_input_tokens + cache_write_input_tokens
115+
111116
return (
112117
input_tokens,
113118
output_tokens,
@@ -466,11 +471,19 @@ def new_iterator() -> "Iterator[MessageStreamEvent]":
466471
)
467472
yield event
468473

474+
# Anthropic's input_tokens excludes cached/cache_write tokens.
475+
# Normalize to total input tokens for correct cost calculations.
476+
total_input = (
477+
usage.input_tokens
478+
+ (usage.cache_read_input_tokens or 0)
479+
+ (usage.cache_write_input_tokens or 0)
480+
)
481+
469482
_set_output_data(
470483
span=span,
471484
integration=integration,
472485
model=model,
473-
input_tokens=usage.input_tokens,
486+
input_tokens=total_input,
474487
output_tokens=usage.output_tokens,
475488
cache_read_input_tokens=usage.cache_read_input_tokens,
476489
cache_write_input_tokens=usage.cache_write_input_tokens,
@@ -496,11 +509,19 @@ async def new_iterator_async() -> "AsyncIterator[MessageStreamEvent]":
496509
)
497510
yield event
498511

512+
# Anthropic's input_tokens excludes cached/cache_write tokens.
513+
# Normalize to total input tokens for correct cost calculations.
514+
total_input = (
515+
usage.input_tokens
516+
+ (usage.cache_read_input_tokens or 0)
517+
+ (usage.cache_write_input_tokens or 0)
518+
)
519+
499520
_set_output_data(
500521
span=span,
501522
integration=integration,
502523
model=model,
503-
input_tokens=usage.input_tokens,
524+
input_tokens=total_input,
504525
output_tokens=usage.output_tokens,
505526
cache_read_input_tokens=usage.cache_read_input_tokens,
506527
cache_write_input_tokens=usage.cache_write_input_tokens,

tests/integrations/anthropic/test_anthropic.py

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2261,10 +2261,202 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_events):
22612261
)
22622262

22632263
(span,) = events[0]["spans"]
2264+
# input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
2265+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
2266+
assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50
2267+
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250
22642268
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
22652269
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
22662270

22672271

2272+
def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_events):
2273+
"""
2274+
Test that gen_ai.usage.input_tokens includes cache_write tokens (non-streaming).
2275+
2276+
Reproduces a real Anthropic cache-write response. Anthropic's usage.input_tokens
2277+
only counts non-cached tokens, but gen_ai.usage.input_tokens should be the TOTAL
2278+
so downstream cost calculations don't produce negative values.
2279+
2280+
Real Anthropic response (from E2E test):
2281+
Usage(input_tokens=19, output_tokens=14,
2282+
cache_creation_input_tokens=2846, cache_read_input_tokens=0)
2283+
"""
2284+
sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
2285+
events = capture_events()
2286+
client = Anthropic(api_key="z")
2287+
2288+
client.messages._post = mock.Mock(
2289+
return_value=Message(
2290+
id="id",
2291+
model="claude-sonnet-4-20250514",
2292+
role="assistant",
2293+
content=[TextBlock(type="text", text="3 + 3 equals 6.")],
2294+
type="message",
2295+
usage=Usage(
2296+
input_tokens=19,
2297+
output_tokens=14,
2298+
cache_read_input_tokens=0,
2299+
cache_creation_input_tokens=2846,
2300+
),
2301+
)
2302+
)
2303+
2304+
with start_transaction(name="anthropic"):
2305+
client.messages.create(
2306+
max_tokens=1024,
2307+
messages=[{"role": "user", "content": "What is 3+3?"}],
2308+
model="claude-sonnet-4-20250514",
2309+
)
2310+
2311+
(span,) = events[0]["spans"]
2312+
2313+
# input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865
2314+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
2315+
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14
2316+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0
2317+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846
2318+
2319+
2320+
def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_events):
2321+
"""
2322+
Test that gen_ai.usage.input_tokens includes cache_read tokens (non-streaming).
2323+
2324+
Reproduces a real Anthropic cache-hit response. This is the scenario that
2325+
caused negative gen_ai.cost.input_tokens: input_tokens=19 but cached=2846,
2326+
so the backend computed 19 - 2846 = -2827 "regular" tokens.
2327+
2328+
Real Anthropic response (from E2E test):
2329+
Usage(input_tokens=19, output_tokens=14,
2330+
cache_creation_input_tokens=0, cache_read_input_tokens=2846)
2331+
"""
2332+
sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
2333+
events = capture_events()
2334+
client = Anthropic(api_key="z")
2335+
2336+
client.messages._post = mock.Mock(
2337+
return_value=Message(
2338+
id="id",
2339+
model="claude-sonnet-4-20250514",
2340+
role="assistant",
2341+
content=[TextBlock(type="text", text="5 + 5 = 10.")],
2342+
type="message",
2343+
usage=Usage(
2344+
input_tokens=19,
2345+
output_tokens=14,
2346+
cache_read_input_tokens=2846,
2347+
cache_creation_input_tokens=0,
2348+
),
2349+
)
2350+
)
2351+
2352+
with start_transaction(name="anthropic"):
2353+
client.messages.create(
2354+
max_tokens=1024,
2355+
messages=[{"role": "user", "content": "What is 5+5?"}],
2356+
model="claude-sonnet-4-20250514",
2357+
)
2358+
2359+
(span,) = events[0]["spans"]
2360+
2361+
# input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865
2362+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
2363+
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14
2364+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
2365+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
2366+
2367+
2368+
def test_input_tokens_include_cache_read_streaming(sentry_init, capture_events):
2369+
"""
2370+
Test that gen_ai.usage.input_tokens includes cache_read tokens (streaming).
2371+
2372+
Same cache-hit scenario as non-streaming, using realistic streaming events.
2373+
"""
2374+
client = Anthropic(api_key="z")
2375+
returned_stream = Stream(cast_to=None, response=None, client=client)
2376+
returned_stream._iterator = [
2377+
MessageStartEvent(
2378+
type="message_start",
2379+
message=Message(
2380+
id="id",
2381+
model="claude-sonnet-4-20250514",
2382+
role="assistant",
2383+
content=[],
2384+
type="message",
2385+
usage=Usage(
2386+
input_tokens=19,
2387+
output_tokens=0,
2388+
cache_read_input_tokens=2846,
2389+
cache_creation_input_tokens=0,
2390+
),
2391+
),
2392+
),
2393+
MessageDeltaEvent(
2394+
type="message_delta",
2395+
delta=Delta(stop_reason="end_turn"),
2396+
usage=MessageDeltaUsage(output_tokens=14),
2397+
),
2398+
]
2399+
2400+
sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
2401+
events = capture_events()
2402+
client.messages._post = mock.Mock(return_value=returned_stream)
2403+
2404+
with start_transaction(name="anthropic"):
2405+
for _ in client.messages.create(
2406+
max_tokens=1024,
2407+
messages=[{"role": "user", "content": "What is 5+5?"}],
2408+
model="claude-sonnet-4-20250514",
2409+
stream=True,
2410+
):
2411+
pass
2412+
2413+
(span,) = events[0]["spans"]
2414+
2415+
# input_tokens should be total: 19 + 2846 = 2865
2416+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
2417+
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14
2418+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
2419+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
2420+
2421+
2422+
def test_input_tokens_unchanged_without_caching(sentry_init, capture_events):
2423+
"""
2424+
Test that input_tokens is unchanged when there are no cached tokens.
2425+
2426+
Real Anthropic response (from E2E test, simple call without caching):
2427+
Usage(input_tokens=20, output_tokens=12)
2428+
"""
2429+
sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
2430+
events = capture_events()
2431+
client = Anthropic(api_key="z")
2432+
2433+
client.messages._post = mock.Mock(
2434+
return_value=Message(
2435+
id="id",
2436+
model="claude-sonnet-4-20250514",
2437+
role="assistant",
2438+
content=[TextBlock(type="text", text="2+2 equals 4.")],
2439+
type="message",
2440+
usage=Usage(
2441+
input_tokens=20,
2442+
output_tokens=12,
2443+
),
2444+
)
2445+
)
2446+
2447+
with start_transaction(name="anthropic"):
2448+
client.messages.create(
2449+
max_tokens=1024,
2450+
messages=[{"role": "user", "content": "What is 2+2?"}],
2451+
model="claude-sonnet-4-20250514",
2452+
)
2453+
2454+
(span,) = events[0]["spans"]
2455+
2456+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20
2457+
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32 # 20 + 12
2458+
2459+
22682460
def test_cache_tokens_streaming(sentry_init, capture_events):
22692461
"""Test cache tokens are tracked for streaming responses."""
22702462
client = Anthropic(api_key="z")
@@ -2307,5 +2499,9 @@ def test_cache_tokens_streaming(sentry_init, capture_events):
23072499
pass
23082500

23092501
(span,) = events[0]["spans"]
2502+
# input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
2503+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
2504+
assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
2505+
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
23102506
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
23112507
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20

0 commit comments

Comments
 (0)