@@ -2261,10 +2261,202 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_events):
22612261 )
22622262
22632263 (span ,) = events [0 ]["spans" ]
2264+ # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
2265+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 200
2266+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_OUTPUT_TOKENS ] == 50
2267+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 250
22642268 assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHED ] == 80
22652269 assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 20
22662270
22672271
2272+ def test_input_tokens_include_cache_write_nonstreaming (sentry_init , capture_events ):
2273+ """
2274+ Test that gen_ai.usage.input_tokens includes cache_write tokens (non-streaming).
2275+
2276+ Reproduces a real Anthropic cache-write response. Anthropic's usage.input_tokens
2277+ only counts non-cached tokens, but gen_ai.usage.input_tokens should be the TOTAL
2278+ so downstream cost calculations don't produce negative values.
2279+
2280+ Real Anthropic response (from E2E test):
2281+ Usage(input_tokens=19, output_tokens=14,
2282+ cache_creation_input_tokens=2846, cache_read_input_tokens=0)
2283+ """
2284+ sentry_init (integrations = [AnthropicIntegration ()], traces_sample_rate = 1.0 )
2285+ events = capture_events ()
2286+ client = Anthropic (api_key = "z" )
2287+
2288+ client .messages ._post = mock .Mock (
2289+ return_value = Message (
2290+ id = "id" ,
2291+ model = "claude-sonnet-4-20250514" ,
2292+ role = "assistant" ,
2293+ content = [TextBlock (type = "text" , text = "3 + 3 equals 6." )],
2294+ type = "message" ,
2295+ usage = Usage (
2296+ input_tokens = 19 ,
2297+ output_tokens = 14 ,
2298+ cache_read_input_tokens = 0 ,
2299+ cache_creation_input_tokens = 2846 ,
2300+ ),
2301+ )
2302+ )
2303+
2304+ with start_transaction (name = "anthropic" ):
2305+ client .messages .create (
2306+ max_tokens = 1024 ,
2307+ messages = [{"role" : "user" , "content" : "What is 3+3?" }],
2308+ model = "claude-sonnet-4-20250514" ,
2309+ )
2310+
2311+ (span ,) = events [0 ]["spans" ]
2312+
2313+ # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865
2314+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 2865
2315+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 2879 # 2865 + 14
2316+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHED ] == 0
2317+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 2846
2318+
2319+
2320+ def test_input_tokens_include_cache_read_nonstreaming (sentry_init , capture_events ):
2321+ """
2322+ Test that gen_ai.usage.input_tokens includes cache_read tokens (non-streaming).
2323+
2324+ Reproduces a real Anthropic cache-hit response. This is the scenario that
2325+ caused negative gen_ai.cost.input_tokens: input_tokens=19 but cached=2846,
2326+ so the backend computed 19 - 2846 = -2827 "regular" tokens.
2327+
2328+ Real Anthropic response (from E2E test):
2329+ Usage(input_tokens=19, output_tokens=14,
2330+ cache_creation_input_tokens=0, cache_read_input_tokens=2846)
2331+ """
2332+ sentry_init (integrations = [AnthropicIntegration ()], traces_sample_rate = 1.0 )
2333+ events = capture_events ()
2334+ client = Anthropic (api_key = "z" )
2335+
2336+ client .messages ._post = mock .Mock (
2337+ return_value = Message (
2338+ id = "id" ,
2339+ model = "claude-sonnet-4-20250514" ,
2340+ role = "assistant" ,
2341+ content = [TextBlock (type = "text" , text = "5 + 5 = 10." )],
2342+ type = "message" ,
2343+ usage = Usage (
2344+ input_tokens = 19 ,
2345+ output_tokens = 14 ,
2346+ cache_read_input_tokens = 2846 ,
2347+ cache_creation_input_tokens = 0 ,
2348+ ),
2349+ )
2350+ )
2351+
2352+ with start_transaction (name = "anthropic" ):
2353+ client .messages .create (
2354+ max_tokens = 1024 ,
2355+ messages = [{"role" : "user" , "content" : "What is 5+5?" }],
2356+ model = "claude-sonnet-4-20250514" ,
2357+ )
2358+
2359+ (span ,) = events [0 ]["spans" ]
2360+
2361+ # input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865
2362+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 2865
2363+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 2879 # 2865 + 14
2364+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHED ] == 2846
2365+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 0
2366+
2367+
2368+ def test_input_tokens_include_cache_read_streaming (sentry_init , capture_events ):
2369+ """
2370+ Test that gen_ai.usage.input_tokens includes cache_read tokens (streaming).
2371+
2372+ Same cache-hit scenario as non-streaming, using realistic streaming events.
2373+ """
2374+ client = Anthropic (api_key = "z" )
2375+ returned_stream = Stream (cast_to = None , response = None , client = client )
2376+ returned_stream ._iterator = [
2377+ MessageStartEvent (
2378+ type = "message_start" ,
2379+ message = Message (
2380+ id = "id" ,
2381+ model = "claude-sonnet-4-20250514" ,
2382+ role = "assistant" ,
2383+ content = [],
2384+ type = "message" ,
2385+ usage = Usage (
2386+ input_tokens = 19 ,
2387+ output_tokens = 0 ,
2388+ cache_read_input_tokens = 2846 ,
2389+ cache_creation_input_tokens = 0 ,
2390+ ),
2391+ ),
2392+ ),
2393+ MessageDeltaEvent (
2394+ type = "message_delta" ,
2395+ delta = Delta (stop_reason = "end_turn" ),
2396+ usage = MessageDeltaUsage (output_tokens = 14 ),
2397+ ),
2398+ ]
2399+
2400+ sentry_init (integrations = [AnthropicIntegration ()], traces_sample_rate = 1.0 )
2401+ events = capture_events ()
2402+ client .messages ._post = mock .Mock (return_value = returned_stream )
2403+
2404+ with start_transaction (name = "anthropic" ):
2405+ for _ in client .messages .create (
2406+ max_tokens = 1024 ,
2407+ messages = [{"role" : "user" , "content" : "What is 5+5?" }],
2408+ model = "claude-sonnet-4-20250514" ,
2409+ stream = True ,
2410+ ):
2411+ pass
2412+
2413+ (span ,) = events [0 ]["spans" ]
2414+
2415+ # input_tokens should be total: 19 + 2846 = 2865
2416+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 2865
2417+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 2879 # 2865 + 14
2418+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHED ] == 2846
2419+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 0
2420+
2421+
2422+ def test_input_tokens_unchanged_without_caching (sentry_init , capture_events ):
2423+ """
2424+ Test that input_tokens is unchanged when there are no cached tokens.
2425+
2426+ Real Anthropic response (from E2E test, simple call without caching):
2427+ Usage(input_tokens=20, output_tokens=12)
2428+ """
2429+ sentry_init (integrations = [AnthropicIntegration ()], traces_sample_rate = 1.0 )
2430+ events = capture_events ()
2431+ client = Anthropic (api_key = "z" )
2432+
2433+ client .messages ._post = mock .Mock (
2434+ return_value = Message (
2435+ id = "id" ,
2436+ model = "claude-sonnet-4-20250514" ,
2437+ role = "assistant" ,
2438+ content = [TextBlock (type = "text" , text = "2+2 equals 4." )],
2439+ type = "message" ,
2440+ usage = Usage (
2441+ input_tokens = 20 ,
2442+ output_tokens = 12 ,
2443+ ),
2444+ )
2445+ )
2446+
2447+ with start_transaction (name = "anthropic" ):
2448+ client .messages .create (
2449+ max_tokens = 1024 ,
2450+ messages = [{"role" : "user" , "content" : "What is 2+2?" }],
2451+ model = "claude-sonnet-4-20250514" ,
2452+ )
2453+
2454+ (span ,) = events [0 ]["spans" ]
2455+
2456+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 20
2457+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 32 # 20 + 12
2458+
2459+
22682460def test_cache_tokens_streaming (sentry_init , capture_events ):
22692461 """Test cache tokens are tracked for streaming responses."""
22702462 client = Anthropic (api_key = "z" )
@@ -2307,5 +2499,9 @@ def test_cache_tokens_streaming(sentry_init, capture_events):
23072499 pass
23082500
23092501 (span ,) = events [0 ]["spans" ]
2502+ # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
2503+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 200
2504+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_OUTPUT_TOKENS ] == 10
2505+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 210
23102506 assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHED ] == 80
23112507 assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 20
0 commit comments