Skip to content

Commit 9998646

Browse files
ericapisaniclaude
andcommitted
feat(anthropic): Record finish reasons in AI monitoring spans
Capture the stop_reason from Anthropic API responses and set it as GEN_AI_RESPONSE_FINISH_REASONS span data. Works for both streaming (via MessageDeltaEvent) and non-streaming responses. Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 6345af9 commit 9998646

File tree

2 files changed

+46
-18
lines changed

2 files changed

+46
-18
lines changed

sentry_sdk/integrations/anthropic.py

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,8 @@ def _collect_ai_data(
159159
usage: "_RecordedUsage",
160160
content_blocks: "list[str]",
161161
response_id: "str | None" = None,
162-
) -> "tuple[str | None, _RecordedUsage, list[str], str | None]":
162+
finish_reasons: "list[str] | None" = None,
163+
) -> "tuple[str | None, _RecordedUsage, list[str], str | None, list[str] | None]":
163164
"""
164165
Collect model information, token usage, and collect content blocks from the AI streaming response.
165166
"""
@@ -197,6 +198,7 @@ def _collect_ai_data(
197198
usage,
198199
content_blocks,
199200
response_id,
201+
finish_reasons,
200202
)
201203

202204
# Counterintuitive, but message_delta contains cumulative token counts :)
@@ -221,18 +223,18 @@ def _collect_ai_data(
221223
usage.cache_read_input_tokens = cache_read_input_tokens
222224
# TODO: Record event.usage.server_tool_use
223225

224-
return (
225-
model,
226-
usage,
227-
content_blocks,
228-
response_id,
229-
)
226+
stop_reason = getattr(event.delta, "stop_reason", None)
227+
if stop_reason is not None:
228+
finish_reasons = [stop_reason]
229+
230+
return (model, usage, content_blocks, response_id, finish_reasons)
230231

231232
return (
232233
model,
233234
usage,
234235
content_blocks,
235236
response_id,
237+
finish_reasons,
236238
)
237239

238240

@@ -411,6 +413,7 @@ def _wrap_synchronous_message_iterator(
411413
usage = _RecordedUsage()
412414
content_blocks: "list[str]" = []
413415
response_id = None
416+
finish_reasons = None
414417

415418
try:
416419
for event in iterator:
@@ -430,12 +433,15 @@ def _wrap_synchronous_message_iterator(
430433
yield event
431434
continue
432435

433-
(model, usage, content_blocks, response_id) = _collect_ai_data(
434-
event,
435-
model,
436-
usage,
437-
content_blocks,
438-
response_id,
436+
(model, usage, content_blocks, response_id, finish_reasons) = (
437+
_collect_ai_data(
438+
event,
439+
model,
440+
usage,
441+
content_blocks,
442+
response_id,
443+
finish_reasons,
444+
)
439445
)
440446
yield event
441447
finally:
@@ -459,6 +465,7 @@ def _wrap_synchronous_message_iterator(
459465
content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
460466
finish_span=True,
461467
response_id=response_id,
468+
finish_reasons=finish_reasons,
462469
)
463470

464471

@@ -475,6 +482,7 @@ async def _wrap_asynchronous_message_iterator(
475482
usage = _RecordedUsage()
476483
content_blocks: "list[str]" = []
477484
response_id = None
485+
finish_reasons = None
478486

479487
try:
480488
async for event in iterator:
@@ -499,12 +507,14 @@ async def _wrap_asynchronous_message_iterator(
499507
usage,
500508
content_blocks,
501509
response_id,
510+
finish_reasons,
502511
) = _collect_ai_data(
503512
event,
504513
model,
505514
usage,
506515
content_blocks,
507516
response_id,
517+
finish_reasons,
508518
)
509519
yield event
510520
finally:
@@ -528,6 +538,7 @@ async def _wrap_asynchronous_message_iterator(
528538
content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
529539
finish_span=True,
530540
response_id=response_id,
541+
finish_reasons=finish_reasons,
531542
)
532543

533544

@@ -542,12 +553,15 @@ def _set_output_data(
542553
content_blocks: "list[Any]",
543554
finish_span: bool = False,
544555
response_id: "str | None" = None,
556+
finish_reasons: "list[str] | None" = None,
545557
) -> None:
546558
"""
547559
Set output data for the span based on the AI response."""
548560
span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model)
549561
if response_id is not None:
550562
span.set_data(SPANDATA.GEN_AI_RESPONSE_ID, response_id)
563+
if finish_reasons is not None:
564+
span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reasons)
551565
if should_send_default_pii() and integration.include_prompts:
552566
output_messages: "dict[str, list[Any]]" = {
553567
"response": [],
@@ -641,6 +655,10 @@ def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A
641655
elif hasattr(content_block, "text"):
642656
content_blocks.append({"type": "text", "text": content_block.text})
643657

658+
finish_reasons = None
659+
if getattr(result, "stop_reason", None) is not None:
660+
finish_reasons = [getattr(result, "stop_reason")]
661+
644662
_set_output_data(
645663
span=span,
646664
integration=integration,
@@ -652,6 +670,7 @@ def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A
652670
content_blocks=content_blocks,
653671
finish_span=True,
654672
response_id=getattr(result, "id", None),
673+
finish_reasons=finish_reasons,
655674
)
656675
else:
657676
span.set_data("unknown_response", True)

tests/integrations/anthropic/test_anthropic.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ async def __call__(self, *args, **kwargs):
6363
role="assistant",
6464
content=[TextBlock(type="text", text="Hi, I'm Claude.")],
6565
type="message",
66+
stop_reason="end_turn",
6667
usage=Usage(input_tokens=10, output_tokens=20),
6768
)
6869

@@ -136,6 +137,7 @@ def test_nonstreaming_create_message(
136137
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
137138
assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
138139
assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
140+
assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
139141

140142

141143
@pytest.mark.asyncio
@@ -258,7 +260,7 @@ def test_streaming_create_message(
258260
),
259261
ContentBlockStopEvent(type="content_block_stop", index=0),
260262
MessageDeltaEvent(
261-
delta=Delta(),
263+
delta=Delta(stop_reason="max_tokens"),
262264
usage=MessageDeltaUsage(output_tokens=10),
263265
type="message_delta",
264266
),
@@ -323,6 +325,7 @@ def test_streaming_create_message(
323325
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
324326
assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
325327
assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
328+
assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
326329

327330

328331
@pytest.mark.parametrize(
@@ -373,7 +376,7 @@ def test_stream_messages(
373376
),
374377
ContentBlockStopEvent(type="content_block_stop", index=0),
375378
MessageDeltaEvent(
376-
delta=Delta(),
379+
delta=Delta(stop_reason="max_tokens"),
377380
usage=MessageDeltaUsage(output_tokens=10),
378381
type="message_delta",
379382
),
@@ -439,6 +442,7 @@ def test_stream_messages(
439442
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
440443
assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
441444
assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
445+
assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
442446

443447

444448
@pytest.mark.asyncio
@@ -492,7 +496,7 @@ async def test_streaming_create_message_async(
492496
),
493497
ContentBlockStopEvent(type="content_block_stop", index=0),
494498
MessageDeltaEvent(
495-
delta=Delta(),
499+
delta=Delta(stop_reason="max_tokens"),
496500
usage=MessageDeltaUsage(output_tokens=10),
497501
type="message_delta",
498502
),
@@ -504,6 +508,7 @@ async def test_streaming_create_message_async(
504508
sentry_init(
505509
integrations=[AnthropicIntegration(include_prompts=include_prompts)],
506510
traces_sample_rate=1.0,
511+
default_integrations=False,
507512
send_default_pii=send_default_pii,
508513
)
509514
events = capture_events()
@@ -559,6 +564,7 @@ async def test_streaming_create_message_async(
559564
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
560565
assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
561566
assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
567+
assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
562568

563569

564570
@pytest.mark.asyncio
@@ -1471,14 +1477,15 @@ def test_collect_ai_data_with_input_json_delta():
14711477

14721478
content_blocks = []
14731479

1474-
model, new_usage, new_content_blocks, response_id = _collect_ai_data(
1475-
event, model, usage, content_blocks
1480+
model, new_usage, new_content_blocks, response_id, finish_reasons = (
1481+
_collect_ai_data(event, model, usage, content_blocks)
14761482
)
14771483
assert model is None
14781484
assert new_usage.input_tokens == usage.input_tokens
14791485
assert new_usage.output_tokens == usage.output_tokens
14801486
assert new_content_blocks == ["test"]
14811487
assert response_id is None
1488+
assert finish_reasons is None
14821489

14831490

14841491
@pytest.mark.skipif(
@@ -1766,6 +1773,7 @@ def test_nonstreaming_create_message_with_system_prompt(
17661773
assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
17671774
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
17681775
assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
1776+
assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
17691777

17701778

17711779
@pytest.mark.asyncio
@@ -1851,6 +1859,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
18511859
assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
18521860
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
18531861
assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
1862+
assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
18541863

18551864

18561865
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)