feat(anthropic): Record finish reasons in AI monitoring spans

ericapisani · claude · ericapisani · commit 99986469eff8 · 2026-03-16T14:18:47.000Z
Capture the stop_reason from Anthropic API responses and set it as
GEN_AI_RESPONSE_FINISH_REASONS span data. Works for both streaming
(via MessageDeltaEvent) and non-streaming responses.

Co-Authored-By: Claude &lt;noreply@anthropic.com&gt;
diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py
@@ -159,7 +159,8 @@ def _collect_ai_data(
     usage: "_RecordedUsage",
     content_blocks: "list[str]",
     response_id: "str | None" = None,
-) -> "tuple[str | None, _RecordedUsage, list[str], str | None]":
+    finish_reasons: "list[str] | None" = None,
+) -> "tuple[str | None, _RecordedUsage, list[str], str | None, list[str] | None]":
     """
     Collect model information, token usage, and collect content blocks from the AI streaming response.
     """
@@ -197,6 +198,7 @@ def _collect_ai_data(
                     usage,
                     content_blocks,
                     response_id,
+                    finish_reasons,
                 )
 
             # Counterintuitive, but message_delta contains cumulative token counts :)
@@ -221,18 +223,18 @@ def _collect_ai_data(
                     usage.cache_read_input_tokens = cache_read_input_tokens
                 # TODO: Record event.usage.server_tool_use
 
-                return (
-                    model,
-                    usage,
-                    content_blocks,
-                    response_id,
-                )
+                stop_reason = getattr(event.delta, "stop_reason", None)
+                if stop_reason is not None:
+                    finish_reasons = [stop_reason]
+
+                return (model, usage, content_blocks, response_id, finish_reasons)
 
     return (
         model,
         usage,
         content_blocks,
         response_id,
+        finish_reasons,
     )
 
 
@@ -411,6 +413,7 @@ def _wrap_synchronous_message_iterator(
     usage = _RecordedUsage()
     content_blocks: "list[str]" = []
     response_id = None
+    finish_reasons = None
 
     try:
         for event in iterator:
@@ -430,12 +433,15 @@ def _wrap_synchronous_message_iterator(
                 yield event
                 continue
 
-            (model, usage, content_blocks, response_id) = _collect_ai_data(
-                event,
-                model,
-                usage,
-                content_blocks,
-                response_id,
+            (model, usage, content_blocks, response_id, finish_reasons) = (
+                _collect_ai_data(
+                    event,
+                    model,
+                    usage,
+                    content_blocks,
+                    response_id,
+                    finish_reasons,
+                )
             )
             yield event
     finally:
@@ -459,6 +465,7 @@ def _wrap_synchronous_message_iterator(
                 content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
                 finish_span=True,
                 response_id=response_id,
+                finish_reasons=finish_reasons,
             )
 
 
@@ -475,6 +482,7 @@ async def _wrap_asynchronous_message_iterator(
     usage = _RecordedUsage()
     content_blocks: "list[str]" = []
     response_id = None
+    finish_reasons = None
 
     try:
         async for event in iterator:
@@ -499,12 +507,14 @@ async def _wrap_asynchronous_message_iterator(
                 usage,
                 content_blocks,
                 response_id,
+                finish_reasons,
             ) = _collect_ai_data(
                 event,
                 model,
                 usage,
                 content_blocks,
                 response_id,
+                finish_reasons,
             )
             yield event
     finally:
@@ -528,6 +538,7 @@ async def _wrap_asynchronous_message_iterator(
                 content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
                 finish_span=True,
                 response_id=response_id,
+                finish_reasons=finish_reasons,
             )
 
 
@@ -542,12 +553,15 @@ def _set_output_data(
     content_blocks: "list[Any]",
     finish_span: bool = False,
     response_id: "str | None" = None,
+    finish_reasons: "list[str] | None" = None,
 ) -> None:
     """
     Set output data for the span based on the AI response."""
     span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model)
     if response_id is not None:
         span.set_data(SPANDATA.GEN_AI_RESPONSE_ID, response_id)
+    if finish_reasons is not None:
+        span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reasons)
     if should_send_default_pii() and integration.include_prompts:
         output_messages: "dict[str, list[Any]]" = {
             "response": [],
@@ -641,6 +655,10 @@ def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A
                 elif hasattr(content_block, "text"):
                     content_blocks.append({"type": "text", "text": content_block.text})
 
+            finish_reasons = None
+            if getattr(result, "stop_reason", None) is not None:
+                finish_reasons = [getattr(result, "stop_reason")]
+
             _set_output_data(
                 span=span,
                 integration=integration,
@@ -652,6 +670,7 @@ def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A
                 content_blocks=content_blocks,
                 finish_span=True,
                 response_id=getattr(result, "id", None),
+                finish_reasons=finish_reasons,
             )
         else:
             span.set_data("unknown_response", True)
diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
@@ -63,6 +63,7 @@ async def __call__(self, *args, **kwargs):
     role="assistant",
     content=[TextBlock(type="text", text="Hi, I'm Claude.")],
     type="message",
+    stop_reason="end_turn",
     usage=Usage(input_tokens=10, output_tokens=20),
 )
 
@@ -136,6 +137,7 @@ def test_nonstreaming_create_message(
     assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
 
 
 @pytest.mark.asyncio
@@ -258,7 +260,7 @@ def test_streaming_create_message(
                 ),
                 ContentBlockStopEvent(type="content_block_stop", index=0),
                 MessageDeltaEvent(
-                    delta=Delta(),
+                    delta=Delta(stop_reason="max_tokens"),
                     usage=MessageDeltaUsage(output_tokens=10),
                     type="message_delta",
                 ),
@@ -323,6 +325,7 @@ def test_streaming_create_message(
     assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
 
 @pytest.mark.parametrize(
@@ -373,7 +376,7 @@ def test_stream_messages(
                 ),
                 ContentBlockStopEvent(type="content_block_stop", index=0),
                 MessageDeltaEvent(
-                    delta=Delta(),
+                    delta=Delta(stop_reason="max_tokens"),
                     usage=MessageDeltaUsage(output_tokens=10),
                     type="message_delta",
                 ),
@@ -439,6 +442,7 @@ def test_stream_messages(
     assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
 
 @pytest.mark.asyncio
@@ -492,7 +496,7 @@ async def test_streaming_create_message_async(
                     ),
                     ContentBlockStopEvent(type="content_block_stop", index=0),
                     MessageDeltaEvent(
-                        delta=Delta(),
+                        delta=Delta(stop_reason="max_tokens"),
                         usage=MessageDeltaUsage(output_tokens=10),
                         type="message_delta",
                     ),
@@ -504,6 +508,7 @@ async def test_streaming_create_message_async(
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
+        default_integrations=False,
         send_default_pii=send_default_pii,
     )
     events = capture_events()
@@ -559,6 +564,7 @@ async def test_streaming_create_message_async(
     assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
 
 @pytest.mark.asyncio
@@ -1471,14 +1477,15 @@ def test_collect_ai_data_with_input_json_delta():
 
     content_blocks = []
 
-    model, new_usage, new_content_blocks, response_id = _collect_ai_data(
-        event, model, usage, content_blocks
+    model, new_usage, new_content_blocks, response_id, finish_reasons = (
+        _collect_ai_data(event, model, usage, content_blocks)
     )
     assert model is None
     assert new_usage.input_tokens == usage.input_tokens
     assert new_usage.output_tokens == usage.output_tokens
     assert new_content_blocks == ["test"]
     assert response_id is None
+    assert finish_reasons is None
 
 
 @pytest.mark.skipif(
@@ -1766,6 +1773,7 @@ def test_nonstreaming_create_message_with_system_prompt(
     assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
     assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
 
 
 @pytest.mark.asyncio
@@ -1851,6 +1859,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
     assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
     assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
 
 
 @pytest.mark.parametrize(