merge master

alexander-alderman-webb · alexander-alderman-webb · commit f6b8909c6f50 · 2026-03-17T12:51:15.000+01:00
diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py
@@ -190,7 +190,8 @@ def _collect_ai_data(
     usage: "_RecordedUsage",
     content_blocks: "list[str]",
     response_id: "str | None" = None,
-) -> "tuple[str | None, _RecordedUsage, list[str], str | None]":
+    finish_reason: "str | None" = None,
+) -> "tuple[str | None, _RecordedUsage, list[str], str | None, str | None]":
     """
     Collect model information, token usage, and collect content blocks from the AI streaming response.
     """
@@ -228,6 +229,7 @@ def _collect_ai_data(
                     usage,
                     content_blocks,
                     response_id,
+                    finish_reason,
                 )
 
             # Counterintuitive, but message_delta contains cumulative token counts :)
@@ -252,18 +254,17 @@ def _collect_ai_data(
                     usage.cache_read_input_tokens = cache_read_input_tokens
                 # TODO: Record event.usage.server_tool_use
 
-                return (
-                    model,
-                    usage,
-                    content_blocks,
-                    response_id,
-                )
+                if event.delta.stop_reason is not None:
+                    finish_reason = event.delta.stop_reason
+
+                return (model, usage, content_blocks, response_id, finish_reason)
 
     return (
         model,
         usage,
         content_blocks,
         response_id,
+        finish_reason,
     )
 
 
@@ -472,6 +473,7 @@ def _wrap_synchronous_message_iterator(
                     stream._usage,
                     stream._content_blocks,
                     stream._response_id,
+                    stream._finish_reason,
                 )
                 del stream._span
 
@@ -489,6 +491,7 @@ async def _wrap_asynchronous_message_iterator(
     usage = _RecordedUsage()
     content_blocks: "list[str]" = []
     response_id = None
+    finish_reason = None
 
     try:
         async for event in iterator:
@@ -513,12 +516,14 @@ async def _wrap_asynchronous_message_iterator(
                 usage,
                 content_blocks,
                 response_id,
+                finish_reason,
             ) = _collect_ai_data(
                 event,
                 model,
                 usage,
                 content_blocks,
                 response_id,
+                finish_reason,
             )
             yield event
     finally:
@@ -542,6 +547,7 @@ async def _wrap_asynchronous_message_iterator(
                 content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
                 finish_span=True,
                 response_id=response_id,
+                finish_reason=finish_reason,
             )
 
 
@@ -556,12 +562,15 @@ def _set_output_data(
     content_blocks: "list[Any]",
     finish_span: bool = False,
     response_id: "str | None" = None,
+    finish_reason: "str | None" = None,
 ) -> None:
     """
     Set output data for the span based on the AI response."""
     span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model)
     if response_id is not None:
         span.set_data(SPANDATA.GEN_AI_RESPONSE_ID, response_id)
+    if finish_reason is not None:
+        span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, [finish_reason])
     if should_send_default_pii() and integration.include_prompts:
         output_messages: "dict[str, list[Any]]" = {
             "response": [],
@@ -665,6 +674,7 @@ def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A
                 content_blocks=content_blocks,
                 finish_span=True,
                 response_id=getattr(result, "id", None),
+                finish_reason=getattr(result, "stop_reason", None),
             )
         else:
             span.set_data("unknown_response", True)
@@ -720,6 +730,7 @@ def _initialize_data_accumulation_state(stream: "Union[Stream, MessageStream]")
         stream._usage = _RecordedUsage()
         stream._content_blocks = []
         stream._response_id = None
+        stream._finish_reason = None
 
 
 def _accumulate_event_data(
@@ -729,18 +740,20 @@ def _accumulate_event_data(
     """
     Update accumulated output from a single stream event.
     """
-    (model, usage, content_blocks, response_id) = _collect_ai_data(
+    (model, usage, content_blocks, response_id, finish_reason) = _collect_ai_data(
         event,
         stream._model,
         stream._usage,
         stream._content_blocks,
         stream._response_id,
+        stream._finish_reason,
     )
 
     stream._model = model
     stream._usage = usage
     stream._content_blocks = content_blocks
     stream._response_id = response_id
+    stream._finish_reason = finish_reason
 
 
 def _finish_streaming_span(
@@ -750,6 +763,7 @@ def _finish_streaming_span(
     usage: "_RecordedUsage",
     content_blocks: "list[str]",
     response_id: "Optional[str]",
+    finish_reason: "Optional[str]",
 ) -> None:
     """
     Set output attributes on the AI Client Span and end the span.
@@ -773,6 +787,7 @@ def _finish_streaming_span(
         content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
         finish_span=True,
         response_id=response_id,
+        finish_reason=finish_reason,
     )
 
 
@@ -822,6 +837,7 @@ def __next__(self: "Stream") -> "RawMessageStreamEvent":
                     self._usage,
                     self._content_blocks,
                     self._response_id,
+                    self._finish_reason,
                 )
                 del self._span
             reraise(*exc_info)
@@ -854,6 +870,7 @@ def close(self: "Stream") -> None:
             self._usage,
             self._content_blocks,
             self._response_id,
+            self._finish_reason,
         )
         del self._span
 
@@ -1023,6 +1040,7 @@ def __next__(self: "MessageStream") -> "MessageStreamEvent":
                     self._usage,
                     self._content_blocks,
                     self._response_id,
+                    self._finish_reason,
                 )
                 del self._span
             reraise(*exc_info)
@@ -1055,6 +1073,7 @@ def close(self: "MessageStream") -> None:
             self._usage,
             self._content_blocks,
             self._response_id,
+            self._finish_reason,
         )
         del self._span
 
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
@@ -554,7 +554,8 @@ def on_llm_end(
                     finish_reason = generation.generation_info.get("finish_reason")
                     if finish_reason is not None:
                         span.set_data(
-                            SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason
+                            SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
+                            [finish_reason],
                         )
                 except AttributeError:
                     pass
diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
@@ -69,6 +69,7 @@ async def __call__(self, *args, **kwargs):
     role="assistant",
     content=[TextBlock(type="text", text="Hi, I'm Claude.")],
     type="message",
+    stop_reason="end_turn",
     usage=Usage(input_tokens=10, output_tokens=20),
 )
 
@@ -142,6 +143,7 @@ def test_nonstreaming_create_message(
     assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
 
 
 @pytest.mark.asyncio
@@ -264,7 +266,7 @@ def test_streaming_create_message(
                 ),
                 ContentBlockStopEvent(type="content_block_stop", index=0),
                 MessageDeltaEvent(
-                    delta=Delta(),
+                    delta=Delta(stop_reason="max_tokens"),
                     usage=MessageDeltaUsage(output_tokens=10),
                     type="message_delta",
                 ),
@@ -329,6 +331,7 @@ def test_streaming_create_message(
     assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
 
 def test_streaming_create_message_next_consumption(
@@ -580,7 +583,7 @@ def test_stream_messages(
                 ),
                 ContentBlockStopEvent(type="content_block_stop", index=0),
                 MessageDeltaEvent(
-                    delta=Delta(),
+                    delta=Delta(stop_reason="max_tokens"),
                     usage=MessageDeltaUsage(output_tokens=10),
                     type="message_delta",
                 ),
@@ -646,6 +649,7 @@ def test_stream_messages(
     assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
 
 def test_stream_messages_next_consumption(
@@ -905,7 +909,7 @@ async def test_streaming_create_message_async(
                     ),
                     ContentBlockStopEvent(type="content_block_stop", index=0),
                     MessageDeltaEvent(
-                        delta=Delta(),
+                        delta=Delta(stop_reason="max_tokens"),
                         usage=MessageDeltaUsage(output_tokens=10),
                         type="message_delta",
                     ),
@@ -917,6 +921,7 @@ async def test_streaming_create_message_async(
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
+        default_integrations=False,
         send_default_pii=send_default_pii,
     )
     events = capture_events()
@@ -972,6 +977,7 @@ async def test_streaming_create_message_async(
     assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL"
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"]
 
 
 @pytest.mark.asyncio
@@ -1884,14 +1890,15 @@ def test_collect_ai_data_with_input_json_delta():
 
     content_blocks = []
 
-    model, new_usage, new_content_blocks, response_id = _collect_ai_data(
+    model, new_usage, new_content_blocks, response_id, finish_reason = _collect_ai_data(
         event, model, usage, content_blocks
     )
     assert model is None
     assert new_usage.input_tokens == usage.input_tokens
     assert new_usage.output_tokens == usage.output_tokens
     assert new_content_blocks == ["test"]
     assert response_id is None
+    assert finish_reason is None
 
 
 @pytest.mark.skipif(
@@ -2179,6 +2186,7 @@ def test_nonstreaming_create_message_with_system_prompt(
     assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
     assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
 
 
 @pytest.mark.asyncio
@@ -2264,6 +2272,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async(
     assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
     assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"]
 
 
 @pytest.mark.parametrize(
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
@@ -297,6 +297,12 @@ def test_langchain_agent(
             f"and include_prompts={include_prompts}"
         )
 
+    # Verify finish_reasons is always an array of strings
+    assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+        "function_call"
+    ]
+    assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"]
+
     # Verify that available tools are always recorded regardless of PII settings
     for chat_span in chat_spans:
         span_data = chat_span.get("data", {})