Merge branch 'master' into feat/span-first

sentrivana · sentrivana · commit 6ce081b05364 · 2026-03-16T14:22:55.000+01:00
diff --git a/.github/workflows/ai-integration-test.yml b/.github/workflows/ai-integration-test.yml
@@ -34,7 +34,7 @@ jobs:
           token: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Run Python SDK Tests
-        uses: getsentry/testing-ai-sdk-integrations@285c012e522f241581534dfc89bd99ec3b1da4f6
+        uses: getsentry/testing-ai-sdk-integrations@6b1f51ec8af03e19087df452b426aa7e46d2b20a
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -22,7 +22,7 @@ jobs:
     steps:
     - name: Get auth token
       id: token
-      uses: actions/create-github-app-token@29824e69f54612133e76f7eaac726eef6c875baf # v2
+      uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v2
       with:
         app-id: ${{ vars.SENTRY_RELEASE_BOT_CLIENT_ID }}
         private-key: ${{ secrets.SENTRY_RELEASE_BOT_PRIVATE_KEY }}
diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
@@ -556,10 +556,25 @@ def _truncate_single_message_content_if_present(
         return message
     content = message["content"]
 
-    if not isinstance(content, str) or len(content) <= max_chars:
+    if isinstance(content, str):
+        if len(content) <= max_chars:
+            return message
+        message["content"] = content[:max_chars] + "..."
+        return message
+
+    if isinstance(content, list):
+        remaining = max_chars
+        for item in content:
+            if isinstance(item, dict) and "text" in item:
+                text = item["text"]
+                if isinstance(text, str):
+                    if len(text) > remaining:
+                        item["text"] = text[:remaining] + "..."
+                        remaining = 0
+                    else:
+                        remaining -= len(text)
         return message
 
-    message["content"] = content[:max_chars] + "..."
     return message
 
 
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
@@ -554,7 +554,8 @@ def on_llm_end(
                     finish_reason = generation.generation_info.get("finish_reason")
                     if finish_reason is not None:
                         span.set_data(
-                            SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason
+                            SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
+                            [finish_reason],
                         )
                 except AttributeError:
                     pass
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
@@ -297,6 +297,12 @@ def test_langchain_agent(
             f"and include_prompts={include_prompts}"
         )
 
+    # Verify finish_reasons is always an array of strings
+    assert chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == [
+        "function_call"
+    ]
+    assert chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"]
+
     # Verify that available tools are always recorded regardless of PII settings
     for chat_span in chat_spans:
         span_data = chat_span.get("data", {})
diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py
@@ -312,6 +312,105 @@ def test_single_message_truncation(self):
         assert user_msgs[0]["content"].endswith("...")
         assert len(user_msgs[0]["content"]) < len(large_content)
 
+    def test_single_message_truncation_list_content_exceeds_limit(self):
+        """Test that list-based content (e.g. pydantic-ai multimodal format) is truncated."""
+        large_text = "A" * 200_000
+
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": large_text},
+                ],
+            },
+        ]
+
+        result, _ = truncate_messages_by_size(messages)
+
+        text_part = result[0]["content"][0]
+        assert text_part["text"].endswith("...")
+        assert len(text_part["text"]) == MAX_SINGLE_MESSAGE_CONTENT_CHARS + 3
+
+    def test_single_message_truncation_list_content_under_limit(self):
+        """Test that small text parts are preserved when non-text parts push size over byte limit."""
+        short_text = "Hello world"
+        large_data_url = "data:image/png;base64," + "A" * 200_000
+
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": short_text},
+                    {"type": "image_url", "image_url": {"url": large_data_url}},
+                ],
+            },
+        ]
+
+        result, _ = truncate_messages_by_size(messages)
+
+        text_part = result[0]["content"][0]
+        assert text_part["text"] == short_text
+
+    def test_single_message_truncation_list_content_mixed_parts(self):
+        """Test truncation with mixed content types (text + non-text parts)."""
+        max_chars = 50
+        large_data_url = "data:image/png;base64," + "X" * 200_000
+
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "A" * 30},
+                    {"type": "image_url", "image_url": {"url": large_data_url}},
+                    {"type": "text", "text": "B" * 30},
+                ],
+            },
+        ]
+
+        result, _ = truncate_messages_by_size(
+            messages, max_single_message_chars=max_chars
+        )
+
+        parts = result[0]["content"]
+        # First text part uses 30 chars of the 50 budget
+        assert parts[0]["text"] == "A" * 30
+        # Image part is unchanged
+        assert parts[1]["type"] == "image_url"
+        # Second text part is truncated to remaining 20 chars
+        assert parts[2]["text"] == "B" * 20 + "..."
+
+    def test_single_message_truncation_list_content_multiple_text_parts(self):
+        """Test that budget is distributed across multiple text parts."""
+        max_chars = 10
+        # Two large text parts that together exceed 128KB byte limit
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "A" * 100_000},
+                    {"type": "text", "text": "B" * 100_000},
+                ],
+            },
+        ]
+
+        result, _ = truncate_messages_by_size(
+            messages, max_single_message_chars=max_chars
+        )
+
+        parts = result[0]["content"]
+        # First part is truncated to the full budget
+        assert parts[0]["text"] == "A" * 10 + "..."
+        # Second part gets truncated to 0 chars + ellipsis
+        assert parts[1]["text"] == "..."
+
+    @pytest.mark.parametrize("content", [None, 42, 3.14, True])
+    def test_single_message_truncation_non_str_non_list_content(self, content):
+        messages = [{"role": "user", "content": content}]
+
+        result, _ = truncate_messages_by_size(messages)
+
+        assert result[0]["content"] is content
+
 
 class TestTruncateAndAnnotateMessages:
     def test_only_keeps_last_message(self, sample_messages):