Merge branch 'master' into ivana/span-first-10-random-improvements

sentrivana · sentrivana · commit 8a31eebebfa0 · 2026-03-10T12:06:45.000+01:00
diff --git a/.github/workflows/ai-integration-test.yml b/.github/workflows/ai-integration-test.yml
@@ -34,7 +34,7 @@ jobs:
           token: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Run Python SDK Tests
-        uses: getsentry/testing-ai-sdk-integrations@121da677853244cedfe11e95184b2b431af102eb
+        uses: getsentry/testing-ai-sdk-integrations@285c012e522f241581534dfc89bd99ec3b1da4f6
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
diff --git a/scripts/populate_tox/package_dependencies.jsonl b/scripts/populate_tox/package_dependencies.jsonl
diff --git a/scripts/populate_tox/populate_tox.py b/scripts/populate_tox/populate_tox.py
@@ -4,6 +4,7 @@
 See scripts/populate_tox/README.md for more info.
 """
 
+import re
 import functools
 import hashlib
 import json
@@ -872,7 +873,10 @@ def get_last_updated() -> Optional[datetime]:
 
 
 def _normalize_name(package: str) -> str:
-    return package.lower().replace("-", "_")
+    # From https://peps.python.org/pep-0503/#normalized-names
+    # but normalizing to underscores instead of hyphens since tox-formatted packages
+    # use underscores.
+    return re.sub(r"[-_.]+", "_", package).lower()
 
 
 def _extract_wheel_info_to_cache(wheel: dict):
diff --git a/scripts/populate_tox/releases.jsonl b/scripts/populate_tox/releases.jsonl
diff --git a/sentry_sdk/integrations/httpx.py b/sentry_sdk/integrations/httpx.py
@@ -23,7 +23,7 @@
 
 
 try:
-    from httpx import AsyncClient, Client, Request, Response  # type: ignore
+    from httpx import AsyncClient, Client, Request, Response
 except ImportError:
     raise DidNotEnable("httpx is not installed")
 
@@ -94,7 +94,7 @@ def send(self: "Client", request: "Request", **kwargs: "Any") -> "Response":
 
         return rv
 
-    Client.send = send
+    Client.send = send  # type: ignore
 
 
 def _install_httpx_async_client() -> None:
@@ -150,4 +150,4 @@ async def send(
 
         return rv
 
-    AsyncClient.send = send
+    AsyncClient.send = send  # type: ignore
diff --git a/sentry_sdk/integrations/pydantic_ai/patches/tools.py b/sentry_sdk/integrations/pydantic_ai/patches/tools.py
@@ -50,6 +50,7 @@ async def wrapped_execute_tool_call(
         call = validated.call
         name = call.tool_name
         tool = self.tools.get(name) if self.tools else None
+        selected_tool_definition = getattr(tool, "tool_def", None)
 
         # Determine tool type by checking tool.toolset
         tool_type = "function"
@@ -73,6 +74,7 @@ async def wrapped_execute_tool_call(
                     args_dict,
                     agent,
                     tool_type=tool_type,
+                    tool_definition=selected_tool_definition,
                 ) as span:
                     try:
                         result = await original_execute_tool_call(
@@ -127,6 +129,7 @@ async def wrapped_call_tool(
         # Extract tool info before calling original
         name = call.tool_name
         tool = self.tools.get(name) if self.tools else None
+        selected_tool_definition = getattr(tool, "tool_def", None)
 
         # Determine tool type by checking tool.toolset
         tool_type = "function"  # default
@@ -150,6 +153,7 @@ async def wrapped_call_tool(
                     args_dict,
                     agent,
                     tool_type=tool_type,
+                    tool_definition=selected_tool_definition,
                 ) as span:
                     try:
                         result = await original_call_tool(
diff --git a/sentry_sdk/integrations/pydantic_ai/spans/execute_tool.py b/sentry_sdk/integrations/pydantic_ai/spans/execute_tool.py
@@ -9,10 +9,15 @@
 
 if TYPE_CHECKING:
     from typing import Any, Optional
+    from pydantic_ai._tool_manager import ToolDefinition  # type: ignore
 
 
 def execute_tool_span(
-    tool_name: str, tool_args: "Any", agent: "Any", tool_type: str = "function"
+    tool_name: str,
+    tool_args: "Any",
+    agent: "Any",
+    tool_type: str = "function",
+    tool_definition: "Optional[ToolDefinition]" = None,
 ) -> "sentry_sdk.tracing.Span":
     """Create a span for tool execution.
 
@@ -21,6 +26,7 @@ def execute_tool_span(
         tool_args: The arguments passed to the tool
         agent: The agent executing the tool
         tool_type: The type of tool ("function" for regular tools, "mcp" for MCP services)
+        tool_definition: The definition of the tool, if available
     """
     span = sentry_sdk.start_span(
         op=OP.GEN_AI_EXECUTE_TOOL,
@@ -32,6 +38,12 @@ def execute_tool_span(
     span.set_data(SPANDATA.GEN_AI_TOOL_TYPE, tool_type)
     span.set_data(SPANDATA.GEN_AI_TOOL_NAME, tool_name)
 
+    if tool_definition is not None and hasattr(tool_definition, "description"):
+        span.set_data(
+            SPANDATA.GEN_AI_TOOL_DESCRIPTION,
+            tool_definition.description,
+        )
+
     _set_agent_data(span, agent)
 
     if _should_send_prompts() and tool_args is not None:
diff --git a/sentry_sdk/traces.py b/sentry_sdk/traces.py
@@ -288,8 +288,12 @@ def __enter__(self) -> "StreamedSpan":
     def __exit__(
         self, ty: "Optional[Any]", value: "Optional[Any]", tb: "Optional[Any]"
     ) -> None:
+        if self._timestamp is not None:
+            # This span is already finished, ignore
+            return
+
         if value is not None and should_be_treated_as_error(ty, value):
-            self.status = SpanStatus.ERROR
+            self.status = SpanStatus.ERROR.value
 
         self._end()
 
@@ -329,7 +333,9 @@ def _end(self, end_timestamp: "Optional[Union[float, datetime]]" = None) -> None
                 del self._previous_span_on_scope
                 self._scope.span = old_span
 
-        # Set attributes from the segment
+        # Set attributes from the segment. These are set on span end on purpose
+        # so that we have the best chance to capture the segment's final name
+        # (since it might change during its lifetime)
         self.set_attribute("sentry.segment.id", self._segment.span_id)
         self.set_attribute("sentry.segment.name", self._segment.name)
 
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
@@ -1220,55 +1220,57 @@ def simple_test_tool(message: str) -> str:
     )
     tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
 
-    available_tools = [
-        {
-            "name": "simple_test_tool",
-            "description": "A simple tool",
-            "params_json_schema": {
-                "properties": {"message": {"title": "Message", "type": "string"}},
-                "required": ["message"],
-                "title": "simple_test_tool_args",
-                "type": "object",
-                "additionalProperties": False,
-            },
-            "on_invoke_tool": "<function agents.tool.function_tool.<locals>._create_function_tool.<locals>._on_invoke_tool>",
-            "strict_json_schema": True,
-            "is_enabled": True,
-        }
-    ]
+    available_tool = {
+        "name": "simple_test_tool",
+        "description": "A simple tool",
+        "params_json_schema": {
+            "properties": {"message": {"title": "Message", "type": "string"}},
+            "required": ["message"],
+            "title": "simple_test_tool_args",
+            "type": "object",
+            "additionalProperties": False,
+        },
+        "on_invoke_tool": mock.ANY,
+        "strict_json_schema": True,
+        "is_enabled": True,
+    }
+
     if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3):
-        available_tools[0].update(
+        available_tool.update(
             {"tool_input_guardrails": None, "tool_output_guardrails": None}
         )
 
     if parse_version(OPENAI_AGENTS_VERSION) >= (
         0,
         8,
     ):
-        available_tools[0]["needs_approval"] = False
+        available_tool["needs_approval"] = False
     if parse_version(OPENAI_AGENTS_VERSION) >= (
         0,
         9,
         0,
     ):
-        available_tools[0].update(
+        available_tool.update(
             {
                 "timeout_seconds": None,
                 "timeout_behavior": "error_as_result",
                 "timeout_error_function": None,
             }
         )
 
-    available_tools = safe_serialize(available_tools)
-
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
 
     assert agent_span["description"] == "invoke_agent test_agent"
     assert agent_span["origin"] == "auto.ai.openai_agents"
     assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
     assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
-    assert agent_span["data"]["gen_ai.request.available_tools"] == available_tools
+
+    agent_span_available_tool = json.loads(
+        agent_span["data"]["gen_ai.request.available_tools"]
+    )[0]
+    assert all(agent_span_available_tool[k] == v for k, v in available_tool.items())
+
     assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
     assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
     assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
@@ -1279,7 +1281,14 @@ def simple_test_tool(message: str) -> str:
     assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
     assert ai_client_span1["data"]["gen_ai.system"] == "openai"
     assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
-    assert ai_client_span1["data"]["gen_ai.request.available_tools"] == available_tools
+
+    ai_client_span1_available_tool = json.loads(
+        ai_client_span1["data"]["gen_ai.request.available_tools"]
+    )[0]
+    assert all(
+        ai_client_span1_available_tool[k] == v for k, v in available_tool.items()
+    )
+
     assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
     assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
         [
@@ -1319,14 +1328,12 @@ def simple_test_tool(message: str) -> str:
     assert tool_span["description"] == "execute_tool simple_test_tool"
     assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
     assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-    assert (
-        re.sub(
-            "<.*>(,)",
-            r"'NOT_CHECKED'\1",
-            agent_span["data"]["gen_ai.request.available_tools"],
-        )
-        == available_tools
-    )
+
+    tool_span_available_tool = json.loads(
+        tool_span["data"]["gen_ai.request.available_tools"]
+    )[0]
+    assert all(tool_span_available_tool[k] == v for k, v in available_tool.items())
+
     assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
     assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
     assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
@@ -1341,14 +1348,14 @@ def simple_test_tool(message: str) -> str:
     assert ai_client_span2["description"] == "chat gpt-4"
     assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
     assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
-    assert (
-        re.sub(
-            "<.*>(,)",
-            r"'NOT_CHECKED'\1",
-            agent_span["data"]["gen_ai.request.available_tools"],
-        )
-        == available_tools
+
+    ai_client_span2_available_tool = json.loads(
+        ai_client_span2["data"]["gen_ai.request.available_tools"]
+    )[0]
+    assert all(
+        ai_client_span2_available_tool[k] == v for k, v in available_tool.items()
     )
+
     assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
     assert ai_client_span2["data"]["gen_ai.request.messages"] == safe_serialize(
         [
@@ -1425,6 +1432,15 @@ async def test_hosted_mcp_tool_propagation_header_streamed(
         "/responses",
     )
 
+    # openai-agents calls with_streaming_response() if available starting with
+    # https://github.com/openai/openai-agents-python/commit/159beb56130f7d85192acfd593c9168757984dc0.
+    # When using with_streaming_response() the header set below changes the response type:
+    # https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_response.py#L67.
+    if parse_version(OPENAI_AGENTS_VERSION) >= (0, 10, 3) and hasattr(
+        agent_with_tool.model._client.responses, "with_streaming_response"
+    ):
+        request.headers["X-Stainless-Raw-Response"] = "stream"
+
     response = httpx.Response(
         200,
         request=request,
@@ -3178,6 +3194,15 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent, async_iterat
         "/responses",
     )
 
+    # openai-agents calls with_streaming_response() if available starting with
+    # https://github.com/openai/openai-agents-python/commit/159beb56130f7d85192acfd593c9168757984dc0.
+    # When using with_streaming_response() the header set below changes the response type:
+    # https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_response.py#L67.
+    if parse_version(OPENAI_AGENTS_VERSION) >= (0, 10, 3) and hasattr(
+        agent_with_tool.model._client.responses, "with_streaming_response"
+    ):
+        request.headers["X-Stainless-Raw-Response"] = "stream"
+
     response = httpx.Response(
         200,
         request=request,
diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -16,7 +16,6 @@
 from pydantic_ai import Agent
 from pydantic_ai.messages import BinaryContent, UserPromptPart
 from pydantic_ai.usage import RequestUsage
-from pydantic_ai.models.test import TestModel
 from pydantic_ai.exceptions import ModelRetry, UnexpectedModelBehavior
 
 
@@ -2386,7 +2385,9 @@ async def test_execute_tool_span_with_mcp_type(sentry_init, capture_events):
     Test execute_tool span with MCP tool type.
     """
     import sentry_sdk
-    from sentry_sdk.integrations.pydantic_ai.spans.execute_tool import execute_tool_span
+    from sentry_sdk.integrations.pydantic_ai.spans.execute_tool import (
+        execute_tool_span,
+    )
 
     sentry_init(
         integrations=[PydanticAIIntegration()],
@@ -2794,3 +2795,42 @@ async def test_set_usage_data_with_cache_tokens(sentry_init, capture_events):
     (span_data,) = event["spans"]
     assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
     assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+
+
+@pytest.mark.asyncio
+async def test_tool_description_in_execute_tool_span(sentry_init, capture_events):
+    """
+    Test that tool description from the tool's docstring is included in execute_tool spans.
+    """
+    agent = Agent(
+        "test",
+        name="test_agent",
+        system_prompt="You are a helpful test assistant.",
+    )
+
+    @agent.tool_plain
+    def multiply_numbers(a: int, b: int) -> int:
+        """Multiply two numbers and return the product."""
+        return a * b
+
+    sentry_init(
+        integrations=[PydanticAIIntegration()],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+
+    events = capture_events()
+
+    result = await agent.run("What is 5 times 3?")
+    assert result is not None
+
+    (transaction,) = events
+    spans = transaction["spans"]
+
+    tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"]
+    assert len(tool_spans) >= 1
+
+    tool_span = tool_spans[0]
+    assert tool_span["data"]["gen_ai.tool.name"] == "multiply_numbers"
+    assert SPANDATA.GEN_AI_TOOL_DESCRIPTION in tool_span["data"]
+    assert "Multiply two numbers" in tool_span["data"][SPANDATA.GEN_AI_TOOL_DESCRIPTION]
diff --git a/tests/integrations/pyramid/test_pyramid.py b/tests/integrations/pyramid/test_pyramid.py
@@ -6,6 +6,7 @@
 import pytest
 from pyramid.authorization import ACLAuthorizationPolicy
 from pyramid.response import Response
+from packaging.version import Version
 from werkzeug.test import Client
 
 from sentry_sdk import capture_message, add_breadcrumb
@@ -18,7 +19,7 @@
 try:
     from importlib.metadata import version
 
-    PYRAMID_VERSION = tuple(map(int, version("pyramid").split(".")))
+    PYRAMID_VERSION = Version(version("pyramid")).release
 
 except ImportError:
     # < py3.8
diff --git a/tox.ini b/tox.ini