test(openai-agents): Replace mocks with httpx in handoff tests

alexander-alderman-webb · alexander-alderman-webb · commit a85b401eaaac · 2026-03-06T14:14:19.000+01:00
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
@@ -5,10 +5,11 @@
 import os
 import json
 import logging
+import httpx
 
 import sentry_sdk
 from sentry_sdk import start_span
-from sentry_sdk.consts import SPANDATA
+from sentry_sdk.consts import SPANDATA, OP
 from sentry_sdk.integrations.logging import LoggingIntegration
 from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
 from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize
@@ -314,6 +315,25 @@ def test_agent_custom_model():
     )
 
 
+@pytest.fixture
+def get_model_response():
+    def inner(response_content):
+        model_request = httpx.Request(
+            "POST",
+            "/responses",
+        )
+
+        response = httpx.Response(
+            200,
+            request=model_request,
+            content=json.dumps(response_content.model_dump()).encode("utf-8"),
+        )
+
+        return response
+
+    return inner
+
+
 @pytest.mark.asyncio
 async def test_agent_invocation_span_no_pii(
     sentry_init, capture_events, test_agent, mock_model_response
@@ -917,85 +937,120 @@ def test_agent_invocation_span_sync(
 
 
 @pytest.mark.asyncio
-async def test_handoff_span(sentry_init, capture_events, mock_usage):
+async def test_handoff_span(sentry_init, capture_events, get_model_response):
     """
     Test that handoff spans are created when agents hand off to other agents.
     """
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4-mini", openai_client=client)
+
     # Create two simple agents with a handoff relationship
     secondary_agent = agents.Agent(
         name="secondary_agent",
         instructions="You are a secondary agent.",
-        model="gpt-4o-mini",
+        model=model,
     )
 
     primary_agent = agents.Agent(
         name="primary_agent",
         instructions="You are a primary agent that hands off to secondary agent.",
-        model="gpt-4o-mini",
+        model=model,
         handoffs=[secondary_agent],
     )
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            # Mock two responses:
-            # 1. Primary agent calls handoff tool
-            # 2. Secondary agent provides final response
-            handoff_response = ModelResponse(
-                output=[
-                    ResponseFunctionToolCall(
-                        id="call_handoff_123",
-                        call_id="call_handoff_123",
-                        name="transfer_to_secondary_agent",
-                        type="function_call",
-                        arguments="{}",
-                    )
-                ],
-                usage=mock_usage,
-                response_id="resp_handoff_123",
-            )
-
-            final_response = ModelResponse(
-                output=[
-                    ResponseOutputMessage(
-                        id="msg_final",
-                        type="message",
-                        status="completed",
-                        content=[
-                            ResponseOutputText(
-                                text="I'm the specialist and I can help with that!",
-                                type="output_text",
-                                annotations=[],
-                            )
-                        ],
-                        role="assistant",
-                    )
-                ],
-                usage=mock_usage,
-                response_id="resp_final_123",
-            )
+    handoff_response = get_model_response(
+        Response(
+            id="resp_tool_123",
+            output=[
+                ResponseFunctionToolCall(
+                    id="call_handoff_123",
+                    call_id="call_handoff_123",
+                    name="transfer_to_secondary_agent",
+                    type="function_call",
+                    arguments="{}",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=20,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=5,
+                ),
+                total_tokens=30,
+            ),
+        )
+    )
 
-            mock_get_response.side_effect = [handoff_response, final_response]
+    final_response = get_model_response(
+        Response(
+            id="resp_final_123",
+            output=[
+                ResponseOutputMessage(
+                    id="msg_final",
+                    type="message",
+                    status="completed",
+                    content=[
+                        ResponseOutputText(
+                            text="I'm the specialist and I can help with that!",
+                            type="output_text",
+                            annotations=[],
+                        )
+                    ],
+                    role="assistant",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=20,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=5,
+                ),
+                total_tokens=30,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-            )
+    with patch.object(
+        primary_agent.model._client._client,
+        "send",
+        side_effect=[handoff_response, final_response],
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            result = await agents.Runner.run(
-                primary_agent,
-                "Please hand off to secondary agent",
-                run_config=test_run_config,
-            )
+        result = await agents.Runner.run(
+            primary_agent,
+            "Please hand off to secondary agent",
+            run_config=test_run_config,
+        )
 
-            assert result is not None
+        assert result is not None
 
     (transaction,) = events
     spans = transaction["spans"]
-    handoff_span = spans[2]
+    handoff_span = next(span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF)
 
     # Verify handoff span was created
     assert handoff_span is not None
@@ -1006,85 +1061,122 @@ async def test_handoff_span(sentry_init, capture_events, mock_usage):
 
 
 @pytest.mark.asyncio
-async def test_max_turns_before_handoff_span(sentry_init, capture_events, mock_usage):
+async def test_max_turns_before_handoff_span(
+    sentry_init, capture_events, mock_usage, get_model_response
+):
     """
     Example raising agents.exceptions.AgentsException after the agent invocation span is complete.
     """
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4-mini", openai_client=client)
+
     # Create two simple agents with a handoff relationship
     secondary_agent = agents.Agent(
         name="secondary_agent",
         instructions="You are a secondary agent.",
-        model="gpt-4o-mini",
+        model=model,
     )
 
     primary_agent = agents.Agent(
         name="primary_agent",
         instructions="You are a primary agent that hands off to secondary agent.",
-        model="gpt-4o-mini",
+        model=model,
         handoffs=[secondary_agent],
     )
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            # Mock two responses:
-            # 1. Primary agent calls handoff tool
-            # 2. Secondary agent provides final response
-            handoff_response = ModelResponse(
-                output=[
-                    ResponseFunctionToolCall(
-                        id="call_handoff_123",
-                        call_id="call_handoff_123",
-                        name="transfer_to_secondary_agent",
-                        type="function_call",
-                        arguments="{}",
-                    )
-                ],
-                usage=mock_usage,
-                response_id="resp_handoff_123",
-            )
-
-            final_response = ModelResponse(
-                output=[
-                    ResponseOutputMessage(
-                        id="msg_final",
-                        type="message",
-                        status="completed",
-                        content=[
-                            ResponseOutputText(
-                                text="I'm the specialist and I can help with that!",
-                                type="output_text",
-                                annotations=[],
-                            )
-                        ],
-                        role="assistant",
-                    )
-                ],
-                usage=mock_usage,
-                response_id="resp_final_123",
-            )
+    handoff_response = get_model_response(
+        Response(
+            id="resp_tool_123",
+            output=[
+                ResponseFunctionToolCall(
+                    id="call_handoff_123",
+                    call_id="call_handoff_123",
+                    name="transfer_to_secondary_agent",
+                    type="function_call",
+                    arguments="{}",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=20,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=5,
+                ),
+                total_tokens=30,
+            ),
+        )
+    )
 
-            mock_get_response.side_effect = [handoff_response, final_response]
+    final_response = get_model_response(
+        Response(
+            id="resp_final_123",
+            output=[
+                ResponseOutputMessage(
+                    id="msg_final",
+                    type="message",
+                    status="completed",
+                    content=[
+                        ResponseOutputText(
+                            text="I'm the specialist and I can help with that!",
+                            type="output_text",
+                            annotations=[],
+                        )
+                    ],
+                    role="assistant",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=20,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=5,
+                ),
+                total_tokens=30,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-            )
+    with patch.object(
+        primary_agent.model._client._client,
+        "send",
+        side_effect=[handoff_response, final_response],
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            with pytest.raises(MaxTurnsExceeded):
-                await agents.Runner.run(
-                    primary_agent,
-                    "Please hand off to secondary agent",
-                    run_config=test_run_config,
-                    max_turns=1,
-                )
+        with pytest.raises(MaxTurnsExceeded):
+            await agents.Runner.run(
+                primary_agent,
+                "Please hand off to secondary agent",
+                run_config=test_run_config,
+                max_turns=1,
+            )
 
     (error, transaction) = events
     spans = transaction["spans"]
-    handoff_span = spans[2]
+    handoff_span = next(span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF)
 
     # Verify handoff span was created
     assert handoff_span is not None