55import os
66import json
77import logging
8+ import httpx
89
910import sentry_sdk
1011from sentry_sdk import start_span
11- from sentry_sdk .consts import SPANDATA
12+ from sentry_sdk .consts import SPANDATA , OP
1213from sentry_sdk .integrations .logging import LoggingIntegration
1314from sentry_sdk .integrations .openai_agents import OpenAIAgentsIntegration
1415from sentry_sdk .integrations .openai_agents .utils import _set_input_data , safe_serialize
@@ -314,6 +315,25 @@ def test_agent_custom_model():
314315 )
315316
316317
318+ @pytest .fixture
319+ def get_model_response ():
320+ def inner (response_content ):
321+ model_request = httpx .Request (
322+ "POST" ,
323+ "/responses" ,
324+ )
325+
326+ response = httpx .Response (
327+ 200 ,
328+ request = model_request ,
329+ content = json .dumps (response_content .model_dump ()).encode ("utf-8" ),
330+ )
331+
332+ return response
333+
334+ return inner
335+
336+
317337@pytest .mark .asyncio
318338async def test_agent_invocation_span_no_pii (
319339 sentry_init , capture_events , test_agent , mock_model_response
@@ -917,85 +937,120 @@ def test_agent_invocation_span_sync(
917937
918938
919939@pytest .mark .asyncio
920- async def test_handoff_span (sentry_init , capture_events , mock_usage ):
940+ async def test_handoff_span (sentry_init , capture_events , get_model_response ):
921941 """
922942 Test that handoff spans are created when agents hand off to other agents.
923943 """
944+ client = AsyncOpenAI (api_key = "test-key" )
945+ model = OpenAIResponsesModel (model = "gpt-4-mini" , openai_client = client )
946+
924947 # Create two simple agents with a handoff relationship
925948 secondary_agent = agents .Agent (
926949 name = "secondary_agent" ,
927950 instructions = "You are a secondary agent." ,
928- model = "gpt-4o-mini" ,
951+ model = model ,
929952 )
930953
931954 primary_agent = agents .Agent (
932955 name = "primary_agent" ,
933956 instructions = "You are a primary agent that hands off to secondary agent." ,
934- model = "gpt-4o-mini" ,
957+ model = model ,
935958 handoffs = [secondary_agent ],
936959 )
937960
938- with patch .dict (os .environ , {"OPENAI_API_KEY" : "test-key" }):
939- with patch (
940- "agents.models.openai_responses.OpenAIResponsesModel.get_response"
941- ) as mock_get_response :
942- # Mock two responses:
943- # 1. Primary agent calls handoff tool
944- # 2. Secondary agent provides final response
945- handoff_response = ModelResponse (
946- output = [
947- ResponseFunctionToolCall (
948- id = "call_handoff_123" ,
949- call_id = "call_handoff_123" ,
950- name = "transfer_to_secondary_agent" ,
951- type = "function_call" ,
952- arguments = "{}" ,
953- )
954- ],
955- usage = mock_usage ,
956- response_id = "resp_handoff_123" ,
957- )
958-
959- final_response = ModelResponse (
960- output = [
961- ResponseOutputMessage (
962- id = "msg_final" ,
963- type = "message" ,
964- status = "completed" ,
965- content = [
966- ResponseOutputText (
967- text = "I'm the specialist and I can help with that!" ,
968- type = "output_text" ,
969- annotations = [],
970- )
971- ],
972- role = "assistant" ,
973- )
974- ],
975- usage = mock_usage ,
976- response_id = "resp_final_123" ,
977- )
961+ handoff_response = get_model_response (
962+ Response (
963+ id = "resp_tool_123" ,
964+ output = [
965+ ResponseFunctionToolCall (
966+ id = "call_handoff_123" ,
967+ call_id = "call_handoff_123" ,
968+ name = "transfer_to_secondary_agent" ,
969+ type = "function_call" ,
970+ arguments = "{}" ,
971+ )
972+ ],
973+ parallel_tool_calls = False ,
974+ tool_choice = "none" ,
975+ tools = [],
976+ created_at = 10000000 ,
977+ model = "gpt-4" ,
978+ object = "response" ,
979+ usage = ResponseUsage (
980+ input_tokens = 10 ,
981+ input_tokens_details = InputTokensDetails (
982+ cached_tokens = 0 ,
983+ ),
984+ output_tokens = 20 ,
985+ output_tokens_details = OutputTokensDetails (
986+ reasoning_tokens = 5 ,
987+ ),
988+ total_tokens = 30 ,
989+ ),
990+ )
991+ )
978992
979- mock_get_response .side_effect = [handoff_response , final_response ]
993+ final_response = get_model_response (
994+ Response (
995+ id = "resp_final_123" ,
996+ output = [
997+ ResponseOutputMessage (
998+ id = "msg_final" ,
999+ type = "message" ,
1000+ status = "completed" ,
1001+ content = [
1002+ ResponseOutputText (
1003+ text = "I'm the specialist and I can help with that!" ,
1004+ type = "output_text" ,
1005+ annotations = [],
1006+ )
1007+ ],
1008+ role = "assistant" ,
1009+ )
1010+ ],
1011+ parallel_tool_calls = False ,
1012+ tool_choice = "none" ,
1013+ tools = [],
1014+ created_at = 10000000 ,
1015+ model = "gpt-4" ,
1016+ object = "response" ,
1017+ usage = ResponseUsage (
1018+ input_tokens = 10 ,
1019+ input_tokens_details = InputTokensDetails (
1020+ cached_tokens = 0 ,
1021+ ),
1022+ output_tokens = 20 ,
1023+ output_tokens_details = OutputTokensDetails (
1024+ reasoning_tokens = 5 ,
1025+ ),
1026+ total_tokens = 30 ,
1027+ ),
1028+ )
1029+ )
9801030
981- sentry_init (
982- integrations = [OpenAIAgentsIntegration ()],
983- traces_sample_rate = 1.0 ,
984- )
1031+ with patch .object (
1032+ primary_agent .model ._client ._client ,
1033+ "send" ,
1034+ side_effect = [handoff_response , final_response ],
1035+ ) as _ :
1036+ sentry_init (
1037+ integrations = [OpenAIAgentsIntegration ()],
1038+ traces_sample_rate = 1.0 ,
1039+ )
9851040
986- events = capture_events ()
1041+ events = capture_events ()
9871042
988- result = await agents .Runner .run (
989- primary_agent ,
990- "Please hand off to secondary agent" ,
991- run_config = test_run_config ,
992- )
1043+ result = await agents .Runner .run (
1044+ primary_agent ,
1045+ "Please hand off to secondary agent" ,
1046+ run_config = test_run_config ,
1047+ )
9931048
994- assert result is not None
1049+ assert result is not None
9951050
9961051 (transaction ,) = events
9971052 spans = transaction ["spans" ]
998- handoff_span = spans [ 2 ]
1053+ handoff_span = next ( span for span in spans if span . get ( "op" ) == OP . GEN_AI_HANDOFF )
9991054
10001055 # Verify handoff span was created
10011056 assert handoff_span is not None
@@ -1006,85 +1061,122 @@ async def test_handoff_span(sentry_init, capture_events, mock_usage):
10061061
10071062
10081063@pytest .mark .asyncio
1009- async def test_max_turns_before_handoff_span (sentry_init , capture_events , mock_usage ):
1064+ async def test_max_turns_before_handoff_span (
1065+ sentry_init , capture_events , mock_usage , get_model_response
1066+ ):
10101067 """
10111068 Example raising agents.exceptions.AgentsException after the agent invocation span is complete.
10121069 """
1070+ client = AsyncOpenAI (api_key = "test-key" )
1071+ model = OpenAIResponsesModel (model = "gpt-4-mini" , openai_client = client )
1072+
10131073 # Create two simple agents with a handoff relationship
10141074 secondary_agent = agents .Agent (
10151075 name = "secondary_agent" ,
10161076 instructions = "You are a secondary agent." ,
1017- model = "gpt-4o-mini" ,
1077+ model = model ,
10181078 )
10191079
10201080 primary_agent = agents .Agent (
10211081 name = "primary_agent" ,
10221082 instructions = "You are a primary agent that hands off to secondary agent." ,
1023- model = "gpt-4o-mini" ,
1083+ model = model ,
10241084 handoffs = [secondary_agent ],
10251085 )
10261086
1027- with patch .dict (os .environ , {"OPENAI_API_KEY" : "test-key" }):
1028- with patch (
1029- "agents.models.openai_responses.OpenAIResponsesModel.get_response"
1030- ) as mock_get_response :
1031- # Mock two responses:
1032- # 1. Primary agent calls handoff tool
1033- # 2. Secondary agent provides final response
1034- handoff_response = ModelResponse (
1035- output = [
1036- ResponseFunctionToolCall (
1037- id = "call_handoff_123" ,
1038- call_id = "call_handoff_123" ,
1039- name = "transfer_to_secondary_agent" ,
1040- type = "function_call" ,
1041- arguments = "{}" ,
1042- )
1043- ],
1044- usage = mock_usage ,
1045- response_id = "resp_handoff_123" ,
1046- )
1047-
1048- final_response = ModelResponse (
1049- output = [
1050- ResponseOutputMessage (
1051- id = "msg_final" ,
1052- type = "message" ,
1053- status = "completed" ,
1054- content = [
1055- ResponseOutputText (
1056- text = "I'm the specialist and I can help with that!" ,
1057- type = "output_text" ,
1058- annotations = [],
1059- )
1060- ],
1061- role = "assistant" ,
1062- )
1063- ],
1064- usage = mock_usage ,
1065- response_id = "resp_final_123" ,
1066- )
1087+ handoff_response = get_model_response (
1088+ Response (
1089+ id = "resp_tool_123" ,
1090+ output = [
1091+ ResponseFunctionToolCall (
1092+ id = "call_handoff_123" ,
1093+ call_id = "call_handoff_123" ,
1094+ name = "transfer_to_secondary_agent" ,
1095+ type = "function_call" ,
1096+ arguments = "{}" ,
1097+ )
1098+ ],
1099+ parallel_tool_calls = False ,
1100+ tool_choice = "none" ,
1101+ tools = [],
1102+ created_at = 10000000 ,
1103+ model = "gpt-4" ,
1104+ object = "response" ,
1105+ usage = ResponseUsage (
1106+ input_tokens = 10 ,
1107+ input_tokens_details = InputTokensDetails (
1108+ cached_tokens = 0 ,
1109+ ),
1110+ output_tokens = 20 ,
1111+ output_tokens_details = OutputTokensDetails (
1112+ reasoning_tokens = 5 ,
1113+ ),
1114+ total_tokens = 30 ,
1115+ ),
1116+ )
1117+ )
10671118
1068- mock_get_response .side_effect = [handoff_response , final_response ]
1119+ final_response = get_model_response (
1120+ Response (
1121+ id = "resp_final_123" ,
1122+ output = [
1123+ ResponseOutputMessage (
1124+ id = "msg_final" ,
1125+ type = "message" ,
1126+ status = "completed" ,
1127+ content = [
1128+ ResponseOutputText (
1129+ text = "I'm the specialist and I can help with that!" ,
1130+ type = "output_text" ,
1131+ annotations = [],
1132+ )
1133+ ],
1134+ role = "assistant" ,
1135+ )
1136+ ],
1137+ parallel_tool_calls = False ,
1138+ tool_choice = "none" ,
1139+ tools = [],
1140+ created_at = 10000000 ,
1141+ model = "gpt-4" ,
1142+ object = "response" ,
1143+ usage = ResponseUsage (
1144+ input_tokens = 10 ,
1145+ input_tokens_details = InputTokensDetails (
1146+ cached_tokens = 0 ,
1147+ ),
1148+ output_tokens = 20 ,
1149+ output_tokens_details = OutputTokensDetails (
1150+ reasoning_tokens = 5 ,
1151+ ),
1152+ total_tokens = 30 ,
1153+ ),
1154+ )
1155+ )
10691156
1070- sentry_init (
1071- integrations = [OpenAIAgentsIntegration ()],
1072- traces_sample_rate = 1.0 ,
1073- )
1157+ with patch .object (
1158+ primary_agent .model ._client ._client ,
1159+ "send" ,
1160+ side_effect = [handoff_response , final_response ],
1161+ ) as _ :
1162+ sentry_init (
1163+ integrations = [OpenAIAgentsIntegration ()],
1164+ traces_sample_rate = 1.0 ,
1165+ )
10741166
1075- events = capture_events ()
1167+ events = capture_events ()
10761168
1077- with pytest .raises (MaxTurnsExceeded ):
1078- await agents .Runner .run (
1079- primary_agent ,
1080- "Please hand off to secondary agent" ,
1081- run_config = test_run_config ,
1082- max_turns = 1 ,
1083- )
1169+ with pytest .raises (MaxTurnsExceeded ):
1170+ await agents .Runner .run (
1171+ primary_agent ,
1172+ "Please hand off to secondary agent" ,
1173+ run_config = test_run_config ,
1174+ max_turns = 1 ,
1175+ )
10841176
10851177 (error , transaction ) = events
10861178 spans = transaction ["spans" ]
1087- handoff_span = spans [ 2 ]
1179+ handoff_span = next ( span for span in spans if span . get ( "op" ) == OP . GEN_AI_HANDOFF )
10881180
10891181 # Verify handoff span was created
10901182 assert handoff_span is not None
0 commit comments