Skip to content

Commit dc27740

Browse files
giles17CopilotCopilot
authored
Python: Fix streaming path to emit mcp_server_tool_result on output_item.done instead of output_item.added (microsoft#4821)
* Fix streaming path to deliver mcp_server_tool_result content (microsoft#4814) Remove premature mcp_server_tool_result emission from the response.output_item.added/mcp_call handler — at that point the MCP server has not yet responded and output is always None. Add a handler for response.mcp_call.completed that emits mcp_server_tool_result with the actual tool output, matching the non-streaming path behavior. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Fix streaming path to deliver mcp_server_tool_result content (microsoft#4814) Stop eagerly emitting mcp_server_tool_result on response.output_item.added (when output is always None). Instead, handle response.output_item.done for mcp_call items, which carries the full McpCall with populated output. This matches the non-streaming path which guards with 'if item.output is not None' before emitting the result. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Fix test docstring to match actual implementation event name Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Address review: call_id fallback and raw_representation consistency (microsoft#4814) - Add call_id fallback in response.output_item.done mcp_call handler to match the output_item.added handler pattern - Use done_item instead of event for raw_representation to keep consistent with other output_item branches and non-streaming path - Add test for call_id fallback when id attribute is missing - Add raw_representation assertions to existing done handler tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Address review: call_id fallback for non-streaming path and test coverage (microsoft#4814) - Apply defensive call_id fallback (getattr with id/call_id/empty) to non-streaming mcp_call path for consistency with streaming path - Add raw_representation assertion to call_id fallback test - Add test for empty-string fallback when neither id nor call_id exist Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <copilot@github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent c1435ac commit dc27740

2 files changed

Lines changed: 136 additions & 32 deletions

File tree

python/packages/openai/agent_framework_openai/_chat_client.py

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,7 +1728,7 @@ def _parse_response_from_openai(
17281728
)
17291729
)
17301730
case "mcp_call":
1731-
call_id = item.id
1731+
call_id = getattr(item, "id", None) or getattr(item, "call_id", None) or ""
17321732
contents.append(
17331733
Content.from_mcp_server_tool_call(
17341734
call_id=call_id,
@@ -2118,27 +2118,7 @@ def _parse_chunk_from_openai(
21182118
raw_representation=event_item,
21192119
)
21202120
)
2121-
result_output = (
2122-
getattr(event_item, "result", None)
2123-
or getattr(event_item, "output", None)
2124-
or getattr(event_item, "outputs", None)
2125-
)
2126-
parsed_output: list[Content] | None = None
2127-
if result_output:
2128-
normalized = ( # pyright: ignore[reportUnknownVariableType]
2129-
result_output
2130-
if isinstance(result_output, Sequence)
2131-
and not isinstance(result_output, (str, bytes, MutableMapping))
2132-
else [result_output]
2133-
)
2134-
parsed_output = [Content.from_dict(output_item) for output_item in normalized] # pyright: ignore[reportArgumentType,reportUnknownVariableType]
2135-
contents.append(
2136-
Content.from_mcp_server_tool_result(
2137-
call_id=call_id,
2138-
output=parsed_output,
2139-
raw_representation=event_item,
2140-
)
2141-
)
2121+
# Result deferred to response.output_item.done
21422122
case "code_interpreter_call": # ResponseOutputCodeInterpreterCall
21432123
call_id = getattr(event_item, "call_id", None) or getattr(event_item, "id", None)
21442124
outputs: list[Content] = []
@@ -2408,6 +2388,21 @@ def _get_ann_value(key: str) -> Any:
24082388
)
24092389
else:
24102390
logger.debug("Unparsed annotation type in streaming: %s", ann_type)
2391+
case "response.output_item.done":
2392+
done_item = event.item
2393+
if getattr(done_item, "type", None) == "mcp_call":
2394+
call_id = getattr(done_item, "id", None) or getattr(done_item, "call_id", None) or ""
2395+
output_text = getattr(done_item, "output", None)
2396+
parsed_output: list[Content] | None = (
2397+
[Content.from_text(text=output_text)] if isinstance(output_text, str) else None
2398+
)
2399+
contents.append(
2400+
Content.from_mcp_server_tool_result(
2401+
call_id=call_id,
2402+
output=parsed_output,
2403+
raw_representation=done_item,
2404+
)
2405+
)
24112406
case _:
24122407
logger.debug("Unparsed event of type: %s: %s", event.type, event)
24132408

python/packages/openai/tests/openai/test_openai_chat_client.py

Lines changed: 119 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,11 +1184,13 @@ def test_parse_response_from_openai_with_mcp_server_tool_result() -> None:
11841184
assert result_content.output is not None
11851185

11861186

1187-
def test_parse_chunk_from_openai_with_mcp_call_result() -> None:
1188-
"""Test _parse_chunk_from_openai with MCP call output."""
1187+
def test_parse_chunk_from_openai_with_mcp_call_added_defers_result() -> None:
1188+
"""Test that response.output_item.added for mcp_call emits only the call, not the result.
1189+
1190+
The result is deferred to response.output_item.done.
1191+
"""
11891192
client = OpenAIChatClient(model="test-model", api_key="test-key")
11901193

1191-
# Mock event with MCP call that has output
11921194
mock_event = MagicMock()
11931195
mock_event.type = "response.output_item.added"
11941196

@@ -1199,8 +1201,9 @@ def test_parse_chunk_from_openai_with_mcp_call_result() -> None:
11991201
mock_item.name = "fetch_resource"
12001202
mock_item.server_label = "ResourceServer"
12011203
mock_item.arguments = {"resource_id": "123"}
1202-
# Use proper content structure that _parse_content can handle
1203-
mock_item.result = [{"type": "text", "text": "test result"}]
1204+
mock_item.result = None
1205+
mock_item.output = None
1206+
mock_item.outputs = None
12041207

12051208
mock_event.item = mock_item
12061209
mock_event.output_index = 0
@@ -1209,18 +1212,124 @@ def test_parse_chunk_from_openai_with_mcp_call_result() -> None:
12091212

12101213
update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids)
12111214

1212-
# Should have both call and result in contents
1213-
assert len(update.contents) == 2
1214-
call_content, result_content = update.contents
1215+
# Should have only the call content — result is deferred
1216+
assert len(update.contents) == 1
1217+
call_content = update.contents[0]
12151218

12161219
assert call_content.type == "mcp_server_tool_call"
12171220
assert call_content.call_id in ["mcp_call_456", "call_456"]
12181221
assert call_content.tool_name == "fetch_resource"
12191222

1223+
# No result should be emitted at this point
1224+
result_contents = [c for c in update.contents if c.type == "mcp_server_tool_result"]
1225+
assert len(result_contents) == 0
1226+
1227+
1228+
def test_parse_chunk_from_openai_with_mcp_output_item_done() -> None:
1229+
"""Test that response.output_item.done for mcp_call emits mcp_server_tool_result with output."""
1230+
client = OpenAIChatClient(model="test-model", api_key="test-key")
1231+
1232+
mock_event = MagicMock()
1233+
mock_event.type = "response.output_item.done"
1234+
1235+
mock_item = MagicMock()
1236+
mock_item.type = "mcp_call"
1237+
mock_item.id = "mcp_call_456"
1238+
mock_item.output = "The weather in Seattle is 72F and sunny."
1239+
mock_event.item = mock_item
1240+
1241+
function_call_ids: dict[int, tuple[str, str]] = {}
1242+
1243+
update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids)
1244+
1245+
assert len(update.contents) == 1
1246+
result_content = update.contents[0]
1247+
1248+
assert result_content.type == "mcp_server_tool_result"
1249+
assert result_content.call_id == "mcp_call_456"
1250+
assert result_content.output is not None
1251+
assert len(result_content.output) == 1
1252+
assert result_content.output[0].text == "The weather in Seattle is 72F and sunny."
1253+
assert result_content.raw_representation is mock_item
1254+
1255+
1256+
def test_parse_chunk_from_openai_with_mcp_output_item_done_no_output() -> None:
1257+
"""Test that response.output_item.done for mcp_call with no output emits result with None output."""
1258+
client = OpenAIChatClient(model="test-model", api_key="test-key")
1259+
1260+
mock_event = MagicMock()
1261+
mock_event.type = "response.output_item.done"
1262+
1263+
mock_item = MagicMock()
1264+
mock_item.type = "mcp_call"
1265+
mock_item.id = "mcp_call_789"
1266+
mock_item.output = None
1267+
mock_event.item = mock_item
1268+
1269+
function_call_ids: dict[int, tuple[str, str]] = {}
1270+
1271+
update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids)
1272+
1273+
assert len(update.contents) == 1
1274+
result_content = update.contents[0]
1275+
1276+
assert result_content.type == "mcp_server_tool_result"
1277+
assert result_content.call_id == "mcp_call_789"
1278+
assert result_content.output is None
1279+
assert result_content.raw_representation is mock_item
1280+
1281+
1282+
def test_parse_chunk_from_openai_with_mcp_output_item_done_call_id_fallback() -> None:
1283+
"""Test that response.output_item.done for mcp_call falls back to call_id when id is missing."""
1284+
client = OpenAIChatClient(model="test-model", api_key="test-key")
1285+
1286+
mock_event = MagicMock()
1287+
mock_event.type = "response.output_item.done"
1288+
1289+
mock_item = MagicMock(spec=[])
1290+
mock_item.type = "mcp_call"
1291+
mock_item.call_id = "mcp_fallback_123"
1292+
mock_item.output = "fallback result"
1293+
mock_event.item = mock_item
1294+
1295+
function_call_ids: dict[int, tuple[str, str]] = {}
1296+
1297+
update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids)
1298+
1299+
assert len(update.contents) == 1
1300+
result_content = update.contents[0]
1301+
1302+
assert result_content.type == "mcp_server_tool_result"
1303+
assert result_content.call_id == "mcp_fallback_123"
1304+
assert result_content.output is not None
1305+
assert result_content.output[0].text == "fallback result"
1306+
assert result_content.raw_representation is mock_item
1307+
1308+
1309+
def test_parse_chunk_from_openai_with_mcp_output_item_done_no_id_fallback() -> None:
1310+
"""Test that response.output_item.done for mcp_call falls back to empty string when neither id nor call_id exist."""
1311+
client = OpenAIChatClient(model="test-model", api_key="test-key")
1312+
1313+
mock_event = MagicMock()
1314+
mock_event.type = "response.output_item.done"
1315+
1316+
mock_item = MagicMock(spec=[])
1317+
mock_item.type = "mcp_call"
1318+
mock_item.output = "some result"
1319+
mock_event.item = mock_item
1320+
1321+
function_call_ids: dict[int, tuple[str, str]] = {}
1322+
1323+
update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids)
1324+
1325+
assert len(update.contents) == 1
1326+
result_content = update.contents[0]
1327+
12201328
assert result_content.type == "mcp_server_tool_result"
1221-
assert result_content.call_id in ["mcp_call_456", "call_456"]
1222-
# Verify the output was parsed
1329+
assert result_content.call_id == ""
12231330
assert result_content.output is not None
1331+
assert result_content.output[0].text == "some result"
1332+
assert result_content.raw_representation is mock_item
12241333

12251334

12261335
def test_prepare_message_for_openai_with_function_approval_response() -> None:

0 commit comments

Comments
 (0)