Skip to content

Commit d813b8e

Browse files
Fix Azure AI Search citation enrichment follow-ups
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent bfd5a6f commit d813b8e

3 files changed

Lines changed: 114 additions & 12 deletions

File tree

python/packages/foundry/tests/foundry/test_foundry_agent.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ def _get_foundry_azure_ai_search_model() -> str | None:
5757

5858
skip_if_foundry_azure_ai_search_integration_tests_disabled = pytest.mark.skipif(
5959
os.getenv("FOUNDRY_PROJECT_ENDPOINT", "") in ("", "https://test-project.services.ai.azure.com/")
60-
or os.getenv("AZURE_SEARCH_ENDPOINT", "") == ""
6160
or os.getenv("AZURE_SEARCH_INDEX_NAME", "") == ""
6261
or _get_foundry_azure_ai_search_model() is None,
6362
reason="No live Foundry project, Azure Search index, or model provided for Azure AI Search integration tests.",

python/packages/openai/agent_framework_openai/_chat_client.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@
122122
OPENAI_SHELL_OUTPUT_TYPE_LOCAL_SHELL_CALL = "local_shell_call_output"
123123
_AZURE_AI_SEARCH_CALL_OUTPUT_TYPE = "azure_ai_search_call_output"
124124
_AZURE_AI_SEARCH_OUTPUT_EVENT_TYPES = {"response.output_item.added", "response.output_item.done"}
125+
_AZURE_AI_SEARCH_OUTPUT_EVENT_PREFIX = "response.azure_ai_search_call_output."
125126

126127
# Internal marker emitted by `_prepare_content_for_openai` for an
127128
# `mcp_server_tool_result` Content. The Responses API expects an `mcp_call`
@@ -1978,16 +1979,27 @@ def _parse_azure_ai_search_output_payload(output: Any) -> Mapping[str, Any] | No
19781979
return None
19791980

19801981
@staticmethod
1981-
def _extract_azure_ai_search_get_urls(event: Any) -> list[str]:
1982-
"""Extract per-document Azure AI Search REST URLs from a streamed Responses event."""
1983-
if getattr(event, "type", None) not in _AZURE_AI_SEARCH_OUTPUT_EVENT_TYPES:
1984-
return []
1982+
def _extract_azure_ai_search_output_payload(event: Any) -> Mapping[str, Any] | None:
1983+
"""Return Azure AI Search output payload from either a top-level event or its nested item."""
1984+
payload = RawOpenAIChatClient._parse_azure_ai_search_output_payload(getattr(event, "output", None))
1985+
if payload is not None:
1986+
return payload
19851987

19861988
item = getattr(event, "item", None)
1987-
if getattr(item, "type", None) != _AZURE_AI_SEARCH_CALL_OUTPUT_TYPE:
1989+
if getattr(item, "type", None) == _AZURE_AI_SEARCH_CALL_OUTPUT_TYPE:
1990+
return RawOpenAIChatClient._parse_azure_ai_search_output_payload(getattr(item, "output", None))
1991+
return None
1992+
1993+
@staticmethod
1994+
def _extract_azure_ai_search_get_urls(event: Any) -> list[str]:
1995+
"""Extract per-document Azure AI Search REST URLs from a streamed Responses event."""
1996+
event_type = getattr(event, "type", None)
1997+
if event_type not in _AZURE_AI_SEARCH_OUTPUT_EVENT_TYPES and not (
1998+
isinstance(event_type, str) and event_type.startswith(_AZURE_AI_SEARCH_OUTPUT_EVENT_PREFIX)
1999+
):
19882000
return []
19892001

1990-
payload = RawOpenAIChatClient._parse_azure_ai_search_output_payload(getattr(item, "output", None))
2002+
payload = RawOpenAIChatClient._extract_azure_ai_search_output_payload(event)
19912003
if payload is None:
19922004
return []
19932005

@@ -2028,12 +2040,18 @@ def _enrich_streamed_azure_ai_search_citations(cls, updates: Sequence[ChatRespon
20282040
for annotation in content.annotations:
20292041
if annotation.get("type") != "citation" or annotation.get("file_id"):
20302042
continue
2031-
additional_properties = annotation.setdefault("additional_properties", {})
2032-
if "get_url" in additional_properties:
2033-
continue
2043+
20342044
doc_index = cls._azure_ai_search_doc_index(annotation)
20352045
if doc_index is None or doc_index >= len(get_urls):
20362046
continue
2047+
2048+
additional_properties = annotation.get("additional_properties")
2049+
if not isinstance(additional_properties, dict):
2050+
additional_properties = {}
2051+
annotation["additional_properties"] = additional_properties
2052+
if "get_url" in additional_properties:
2053+
continue
2054+
20372055
additional_properties["get_url"] = get_urls[doc_index]
20382056

20392057
@staticmethod
@@ -3136,7 +3154,8 @@ def _get_ann_value(key: str) -> Any:
31363154
elif getattr(done_item, "type", None) == _AZURE_AI_SEARCH_CALL_OUTPUT_TYPE:
31373155
pass
31383156
case _:
3139-
logger.debug("Unparsed event of type: %s: %s", event.type, event)
3157+
if not isinstance(event.type, str) or not event.type.startswith(_AZURE_AI_SEARCH_OUTPUT_EVENT_PREFIX):
3158+
logger.debug("Unparsed event of type: %s: %s", event.type, event)
31403159

31413160
return ChatResponseUpdate(
31423161
contents=contents,

python/packages/openai/tests/openai/test_openai_chat_client.py

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import pytest
1414
from agent_framework import (
1515
Agent,
16+
Annotation,
1617
ChatOptions,
1718
ChatResponse,
1819
ChatResponseUpdate,
@@ -3618,9 +3619,17 @@ def _make_mcp_call_done_event(output: str) -> MagicMock:
36183619
return event
36193620

36203621

3621-
def _make_azure_ai_search_output_event(output: Any, *, event_type: str = "response.output_item.done") -> MagicMock:
3622+
def _make_azure_ai_search_output_event(
3623+
output: Any,
3624+
*,
3625+
event_type: str = "response.output_item.done",
3626+
top_level_output: bool = False,
3627+
) -> MagicMock:
36223628
event = MagicMock()
36233629
event.type = event_type
3630+
if top_level_output:
3631+
event.output = output
3632+
return event
36243633
event.item = MagicMock()
36253634
event.item.type = "azure_ai_search_call_output"
36263635
event.item.output = output
@@ -3719,6 +3728,81 @@ def test_streaming_azure_ai_search_output_does_not_overwrite_existing_get_url()
37193728
assert annotation["additional_properties"]["get_url"] == existing_get_url
37203729

37213730

3731+
def test_streaming_azure_ai_search_output_normalizes_non_dict_additional_properties() -> None:
3732+
"""Existing non-dict additional_properties should be normalized before enriching get_url."""
3733+
client = OpenAIChatClient(model="test-model", api_key="test-key")
3734+
chat_options = ChatOptions()
3735+
function_call_ids: dict[int, tuple[str, str]] = {}
3736+
get_url = "https://example.search.windows.net/indexes/my-index/docs/doc-123?api-version=2024-07-01"
3737+
3738+
citation_update = client._parse_chunk_from_openai(
3739+
_make_url_citation_event(title="doc_0"),
3740+
chat_options,
3741+
function_call_ids,
3742+
)
3743+
citation_update.contents[0].annotations[0]["additional_properties"] = None
3744+
search_update = client._parse_chunk_from_openai(
3745+
_make_azure_ai_search_output_event(json.dumps({"get_urls": [get_url]})),
3746+
chat_options,
3747+
function_call_ids,
3748+
)
3749+
3750+
response = client._finalize_response_updates([citation_update, search_update])
3751+
3752+
annotation = response.messages[0].contents[0].annotations[0]
3753+
assert annotation["additional_properties"] == {"get_url": get_url}
3754+
3755+
3756+
def test_streaming_azure_ai_search_output_does_not_create_additional_properties_for_unusable_citation() -> None:
3757+
"""Unenrichable Azure AI Search citations should keep their original annotation shape."""
3758+
update = ChatResponseUpdate(
3759+
contents=[
3760+
Content.from_text(
3761+
text="hello",
3762+
annotations=[Annotation(type="citation", title="source_0", url="https://example.invalid")],
3763+
)
3764+
],
3765+
raw_representation=_make_azure_ai_search_output_event(
3766+
json.dumps({"get_urls": ["https://example.search.windows.net/indexes/my-index/docs/doc-0"]})
3767+
),
3768+
)
3769+
3770+
RawOpenAIChatClient._enrich_streamed_azure_ai_search_citations([update])
3771+
3772+
annotation = update.contents[0].annotations[0]
3773+
assert annotation.get("additional_properties") is None
3774+
3775+
3776+
def test_extract_azure_ai_search_get_urls_accepts_dedicated_output_event() -> None:
3777+
"""Dedicated response.azure_ai_search_call_output.* events should yield get_urls too."""
3778+
get_url = "https://example.search.windows.net/indexes/my-index/docs/doc-123?api-version=2024-07-01"
3779+
event = _make_azure_ai_search_output_event(
3780+
json.dumps({"get_urls": [get_url]}),
3781+
event_type="response.azure_ai_search_call_output.done",
3782+
top_level_output=True,
3783+
)
3784+
3785+
assert RawOpenAIChatClient._extract_azure_ai_search_get_urls(event) == [get_url]
3786+
3787+
3788+
def test_parse_chunk_from_openai_ignores_dedicated_azure_ai_search_events() -> None:
3789+
"""Dedicated Azure AI Search events should be treated as intentional no-op updates."""
3790+
client = OpenAIChatClient(model="test-model", api_key="test-key")
3791+
chat_options = ChatOptions()
3792+
function_call_ids: dict[int, tuple[str, str]] = {}
3793+
event = _make_azure_ai_search_output_event(
3794+
json.dumps({"get_urls": ["https://example.search.windows.net/indexes/my-index/docs/doc-0"]}),
3795+
event_type="response.azure_ai_search_call_output.done",
3796+
top_level_output=True,
3797+
)
3798+
3799+
with patch("agent_framework_openai._chat_client.logger.debug") as mock_debug:
3800+
update = client._parse_chunk_from_openai(event, chat_options, function_call_ids)
3801+
3802+
assert update.contents == []
3803+
mock_debug.assert_not_called()
3804+
3805+
37223806
@pytest.mark.parametrize(
37233807
("title", "output"),
37243808
[

0 commit comments

Comments
 (0)