From bf1906aec9f5cd5fbc3326b32fddc71dfd1dc6c0 Mon Sep 17 00:00:00 2001
From: Harsh-Microsoft <v-hbangera@microsoft.com>
Date: Thu, 21 May 2026 21:03:35 +0530
Subject: [PATCH 1/2] chore: upgrade agent-framework to 1.3.0 and
 azure-ai-projects to 2.1.0

Library upgrades (US #43549):
- agent-framework-core: 1.0.0rc2 -> 1.3.0
- azure-ai-projects: 2.0.0b3 -> 2.1.0
- Replace retired agent-framework-azure-ai/azure-ai-agents with
  agent-framework-foundry==1.3.0 and azure-ai-inference==1.0.0b9

Code changes for the new GA API surface:
- chat_service / history_service: switch from AzureAIProjectAgentProvider
  to FoundryAgent and use AgentSession(service_session_id=...) in place of
  the removed conversation_id streaming kwarg.
- infra agent/index scripts: align with the new SDK imports and GA APIs.

Citation regression workaround (microsoft/agent-framework#5995):
- Add services/_patches/agent_framework_search_citations.py, a runtime
  monkey-patch of RawOpenAIChatClient._parse_chunk_from_openai that
  re-attaches per-document REST URLs (additional_properties.get_url) on
  Azure AI Search url_citation annotations. Mapping uses the doc index
  parsed from the citation title (doc_<N>) so duplicate citations resolve
  correctly. Patch is idempotent and degrades gracefully if upstream
  fixes the regression or renames the target method.
- chat_service citation extraction now prefers additional_properties.get_url
  over the search-service root url so titles/links are per-document.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../scripts/agent_scripts/01_create_agents.py |   4 +-
 infra/scripts/agent_scripts/requirements.txt  |   2 +-
 .../index_scripts/03_cu_process_data_text.py  |  18 +-
 .../04_cu_process_custom_data.py              |  18 +-
 infra/scripts/index_scripts/requirements.txt  |   8 +-
 src/api/requirements.txt                      |   8 +-
 src/api/services/_patches/__init__.py         |   1 +
 .../agent_framework_search_citations.py       | 175 ++++++++++++++++++
 src/api/services/chat_service.py              |  35 ++--
 src/api/services/history_service.py           |   9 +-
 src/tests/api/services/test_chat_service.py   |  76 +++-----
 .../api/services/test_history_service.py      |   6 +-
 12 files changed, 256 insertions(+), 104 deletions(-)
 create mode 100644 src/api/services/_patches/__init__.py
 create mode 100644 src/api/services/_patches/agent_framework_search_citations.py
diff --git a/infra/scripts/agent_scripts/01_create_agents.py b/infra/scripts/agent_scripts/01_create_agents.py
index 9374a0235..db73810c9 100644
--- a/infra/scripts/agent_scripts/01_create_agents.py
+++ b/infra/scripts/agent_scripts/01_create_agents.py
@@ -6,7 +6,7 @@
 from azure.identity.aio import AzureCliCredential
 from azure.ai.projects.models import (
     PromptAgentDefinition,
-    AzureAISearchAgentTool,
+    AzureAISearchTool,
     FunctionTool,
     AzureAISearchToolResource,
     AISearchIndexResource,
@@ -115,7 +115,7 @@ async def main():
                         }
                     ),
                     # Azure AI Search - built-in service tool (no client implementation needed)
-                    AzureAISearchAgentTool(
+                    AzureAISearchTool(
                         azure_ai_search=AzureAISearchToolResource(
                             indexes=[
                                 AISearchIndexResource(
diff --git a/infra/scripts/agent_scripts/requirements.txt b/infra/scripts/agent_scripts/requirements.txt
index b1179006b..fad61a0a1 100644
--- a/infra/scripts/agent_scripts/requirements.txt
+++ b/infra/scripts/agent_scripts/requirements.txt
@@ -1,3 +1,3 @@
 aiohttp==3.13.4
 azure-identity==1.25.2
-azure-ai-projects==2.0.0b3
+azure-ai-projects==2.1.0
diff --git a/infra/scripts/index_scripts/03_cu_process_data_text.py b/infra/scripts/index_scripts/03_cu_process_data_text.py
index 4cfaa5e40..3f8d34ec8 100644
--- a/infra/scripts/index_scripts/03_cu_process_data_text.py
+++ b/infra/scripts/index_scripts/03_cu_process_data_text.py
@@ -16,7 +16,7 @@
 
 # Suppress informational warnings from agent_framework about runtime
 # tool/structured_output overrides not being supported by AzureAIClient.
-logging.getLogger("agent_framework.azure").setLevel(logging.ERROR)
+logging.getLogger("agent_framework.foundry").setLevel(logging.ERROR)
 
 import pandas as pd
 import pyodbc
@@ -29,7 +29,7 @@
 from azure.search.documents.indexes import SearchIndexClient
 from azure.storage.filedatalake import DataLakeServiceClient
 
-from agent_framework.azure import AzureAIProjectAgentProvider
+from agent_framework_foundry import FoundryAgent
 
 from content_understanding_client import AzureContentUnderstandingClient, sanitize_cu_output
 
@@ -514,11 +514,8 @@ async def call_topic_mining_agent(topics_str1):
             AsyncAzureCliCredential(process_timeout=30) as async_cred,
             AIProjectClient(endpoint=AI_PROJECT_ENDPOINT, credential=async_cred) as project_client,
         ):
-            # Create provider for agent management
-            provider = AzureAIProjectAgentProvider(project_client=project_client)
-            
-            # Get agent using provider
-            agent = await provider.get_agent(name=TOPIC_MINING_AGENT_NAME)
+            # Create agent using FoundryAgent
+            agent = FoundryAgent(project_client=project_client, agent_name=TOPIC_MINING_AGENT_NAME)
             
             # Query with the topics string
             query = f"Analyze these conversation topics and identify distinct categories: {topics_str1}"
@@ -561,11 +558,8 @@ async def map_all_topics():
             AsyncAzureCliCredential(process_timeout=30) as async_cred,
             AIProjectClient(endpoint=AI_PROJECT_ENDPOINT, credential=async_cred) as project_client,
         ):
-            # Create provider for agent management
-            provider = AzureAIProjectAgentProvider(project_client=project_client)
-            
-            # Get agent using provider
-            agent = await provider.get_agent(name=TOPIC_MAPPING_AGENT_NAME)
+            # Create agent using FoundryAgent
+            agent = FoundryAgent(project_client=project_client, agent_name=TOPIC_MAPPING_AGENT_NAME)
             
             # Process all rows using the same agent instance
             for _, row in df_processed_data.iterrows():
diff --git a/infra/scripts/index_scripts/04_cu_process_custom_data.py b/infra/scripts/index_scripts/04_cu_process_custom_data.py
index 3ac1166b8..1bd4ea967 100644
--- a/infra/scripts/index_scripts/04_cu_process_custom_data.py
+++ b/infra/scripts/index_scripts/04_cu_process_custom_data.py
@@ -16,7 +16,7 @@
 
 # Suppress informational warnings from agent_framework about runtime
 # tool/structured_output overrides not being supported by AzureAIClient.
-logging.getLogger("agent_framework.azure").setLevel(logging.ERROR)
+logging.getLogger("agent_framework.foundry").setLevel(logging.ERROR)
 
 import pandas as pd
 import pyodbc
@@ -43,7 +43,7 @@
 )
 from azure.storage.filedatalake import DataLakeServiceClient
 
-from agent_framework.azure import AzureAIProjectAgentProvider
+from agent_framework_foundry import FoundryAgent
 
 from content_understanding_client import AzureContentUnderstandingClient, sanitize_cu_output
 
@@ -610,11 +610,8 @@ async def call_topic_mining_agent(topics_str1):
             AsyncAzureCliCredential(process_timeout=30) as async_cred,
             AIProjectClient(endpoint=AI_PROJECT_ENDPOINT, credential=async_cred) as project_client,
         ):
-            # Create provider for agent management
-            provider = AzureAIProjectAgentProvider(project_client=project_client)
-            
-            # Get agent using provider
-            agent = await provider.get_agent(name=TOPIC_MINING_AGENT_NAME)
+            # Create agent using FoundryAgent
+            agent = FoundryAgent(project_client=project_client, agent_name=TOPIC_MINING_AGENT_NAME)
             
             # Query with the topics string
             query = f"Analyze these conversation topics and identify distinct categories: {topics_str1}"
@@ -657,11 +654,8 @@ async def map_all_topics():
             AsyncAzureCliCredential(process_timeout=30) as async_cred,
             AIProjectClient(endpoint=AI_PROJECT_ENDPOINT, credential=async_cred) as project_client,
         ):
-            # Create provider for agent management
-            provider = AzureAIProjectAgentProvider(project_client=project_client)
-            
-            # Get agent using provider
-            agent = await provider.get_agent(name=TOPIC_MAPPING_AGENT_NAME)
+            # Create agent using FoundryAgent
+            agent = FoundryAgent(project_client=project_client, agent_name=TOPIC_MAPPING_AGENT_NAME)
             
             # Process all rows using the same agent instance
             for _, row in df_processed_data.iterrows():
diff --git a/infra/scripts/index_scripts/requirements.txt b/infra/scripts/index_scripts/requirements.txt
index 466989985..f1f711b71 100644
--- a/infra/scripts/index_scripts/requirements.txt
+++ b/infra/scripts/index_scripts/requirements.txt
@@ -1,9 +1,9 @@
 azure-storage-file-datalake==12.23.0
 openai==2.24.0
-azure-ai-projects==2.0.0b3
-azure-ai-agents==1.2.0b5
-agent-framework-core==1.0.0rc2
-agent-framework-azure-ai==1.0.0rc2
+azure-ai-projects==2.1.0
+azure-ai-inference==1.0.0b9
+agent-framework-core==1.3.0
+agent-framework-foundry==1.3.0
 pypdf==6.10.2
 tiktoken==0.12.0
 azure-identity==1.25.2
diff --git a/src/api/requirements.txt b/src/api/requirements.txt
index 27307e234..0bbd4a80c 100644
--- a/src/api/requirements.txt
+++ b/src/api/requirements.txt
@@ -14,10 +14,10 @@ aiohttp==3.13.5
 # Azure Services
 azure-identity==1.25.3
 azure-search-documents==11.6.0
-azure-ai-projects==2.0.0b3
-azure-ai-agents==1.2.0b5
-agent-framework-core==1.0.0rc2
-agent-framework-azure-ai==1.0.0rc2
+azure-ai-projects==2.1.0
+azure-ai-inference==1.0.0b9
+agent-framework-core==1.3.0
+agent-framework-foundry==1.3.0
 azure-cosmos==4.15.0
 
 # Additional utilities
diff --git a/src/api/services/_patches/__init__.py b/src/api/services/_patches/__init__.py
new file mode 100644
index 000000000..9e04642ff
--- /dev/null
+++ b/src/api/services/_patches/__init__.py
@@ -0,0 +1 @@
+"""Runtime patches applied to third-party packages used by this app."""
diff --git a/src/api/services/_patches/agent_framework_search_citations.py b/src/api/services/_patches/agent_framework_search_citations.py
new file mode 100644
index 000000000..b13f33bc8
--- /dev/null
+++ b/src/api/services/_patches/agent_framework_search_citations.py
@@ -0,0 +1,175 @@
+"""Restore Azure AI Search ``get_url`` enrichment on streaming citations.
+
+Pre-GA ``agent-framework-azure-ai==1.0.0rc2`` enriched ``url_citation``
+streaming annotations with a per-document REST URL exposed under
+``additional_properties.get_url``. That subclass was removed when the
+``azure-ai`` package was retired at GA.
+
+In the GA ``agent_framework_openai._chat_client._parse_chunk_from_openai``:
+
+1. ``response.azure_ai_search_call_output.done`` events fall through to
+   the default ``case _:`` debug log, so the ``output.get_urls[]`` array
+   carrying per-document REST URLs is silently dropped.
+2. ``url_citation`` annotations (added by upstream PR #5071) emit only
+   ``title`` and ``url`` (the search-service root URL).
+
+This patch wraps that method to:
+
+1. Cache ``get_urls`` per-stream when seeing the search-call-output event.
+2. Inject ``additional_properties.get_url`` on ``url_citation`` annotations
+   using ``annotation_index`` as the lookup key, matching the pre-GA
+   contract that the citation extraction in ``chat_service.py`` reads.
+
+Tracking upstream: https://github.com/microsoft/agent-framework/issues/5995
+Safe to remove once upstream ports the ``get_url`` enrichment into
+``agent_framework_openai`` or ``agent_framework_foundry``.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from typing import Any
+
+_DOC_INDEX_RE = re.compile(r"^doc_(\d+)$")
+
+logger = logging.getLogger(__name__)
+
+_PATCH_APPLIED = False
+_CACHE_ATTR = "_kmsa_search_get_urls_cache"
+_TARGET_METHOD = "_parse_chunk_from_openai"
+_TARGET_CLASS = "RawOpenAIChatClient"
+_UPSTREAM_ISSUE = "https://github.com/microsoft/agent-framework/issues/5995"
+
+
+def apply() -> None:
+    """Idempotently patch RawOpenAIChatClient._parse_chunk_from_openai.
+
+    Safe to call multiple times; the second call is a no-op.
+    Logs a warning (does not raise) if upstream has renamed the target,
+    so app startup still succeeds with degraded citations.
+    """
+    global _PATCH_APPLIED
+    if _PATCH_APPLIED:
+        return
+
+    try:
+        from agent_framework_openai import _chat_client as _cc
+    except ImportError:
+        logger.warning(
+            "agent_framework_openai not installed; "
+            "Azure AI Search citation patch skipped"
+        )
+        return
+
+    target_cls = getattr(_cc, _TARGET_CLASS, None)
+    if target_cls is None or not hasattr(target_cls, _TARGET_METHOD):
+        logger.warning(
+            "agent-framework upgrade broke citation patch: %s.%s no longer exists. "
+            "Per-document URLs (get_url) will be missing on Azure AI Search "
+            "citations. See %s",
+            _TARGET_CLASS, _TARGET_METHOD, _UPSTREAM_ISSUE,
+        )
+        return
+
+    _original = getattr(target_cls, _TARGET_METHOD)
+
+    def _patched(self: Any, event: Any, *args: Any, **kwargs: Any) -> Any:
+        event_type = getattr(event, "type", None)
+
+        # Reset per-stream cache so back-to-back requests on the same client
+        # instance don't cross-pollute citation enrichment.
+        if event_type in ("response.created", "response.in_progress"):
+            setattr(self, _CACHE_ATTR, [])
+
+        # Capture get_urls from azure_ai_search_call_output items on .done.
+        # The .added event for this item has output='[]'; the .done event
+        # carries the actual documents + get_urls.
+        if event_type == "response.output_item.done":
+            try:
+                done_item = getattr(event, "item", None)
+                if getattr(done_item, "type", None) == "azure_ai_search_call_output":
+                    output = getattr(done_item, "output", None)
+                    get_urls = getattr(output, "get_urls", None)
+                    if get_urls is None and isinstance(output, dict):
+                        get_urls = output.get("get_urls")
+                    if get_urls is None and isinstance(output, str):
+                        # Some SDK versions deliver `output` as a JSON string.
+                        import json as _json
+                        try:
+                            parsed = _json.loads(output)
+                            if isinstance(parsed, dict):
+                                get_urls = parsed.get("get_urls")
+                        except Exception:  # noqa: BLE001
+                            pass
+                    if get_urls:
+                        cache = getattr(self, _CACHE_ATTR, None)
+                        if cache is None:
+                            cache = []
+                            setattr(self, _CACHE_ATTR, cache)
+                        cache.extend(get_urls)
+            except Exception:  # noqa: BLE001 - defensive: never break streaming
+                logger.debug(
+                    "search-citation patch: failed to capture get_urls",
+                    exc_info=True,
+                )
+
+        result = _original(self, event, *args, **kwargs)
+
+        # Enrich url_citation annotations emitted by the base method's
+        # response.output_text.annotation.added branch.
+        if event_type == "response.output_text.annotation.added":
+            try:
+                cache = getattr(self, _CACHE_ATTR, None) or []
+                if cache:
+                    for content in (getattr(result, "contents", None) or []):
+                        for ann in (getattr(content, "annotations", None) or []):
+                            if not isinstance(ann, dict):
+                                continue
+                            if ann.get("type") != "citation":
+                                continue
+                            add_props = ann.get("additional_properties") or {}
+                            # Idempotent: do not overwrite if upstream ever ships
+                            # the fix and starts populating get_url itself.
+                            if add_props.get("get_url"):
+                                continue
+                            # Map by title "doc_<N>" where N is the index into
+                            # the search results (and thus into get_urls). The
+                            # model can cite the same doc multiple times, so
+                            # annotation_index (a running counter) is unreliable.
+                            title = ann.get("title") or ""
+                            m = _DOC_INDEX_RE.match(str(title))
+                            doc_idx = int(m.group(1)) if m else None
+                            if doc_idx is None:
+                                doc_idx = add_props.get("annotation_index")
+                            if isinstance(doc_idx, int) and 0 <= doc_idx < len(cache):
+                                add_props["get_url"] = cache[doc_idx]
+                                ann["additional_properties"] = add_props
+            except Exception:  # noqa: BLE001
+                logger.debug(
+                    "search-citation patch: failed to enrich annotation",
+                    exc_info=True,
+                )
+
+        # Release per-stream state once the response completes.
+        if event_type == "response.completed":
+            try:
+                if hasattr(self, _CACHE_ATTR):
+                    delattr(self, _CACHE_ATTR)
+            except Exception:  # noqa: BLE001
+                pass
+
+        return result
+
+    setattr(target_cls, _TARGET_METHOD, _patched)
+    _PATCH_APPLIED = True
+    logger.info(
+        "Applied Azure AI Search citation patch on %s.%s (workaround for %s)",
+        _TARGET_CLASS, _TARGET_METHOD, _UPSTREAM_ISSUE,
+    )
+
+
+# Apply on import so a single
+# `import services._patches.agent_framework_search_citations`
+# from chat_service.py is enough.
+apply()
diff --git a/src/api/services/chat_service.py b/src/api/services/chat_service.py
index ca972f898..afaa3d7d8 100644
--- a/src/api/services/chat_service.py
+++ b/src/api/services/chat_service.py
@@ -22,7 +22,12 @@
 
 from azure.ai.projects.aio import AIProjectClient
 
-from agent_framework.azure import AzureAIProjectAgentProvider
+from agent_framework import AgentSession
+from agent_framework_foundry import FoundryAgent
+# Restore Azure AI Search per-document URL enrichment on streaming citations
+# (regression at agent-framework GA; tracked upstream as microsoft/agent-framework#5995).
+# Must be imported BEFORE the first FoundryAgent.run() call.
+from services._patches import agent_framework_search_citations  # noqa: F401
 
 from cachetools import TTLCache
 
@@ -38,7 +43,7 @@
 # tool/structured_output overrides not being supported by AzureAIClient.
 # This can be made configurable via env var if needed for debugging.
 agent_log_level = os.getenv("AGENT_FRAMEWORK_LOG_LEVEL", "ERROR").upper()
-logging.getLogger("agent_framework.azure").setLevel(getattr(logging, agent_log_level, logging.ERROR))
+logging.getLogger("agent_framework.foundry").setLevel(getattr(logging, agent_log_level, logging.ERROR))
 
 
 class ExpCache(TTLCache):
@@ -136,9 +141,6 @@ async def stream_openai_text(self, conversation_id: str, query: str, user_id: st
                 if not query:
                     query = "Please provide a query."
 
-                # Create provider for agent management
-                provider = AzureAIProjectAgentProvider(project_client=project_client)
-
                 db_conn = await get_sqldb_connection()
                 custom_tool = SQLTool(conn=db_conn)
 
@@ -149,11 +151,12 @@ async def stream_openai_text(self, conversation_id: str, query: str, user_id: st
                     logger.info("Reusing existing thread %s for conversation %s",
                                 thread_conversation_id, conversation_id)
 
-                # Get agent with tools using provider
+                # Create agent using FoundryAgent
                 logger.info("Retrieving orchestrator agent: '%s'", self.orchestrator_agent_name)
-                agent = await provider.get_agent(
-                    name=self.orchestrator_agent_name,
-                    tools=custom_tool.get_sql_response
+                agent = FoundryAgent(
+                    project_client=project_client,
+                    agent_name=self.orchestrator_agent_name,
+                    tools=[custom_tool.get_sql_response]
                 )
                 logger.info("Orchestrator agent retrieved successfully: '%s'", self.orchestrator_agent_name)
 
@@ -179,7 +182,8 @@ def replace_citation_marker(match):
 
                 logger.info("Starting agent.run stream for conversation %s, thread %s",
                             conversation_id, thread_conversation_id)
-                async for chunk in agent.run(query, stream=True, conversation_id=thread_conversation_id):
+                session = AgentSession(service_session_id=thread_conversation_id)
+                async for chunk in agent.run(query, stream=True, session=session):
                     # Collect citations from Azure AI Search responses
                     for content in getattr(chunk, "contents", []):
                         annotations = getattr(content, "annotations", [])
@@ -212,7 +216,16 @@ def replace_citation_marker(match):
                     seen_doc_ids = set()  # Track unique document IDs to avoid duplicates
 
                     for citation in citations:
-                        get_url = (citation.get("additional_properties") or {}).get("get_url")
+                        add_props = citation.get("additional_properties") or {}
+                        # Prefer the per-document URL (additional_properties.get_url, populated
+                        # by the agent_framework_search_citations patch) over the top-level `url`,
+                        # which carries only the search-service root URL on GA agent-framework
+                        # (see microsoft/agent-framework#5995).
+                        get_url = (
+                            add_props.get("get_url")
+                            or citation.get("url")
+                            or add_props.get("url")
+                        )
                         url = get_url if get_url else 'N/A'
                         title = citation.get('title', 'N/A')
 
diff --git a/src/api/services/history_service.py b/src/api/services/history_service.py
index 9b35e6e80..01d1433a7 100644
--- a/src/api/services/history_service.py
+++ b/src/api/services/history_service.py
@@ -7,7 +7,7 @@
 from common.database.cosmosdb_service import CosmosConversationClient
 from helpers.azure_credential_utils import get_azure_credential_async, build_async_azure_credential
 
-from agent_framework.azure import AzureAIProjectAgentProvider
+from agent_framework_foundry import FoundryAgent
 
 logger = logging.getLogger(__name__)
 
@@ -73,11 +73,8 @@ async def generate_title(self, conversation_messages):
                 await get_azure_credential_async(client_id=self.azure_client_id) as credential,
                 AIProjectClient(endpoint=self.ai_project_endpoint, credential=credential) as project_client,
             ):
-                # Create provider for agent management
-                provider = AzureAIProjectAgentProvider(project_client=project_client)
-
-                # Get title agent using provider
-                agent = await provider.get_agent(name=self.title_agent_name)
+                # Create agent using FoundryAgent
+                agent = FoundryAgent(project_client=project_client, agent_name=self.title_agent_name)
 
                 # Generate title using agent
                 result = await agent.run(final_prompt)
diff --git a/src/tests/api/services/test_chat_service.py b/src/tests/api/services/test_chat_service.py
index 9f372483a..5ca280d47 100644
--- a/src/tests/api/services/test_chat_service.py
+++ b/src/tests/api/services/test_chat_service.py
@@ -173,11 +173,11 @@ def test_get_thread_cache(self, chat_service):
     @pytest.mark.asyncio
     @patch("services.chat_service.SQLTool")
     @patch("services.chat_service.get_sqldb_connection", new_callable=AsyncMock)
-    @patch("services.chat_service.AzureAIProjectAgentProvider")
+    @patch("services.chat_service.FoundryAgent")
     @patch("services.chat_service.AIProjectClient")
     @patch("services.chat_service.get_azure_credential_async", new_callable=AsyncMock)
     async def test_stream_openai_text_success(
-        self, mock_credential, mock_project_client_class, mock_provider_class,
+        self, mock_credential, mock_project_client_class, mock_foundry_agent_class,
         mock_sqldb_conn, mock_sql_tool, chat_service
     ):
         """Test successful streaming with valid query."""
@@ -212,9 +212,7 @@ async def mock_run(*args, **kwargs):
         
         mock_agent.run = mock_run
         
-        mock_provider = MagicMock()
-        mock_provider.get_agent = AsyncMock(return_value=mock_agent)
-        mock_provider_class.return_value = mock_provider
+        mock_foundry_agent_class.return_value = mock_agent
 
         mock_sqldb_conn.return_value = AsyncMock()
         mock_tool_instance = MagicMock()
@@ -240,11 +238,11 @@ async def mock_run(*args, **kwargs):
     @pytest.mark.asyncio
     @patch("services.chat_service.SQLTool")
     @patch("services.chat_service.get_sqldb_connection", new_callable=AsyncMock)
-    @patch("services.chat_service.AzureAIProjectAgentProvider")
+    @patch("services.chat_service.FoundryAgent")
     @patch("services.chat_service.AIProjectClient")
     @patch("services.chat_service.get_azure_credential_async", new_callable=AsyncMock)
     async def test_stream_openai_text_empty_query(
-        self, mock_credential, mock_project_client_class, mock_provider_class,
+        self, mock_credential, mock_project_client_class, mock_foundry_agent_class,
         mock_sqldb_conn, mock_sql_tool, chat_service
     ):
         """Test streaming with empty query - should use default query."""
@@ -277,9 +275,7 @@ async def mock_run(query, *args, **kwargs):
         
         mock_agent.run = mock_run
         
-        mock_provider = MagicMock()
-        mock_provider.get_agent = AsyncMock(return_value=mock_agent)
-        mock_provider_class.return_value = mock_provider
+        mock_foundry_agent_class.return_value = mock_agent
 
         mock_sqldb_conn.return_value = AsyncMock()
         mock_tool_instance = MagicMock()
@@ -297,11 +293,11 @@ async def mock_run(query, *args, **kwargs):
     @pytest.mark.asyncio
     @patch("services.chat_service.SQLTool")
     @patch("services.chat_service.get_sqldb_connection", new_callable=AsyncMock)
-    @patch("services.chat_service.AzureAIProjectAgentProvider")
+    @patch("services.chat_service.FoundryAgent")
     @patch("services.chat_service.AIProjectClient")
     @patch("services.chat_service.get_azure_credential_async", new_callable=AsyncMock)
     async def test_stream_openai_text_with_citations(
-        self, mock_credential, mock_project_client_class, mock_provider_class,
+        self, mock_credential, mock_project_client_class, mock_foundry_agent_class,
         mock_sqldb_conn, mock_sql_tool, chat_service
     ):
         """Test streaming with citations in response."""
@@ -343,9 +339,7 @@ async def mock_run(*args, **kwargs):
         
         mock_agent.run = mock_run
         
-        mock_provider = MagicMock()
-        mock_provider.get_agent = AsyncMock(return_value=mock_agent)
-        mock_provider_class.return_value = mock_provider
+        mock_foundry_agent_class.return_value = mock_agent
 
         mock_sqldb_conn.return_value = AsyncMock()
         mock_tool_instance = MagicMock()
@@ -374,11 +368,11 @@ async def mock_run(*args, **kwargs):
     @pytest.mark.asyncio
     @patch("services.chat_service.SQLTool")
     @patch("services.chat_service.get_sqldb_connection", new_callable=AsyncMock)
-    @patch("services.chat_service.AzureAIProjectAgentProvider")
+    @patch("services.chat_service.FoundryAgent")
     @patch("services.chat_service.AIProjectClient")
     @patch("services.chat_service.get_azure_credential_async", new_callable=AsyncMock)
     async def test_stream_openai_text_with_citation_markers(
-        self, mock_credential, mock_project_client_class, mock_provider_class,
+        self, mock_credential, mock_project_client_class, mock_foundry_agent_class,
         mock_sqldb_conn, mock_sql_tool, chat_service
     ):
         """Test streaming replaces citation markers correctly."""
@@ -409,9 +403,7 @@ async def mock_run(*args, **kwargs):
         
         mock_agent.run = mock_run
         
-        mock_provider = MagicMock()
-        mock_provider.get_agent = AsyncMock(return_value=mock_agent)
-        mock_provider_class.return_value = mock_provider
+        mock_foundry_agent_class.return_value = mock_agent
 
         mock_sqldb_conn.return_value = AsyncMock()
         mock_tool_instance = MagicMock()
@@ -434,11 +426,11 @@ async def mock_run(*args, **kwargs):
     @pytest.mark.asyncio
     @patch("services.chat_service.SQLTool")
     @patch("services.chat_service.get_sqldb_connection", new_callable=AsyncMock)
-    @patch("services.chat_service.AzureAIProjectAgentProvider")
+    @patch("services.chat_service.FoundryAgent")
     @patch("services.chat_service.AIProjectClient")
     @patch("services.chat_service.get_azure_credential_async", new_callable=AsyncMock)
     async def test_stream_openai_text_with_citation_markers_without_dagger(
-        self, mock_credential, mock_project_client_class, mock_provider_class,
+        self, mock_credential, mock_project_client_class, mock_foundry_agent_class,
         mock_sqldb_conn, mock_sql_tool, chat_service
     ):
         """Test streaming replaces citation markers that lack the † character."""
@@ -469,9 +461,7 @@ async def mock_run(*args, **kwargs):
 
         mock_agent.run = mock_run
 
-        mock_provider = MagicMock()
-        mock_provider.get_agent = AsyncMock(return_value=mock_agent)
-        mock_provider_class.return_value = mock_provider
+        mock_foundry_agent_class.return_value = mock_agent
 
         mock_sqldb_conn.return_value = AsyncMock()
         mock_tool_instance = MagicMock()
@@ -494,11 +484,11 @@ async def mock_run(*args, **kwargs):
     @pytest.mark.asyncio
     @patch("services.chat_service.SQLTool")
     @patch("services.chat_service.get_sqldb_connection", new_callable=AsyncMock)
-    @patch("services.chat_service.AzureAIProjectAgentProvider")
+    @patch("services.chat_service.FoundryAgent")
     @patch("services.chat_service.AIProjectClient")
     @patch("services.chat_service.get_azure_credential_async", new_callable=AsyncMock)
     async def test_stream_openai_text_cached_thread(
-        self, mock_credential, mock_project_client_class, mock_provider_class,
+        self, mock_credential, mock_project_client_class, mock_foundry_agent_class,
         mock_sqldb_conn, mock_sql_tool, chat_service
     ):
         """Test streaming with cached thread ID."""
@@ -526,16 +516,14 @@ async def test_stream_openai_text_cached_thread(
         mock_chunk.text = "Response"
         mock_chunk.contents = []
         
-        async def mock_run(query, stream=False, conversation_id=None):
+        async def mock_run(query, stream=False, session=None):
             # Verify cached thread ID is used
-            assert conversation_id == "cached-thread-id"
+            assert session is not None and session.service_session_id == "cached-thread-id"
             yield mock_chunk
         
         mock_agent.run = mock_run
         
-        mock_provider = MagicMock()
-        mock_provider.get_agent = AsyncMock(return_value=mock_agent)
-        mock_provider_class.return_value = mock_provider
+        mock_foundry_agent_class.return_value = mock_agent
 
         mock_sqldb_conn.return_value = AsyncMock()
         mock_tool_instance = MagicMock()
@@ -554,11 +542,11 @@ async def mock_run(query, stream=False, conversation_id=None):
     @pytest.mark.asyncio
     @patch("services.chat_service.SQLTool")
     @patch("services.chat_service.get_sqldb_connection", new_callable=AsyncMock)
-    @patch("services.chat_service.AzureAIProjectAgentProvider")
+    @patch("services.chat_service.FoundryAgent")
     @patch("services.chat_service.AIProjectClient")
     @patch("services.chat_service.get_azure_credential_async", new_callable=AsyncMock)
     async def test_stream_openai_text_rate_limit_error(
-        self, mock_credential, mock_project_client_class, mock_provider_class,
+        self, mock_credential, mock_project_client_class, mock_foundry_agent_class,
         mock_sqldb_conn, mock_sql_tool, chat_service
     ):
         """Test handling of rate limit errors."""
@@ -594,9 +582,7 @@ async def mock_run(*args, **kwargs):
         
         mock_agent.run = mock_run
         
-        mock_provider = MagicMock()
-        mock_provider.get_agent = AsyncMock(return_value=mock_agent)
-        mock_provider_class.return_value = mock_provider
+        mock_foundry_agent_class.return_value = mock_agent
 
         # Execute and verify HTTPException with 429 status
         with pytest.raises(HTTPException) as exc_info:
@@ -609,11 +595,11 @@ async def mock_run(*args, **kwargs):
     @pytest.mark.asyncio
     @patch("services.chat_service.SQLTool")
     @patch("services.chat_service.get_sqldb_connection", new_callable=AsyncMock)
-    @patch("services.chat_service.AzureAIProjectAgentProvider")
+    @patch("services.chat_service.FoundryAgent")
     @patch("services.chat_service.AIProjectClient")
     @patch("services.chat_service.get_azure_credential_async", new_callable=AsyncMock)
     async def test_stream_openai_text_general_exception(
-        self, mock_credential, mock_project_client_class, mock_provider_class,
+        self, mock_credential, mock_project_client_class, mock_foundry_agent_class,
         mock_sqldb_conn, mock_sql_tool, chat_service
     ):
         """Test handling of general exceptions."""
@@ -637,9 +623,7 @@ async def mock_run(*args, **kwargs):
         
         mock_agent.run = mock_run
         
-        mock_provider = MagicMock()
-        mock_provider.get_agent = AsyncMock(return_value=mock_agent)
-        mock_provider_class.return_value = mock_provider
+        mock_foundry_agent_class.return_value = mock_agent
 
         mock_sqldb_conn.return_value = AsyncMock()
         mock_tool_instance = MagicMock()
@@ -656,11 +640,11 @@ async def mock_run(*args, **kwargs):
     @pytest.mark.asyncio
     @patch("services.chat_service.SQLTool")
     @patch("services.chat_service.get_sqldb_connection", new_callable=AsyncMock)
-    @patch("services.chat_service.AzureAIProjectAgentProvider")
+    @patch("services.chat_service.FoundryAgent")
     @patch("services.chat_service.AIProjectClient")
     @patch("services.chat_service.get_azure_credential_async", new_callable=AsyncMock)
     async def test_stream_openai_text_no_response(
-        self, mock_credential, mock_project_client_class, mock_provider_class,
+        self, mock_credential, mock_project_client_class, mock_foundry_agent_class,
         mock_sqldb_conn, mock_sql_tool, chat_service
     ):
         """Test handling when agent returns no text."""
@@ -692,9 +676,7 @@ async def mock_run(*args, **kwargs):
         
         mock_agent.run = mock_run
         
-        mock_provider = MagicMock()
-        mock_provider.get_agent = AsyncMock(return_value=mock_agent)
-        mock_provider_class.return_value = mock_provider
+        mock_foundry_agent_class.return_value = mock_agent
 
         mock_sqldb_conn.return_value = AsyncMock()
         mock_tool_instance = MagicMock()
diff --git a/src/tests/api/services/test_history_service.py b/src/tests/api/services/test_history_service.py
index a598d2f85..216bfd408 100644
--- a/src/tests/api/services/test_history_service.py
+++ b/src/tests/api/services/test_history_service.py
@@ -93,15 +93,11 @@ async def test_generate_title(self, history_service):
         # Mock agent
         mock_agent = MagicMock()
         mock_agent.run = AsyncMock(return_value=mock_result)
-        
-        # Mock provider
-        mock_provider = MagicMock()
-        mock_provider.get_agent = AsyncMock(return_value=mock_agent)
 
         with patch("services.history_service.get_azure_credential_async", new_callable=AsyncMock) as mock_get_cred:
             mock_get_cred.return_value = mock_credential
             with patch("services.history_service.AIProjectClient", return_value=mock_project_client):
-                with patch("services.history_service.AzureAIProjectAgentProvider", return_value=mock_provider):
+                with patch("services.history_service.FoundryAgent", return_value=mock_agent):
                     result = await history_service.generate_title(conversation_messages)
                     assert result == "Billing Help Request"
                     mock_agent.run.assert_called_once()

From d3071ebbe306a4609217d796d7d5dc1b6aac3e43 Mon Sep 17 00:00:00 2001
From: Harsh-Microsoft <v-hbangera@microsoft.com>
Date: Fri, 22 May 2026 12:52:32 +0530
Subject: [PATCH 2/2] chore: pin agent-framework-openai==1.3.0 explicitly

The services/_patches/agent_framework_search_citations.py monkey-patch
targets internals of agent_framework_openai, so pin its version
explicitly instead of relying on transitive resolution via
agent-framework-foundry.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/api/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/api/requirements.txt b/src/api/requirements.txt
index 0bbd4a80c..06b62be49 100644
--- a/src/api/requirements.txt
+++ b/src/api/requirements.txt
@@ -18,6 +18,7 @@ azure-ai-projects==2.1.0
 azure-ai-inference==1.0.0b9
 agent-framework-core==1.3.0
 agent-framework-foundry==1.3.0
+agent-framework-openai==1.3.0
 azure-cosmos==4.15.0
 
 # Additional utilities