Merge pull request #866 from microsoft/dev

Roopan-Microsoft · web-flow · commit bcdb2807f2c5 · 2026-06-08T11:14:37.000+05:30
chore: dev to main merge
diff --git a/infra/vscode_web/requirements.txt b/infra/vscode_web/requirements.txt
@@ -1,3 +1,3 @@
-azure-ai-projects==2.0.0b3
+azure-ai-projects==2.1.0
 azure-identity==1.20.0
 ansible-core~=2.17.0
diff --git a/src/backend/orchestrator.py b/src/backend/orchestrator.py
@@ -22,14 +22,12 @@
 from typing import AsyncIterator, Optional, cast
 
 from agent_framework import (
-    ChatMessage,
-    HandoffBuilder,
-    HandoffAgentUserRequest,
-    RequestInfoEvent,
-    WorkflowOutputEvent,
-    WorkflowStatusEvent,
+    Agent,
+    Message,
+    WorkflowEventType,
 )
-from agent_framework.azure import AzureOpenAIChatClient
+from agent_framework.orchestrations import HandoffBuilder, HandoffAgentUserRequest
+from agent_framework.openai import OpenAIChatCompletionClient
 from azure.identity import DefaultAzureCredential
 
 # Foundry imports - only used when USE_FOUNDRY=true
@@ -48,6 +46,11 @@
 # Token endpoint for Azure Cognitive Services (used for Azure OpenAI)
 TOKEN_ENDPOINT = "https://cognitiveservices.azure.com/.default"
 
+# Event type constants for type-safe dispatch (avoids string typos)
+EVENT_STATUS: WorkflowEventType = "status"
+EVENT_REQUEST_INFO: WorkflowEventType = "request_info"
+EVENT_OUTPUT: WorkflowEventType = "output"
+
 
 # Harmful content patterns to detect in USER INPUT before processing
 # This provides proactive content safety by blocking harmful requests at the input layer
@@ -120,9 +123,9 @@ def _check_input_for_harmful_content(message: str) -> tuple[bool, str]:
     r"You are a Text Content Agent",
     r"You are an Image Content Agent",
     r"You are a Compliance Agent",
-    # Handoff instructions
-    r"hand off to \w+_agent",
-    r"hand back to \w+_agent",
+    # Handoff instructions (match both underscore and hyphen agent names)
+    r"hand off to [\w\-]+[_\-]agent",
+    r"hand back to [\w\-]+[_\-]agent",
     r"may hand off to",
     r"After (?:generating|completing|validation|parsing)",
     # Internal workflow markers
@@ -139,8 +142,8 @@ def _check_input_for_harmful_content(message: str) -> tuple[bool, str]:
     # RAI internal instructions
     r"NEVER generate images that contain:",
     r"Responsible AI - Image Generation Rules",
-    # Agent framework references
-    r"compliance_agent|triage_agent|planning_agent|research_agent|text_content_agent|image_content_agent",
+    # Agent framework references (match both underscore and hyphen separators)
+    r"compliance[_\-]agent|triage[_\-]agent|planning[_\-]agent|research[_\-]agent|text[_\-]content[_\-]agent|image[_\-]content[_\-]agent",
 ]
 
 _SYSTEM_PROMPT_PATTERNS_COMPILED = [re.compile(pattern, re.IGNORECASE | re.DOTALL) for pattern in SYSTEM_PROMPT_PATTERNS]
@@ -485,7 +488,7 @@ class ContentGenerationOrchestrator:
     Microsoft Agent Framework's HandoffBuilder.
 
     Supports two modes:
-    1. Azure OpenAI Direct (default): Uses AzureOpenAIChatClient with ad_token_provider
+    1. Azure OpenAI Direct (default): Uses OpenAIChatCompletionClient with DefaultAzureCredential
     2. Azure AI Foundry: Uses AIProjectClient with project endpoint (set USE_FOUNDRY=true)
 
     Agents:
@@ -498,7 +501,7 @@ class ContentGenerationOrchestrator:
     """
 
     def __init__(self):
-        self._chat_client = None  # Always AzureOpenAIChatClient
+        self._chat_client = None  # OpenAIChatCompletionClient instance
         self._project_client = None  # AIProjectClient for Foundry mode (used for image generation)
         self._agents: dict = {}
         self._rai_agent = None
@@ -536,45 +539,34 @@ def _get_chat_client(self):
                 # Store the project client for image generation
                 self._project_client = project_client
 
-                # For chat completions, use the direct Azure OpenAI endpoint
                 # The Foundry project uses Azure OpenAI under the hood, and we need the direct endpoint
                 # to properly authenticate with Cognitive Services token
                 azure_endpoint = app_settings.azure_openai.endpoint
                 if not azure_endpoint:
                     raise ValueError("AZURE_OPENAI_ENDPOINT is required for Foundry mode chat completions")
 
-                def get_token() -> str:
-                    """Token provider callable - invoked for each request to ensure fresh tokens."""
-                    token = self._credential.get_token(TOKEN_ENDPOINT)
-                    return token.token
-
                 model_deployment = app_settings.ai_foundry.model_deployment or app_settings.azure_openai.gpt_model
                 api_version = app_settings.azure_openai.api_version
 
                 logger.info(f"Foundry mode using Azure OpenAI endpoint: {azure_endpoint}, deployment: {model_deployment}")
-                self._chat_client = AzureOpenAIChatClient(
-                    endpoint=azure_endpoint,
-                    deployment_name=model_deployment,
+                self._chat_client = OpenAIChatCompletionClient(
+                    azure_endpoint=azure_endpoint,
+                    model=model_deployment,
                     api_version=api_version,
-                    ad_token_provider=get_token,
+                    credential=self._credential,
                 )
             else:
                 # Azure OpenAI Direct mode
                 endpoint = app_settings.azure_openai.endpoint
                 if not endpoint:
                     raise ValueError("AZURE_OPENAI_ENDPOINT is not configured")
 
-                def get_token() -> str:
-                    """Token provider callable - invoked for each request to ensure fresh tokens."""
-                    token = self._credential.get_token(TOKEN_ENDPOINT)
-                    return token.token
-
-                logger.info("Using Azure OpenAI Direct mode with ad_token_provider")
-                self._chat_client = AzureOpenAIChatClient(
-                    endpoint=endpoint,
-                    deployment_name=app_settings.azure_openai.gpt_model,
+                logger.info("Using Azure OpenAI Direct mode with DefaultAzureCredential")
+                self._chat_client = OpenAIChatCompletionClient(
+                    azure_endpoint=endpoint,
+                    model=app_settings.azure_openai.gpt_model,
                     api_version=app_settings.azure_openai.api_version,
-                    ad_token_provider=get_token,
+                    credential=self._credential,
                 )
         return self._chat_client
 
@@ -589,40 +581,60 @@ def initialize(self) -> None:
         # Get the chat client
         chat_client = self._get_chat_client()
 
-        # Agent names - use underscores (AzureOpenAIChatClient works with both modes now)
+        # Agent names - always use underscores so that instruction strings
+        # (TRIAGE_INSTRUCTIONS, *_CONTENT_INSTRUCTIONS, etc.) and the
+        # SYSTEM_PROMPT_PATTERNS leakage-detection regexes stay in sync.
+        # Foundry workflows accept underscore names; no hyphen conversion needed.
         name_sep = "_"
 
         # Create all agents
-        triage_agent = chat_client.create_agent(
+        # NOTE: Handoff workflow participants must set
+        # require_per_service_call_history_persistence=True so local conversation
+        # history stays consistent with the service across handoff tool-call
+        # short-circuits (required by agent_framework.orchestrations.HandoffBuilder).
+        triage_agent = Agent(
+            client=chat_client,
             name=f"triage{name_sep}agent",
             instructions=TRIAGE_INSTRUCTIONS,
+            require_per_service_call_history_persistence=True,
         )
 
-        planning_agent = chat_client.create_agent(
+        planning_agent = Agent(
+            client=chat_client,
             name=f"planning{name_sep}agent",
             instructions=PLANNING_INSTRUCTIONS,
+            require_per_service_call_history_persistence=True,
         )
 
-        research_agent = chat_client.create_agent(
+        research_agent = Agent(
+            client=chat_client,
             name=f"research{name_sep}agent",
             instructions=RESEARCH_INSTRUCTIONS,
+            require_per_service_call_history_persistence=True,
         )
 
-        text_content_agent = chat_client.create_agent(
+        text_content_agent = Agent(
+            client=chat_client,
             name=f"text{name_sep}content{name_sep}agent",
             instructions=TEXT_CONTENT_INSTRUCTIONS,
+            require_per_service_call_history_persistence=True,
         )
 
-        image_content_agent = chat_client.create_agent(
+        image_content_agent = Agent(
+            client=chat_client,
             name=f"image{name_sep}content{name_sep}agent",
             instructions=IMAGE_CONTENT_INSTRUCTIONS,
+            require_per_service_call_history_persistence=True,
         )
 
-        compliance_agent = chat_client.create_agent(
+        compliance_agent = Agent(
+            client=chat_client,
             name=f"compliance{name_sep}agent",
             instructions=COMPLIANCE_INSTRUCTIONS,
+            require_per_service_call_history_persistence=True,
         )
-        self._rai_agent = chat_client.create_agent(
+        self._rai_agent = Agent(
+            client=chat_client,
             name=f"rai{name_sep}agent",
             instructions=RAI_INSTRUCTIONS,
         )
@@ -636,7 +648,7 @@ def initialize(self) -> None:
             "compliance": compliance_agent,
         }
 
-        # Workflow name - Foundry requires hyphens
+        # Workflow name
         workflow_name = f"content{name_sep}generation{name_sep}workflow"
 
         # Build the handoff workflow
@@ -736,35 +748,35 @@ async def process_message(
                 events.append(event)
 
                 # Handle different event types from the workflow
-                if isinstance(event, WorkflowStatusEvent):
+                if event.type == EVENT_STATUS:
+                    status_name = event.state.name if event.state else str(event.data)
                     yield {
                         "type": "status",
-                        "content": event.state.name,
+                        "content": status_name,
                         "is_final": False,
                         "metadata": {"conversation_id": conversation_id}
                     }
 
-                elif isinstance(event, RequestInfoEvent):
+                elif event.type == EVENT_REQUEST_INFO:
                     # Workflow is requesting user input
                     if isinstance(event.data, HandoffAgentUserRequest):
-                        # Extract conversation history from agent_response.messages (updated API)
-                        messages = event.data.agent_response.messages if hasattr(event.data, 'agent_response') and event.data.agent_response else []
-                        if not isinstance(messages, list):
-                            messages = [messages] if messages else []
+                        # Extract conversation history from agent_response.messages
+                        agent_resp = event.data.agent_response
+                        messages = list(agent_resp.messages) if agent_resp and agent_resp.messages else []
 
                         conversation_text = "\n".join([
                             f"{msg.author_name or msg.role.value}: {msg.text}"
                             for msg in messages
                         ])
 
                         # Get the last message content and filter any system prompt leakage
-                        last_msg_content = messages[-1].text if messages else (event.data.agent_response.text if hasattr(event.data, 'agent_response') and event.data.agent_response else "")
+                        last_msg_content = messages[-1].text if messages else (agent_resp.text if agent_resp else "")
                         last_msg_content = _filter_system_prompt_from_response(last_msg_content)
-                        last_msg_agent = messages[-1].author_name if messages and hasattr(messages[-1], 'author_name') else "unknown"
+                        last_msg_agent = messages[-1].author_name if messages else "unknown"
 
                         yield {
                             "type": "agent_response",
-                            "agent": last_msg_agent,
+                            "agent": last_msg_agent or "unknown",
                             "content": last_msg_content,
                             "conversation_history": conversation_text,
                             "is_final": False,
@@ -773,9 +785,8 @@ async def process_message(
                             "metadata": {"conversation_id": conversation_id}
                         }
 
-                elif isinstance(event, WorkflowOutputEvent):
-                    # Final output from the workflow
-                    conversation = cast(list[ChatMessage], event.data)
+                elif event.type == EVENT_OUTPUT:
+                    conversation = cast(list[Message], event.data)
                     if isinstance(conversation, list) and conversation:
                         # Get the last assistant message as the final response
                         assistant_messages = [
@@ -841,38 +852,38 @@ async def send_user_response(
         try:
             responses = {request_id: user_response}
             async for event in self._workflow.send_responses_streaming(responses):
-                if isinstance(event, WorkflowStatusEvent):
+                if event.type == EVENT_STATUS:
+                    status_name = event.state.name if event.state else str(event.data)
                     yield {
                         "type": "status",
-                        "content": event.state.name,
+                        "content": status_name,
                         "is_final": False,
                         "metadata": {"conversation_id": conversation_id}
                     }
 
-                elif isinstance(event, RequestInfoEvent):
+                elif event.type == EVENT_REQUEST_INFO:
                     if isinstance(event.data, HandoffAgentUserRequest):
-                        # Get messages from agent_response (updated API)
-                        messages = event.data.agent_response.messages if hasattr(event.data, 'agent_response') and event.data.agent_response else []
-                        if not isinstance(messages, list):
-                            messages = [messages] if messages else []
+                        # Get messages from agent_response
+                        agent_resp = event.data.agent_response
+                        messages = list(agent_resp.messages) if agent_resp and agent_resp.messages else []
 
                         # Get the last message content and filter any system prompt leakage
-                        last_msg_content = messages[-1].text if messages else (event.data.agent_response.text if hasattr(event.data, 'agent_response') and event.data.agent_response else "")
+                        last_msg_content = messages[-1].text if messages else (agent_resp.text if agent_resp else "")
                         last_msg_content = _filter_system_prompt_from_response(last_msg_content)
-                        last_msg_agent = messages[-1].author_name if messages and hasattr(messages[-1], 'author_name') else "unknown"
+                        last_msg_agent = messages[-1].author_name if messages else "unknown"
 
                         yield {
                             "type": "agent_response",
-                            "agent": last_msg_agent,
+                            "agent": last_msg_agent or "unknown",
                             "content": last_msg_content,
                             "is_final": False,
                             "requires_user_input": True,
                             "request_id": event.request_id,
                             "metadata": {"conversation_id": conversation_id}
                         }
 
-                elif isinstance(event, WorkflowOutputEvent):
-                    conversation = cast(list[ChatMessage], event.data)
+                elif event.type == EVENT_OUTPUT:
+                    conversation = cast(list[Message], event.data)
                     if isinstance(conversation, list) and conversation:
                         assistant_messages = [
                             msg for msg in conversation
diff --git a/src/backend/requirements.txt b/src/backend/requirements.txt
@@ -6,8 +6,9 @@ quart-cors>=0.7.0
 hypercorn>=0.17.0
 
 # Microsoft Agent Framework
-agent-framework-azure-ai==1.0.0b260114
-agent-framework-core==1.0.0b260114
+agent-framework-foundry==1.1.1
+agent-framework-core==1.1.1
+agent-framework-orchestrations==1.0.0b260421
 
 # OpenTelemetry (required by agent-framework)
 opentelemetry-semantic-conventions-ai==0.4.13
@@ -18,7 +19,7 @@ azure-cosmos>=4.7.0
 azure-storage-blob>=12.22.0
 azure-search-documents>=11.4.0
 azure-ai-contentsafety>=1.0.0
-azure-ai-projects==2.0.0b3  # Azure AI Foundry SDK (optional, for USE_FOUNDRY=true)
+azure-ai-projects==2.1.0  # Azure AI Foundry SDK (optional, for USE_FOUNDRY=true)
 
 # OpenAI
 openai>=1.45.0
diff --git a/src/backend/services/title_service.py b/src/backend/services/title_service.py
@@ -9,16 +9,14 @@
 import re
 from typing import Optional
 
-from agent_framework.azure import AzureOpenAIChatClient
+from agent_framework import Agent
+from agent_framework.openai import OpenAIChatCompletionClient
 from azure.identity import DefaultAzureCredential
 
 from settings import app_settings
 
 logger = logging.getLogger(__name__)
 
-# Token endpoint for Azure OpenAI authentication
-TOKEN_ENDPOINT = "https://cognitiveservices.azure.com/.default"
-
 # Title generation instructions (from MS reference accelerator)
 TITLE_INSTRUCTIONS = """Summarize the conversation so far into a 4-word or less title.
 Do not use any quotation marks or punctuation.
@@ -57,20 +55,15 @@ def initialize(self) -> None:
 
             api_version = app_settings.azure_openai.api_version
 
-            # Create token provider function
-            def get_token() -> str:
-                """Token provider callable - invoked for each request to ensure fresh tokens."""
-                token = self._credential.get_token(TOKEN_ENDPOINT)
-                return token.token
-
-            chat_client = AzureOpenAIChatClient(
-                endpoint=endpoint,
-                deployment_name=deployment,
+            chat_client = OpenAIChatCompletionClient(
+                azure_endpoint=endpoint,
+                model=deployment,
                 api_version=api_version,
-                ad_token_provider=get_token,
+                credential=self._credential,
             )
 
-            self._agent = chat_client.create_agent(
+            self._agent = Agent(
+                client=chat_client,
                 name="title_agent",
                 instructions=TITLE_INSTRUCTIONS,
             )
diff --git a/src/tests/services/test_orchestrator.py b/src/tests/services/test_orchestrator.py
diff --git a/src/tests/test_title_generation_service.py b/src/tests/test_title_generation_service.py