From 27261c7ecc81fdbc71e2a77f1a065e0c296ac197 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 17:50:25 +0530 Subject: [PATCH 01/24] increase the tokens --- src/processor/src/libs/base/orchestrator_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/processor/src/libs/base/orchestrator_base.py b/src/processor/src/libs/base/orchestrator_base.py index fbcb39e2..9203772f 100644 --- a/src/processor/src/libs/base/orchestrator_base.py +++ b/src/processor/src/libs/base/orchestrator_base.py @@ -183,7 +183,7 @@ async def create_agents( builder .with_temperature(0.0) .with_response_format(ManagerSelectionResponse) - .with_max_tokens(4_000) + .with_max_tokens(10_000) .with_tools(agent_info.tools) # for checking file existence ) elif agent_info.agent_name == "ResultGenerator": From 49eb5cdf33d9a3b4c70a71762ecf1c9e389a6c5d Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Thu, 11 Jun 2026 21:19:20 +0530 Subject: [PATCH 02/24] Upgrade agent-framework to 1.3.0 and azure-ai-projects to 2.1.0 - Update agent-framework from 1.0.0b260107 to 1.3.0 in pyproject.toml - Update azure-ai-projects from 1.0.0b12 to 2.1.0 in requirements.txt - Migrate ChatAgent to Agent (client=, default_options=ChatOptions) - Migrate agent_framework.azure to agent_framework.openai module paths - Migrate ChatMessage to Message with Content.from_text() - Migrate Role enum to string literals - Migrate AgentRunContext to AgentContext - Migrate WorkflowBuilder to new API (start_executor=, add_chain) - Migrate event handling from isinstance checks to WorkflowEvent.type - Migrate GroupChatBuilder to agent_framework.orchestrations module - Migrate ContextProvider to before_run/after_run interface - Remove ToolProtocol (use Any), AgentProtocol (use SupportsAgentRun) - Define ManagerSelectionResponse locally (removed from framework) - Update MCP tool files for Agent import - Update all unit tests for new APIs (812 tests passing) - Update docs/ProcessFrameworkGuide.md with new WorkflowBuilder example - Update docs/LocalDevelopmentSetup.md prerelease note - Regenerate uv.lock Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/LocalDevelopmentSetup.md | 2 +- docs/ProcessFrameworkGuide.md | 20 +- infra/vscode_web/codeSample.py | 19 +- infra/vscode_web/requirements.txt | 2 +- src/processor/pyproject.toml | 2 +- .../src/libs/agent_framework/agent_builder.py | 264 ++++++++++----- .../agent_framework/agent_framework_helper.py | 88 ++--- .../src/libs/agent_framework/agent_info.py | 5 +- .../agent_framework/agent_speaking_capture.py | 4 +- .../azure_openai_response_retry.py | 159 +-------- .../agent_framework/groupchat_orchestrator.py | 93 +++--- .../src/libs/agent_framework/middlewares.py | 18 +- .../shared_memory_context_provider.py | 70 ++-- .../src/libs/base/orchestrator_base.py | 15 +- .../src/libs/mcp_server/MCPBlobIOTool.py | 14 +- .../src/libs/mcp_server/MCPDatetimeTool.py | 12 +- .../src/libs/mcp_server/MCPMicrosoftDocs.py | 8 +- .../orchestration/analysis_orchestrator.py | 6 +- .../yaml_convert_orchestrator.py | 5 +- .../orchestration/design_orchestrator.py | 5 +- .../documentation_orchestrator.py | 5 +- .../src/steps/migration_processor.py | 98 +++--- .../agent_framework/test_agent_builder.py | 25 +- .../test_agent_framework_helper.py | 75 +++-- .../test_groupchat_orchestrator_internals.py | 89 +++-- .../test_input_observer_middleware.py | 6 +- .../test_middlewares_extras.py | 8 +- .../test_shared_memory_context_provider.py | 129 +++++--- .../steps/test_migration_processor_run.py | 95 +++--- src/processor/uv.lock | 305 +++++++++++++++--- 30 files changed, 900 insertions(+), 746 deletions(-) diff --git a/docs/LocalDevelopmentSetup.md b/docs/LocalDevelopmentSetup.md index dc82ed3f..7df64705 100644 --- a/docs/LocalDevelopmentSetup.md +++ b/docs/LocalDevelopmentSetup.md @@ -337,7 +337,7 @@ py -3.12 -m uv venv .venv py -3.12 -m uv sync --prerelease=allow ``` -> **⚠️ Important**: This repo currently depends on a prerelease/dev version of Microsoft Agent Framework. Always run `uv sync --prerelease=allow` (or `py -3.12 -m uv sync --prerelease=allow` on Windows) after creating the virtual environment to install all required dependencies. Missing dependencies will cause runtime errors like `ModuleNotFoundError: No module named 'pydantic'` or DNS resolution failures. +> **⚠️ Important**: Always run `uv sync --prerelease=allow` (or `py -3.12 -m uv sync --prerelease=allow` on Windows) after creating the virtual environment to install all required dependencies. The `--prerelease=allow` flag is needed because some transitive dependencies may still use pre-release versions. Missing dependencies will cause runtime errors like `ModuleNotFoundError: No module named 'pydantic'` or DNS resolution failures. ### 5.4. Run the Processor diff --git a/docs/ProcessFrameworkGuide.md b/docs/ProcessFrameworkGuide.md index 575d4a1c..8732ec49 100644 --- a/docs/ProcessFrameworkGuide.md +++ b/docs/ProcessFrameworkGuide.md @@ -118,23 +118,21 @@ Inside each step, the orchestrator can use multi-agent patterns (maker-checker l - Implementation: [src/processor/src/steps/migration_processor.py](../src/processor/src/steps/migration_processor.py) - The processor creates a workflow with `WorkflowBuilder`. -- It registers four executors, sets the start executor, and defines edges. +- It instantiates four executors, passes the start executor to `WorkflowBuilder`, and chains them with `add_chain`. Example from the repo (simplified): ```python from agent_framework import WorkflowBuilder +analysis_exec = AnalysisExecutor(id="analysis", app_context=app_context) +design_exec = DesignExecutor(id="design", app_context=app_context) +yaml_exec = YamlConvertExecutor(id="yaml", app_context=app_context) +docs_exec = DocumentationExecutor(id="documentation", app_context=app_context) + workflow = ( - WorkflowBuilder() - .register_executor(lambda: AnalysisExecutor(id="analysis", app_context=app_context), name="analysis") - .register_executor(lambda: DesignExecutor(id="design", app_context=app_context), name="design") - .register_executor(lambda: YamlConvertExecutor(id="yaml", app_context=app_context), name="yaml") - .register_executor(lambda: DocumentationExecutor(id="documentation", app_context=app_context), name="documentation") - .set_start_executor("analysis") - .add_edge("analysis", "design") - .add_edge("design", "yaml") - .add_edge("yaml", "documentation") + WorkflowBuilder(start_executor=analysis_exec) + .add_chain([analysis_exec, design_exec, yaml_exec, docs_exec]) .build() ) ``` @@ -355,7 +353,7 @@ To run processor unit tests locally (example): ```bash cd src/processor -uv run --prerelease=allow python -m pytest src/processor/src/tests/unit -v +uv run python -m pytest src/tests/unit -v ``` ## Extending the pipeline diff --git a/infra/vscode_web/codeSample.py b/infra/vscode_web/codeSample.py index 37224009..288a5084 100644 --- a/infra/vscode_web/codeSample.py +++ b/infra/vscode_web/codeSample.py @@ -1,3 +1,4 @@ +from azure.ai.agents.models import ListSortOrder from azure.ai.projects import AIProjectClient from azure.identity import DefaultAzureCredential @@ -7,19 +8,25 @@ agent = project_client.agents.get_agent("<%= agentId %>") -thread = project_client.agents.create_thread() +thread = project_client.agents.threads.create() print(f"Created thread, ID: {thread.id}") -message = project_client.agents.create_message( +message = project_client.agents.messages.create( thread_id=thread.id, role="user", content="<%= userMessage %>" ) -run = project_client.agents.create_and_process_run( +run = project_client.agents.runs.create_and_process( thread_id=thread.id, agent_id=agent.id) -messages = project_client.agents.list_messages(thread_id=thread.id) -for text_message in messages.text_messages: - print(text_message.as_dict()) +if run.status == "failed": + print(f"Run failed: {run.last_error}") +else: + messages = project_client.agents.messages.list( + thread_id=thread.id, order=ListSortOrder.ASCENDING) + + for message in messages: + if message.text_messages: + print(f"{message.role}: {message.text_messages[-1].text.value}") diff --git a/infra/vscode_web/requirements.txt b/infra/vscode_web/requirements.txt index 18d6803e..d7ff98e4 100644 --- a/infra/vscode_web/requirements.txt +++ b/infra/vscode_web/requirements.txt @@ -1,3 +1,3 @@ -azure-ai-projects==1.0.0b12 +azure-ai-projects==2.1.0 azure-identity==1.20.0 ansible-core~=2.17.0 \ No newline at end of file diff --git a/src/processor/pyproject.toml b/src/processor/pyproject.toml index b6c2c2aa..d72e9c5f 100644 --- a/src/processor/pyproject.toml +++ b/src/processor/pyproject.toml @@ -5,7 +5,7 @@ description = "Add your description here" readme = "README.md" requires-python = ">=3.12" dependencies = [ - "agent-framework==1.0.0b260107", + "agent-framework==1.3.0", "aiohttp==3.13.5", "art==6.5", "azure-ai-agents==1.2.0b5", diff --git a/src/processor/src/libs/agent_framework/agent_builder.py b/src/processor/src/libs/agent_framework/agent_builder.py index 8b9c629e..888e2dae 100644 --- a/src/processor/src/libs/agent_framework/agent_builder.py +++ b/src/processor/src/libs/agent_framework/agent_builder.py @@ -1,26 +1,68 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -"""Fluent builder for constructing ChatAgent instances with chainable configuration.""" +"""Fluent builder for constructing Agent instances with chainable configuration.""" +import logging from collections.abc import Callable, MutableMapping, Sequence from typing import Any, Literal from agent_framework import ( - AggregateContextProvider, - ChatAgent, - ChatClientProtocol, - ChatMessageStoreProtocol, + Agent, + AgentMiddleware, + ChatMiddleware, + ChatOptions, ContextProvider, - Middleware, + HistoryProvider, + SupportsChatGetResponse, ToolMode, - ToolProtocol, ) from pydantic import BaseModel from libs.agent_framework.agent_info import AgentInfo from utils.credential_util import get_bearer_token_provider +logger = logging.getLogger(__name__) + +# Reasoning models that do not support custom temperature/top_p values. +_REASONING_MODEL_PREFIXES = ("o1", "o3", "o4", "gpt-5") + + +def is_reasoning_model(model_name: str) -> bool: + """Check if a model is a reasoning model based on its deployment name.""" + name = model_name.lower() + return any(name.startswith(prefix) for prefix in _REASONING_MODEL_PREFIXES) + + +def resolve_model_name(client: Any, model_id: str | None = None) -> str | None: + """Extract model/deployment name from the client or explicit model_id.""" + if model_id: + return model_id + for attr in ("model", "_model", "model_id"): + val = getattr(client, attr, None) + if isinstance(val, str): + return val + return None + + +def _strip_unsupported_reasoning_params( + options: dict[str, Any], model_name: str | None +) -> dict[str, Any]: + """Remove temperature and top_p for reasoning models that don't support them.""" + if model_name and is_reasoning_model(model_name): + removed = [] + for param in ("temperature", "top_p"): + if param in options: + del options[param] + removed.append(param) + if removed: + logger.info( + "Stripped unsupported params %s for reasoning model '%s'", + removed, + model_name, + ) + return options + class AgentBuilder: """Fluent builder for creating ChatAgent instances with a chainable API. @@ -59,7 +101,7 @@ class AgentBuilder: ) """ - def __init__(self, chat_client: ChatClientProtocol): + def __init__(self, chat_client: SupportsChatGetResponse): """Initialize the builder with a chat client. Args: @@ -70,14 +112,15 @@ def __init__(self, chat_client: ChatClientProtocol): self._id: str | None = None self._name: str | None = None self._description: str | None = None - self._chat_message_store_factory: ( - Callable[[], ChatMessageStoreProtocol] | None - ) = None + self._chat_message_store_factory: Callable[[], HistoryProvider] | None = None self._conversation_id: str | None = None - self._context_providers: ( - ContextProvider | list[ContextProvider] | AggregateContextProvider | None + self._context_providers: ContextProvider | list[ContextProvider] | None = None + self._middleware: ( + AgentMiddleware + | ChatMiddleware + | list[AgentMiddleware | ChatMiddleware] + | None ) = None - self._middleware: Middleware | list[Middleware] | None = None self._frequency_penalty: float | None = None self._logit_bias: dict[str | int, float] | None = None self._max_tokens: int | None = None @@ -93,10 +136,10 @@ def __init__(self, chat_client: ChatClientProtocol): ToolMode | Literal["auto", "required", "none"] | dict[str, Any] | None ) = "auto" self._tools: ( - ToolProtocol + Any | Callable[..., Any] | MutableMapping[str, Any] - | Sequence[ToolProtocol | Callable[..., Any] | MutableMapping[str, Any]] + | Sequence[Any | Callable[..., Any] | MutableMapping[str, Any]] | None ) = None self._top_p: float | None = None @@ -178,10 +221,10 @@ def with_max_tokens(self, max_tokens: int) -> "AgentBuilder": def with_tools( self, - tools: ToolProtocol + tools: Any | Callable[..., Any] | MutableMapping[str, Any] - | Sequence[ToolProtocol | Callable[..., Any] | MutableMapping[str, Any]], + | Sequence[Any | Callable[..., Any] | MutableMapping[str, Any]], ) -> "AgentBuilder": """Set the tools available to the agent. @@ -210,7 +253,10 @@ def with_tool_choice( return self def with_middleware( - self, middleware: Middleware | list[Middleware] + self, + middleware: AgentMiddleware + | ChatMiddleware + | list[AgentMiddleware | ChatMiddleware], ) -> "AgentBuilder": """Set middleware for request/response processing. @@ -225,9 +271,7 @@ def with_middleware( def with_context_providers( self, - context_providers: ContextProvider - | list[ContextProvider] - | AggregateContextProvider, + context_providers: ContextProvider | list[ContextProvider], ) -> "AgentBuilder": """Set context providers for additional conversation context. @@ -385,7 +429,7 @@ def with_store(self, store: bool) -> "AgentBuilder": return self def with_message_store_factory( - self, factory: Callable[[], ChatMessageStoreProtocol] + self, factory: Callable[[], HistoryProvider] ) -> "AgentBuilder": """Set the message store factory. @@ -422,11 +466,11 @@ def with_kwargs(self, **kwargs: Any) -> "AgentBuilder": self._kwargs.update(kwargs) return self - def build(self) -> ChatAgent: - """Build and return the configured ChatAgent. + def build(self) -> Agent: + """Build and return the configured Agent. Returns: - ChatAgent: Configured agent instance ready for use + Agent: Configured agent instance ready for use Example: .. code-block:: python @@ -442,32 +486,55 @@ def build(self) -> ChatAgent: async with agent: response = await agent.run("Hello!") """ - return ChatAgent( - chat_client=self._chat_client, + options: dict[str, Any] = {} + if self._frequency_penalty is not None: + options["frequency_penalty"] = self._frequency_penalty + if self._logit_bias is not None: + options["logit_bias"] = self._logit_bias + if self._max_tokens is not None: + options["max_tokens"] = self._max_tokens + if self._metadata is not None: + options["metadata"] = self._metadata + if self._model_id is not None: + options["model"] = self._model_id + if self._presence_penalty is not None: + options["presence_penalty"] = self._presence_penalty + if self._response_format is not None: + options["response_format"] = self._response_format + if self._seed is not None: + options["seed"] = self._seed + if self._stop is not None: + options["stop"] = self._stop + if self._store is not None: + options["store"] = self._store + if self._temperature is not None: + options["temperature"] = self._temperature + if self._tool_choice is not None: + options["tool_choice"] = self._tool_choice + if self._top_p is not None: + options["top_p"] = self._top_p + if self._user is not None: + options["user"] = self._user + if self._conversation_id is not None: + options["conversation_id"] = self._conversation_id + if self._additional_chat_options: + options.update(self._additional_chat_options) + + model_name = resolve_model_name(self._chat_client, self._model_id) + _strip_unsupported_reasoning_params(options, model_name) + + default_options: ChatOptions | None = ChatOptions(**options) if options else None + + return Agent( + client=self._chat_client, instructions=self._instructions, id=self._id, name=self._name, description=self._description, - chat_message_store_factory=self._chat_message_store_factory, - conversation_id=self._conversation_id, + tools=self._tools, + default_options=default_options, context_providers=self._context_providers, middleware=self._middleware, - frequency_penalty=self._frequency_penalty, - logit_bias=self._logit_bias, - max_tokens=self._max_tokens, - metadata=self._metadata, - model_id=self._model_id, - presence_penalty=self._presence_penalty, - response_format=self._response_format, - seed=self._seed, - stop=self._stop, - store=self._store, - temperature=self._temperature, - tool_choice=self._tool_choice, - tools=self._tools, - top_p=self._top_p, - user=self._user, - additional_chat_options=self._additional_chat_options, **self._kwargs, ) @@ -477,14 +544,12 @@ def create_agent_by_agentinfo( agent_info: AgentInfo, *, id: str | None = None, - chat_message_store_factory: Callable[[], ChatMessageStoreProtocol] - | None = None, + chat_message_store_factory: Callable[[], HistoryProvider] | None = None, conversation_id: str | None = None, - context_providers: ContextProvider - | list[ContextProvider] - | AggregateContextProvider - | None = None, - middleware: Middleware | list[Middleware] | None = None, + context_providers: ContextProvider | list[ContextProvider] | None = None, + middleware: ( + AgentMiddleware | ChatMiddleware | list[AgentMiddleware | ChatMiddleware] | None + ) = None, frequency_penalty: float | None = None, logit_bias: dict[str | int, float] | None = None, max_tokens: int | None = None, @@ -500,16 +565,16 @@ def create_agent_by_agentinfo( | Literal["auto", "required", "none"] | dict[str, Any] | None = "auto", - tools: ToolProtocol + tools: Any | Callable[..., Any] | MutableMapping[str, Any] - | Sequence[ToolProtocol | Callable[..., Any] | MutableMapping[str, Any]] + | Sequence[Any | Callable[..., Any] | MutableMapping[str, Any]] | None = None, top_p: float | None = None, user: str | None = None, additional_chat_options: dict[str, Any] | None = None, **kwargs: Any, - ) -> ChatAgent: + ) -> Agent: """Create an agent using AgentInfo configuration with full parameter support. This method creates a chat client from the service configuration and then @@ -611,20 +676,18 @@ def create_agent_by_agentinfo( @staticmethod def create_agent( - chat_client: ChatClientProtocol, + chat_client: SupportsChatGetResponse, instructions: str | None = None, *, id: str | None = None, name: str | None = None, description: str | None = None, - chat_message_store_factory: Callable[[], ChatMessageStoreProtocol] - | None = None, + chat_message_store_factory: Callable[[], HistoryProvider] | None = None, conversation_id: str | None = None, - context_providers: ContextProvider - | list[ContextProvider] - | AggregateContextProvider - | None = None, - middleware: Middleware | list[Middleware] | None = None, + context_providers: ContextProvider | list[ContextProvider] | None = None, + middleware: ( + AgentMiddleware | ChatMiddleware | list[AgentMiddleware | ChatMiddleware] | None + ) = None, frequency_penalty: float | None = None, logit_bias: dict[str | int, float] | None = None, max_tokens: int | None = None, @@ -640,16 +703,16 @@ def create_agent( | Literal["auto", "required", "none"] | dict[str, Any] | None = "auto", - tools: ToolProtocol + tools: Any | Callable[..., Any] | MutableMapping[str, Any] - | Sequence[ToolProtocol | Callable[..., Any] | MutableMapping[str, Any]] + | Sequence[Any | Callable[..., Any] | MutableMapping[str, Any]] | None = None, top_p: float | None = None, user: str | None = None, additional_chat_options: dict[str, Any] | None = None, **kwargs: Any, - ) -> ChatAgent: + ) -> Agent: """Create a Chat Client Agent. Factory method that creates a ChatAgent instance with the specified configuration. @@ -751,7 +814,7 @@ def create_agent( temperature=0.7, max_tokens=500, additional_chat_options={ - "reasoning": {"effort": "high", "summary": "concise"} + "reasoning_effort": "high" }, # OpenAI-specific reasoning options ) @@ -761,34 +824,57 @@ def create_agent( Note: When the agent has MCP tools or needs proper resource cleanup, use it with - ``async with`` to ensure proper initialization and cleanup via the ChatAgent's + ``async with`` to ensure proper initialization and cleanup via the Agent's async context manager protocol. """ - return ChatAgent( - chat_client=chat_client, + options: dict[str, Any] = {} + if frequency_penalty is not None: + options["frequency_penalty"] = frequency_penalty + if logit_bias is not None: + options["logit_bias"] = logit_bias + if max_tokens is not None: + options["max_tokens"] = max_tokens + if metadata is not None: + options["metadata"] = metadata + if model_id is not None: + options["model"] = model_id + if presence_penalty is not None: + options["presence_penalty"] = presence_penalty + if response_format is not None: + options["response_format"] = response_format + if seed is not None: + options["seed"] = seed + if stop is not None: + options["stop"] = stop + if store is not None: + options["store"] = store + if temperature is not None: + options["temperature"] = temperature + if tool_choice is not None: + options["tool_choice"] = tool_choice + if top_p is not None: + options["top_p"] = top_p + if user is not None: + options["user"] = user + if conversation_id is not None: + options["conversation_id"] = conversation_id + if additional_chat_options: + options.update(additional_chat_options) + + model_name = resolve_model_name(chat_client, model_id) + _strip_unsupported_reasoning_params(options, model_name) + + default_options: ChatOptions | None = ChatOptions(**options) if options else None + + return Agent( + client=chat_client, instructions=instructions, id=id, name=name, description=description, - chat_message_store_factory=chat_message_store_factory, - conversation_id=conversation_id, + tools=tools, + default_options=default_options, context_providers=context_providers, middleware=middleware, - frequency_penalty=frequency_penalty, - logit_bias=logit_bias, - max_tokens=max_tokens, - metadata=metadata, - model_id=model_id, - presence_penalty=presence_penalty, - response_format=response_format, - seed=seed, - stop=stop, - store=store, - temperature=temperature, - tool_choice=tool_choice, - tools=tools, - top_p=top_p, - user=user, - additional_chat_options=additional_chat_options, **kwargs, ) diff --git a/src/processor/src/libs/agent_framework/agent_framework_helper.py b/src/processor/src/libs/agent_framework/agent_framework_helper.py index 61da842a..b1bf6ba8 100644 --- a/src/processor/src/libs/agent_framework/agent_framework_helper.py +++ b/src/processor/src/libs/agent_framework/agent_framework_helper.py @@ -27,11 +27,9 @@ ) if TYPE_CHECKING: - from agent_framework.azure import ( - AzureAIAgentClient, - AzureOpenAIAssistantsClient, - AzureOpenAIChatClient, - AzureOpenAIResponsesClient, + from agent_framework.openai import ( + OpenAIChatClient, + OpenAIChatCompletionClient, ) @@ -147,7 +145,7 @@ def create_client( env_file_path: str | None = None, env_file_encoding: str | None = None, instruction_role: str | None = None, - ) -> "AzureOpenAIChatClient": + ) -> "OpenAIChatCompletionClient": pass @overload @@ -171,7 +169,7 @@ def create_client( async_client: object | None = None, env_file_path: str | None = None, env_file_encoding: str | None = None, - ) -> "AzureOpenAIAssistantsClient": + ) -> Any: pass @overload @@ -193,7 +191,7 @@ def create_client( env_file_path: str | None = None, env_file_encoding: str | None = None, instruction_role: str | None = None, - ) -> "AzureOpenAIResponsesClient": + ) -> "OpenAIChatClient": pass @overload @@ -233,7 +231,7 @@ def create_client( async_credential: object | None = None, env_file_path: str | None = None, env_file_encoding: str | None = None, - ) -> "AzureAIAgentClient": + ) -> Any: pass @staticmethod @@ -366,18 +364,15 @@ def create_client( "OpenAIResponsesClient is not implemented in this context." ) elif client_type == ClientType.AzureOpenAIChatCompletion: - from agent_framework.azure import AzureOpenAIChatClient + from agent_framework.openai import OpenAIChatCompletionClient - return AzureOpenAIChatClient( + return OpenAIChatCompletionClient( + model=deployment_name, api_key=api_key, - deployment_name=deployment_name, - endpoint=endpoint, + azure_endpoint=endpoint, base_url=base_url, api_version=api_version, - ad_token=ad_token, - ad_token_provider=ad_token_provider, - token_endpoint=token_endpoint, - credential=credential, + credential=credential or ad_token_provider, default_headers=default_headers, async_client=async_client, env_file_path=env_file_path, @@ -385,39 +380,20 @@ def create_client( instruction_role=instruction_role, ) elif client_type == ClientType.AzureOpenAIAssistant: - from agent_framework.azure import AzureOpenAIAssistantsClient - - return AzureOpenAIAssistantsClient( - deployment_name=deployment_name, - assistant_id=assistant_id, - assistant_name=assistant_name, - thread_id=thread_id, - api_key=api_key, - endpoint=endpoint, - base_url=base_url, - api_version=api_version, - ad_token=ad_token, - ad_token_provider=ad_token_provider, - token_endpoint=token_endpoint, - credential=credential, - default_headers=default_headers, - async_client=async_client, - env_file_path=env_file_path, - env_file_encoding=env_file_encoding, + raise NotImplementedError( + "AzureOpenAIAssistantsClient has been removed in agent-framework 1.3.0. " + "Use OpenAIChatClient with built-in tools instead." ) elif client_type == ClientType.AzureOpenAIResponse: - from agent_framework.azure import AzureOpenAIResponsesClient + from agent_framework.openai import OpenAIChatClient - return AzureOpenAIResponsesClient( + return OpenAIChatClient( + model=deployment_name, api_key=api_key, - deployment_name=deployment_name, - endpoint=endpoint, + azure_endpoint=endpoint, base_url=base_url, api_version=api_version, - ad_token=ad_token, - ad_token_provider=ad_token_provider, - token_endpoint=token_endpoint, - credential=credential, + credential=credential or ad_token_provider, default_headers=default_headers, async_client=async_client, env_file_path=env_file_path, @@ -426,15 +402,12 @@ def create_client( ) elif client_type == ClientType.AzureOpenAIResponseWithRetry: return AzureOpenAIResponseClientWithRetry( + model=deployment_name, api_key=api_key, - deployment_name=deployment_name, - endpoint=endpoint, + azure_endpoint=endpoint, base_url=base_url, api_version=api_version, - ad_token=ad_token, - ad_token_provider=ad_token_provider, - token_endpoint=token_endpoint, - credential=credential, + credential=credential or ad_token_provider, default_headers=default_headers, async_client=async_client, env_file_path=env_file_path, @@ -443,18 +416,9 @@ def create_client( retry_config=retry_config, ) elif client_type == ClientType.AzureOpenAIAgent: - from agent_framework.azure import AzureAIAgentClient - - return AzureAIAgentClient( - project_client=project_client, - agent_id=agent_id, - agent_name=agent_name, - thread_id=thread_id, - project_endpoint=project_endpoint, - model_deployment_name=model_deployment_name, - async_credential=async_credential, - env_file_path=env_file_path, - env_file_encoding=env_file_encoding, + raise NotImplementedError( + "AzureAIAgentClient has been removed in agent-framework 1.3.0. " + "Use FoundryChatClient from agent_framework.foundry instead." ) else: raise ValueError(f"Unsupported agent type: {client_type}") diff --git a/src/processor/src/libs/agent_framework/agent_info.py b/src/processor/src/libs/agent_framework/agent_info.py index 8eb18de3..fbd6cafe 100644 --- a/src/processor/src/libs/agent_framework/agent_info.py +++ b/src/processor/src/libs/agent_framework/agent_info.py @@ -4,7 +4,6 @@ """Pydantic model describing an agent participant with Jinja2 template rendering.""" from typing import Any, Callable, MutableMapping, Sequence -from agent_framework import ToolProtocol from jinja2 import Template from openai import BaseModel from pydantic import Field @@ -20,10 +19,10 @@ class AgentInfo(BaseModel): agent_instruction: str | None = Field(default=None) agent_framework_helper: AgentFrameworkHelper | None = Field(default=None) tools: ( - ToolProtocol + Any | Callable[..., Any] | MutableMapping[str, Any] - | Sequence[ToolProtocol | Callable[..., Any] | MutableMapping[str, Any]] + | Sequence[Any | Callable[..., Any] | MutableMapping[str, Any]] | None ) = Field(default=None) diff --git a/src/processor/src/libs/agent_framework/agent_speaking_capture.py b/src/processor/src/libs/agent_framework/agent_speaking_capture.py index 8243d755..fe5cc0ff 100644 --- a/src/processor/src/libs/agent_framework/agent_speaking_capture.py +++ b/src/processor/src/libs/agent_framework/agent_speaking_capture.py @@ -5,7 +5,7 @@ from datetime import datetime from typing import Any, Callable, Optional -from agent_framework import AgentRunContext, AgentMiddleware +from agent_framework import AgentContext, AgentMiddleware class AgentSpeakingCaptureMiddleware(AgentMiddleware): @@ -72,7 +72,7 @@ def __init__( str, list[str] ] = {} # Buffer for streaming responses - async def process(self, context: AgentRunContext, next): + async def process(self, context: AgentContext, next): """Process the agent invocation and capture the response. Args: diff --git a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py index 42f6422e..3fbd0e8b 100644 --- a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py +++ b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py @@ -10,9 +10,9 @@ import os import random from dataclasses import dataclass -from typing import Any, AsyncIterable, MutableSequence +from typing import Any, MutableSequence -from agent_framework.azure import AzureOpenAIResponsesClient +from agent_framework.openai import OpenAIChatClient from tenacity import ( AsyncRetrying, retry_if_exception, @@ -513,11 +513,11 @@ def __call__(self, retry_state) -> float: raise RuntimeError("Retry loop exhausted unexpectedly") -class AzureOpenAIResponseClientWithRetry(AzureOpenAIResponsesClient): +class AzureOpenAIResponseClientWithRetry(OpenAIChatClient): """Azure OpenAI Responses client with 429 retry at the request boundary. Retry is centralized in the client layer (not in orchestrators) by retrying the - underlying Responses calls made by `OpenAIBaseResponsesClient`. + underlying Responses calls made by `OpenAIChatClient`. """ def __init__( @@ -531,7 +531,7 @@ def __init__( self._context_trim_config = ContextTrimConfig.from_env() async def _inner_get_response( - self, *, messages: MutableSequence[Any], chat_options: Any, **kwargs: Any + self, *, messages: MutableSequence[Any], options: Any = None, **kwargs: Any ) -> Any: parent_inner_get_response = super( AzureOpenAIResponseClientWithRetry, self @@ -558,7 +558,7 @@ async def _inner_get_response( try: return await _retry_call( lambda: parent_inner_get_response( - messages=effective_messages, chat_options=chat_options, **kwargs + messages=effective_messages, options=options, **kwargs ), config=self._retry_config, ) @@ -606,153 +606,8 @@ async def _inner_get_response( await asyncio.sleep(trim_delay) return await _retry_call( lambda: parent_inner_get_response( - messages=trimmed, chat_options=chat_options, **kwargs + messages=trimmed, options=options, **kwargs ), config=self._retry_config, ) - async def _inner_get_streaming_response( - self, *, messages: MutableSequence[Any], chat_options: Any, **kwargs: Any - ) -> AsyncIterable[Any]: - # Conservative retry: only retries failures before the first yielded update. - attempts = self._retry_config.max_retries + 1 - - effective_messages: MutableSequence[Any] | list[Any] = messages - if self._context_trim_config.enabled: - approx_chars = sum(len(_estimate_message_text(m)) for m in messages) - if ( - self._context_trim_config.max_total_chars > 0 - and approx_chars > self._context_trim_config.max_total_chars - ): - effective_messages = _trim_messages( - messages, cfg=self._context_trim_config - ) - logger.warning( - "[AOAI_CTX_TRIM] pre-trimmed streaming request messages: approx_chars=%s -> %s; count=%s -> %s", - approx_chars, - sum(len(_estimate_message_text(m)) for m in effective_messages), - len(messages), - len(effective_messages), - ) - - for attempt_index in range(attempts): - stream = super( - AzureOpenAIResponseClientWithRetry, self - )._inner_get_streaming_response( - messages=effective_messages, chat_options=chat_options, **kwargs - ) - - iterator = stream.__aiter__() - try: - first = await iterator.__anext__() - - async def _tail(): - yield first - async for item in iterator: - yield item - - async for item in _tail(): - yield item - return - except StopAsyncIteration: - return - except Exception as e: - close = getattr(stream, "aclose", None) - if callable(close): - try: - await close() - except Exception: - logger.debug("Best-effort close of response stream failed", exc_info=True) - - # Progressive retry for context-length failures. - if ( - self._context_trim_config.enabled - and self._context_trim_config.retry_on_context_error - and _looks_like_context_length(e) - ): - # Make trimming progressively more aggressive on each retry - # GPT-5.1: 272K input tokens ≈ 800K chars. Scale down from 600K default. - scale = attempt_index + 1 - aggressive_cfg = ContextTrimConfig( - enabled=True, - max_total_chars=max( - 30_000, - self._context_trim_config.max_total_chars - scale * 100_000, - ), - max_message_chars=max( - 2_000, - self._context_trim_config.max_message_chars - scale * 8_000, - ), - keep_last_messages=max( - 4, - self._context_trim_config.keep_last_messages - scale * 8, - ), - keep_head_chars=max( - 500, - self._context_trim_config.keep_head_chars - scale * 3_000, - ), - keep_tail_chars=max( - 500, - self._context_trim_config.keep_tail_chars - scale * 1_000, - ), - keep_system_messages=True, - retry_on_context_error=True, - ) - trimmed = _trim_messages(effective_messages, cfg=aggressive_cfg) - logger.warning( - "[AOAI_CTX_TRIM_STREAM] retrying after context-length error (attempt %s); count=%s -> %s, budget=%s", - attempt_index + 1, - len(effective_messages), - len(trimmed), - aggressive_cfg.max_total_chars, - ) - effective_messages = trimmed - if attempt_index >= attempts - 1: - # No more retries available. - raise - - # Cool down before retrying — immediate retries after trimming - # tend to trigger 429s because the API hasn't recovered yet. - trim_delay = self._retry_config.base_delay_seconds * ( - 2**attempt_index - ) - trim_delay = min(trim_delay, self._retry_config.max_delay_seconds) - logger.info( - "[AOAI_CTX_TRIM_STREAM] sleeping %ss before retry", - round(trim_delay, 1), - ) - await asyncio.sleep(trim_delay) - continue - - if not _looks_like_rate_limit(e) or attempt_index >= attempts - 1: - if _looks_like_rate_limit(e): - logger.warning( - "[AOAI_RETRY_STREAM] giving up after %s/%s attempts; error=%s", - attempt_index + 1, - attempts, - _format_exc_brief(e) - if isinstance(e, BaseException) - else str(e), - ) - raise - - retry_after = _try_get_retry_after_seconds(e) - if retry_after is not None and retry_after >= 0: - delay = retry_after - else: - delay = self._retry_config.base_delay_seconds * (2**attempt_index) - delay = min(delay, self._retry_config.max_delay_seconds) - delay = delay + random.uniform(0.0, 0.25 * max(delay, 0.1)) - - status = getattr(e, "status_code", None) or getattr(e, "status", None) - logger.warning( - "[AOAI_RETRY_STREAM] attempt %s/%s; sleeping=%ss; retry_after=%s; status=%s; error=%s", - attempt_index + 1, - attempts, - round(float(delay), 3), - None if retry_after is None else round(float(retry_after), 3), - status, - _format_exc_brief(e) if isinstance(e, BaseException) else str(e), - ) - - await asyncio.sleep(delay) diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py index 5cb63938..5c3fc9ab 100644 --- a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py +++ b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py @@ -22,17 +22,16 @@ from typing import Any, Awaitable, Callable, Generic, Mapping, Sequence, TypeVar from agent_framework import ( - AgentProtocol, - AgentRunUpdateEvent, - ChatAgent, - ChatMessage, + Agent, + AgentResponseUpdate, + ChatOptions, Executor, - GroupChatBuilder, - ManagerSelectionResponse, - Role, + Message, + SupportsAgentRun, Workflow, - WorkflowOutputEvent, + WorkflowEvent, ) +from agent_framework.orchestrations import GroupChatBuilder from mem0 import AsyncMemory from pydantic import BaseModel, ValidationError @@ -44,6 +43,17 @@ TOutput = TypeVar("TOutput", bound=BaseModel) # Output must be Pydantic model +class ManagerSelectionResponse(BaseModel): + """Coordinator selection payload parsed from JSON output.""" + + selected_participant: str | None = None + instruction: str | None = None + finish: bool | None = None + final_message: str | None = None + + model_config = {"extra": "allow"} + + @dataclass class AgentResponse: """Represents a single agent's response during workflow execution""" @@ -87,7 +97,7 @@ class OrchestrationResult(Generic[TOutput]): """Final workflow execution result with generic output type""" success: bool - conversation: list[ChatMessage] + conversation: list[Message] agent_responses: list[AgentResponse] tool_usage: dict[str, list[dict[str, Any]]] result: TOutput | None = None @@ -188,8 +198,8 @@ def __init__( self, name: str, process_id: str, - participants: Mapping[str, AgentProtocol | Executor] - | Sequence[AgentProtocol | Executor], + participants: Mapping[str, SupportsAgentRun | Executor] + | Sequence[SupportsAgentRun | Executor], memory_client: AsyncMemory, coordinator_name: str = "Coordinator", max_rounds: int = 100, @@ -225,7 +235,7 @@ def __init__( self.result_format = result_output_format # Runtime state - self.agents: dict[str, ChatAgent] = participants + self.agents: dict[str, Agent] = participants self.agent_tool_usage: dict[str, list[dict[str, Any]]] = {} self.agent_responses: list[AgentResponse] = [] self._initialized: bool = False @@ -338,7 +348,7 @@ def get_result_generator_name(self) -> str: """ return "ResultGenerator" - def _validate_sign_offs(self, conversation: list[ChatMessage]) -> tuple[bool, str]: + def _validate_sign_offs(self, conversation: list[Message]) -> tuple[bool, str]: """ Validate that all required reviewers have SIGN-OFF: PASS. @@ -475,7 +485,7 @@ async def run_stream( self._tool_call_emitted.clear() self._tool_call_recorded.clear() self._tool_call_index.clear() - self._conversation: list[ChatMessage] = [] # Track conversation during workflow + self._conversation: list[Message] = [] # Track conversation during workflow try: # Ensure initialized @@ -489,9 +499,9 @@ async def run_stream( group_chat_workflow = await self._build_groupchat() # Execute with streaming - conversation: list[ChatMessage] = [] + conversation: list[Message] = [] - async for event in group_chat_workflow.run_stream(task_prompt): + async for event in group_chat_workflow.run(task_prompt, stream=True): # Enforce wall-clock timeout if configured. if self.max_seconds is not None: elapsed = (datetime.now() - start_time).total_seconds() @@ -503,7 +513,7 @@ async def run_stream( termination_type="hard_timeout", ) - if isinstance(event, AgentRunUpdateEvent): + if isinstance(event, AgentResponseUpdate): await self._handle_agent_update( event, stream_callback=on_agent_response_stream, @@ -525,7 +535,8 @@ async def run_stream( # If the Coordinator requested finish=true, stop immediately. if self._termination_requested: break - elif isinstance(event, WorkflowOutputEvent): + elif event.type == "output": + event: WorkflowEvent # Complete last agent's response before finishing if self._last_executor_id and self._current_agent_response: await self._complete_agent_response( @@ -542,8 +553,8 @@ async def run_stream( self._conversation = conversation # Update instance variable # Backfill tool usage from the final conversation (more reliable than streaming updates) - # AgentRunUpdateEvent may stream text only; tool calls are represented as FunctionCallContent - # items inside ChatMessage.contents. + # AgentResponseUpdate may stream text only; tool calls are represented as FunctionCallContent + # items inside Message.contents. self._backfill_tool_usage_from_conversation(conversation) # Post-workflow analysis (optional) @@ -642,7 +653,7 @@ async def run_stream( async def _handle_agent_update( self, - event: AgentRunUpdateEvent, + event: AgentResponseUpdate, stream_callback: AgentResponseStreamCallback | None = None, callback: AgentResponseCallback | None = None, ) -> None: @@ -655,7 +666,7 @@ async def _handle_agent_update( 3. Trigger callback with complete response 4. Handle tool calls separately from text streaming """ - agent_name = self._normalize_executor_id(event.executor_id) + agent_name = self._normalize_executor_id(event.agent_id or "") await self._start_agent_if_needed(agent_name, stream_callback, callback) self._append_text_chunk(event) await self._process_tool_calls(event, agent_name, stream_callback) @@ -705,24 +716,23 @@ async def _start_agent_if_needed( logger.info(f"\n[AGENT] {agent_name}:", extra={"agent_name": agent_name}) - def _append_text_chunk(self, event: AgentRunUpdateEvent) -> None: + def _append_text_chunk(self, event: AgentResponseUpdate) -> None: """Append streamed text chunks to the current agent buffer.""" - if not hasattr(event.data, "text") or not event.data.text: + text_chunk = getattr(event, "text", None) + if not text_chunk: return - text_obj = event.data.text - text_chunk = getattr(text_obj, "text", text_obj) if isinstance(text_chunk, str) and text_chunk: self._current_agent_response.append(text_chunk) async def _process_tool_calls( self, - event: AgentRunUpdateEvent, + event: AgentResponseUpdate, agent_name: str, stream_callback: AgentResponseStreamCallback | None, ) -> None: """Process tool-call contents: buffer/parse args, record once, emit once.""" - tool_calls = self._extract_function_calls(getattr(event.data, "contents", None)) + tool_calls = self._extract_function_calls(event.contents) if not tool_calls: return @@ -884,7 +894,7 @@ def _extract_function_calls(self, contents: Any) -> list[dict[str, Any]]: return calls def _backfill_tool_usage_from_conversation( - self, conversation: list[ChatMessage] + self, conversation: list[Message] ) -> None: """Populate `agent_tool_usage` from final conversation messages. @@ -894,7 +904,7 @@ def _backfill_tool_usage_from_conversation( for msg in conversation: try: role = getattr(msg, "role", None) - if role != Role.ASSISTANT: + if role != "assistant": continue agent_name = getattr(msg, "author_name", None) or "assistant" @@ -1114,15 +1124,16 @@ async def _build_groupchat(self) -> Workflow: ] return ( - GroupChatBuilder() - .set_manager(coordinator) - .participants(participants) + GroupChatBuilder( + orchestrator_agent=coordinator, + participants=participants, + ) .build() ) async def _generate_final_result( self, - conversation: list[ChatMessage], + conversation: list[Message], result_format: type[TOutput], result_generator_name: str, ) -> TOutput: @@ -1141,7 +1152,7 @@ async def _generate_final_result( result = await result_generator.run( final_conversation, - response_format=result_format, + options=ChatOptions(response_format=result_format), ) text = result.messages[-1].text @@ -1174,7 +1185,7 @@ async def _generate_final_result( ) retry_result = await result_generator.run( retry_conversation, - response_format=result_format, + options=ChatOptions(response_format=result_format), ) retry_text = retry_result.messages[-1].text retry_json_payload = self._extract_first_json_payload(retry_text) @@ -1220,7 +1231,7 @@ def _truncate_text( def _build_result_generator_conversation( self, - conversation: Iterable[ChatMessage], + conversation: Iterable[Message], *, exclude_authors: set[str] | None, max_messages: int, @@ -1228,7 +1239,7 @@ def _build_result_generator_conversation( max_chars_per_message: int, keep_head_chars: int, keep_tail_chars: int, - ) -> list[ChatMessage]: + ) -> list[Message]: """Build a size-bounded conversation slice for the ResultGenerator. The raw conversation can contain extremely large tool outputs or repeated @@ -1241,7 +1252,7 @@ def _build_result_generator_conversation( """ exclude = {a.lower() for a in (exclude_authors or set())} - selected: list[ChatMessage] = [] + selected: list[Message] = [] seen_fingerprints: set[tuple[str | None, str, str]] = set() total_chars = 0 @@ -1296,9 +1307,9 @@ def _build_result_generator_conversation( # Preserve role + author_name so downstream can attribute sign-offs. selected.append( - ChatMessage( + Message( role=role, - text=truncated, + contents=[truncated], author_name=author, ) ) diff --git a/src/processor/src/libs/agent_framework/middlewares.py b/src/processor/src/libs/agent_framework/middlewares.py index a24f5b00..573ab0c2 100644 --- a/src/processor/src/libs/agent_framework/middlewares.py +++ b/src/processor/src/libs/agent_framework/middlewares.py @@ -8,13 +8,13 @@ from agent_framework import ( AgentMiddleware, - AgentRunContext, + AgentContext, ChatContext, - ChatMessage, ChatMiddleware, + Content, FunctionInvocationContext, FunctionMiddleware, - Role, + Message, ) @@ -23,8 +23,8 @@ class DebuggingMiddleware(AgentMiddleware): async def process( self, - context: AgentRunContext, - next: Callable[[AgentRunContext], Awaitable[None]], + context: AgentContext, + next: Callable[[AgentContext], Awaitable[None]], ) -> None: """Run-level debugging middleware for troubleshooting specific runs.""" print("[Debug] Debug mode enabled for this run") @@ -136,16 +136,16 @@ async def process( for i, message in enumerate(context.messages): content = message.text if message.text else str(message.contents) - print(f" Message {i + 1} ({message.role.value}): {content}") + print(f" Message {i + 1} ({message.role}): {content}") print(f"[InputObserverMiddleware] Total messages: {len(context.messages)}") # Modify user messages by creating new messages with enhanced text - modified_messages: list[ChatMessage] = [] + modified_messages: list[Message] = [] modified_count = 0 for message in context.messages: - if message.role == Role.USER and message.text: + if message.role == "user" and message.text: original_text = message.text updated_text = original_text @@ -155,7 +155,7 @@ async def process( f"[InputObserverMiddleware] Updated: '{original_text}' -> '{updated_text}'" ) - modified_message = ChatMessage(role=message.role, text=updated_text) + modified_message = Message(role=message.role, contents=[Content.from_text(updated_text)]) modified_messages.append(modified_message) modified_count += 1 else: diff --git a/src/processor/src/libs/agent_framework/shared_memory_context_provider.py b/src/processor/src/libs/agent_framework/shared_memory_context_provider.py index a143a88e..687c4419 100644 --- a/src/processor/src/libs/agent_framework/shared_memory_context_provider.py +++ b/src/processor/src/libs/agent_framework/shared_memory_context_provider.py @@ -18,7 +18,7 @@ from collections.abc import MutableSequence, Sequence from typing import TYPE_CHECKING -from agent_framework import ChatMessage, Context, ContextProvider +from agent_framework import AgentSession, ContextProvider, Message, SessionContext, SupportsAgentRun if TYPE_CHECKING: from libs.agent_framework.qdrant_memory_store import QdrantMemoryStore @@ -49,6 +49,11 @@ class SharedMemoryContextProvider(ContextProvider): redundant embedding calls for intermediate turns) """ + DEFAULT_CONTEXT_PROMPT = ( + "The following are relevant memories from previous migration steps. " + "Use them as additional context when formulating your response:" + ) + def __init__( self, memory_store: QdrantMemoryStore, @@ -66,6 +71,7 @@ def __init__( top_k: Number of relevant memories to retrieve per turn. score_threshold: Minimum similarity score for memory retrieval. """ + super().__init__(source_id=f"shared_memory_{agent_name}_{step}") self._memory_store = memory_store self._agent_name = agent_name self._step = step @@ -85,11 +91,14 @@ def __init__( break self._prior_steps = _STEP_ORDER[:step_idx] if step_idx else [] - async def invoking( + async def before_run( self, - messages: ChatMessage | MutableSequence[ChatMessage], - **kwargs, - ) -> Context: + *, + agent: SupportsAgentRun, + session: AgentSession, + context: SessionContext, + state: dict, + ) -> None: """Called before the agent's LLM call. Injects relevant shared memories. Only searches memories from PREVIOUS steps. Within the current step, @@ -97,12 +106,12 @@ async def invoking( """ # Skip if this is the first step (no prior memories exist) if not self._prior_steps: - return Context() + return # Extract query from the most recent messages - query = self._extract_query(messages) + query = self._extract_query(context.input_messages) if not query: - return Context() + return try: memories = await self._memory_store.search( @@ -116,15 +125,15 @@ async def invoking( self._agent_name, e, ) - return Context() + return if not memories: - return Context() + return # Format memories into context instructions formatted = self._format_memories(memories) if not formatted: - return Context() + return instructions = f"{self.DEFAULT_CONTEXT_PROMPT}\n\n{formatted}" @@ -136,14 +145,17 @@ async def invoking( len(instructions), ) - return Context(instructions=instructions) + if context.instructions is None: + context.instructions = [] + context.instructions.append(instructions) - async def invoked( + async def after_run( self, - request_messages: ChatMessage | Sequence[ChatMessage], - response_messages: ChatMessage | Sequence[ChatMessage] | None = None, - invoke_exception: Exception | None = None, - **kwargs, + *, + agent: SupportsAgentRun, + session: AgentSession, + context: SessionContext, + state: dict, ) -> None: """Called after the agent's LLM response. Buffers the response for storage. @@ -152,17 +164,19 @@ async def invoked( This means only the agent's last response per step gets stored, which is the most complete and useful summary. """ - if invoke_exception is not None: + # Extract text from response messages + response = context.response + if response is None: logger.debug( - "[MEMORY] invoked() skipped for %s — exception: %s", + "[MEMORY] after_run() skipped for %s — no response", self._agent_name, - invoke_exception, ) return - if response_messages is None: + response_messages = getattr(response, "messages", None) + if not response_messages: logger.debug( - "[MEMORY] invoked() skipped for %s — no response_messages", + "[MEMORY] after_run() skipped for %s — no response_messages", self._agent_name, ) return @@ -171,14 +185,14 @@ async def invoked( content = self._extract_text(response_messages) if not content or len(content) < MIN_CONTENT_LENGTH_TO_STORE: logger.debug( - "[MEMORY] invoked() skipped for %s — content too short (%d chars)", + "[MEMORY] after_run() skipped for %s — content too short (%d chars)", self._agent_name, len(content) if content else 0, ) return logger.info( - "[MEMORY] invoked() buffering for %s (step=%s, %d chars)", + "[MEMORY] after_run() buffering for %s (step=%s, %d chars)", self._agent_name, self._step, len(content), @@ -249,7 +263,7 @@ async def _flush_memory(self) -> None: ) def _extract_query( - self, messages: ChatMessage | MutableSequence[ChatMessage] + self, messages: Message | MutableSequence[Message] ) -> str: """Extract a search query from the input messages. @@ -292,8 +306,8 @@ def _format_memories(self, memories: list) -> str: return "\n".join(lines) @staticmethod - def _get_text(message: ChatMessage) -> str: - """Extract text content from a ChatMessage.""" + def _get_text(message: Message) -> str: + """Extract text content from a Message.""" if hasattr(message, "text") and message.text: return message.text if hasattr(message, "content"): @@ -302,7 +316,7 @@ def _get_text(message: ChatMessage) -> str: @staticmethod def _extract_text( - messages: ChatMessage | Sequence[ChatMessage], + messages: Message | Sequence[Message], ) -> str: """Extract text content from response message(s).""" if not isinstance(messages, (list, Sequence)) or isinstance(messages, str): diff --git a/src/processor/src/libs/base/orchestrator_base.py b/src/processor/src/libs/base/orchestrator_base.py index 9203772f..a4570c04 100644 --- a/src/processor/src/libs/base/orchestrator_base.py +++ b/src/processor/src/libs/base/orchestrator_base.py @@ -9,7 +9,7 @@ from abc import abstractmethod from typing import Any, Callable, Generic, MutableMapping, Sequence, TypeVar -from agent_framework import ChatAgent, ManagerSelectionResponse, ToolProtocol +from agent_framework import Agent from libs.agent_framework.agent_builder import AgentBuilder from libs.agent_framework.agent_framework_helper import ClientType @@ -18,6 +18,7 @@ from libs.agent_framework.groupchat_orchestrator import ( AgentResponse, AgentResponseStream, + ManagerSelectionResponse, OrchestrationResult, ) from libs.agent_framework.qdrant_memory_store import QdrantMemoryStore @@ -60,10 +61,10 @@ def is_console_summarization_enabled(self) -> bool: async def initialize(self, process_id: str): self.mcp_tools: ( - ToolProtocol + Any | Callable[..., Any] | MutableMapping[str, Any] - | Sequence[ToolProtocol | Callable[..., Any] | MutableMapping[str, Any]] + | Sequence[Any | Callable[..., Any] | MutableMapping[str, Any]] ) = await self.prepare_mcp_tools() self.agentinfos = await self.prepare_agent_infos() @@ -130,10 +131,10 @@ async def execute( async def prepare_mcp_tools( self, ) -> ( - ToolProtocol + Any | Callable[..., Any] | MutableMapping[str, Any] - | Sequence[ToolProtocol | Callable[..., Any] | MutableMapping[str, Any]] + | Sequence[Any | Callable[..., Any] | MutableMapping[str, Any]] ): pass @@ -144,8 +145,8 @@ async def prepare_agent_infos(self) -> list[AgentInfo]: async def create_agents( self, agent_infos: list[AgentInfo], process_id: str - ) -> list[ChatAgent]: - agents = dict[str, ChatAgent]() + ) -> list[Agent]: + agents = dict[str, Agent]() agent_client = await self.get_client(thread_id=process_id) # Workspace context — injected into every agent's system instructions diff --git a/src/processor/src/libs/mcp_server/MCPBlobIOTool.py b/src/processor/src/libs/mcp_server/MCPBlobIOTool.py index 40a68fe2..f821c925 100644 --- a/src/processor/src/libs/mcp_server/MCPBlobIOTool.py +++ b/src/processor/src/libs/mcp_server/MCPBlobIOTool.py @@ -22,14 +22,14 @@ from libs.mcp_server.MCPBlobIOTool import get_blob_file_mcp from libs.agent_framework.mcp_context import MCPContext - from agent_framework import ChatAgent + from agent_framework import Agent # Get the Blob Storage MCP tool blob_tool = get_blob_file_mcp() # Use with MCPContext for TaskGroup-safe management async with MCPContext(tools=[blob_tool]) as mcp_ctx: - async with ChatAgent(client, tools=mcp_ctx.tools) as agent: + async with Agent(client, tools=mcp_ctx.tools) as agent: response = await agent.run( "Upload the file 'data.csv' to my Azure storage container 'datasets'" ) @@ -76,7 +76,7 @@ def get_blob_file_mcp() -> MCPStdioTool: blob_tool = get_blob_file_mcp() async with blob_tool: - async with ChatAgent(client, tools=[blob_tool]) as agent: + async with Agent(client, tools=[blob_tool]) as agent: result = await agent.run( "Upload 'report.pdf' to container 'documents'" ) @@ -91,7 +91,7 @@ def get_blob_file_mcp() -> MCPStdioTool: blob_tool = get_blob_file_mcp() async with MCPContext(tools=[blob_tool]) as mcp_ctx: - async with ChatAgent(client, tools=mcp_ctx.tools) as agent: + async with Agent(client, tools=mcp_ctx.tools) as agent: # List all containers containers = await agent.run("List all my blob containers") print(containers) @@ -111,13 +111,13 @@ def get_blob_file_mcp() -> MCPStdioTool: async with MCPContext(tools=[blob_tool, datetime_tool]) as mcp_ctx: # Data processing agent - async with ChatAgent(client1, tools=mcp_ctx.tools) as processor: + async with Agent(client1, tools=mcp_ctx.tools) as processor: data = await processor.run( "Download 'raw_data.csv' from 'input-container'" ) # Analysis agent - async with ChatAgent(client2, tools=mcp_ctx.tools) as analyst: + async with Agent(client2, tools=mcp_ctx.tools) as analyst: result = await analyst.run( f"Analyze the data and upload results to 'output-container'" ) @@ -137,7 +137,7 @@ def get_blob_file_mcp() -> MCPStdioTool: blob_tool = get_blob_file_mcp() async with MCPContext(tools=[blob_tool]) as mcp_ctx: - async with ChatAgent(client, tools=mcp_ctx.tools) as agent: + async with Agent(client, tools=mcp_ctx.tools) as agent: response = await agent.run("Upload 'image.png' to 'media-container'") Note: diff --git a/src/processor/src/libs/mcp_server/MCPDatetimeTool.py b/src/processor/src/libs/mcp_server/MCPDatetimeTool.py index 83aca397..157d07a2 100644 --- a/src/processor/src/libs/mcp_server/MCPDatetimeTool.py +++ b/src/processor/src/libs/mcp_server/MCPDatetimeTool.py @@ -15,14 +15,14 @@ from libs.mcp_server.MCPDatetimeTool import get_datetime_mcp from libs.agent_framework.mcp_context import MCPContext - from agent_framework import ChatAgent + from agent_framework import Agent # Get the datetime MCP tool datetime_tool = get_datetime_mcp() # Use with MCPContext for TaskGroup-safe management async with MCPContext(tools=[datetime_tool]) as mcp_ctx: - async with ChatAgent(client, tools=mcp_ctx.tools) as agent: + async with Agent(client, tools=mcp_ctx.tools) as agent: response = await agent.run("What time is it right now?") print(response) """ @@ -60,7 +60,7 @@ def get_datetime_mcp() -> MCPStdioTool: datetime_tool = get_datetime_mcp() async with datetime_tool: - async with ChatAgent(client, tools=[datetime_tool]) as agent: + async with Agent(client, tools=[datetime_tool]) as agent: result = await agent.run("What's today's date?") print(result) @@ -74,7 +74,7 @@ def get_datetime_mcp() -> MCPStdioTool: weather_tool = get_weather_mcp() async with MCPContext(tools=[datetime_tool, weather_tool]) as mcp_ctx: - async with ChatAgent(client, tools=mcp_ctx.tools) as agent: + async with Agent(client, tools=mcp_ctx.tools) as agent: response = await agent.run( "What's the current time and what's the weather like?" ) @@ -88,10 +88,10 @@ def get_datetime_mcp() -> MCPStdioTool: async with MCPContext(tools=[datetime_tool]) as mcp_ctx: # Share tool across multiple agents - async with ChatAgent(client1, tools=mcp_ctx.tools) as agent1: + async with Agent(client1, tools=mcp_ctx.tools) as agent1: time_info = await agent1.run("Get the current time") - async with ChatAgent(client2, tools=mcp_ctx.tools) as agent2: + async with Agent(client2, tools=mcp_ctx.tools) as agent2: schedule = await agent2.run( f"Based on the time {time_info}, suggest a meeting slot" ) diff --git a/src/processor/src/libs/mcp_server/MCPMicrosoftDocs.py b/src/processor/src/libs/mcp_server/MCPMicrosoftDocs.py index d9a2ca0e..989f7d75 100644 --- a/src/processor/src/libs/mcp_server/MCPMicrosoftDocs.py +++ b/src/processor/src/libs/mcp_server/MCPMicrosoftDocs.py @@ -12,14 +12,14 @@ from libs.mcp_server.MCPMicrosoftDocs import get_microsoft_docs_mcp from libs.agent_framework.mcp_context import MCPContext - from agent_framework import ChatAgent + from agent_framework import Agent # Get the Microsoft Docs MCP tool docs_tool = get_microsoft_docs_mcp() # Use with MCPContext for TaskGroup-safe management async with MCPContext(tools=[docs_tool]) as mcp_ctx: - async with ChatAgent(client, tools=mcp_ctx.tools) as agent: + async with Agent(client, tools=mcp_ctx.tools) as agent: response = await agent.run("Search Microsoft Learn for Azure Functions best practices") print(response) """ @@ -47,7 +47,7 @@ def get_microsoft_docs_mcp() -> MCPStreamableHTTPTool: docs_tool = get_microsoft_docs_mcp() async with docs_tool: - async with ChatAgent(client, tools=[docs_tool]) as agent: + async with Agent(client, tools=[docs_tool]) as agent: result = await agent.run("Find documentation about Azure App Service") Advanced usage with multiple tools: @@ -60,7 +60,7 @@ def get_microsoft_docs_mcp() -> MCPStreamableHTTPTool: datetime_tool = MCPStdioTool(name="datetime", command="npx", args=["-y", "@modelcontextprotocol/server-datetime"]) async with MCPContext(tools=[docs_tool, datetime_tool]) as mcp_ctx: - async with ChatAgent(client, tools=mcp_ctx.tools) as agent: + async with Agent(client, tools=mcp_ctx.tools) as agent: response = await agent.run("What's the latest Azure Functions documentation?") Note: diff --git a/src/processor/src/steps/analysis/orchestration/analysis_orchestrator.py b/src/processor/src/steps/analysis/orchestration/analysis_orchestrator.py index 93f8f2f0..1221433f 100644 --- a/src/processor/src/steps/analysis/orchestration/analysis_orchestrator.py +++ b/src/processor/src/steps/analysis/orchestration/analysis_orchestrator.py @@ -12,7 +12,7 @@ from pathlib import Path from typing import Any, Callable, MutableMapping, Sequence -from agent_framework import MCPStdioTool, MCPStreamableHTTPTool, ToolProtocol +from agent_framework import MCPStdioTool, MCPStreamableHTTPTool from libs.agent_framework.agent_info import AgentInfo from libs.agent_framework.groupchat_orchestrator import ( @@ -98,10 +98,10 @@ async def execute( async def prepare_mcp_tools( self, ) -> ( - ToolProtocol + Any | Callable[..., Any] | MutableMapping[str, Any] - | Sequence[ToolProtocol | Callable[..., Any] | MutableMapping[str, Any]] + | Sequence[Any | Callable[..., Any] | MutableMapping[str, Any]] ): """Create and return the MCP tools used by analysis agents. diff --git a/src/processor/src/steps/convert/orchestration/yaml_convert_orchestrator.py b/src/processor/src/steps/convert/orchestration/yaml_convert_orchestrator.py index f1fe8b4d..3ee0992b 100644 --- a/src/processor/src/steps/convert/orchestration/yaml_convert_orchestrator.py +++ b/src/processor/src/steps/convert/orchestration/yaml_convert_orchestrator.py @@ -16,7 +16,6 @@ from agent_framework import ( MCPStdioTool, MCPStreamableHTTPTool, - ToolProtocol, ) from libs.agent_framework.agent_info import AgentInfo @@ -107,10 +106,10 @@ async def execute( async def prepare_mcp_tools( self, ) -> ( - ToolProtocol + Any | Callable[..., Any] | MutableMapping[str, Any] - | Sequence[ToolProtocol | Callable[..., Any] | MutableMapping[str, Any]] + | Sequence[Any | Callable[..., Any] | MutableMapping[str, Any]] ): """Create and return the MCP tools used by conversion agents.""" ms_doc_mcp_tool = MCPStreamableHTTPTool( diff --git a/src/processor/src/steps/design/orchestration/design_orchestrator.py b/src/processor/src/steps/design/orchestration/design_orchestrator.py index d2dd47f0..df53ccc2 100644 --- a/src/processor/src/steps/design/orchestration/design_orchestrator.py +++ b/src/processor/src/steps/design/orchestration/design_orchestrator.py @@ -14,7 +14,6 @@ from agent_framework import ( MCPStdioTool, MCPStreamableHTTPTool, - ToolProtocol, ) from libs.agent_framework.agent_info import AgentInfo @@ -98,10 +97,10 @@ async def execute( async def prepare_mcp_tools( self, ) -> ( - ToolProtocol + Any | Callable[..., Any] | MutableMapping[str, Any] - | Sequence[ToolProtocol | Callable[..., Any] | MutableMapping[str, Any]] + | Sequence[Any | Callable[..., Any] | MutableMapping[str, Any]] ): """Create and return the MCP tools used by design agents.""" # Create MCP tools (not connected yet) diff --git a/src/processor/src/steps/documentation/orchestration/documentation_orchestrator.py b/src/processor/src/steps/documentation/orchestration/documentation_orchestrator.py index 0aa6c443..ee87719d 100644 --- a/src/processor/src/steps/documentation/orchestration/documentation_orchestrator.py +++ b/src/processor/src/steps/documentation/orchestration/documentation_orchestrator.py @@ -18,7 +18,6 @@ from agent_framework import ( MCPStdioTool, MCPStreamableHTTPTool, - ToolProtocol, ) from libs.agent_framework.agent_info import AgentInfo @@ -112,10 +111,10 @@ async def execute( async def prepare_mcp_tools( self, ) -> ( - ToolProtocol + Any | Callable[..., Any] | MutableMapping[str, Any] - | Sequence[ToolProtocol | Callable[..., Any] | MutableMapping[str, Any]] + | Sequence[Any | Callable[..., Any] | MutableMapping[str, Any]] ): """Create and return the MCP tools used by documentation agents.""" ms_doc_mcp_tool = MCPStreamableHTTPTool( diff --git a/src/processor/src/steps/migration_processor.py b/src/processor/src/steps/migration_processor.py index 73b2954a..dfc7a227 100644 --- a/src/processor/src/steps/migration_processor.py +++ b/src/processor/src/steps/migration_processor.py @@ -33,14 +33,9 @@ from typing import Any from agent_framework import ( - ExecutorCompletedEvent, - ExecutorFailedEvent, - ExecutorInvokedEvent, Workflow, WorkflowBuilder, - WorkflowFailedEvent, - WorkflowOutputEvent, - WorkflowStartedEvent, + WorkflowEvent, ) from openai import AsyncAzureOpenAI @@ -166,30 +161,16 @@ def _init_workflow(self) -> Workflow: Workflow The built workflow ready to execute. """ + analysis = AnalysisExecutor(id="analysis", app_context=self.app_context) + design = DesignExecutor(id="design", app_context=self.app_context) + yaml_convert = YamlConvertExecutor(id="yaml", app_context=self.app_context) + documentation = DocumentationExecutor( + id="documentation", app_context=self.app_context + ) + workflow = ( - WorkflowBuilder() - .register_executor( - lambda: AnalysisExecutor(id="analysis", app_context=self.app_context), - name="analysis", - ) - .register_executor( - lambda: DesignExecutor(id="design", app_context=self.app_context), - name="design", - ) - .register_executor( - lambda: YamlConvertExecutor(id="yaml", app_context=self.app_context), - name="yaml", - ) - .register_executor( - lambda: DocumentationExecutor( - id="documentation", app_context=self.app_context - ), - name="documentation", - ) - .set_start_executor("analysis") - .add_edge("analysis", "design") - .add_edge("design", "yaml") - .add_edge("yaml", "documentation") + WorkflowBuilder(start_executor=analysis) + .add_chain([analysis, design, yaml_convert, documentation]) .build() ) @@ -254,7 +235,7 @@ async def _create_memory_store( async def run(self, input_data: Analysis_TaskParam) -> Any: """Run the migration workflow. - The workflow is executed via ``run_stream`` and handled as a sequence of + The workflow is executed via ``run(stream=True)`` and handled as a sequence of framework events. This method: - Initializes telemetry for the process. @@ -367,8 +348,9 @@ async def _generate_report_summary( "top_remediations": remediation_titles, } - async for event in self.workflow.run_stream(input_data): - if isinstance(event, WorkflowStartedEvent): + async for event in self.workflow.run(input_data, stream=True): + event: WorkflowEvent + if event.type == "started": logger.info("Workflow started (%s)", event.origin.value) report_collector.set_current_step("analysis", step_phase="start") @@ -377,16 +359,16 @@ async def _generate_report_summary( await telemetry.init_process( process_id=input_data.process_id, step="analysis", phase="start" ) - elif isinstance(event, WorkflowOutputEvent): - # WorkflowOutputEvent carries the step output (success or hard-termination). + elif event.type == "output": + # Workflow "output" event carries the step output (success or hard-termination). # Note: a None payload is an error that must be surfaced clearly. if event.data is None: report_collector.set_current_step( - event.source_executor_id or "unknown" + event.executor_id or "unknown" ) # Build a meaningful error message instead of generic "Workflow output is None" - executor_id = event.source_executor_id or "unknown" + executor_id = event.executor_id or "unknown" error_msg = f"Step '{executor_id}' completed without producing output. This may be caused by context length overflow, agent timeout, or an internal orchestration error. Check processor logs for '[AOAI_CTX_TRIM_STREAM]' or exception details." report_collector.record_failure( @@ -407,13 +389,13 @@ async def _generate_report_summary( await telemetry.record_failure_outcome( process_id=input_data.process_id, - failed_step=event.source_executor_id or "unknown", + failed_step=event.executor_id or "unknown", error_message=error_msg, failure_details=failure_details, execution_time_seconds=( time.perf_counter() - - step_start_perf[event.source_executor_id] - if event.source_executor_id in step_start_perf + - step_start_perf[event.executor_id] + if event.executor_id in step_start_perf else None ), ) @@ -423,7 +405,7 @@ async def _generate_report_summary( # Raise a rich exception so the queue worker reports a meaningful reason. raise WorkflowExecutorFailedException({ - "executor_id": event.source_executor_id or "unknown", + "executor_id": event.executor_id or "unknown", "error_type": "WorkflowOutputMissing", "message": error_msg, "traceback": None, @@ -477,15 +459,15 @@ async def _generate_report_summary( } report_collector.set_current_step( - event.source_executor_id or "unknown" + event.executor_id or "unknown" ) report_collector.record_failure( exception=ValueError( getattr(event.data, "reason", None) - or f"Hard terminated in {event.source_executor_id} step" + or f"Hard terminated in {event.executor_id} step" ), custom_message=getattr(event.data, "reason", None) - or f"Hard terminated in {event.source_executor_id} step", + or f"Hard terminated in {event.executor_id} step", ) failure_details: Any = ( @@ -510,14 +492,14 @@ async def _generate_report_summary( await telemetry.record_failure_outcome( process_id=input_data.process_id, - failed_step=event.source_executor_id or "unknown", + failed_step=event.executor_id or "unknown", error_message=getattr(event.data, "reason", None) - or f"Hard terminated in {event.source_executor_id} step", + or f"Hard terminated in {event.executor_id} step", failure_details=failure_details, execution_time_seconds=( time.perf_counter() - - step_start_perf[event.source_executor_id] - if event.source_executor_id in step_start_perf + - step_start_perf[event.executor_id] + if event.executor_id in step_start_perf else None ), ) @@ -533,21 +515,21 @@ async def _generate_report_summary( logger.info("Workflow output (%s): %s", event.origin.value, event.data) await telemetry.record_step_result( process_id=input_data.process_id, - step_name=event.source_executor_id, + step_name=event.executor_id, step_result=event.data, execution_time_seconds=( time.perf_counter() - - step_start_perf[event.source_executor_id] - if event.source_executor_id in step_start_perf + - step_start_perf[event.executor_id] + if event.executor_id in step_start_perf else None ), ) - if event.source_executor_id in step_start_perf: + if event.executor_id in step_start_perf: report_collector.mark_step_completed( - event.source_executor_id, + event.executor_id, execution_time=time.perf_counter() - - step_start_perf[event.source_executor_id], + - step_start_perf[event.executor_id], ) try: @@ -572,10 +554,10 @@ async def _generate_report_summary( ) return event.data - elif isinstance(event, ExecutorFailedEvent): + elif event.type == "executor_failed": pass # will handle in WorkflowFailedEvent - elif isinstance(event, WorkflowFailedEvent): + elif event.type == "failed": logger.error( "Executor failed (%s): %s [%s]: %s (traceback: %s)", event.origin.value, @@ -644,7 +626,7 @@ async def _generate_report_summary( # Raise a rich exception containing the full WorkflowErrorDetails payload. raise WorkflowExecutorFailedException(event.details) - elif isinstance(event, ExecutorInvokedEvent): + elif event.type == "executor_invoked": # The bug. the first executor's event fired after completing execution. if event.executor_id != "analysis": telemetry: TelemetryManager = ( @@ -660,7 +642,7 @@ async def _generate_report_summary( event.executor_id, event.executor_id.capitalize() ) await telemetry.transition_to_phase( - process_id=event.data.process_id, + process_id=getattr(event.data, "process_id", input_data.process_id), step=event.executor_id, phase=f"Initializing {step_display}", ) @@ -675,7 +657,7 @@ async def _generate_report_summary( # near-zero and incorrect. if event.executor_id not in step_start_perf: step_start_perf[event.executor_id] = time.perf_counter() - elif isinstance(event, ExecutorCompletedEvent): + elif event.type == "executor_completed": # print(f"Executor completed ({event.executor_id}): {event.data}") # Log shared memory stats after each step diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_agent_builder.py b/src/processor/src/tests/unit/libs/agent_framework/test_agent_builder.py index 26fcbfe5..572974a8 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_agent_builder.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_agent_builder.py @@ -144,7 +144,7 @@ def test_chaining_returns_self_each_step(self): class TestBuild: def test_build_passes_all_state_to_chat_agent(self): chat_client = MagicMock() - with patch("libs.agent_framework.agent_builder.ChatAgent") as mock_chat: + with patch("libs.agent_framework.agent_builder.Agent") as mock_chat: agent = ( AgentBuilder(chat_client) .with_instructions("inst") @@ -158,21 +158,22 @@ def test_build_passes_all_state_to_chat_agent(self): ) assert agent is mock_chat.return_value kwargs = mock_chat.call_args.kwargs - assert kwargs["chat_client"] is chat_client + assert kwargs["client"] is chat_client assert kwargs["instructions"] == "inst" assert kwargs["id"] == "id1" assert kwargs["name"] == "name1" assert kwargs["description"] == "desc1" - assert kwargs["temperature"] == 0.3 - assert kwargs["max_tokens"] == 100 - assert kwargs["tool_choice"] == "auto" + default_options = kwargs["default_options"] + assert default_options["temperature"] == 0.3 + assert default_options["max_tokens"] == 100 + assert default_options["tool_choice"] == "auto" assert kwargs["extra"] == 42 class TestStaticFactories: def test_create_agent_invokes_chat_agent(self): chat_client = MagicMock() - with patch("libs.agent_framework.agent_builder.ChatAgent") as mock_chat: + with patch("libs.agent_framework.agent_builder.Agent") as mock_chat: agent = AgentBuilder.create_agent( chat_client=chat_client, instructions="i", @@ -181,10 +182,10 @@ def test_create_agent_invokes_chat_agent(self): ) assert agent is mock_chat.return_value kwargs = mock_chat.call_args.kwargs - assert kwargs["chat_client"] is chat_client + assert kwargs["client"] is chat_client assert kwargs["instructions"] == "i" assert kwargs["name"] == "n" - assert kwargs["temperature"] == 0.4 + assert kwargs["default_options"]["temperature"] == 0.4 def test_create_agent_by_agentinfo_uses_helper_and_creates_client(self): # Build a fake AgentInfo with the minimum surface used by the method @@ -206,7 +207,7 @@ def test_create_agent_by_agentinfo_uses_helper_and_creates_client(self): with patch( "libs.agent_framework.agent_builder.get_bearer_token_provider", return_value="token-provider", - ), patch("libs.agent_framework.agent_builder.ChatAgent") as mock_chat: + ), patch("libs.agent_framework.agent_builder.Agent") as mock_chat: agent = AgentBuilder.create_agent_by_agentinfo( service_id="default", agent_info=agent_info, @@ -216,11 +217,11 @@ def test_create_agent_by_agentinfo_uses_helper_and_creates_client(self): helper.settings.get_service_config.assert_called_once_with("default") helper.create_client.assert_called_once() ck = mock_chat.call_args.kwargs - assert ck["chat_client"] == "client-instance" + assert ck["client"] == "client-instance" assert ck["instructions"] == "instr" assert ck["name"] == "A" assert ck["description"] == "D" - assert ck["temperature"] == 0.2 + assert ck["default_options"]["temperature"] == 0.2 def test_create_agent_by_agentinfo_falls_back_to_system_prompt(self): helper = MagicMock() @@ -241,7 +242,7 @@ def test_create_agent_by_agentinfo_falls_back_to_system_prompt(self): with patch( "libs.agent_framework.agent_builder.get_bearer_token_provider", return_value="tp", - ), patch("libs.agent_framework.agent_builder.ChatAgent") as mock_chat: + ), patch("libs.agent_framework.agent_builder.Agent") as mock_chat: AgentBuilder.create_agent_by_agentinfo( service_id="default", agent_info=agent_info ) diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_agent_framework_helper.py b/src/processor/src/tests/unit/libs/agent_framework/test_agent_framework_helper.py index 64a8d415..578a79d5 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_agent_framework_helper.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_agent_framework_helper.py @@ -91,9 +91,9 @@ def test_azure_openai_response_with_retry(self): ) assert client is mock_cls.return_value kwargs = mock_cls.call_args.kwargs - assert kwargs["endpoint"] == "https://x" - assert kwargs["deployment_name"] == "gpt-4" - assert kwargs["ad_token_provider"] == "token" + assert kwargs["azure_endpoint"] == "https://x" + assert kwargs["model"] == "gpt-4" + assert kwargs["credential"] == "token" def test_default_token_provider_when_no_credential(self): with patch( @@ -107,53 +107,58 @@ def test_default_token_provider_when_no_credential(self): endpoint="https://x", deployment_name="gpt-4", ) - assert mock_cls.call_args.kwargs["ad_token_provider"] == "default-token" + assert mock_cls.call_args.kwargs["credential"] == "default-token" def test_azure_openai_chat_completion(self): - # Patch the lazily imported module - fake_module = types.ModuleType("agent_framework.azure") - fake_module.AzureOpenAIChatClient = MagicMock(return_value="chat_client") - with patch.dict(sys.modules, {"agent_framework.azure": fake_module}): - client = AgentFrameworkHelper.create_client( - ClientType.AzureOpenAIChatCompletion, - endpoint="https://x", - deployment_name="gpt-4", - ad_token_provider="t", - ) + with patch( + "libs.agent_framework.agent_framework_helper.OpenAIChatCompletionClient", + create=True, + ) as mock_cls: + mock_cls.return_value = "chat_client" + with patch.dict( + "sys.modules", + {"agent_framework.openai": MagicMock(OpenAIChatCompletionClient=mock_cls)}, + ): + client = AgentFrameworkHelper.create_client( + ClientType.AzureOpenAIChatCompletion, + endpoint="https://x", + deployment_name="gpt-4", + ad_token_provider="t", + ) assert client == "chat_client" - def test_azure_openai_assistant(self): - fake_module = types.ModuleType("agent_framework.azure") - fake_module.AzureOpenAIAssistantsClient = MagicMock(return_value="asst_client") - with patch.dict(sys.modules, {"agent_framework.azure": fake_module}): - client = AgentFrameworkHelper.create_client( + def test_azure_openai_assistant_raises_not_implemented(self): + with pytest.raises(NotImplementedError): + AgentFrameworkHelper.create_client( ClientType.AzureOpenAIAssistant, endpoint="https://x", deployment_name="gpt-4", ad_token_provider="t", ) - assert client == "asst_client" def test_azure_openai_response(self): - fake_module = types.ModuleType("agent_framework.azure") - fake_module.AzureOpenAIResponsesClient = MagicMock(return_value="resp_client") - with patch.dict(sys.modules, {"agent_framework.azure": fake_module}): - client = AgentFrameworkHelper.create_client( - ClientType.AzureOpenAIResponse, - endpoint="https://x", - deployment_name="gpt-4", - ad_token_provider="t", - ) + with patch( + "libs.agent_framework.agent_framework_helper.OpenAIChatClient", + create=True, + ) as mock_cls: + mock_cls.return_value = "resp_client" + with patch.dict( + "sys.modules", + {"agent_framework.openai": MagicMock(OpenAIChatClient=mock_cls)}, + ): + client = AgentFrameworkHelper.create_client( + ClientType.AzureOpenAIResponse, + endpoint="https://x", + deployment_name="gpt-4", + ad_token_provider="t", + ) assert client == "resp_client" - def test_azure_openai_agent(self): - fake_module = types.ModuleType("agent_framework.azure") - fake_module.AzureAIAgentClient = MagicMock(return_value="agent_client") - with patch.dict(sys.modules, {"agent_framework.azure": fake_module}): - client = AgentFrameworkHelper.create_client( + def test_azure_openai_agent_raises_not_implemented(self): + with pytest.raises(NotImplementedError): + AgentFrameworkHelper.create_client( ClientType.AzureOpenAIAgent, project_endpoint="https://proj", model_deployment_name="gpt-4", ad_token_provider="t", ) - assert client == "agent_client" diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py index 4939049b..600869a0 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py @@ -331,29 +331,28 @@ def test_no_prefix(self): class TestAppendTextChunk: def test_no_text_attr(self): orch = _make_orch() - ev = SimpleNamespace(data=SimpleNamespace()) # no `text` attr + ev = SimpleNamespace() # no `text` attr orch._current_agent_response = [] orch._append_text_chunk(ev) # noop assert orch._current_agent_response == [] def test_falsy_text(self): orch = _make_orch() - ev = SimpleNamespace(data=SimpleNamespace(text="")) + ev = SimpleNamespace(text="") orch._current_agent_response = [] orch._append_text_chunk(ev) assert orch._current_agent_response == [] - def test_text_object_with_text_attr(self): + def test_text_string(self): orch = _make_orch() - text_obj = SimpleNamespace(text="hello") - ev = SimpleNamespace(data=SimpleNamespace(text=text_obj)) + ev = SimpleNamespace(text="hello") orch._current_agent_response = [] orch._append_text_chunk(ev) assert orch._current_agent_response == ["hello"] - def test_text_string(self): + def test_text_raw_string(self): orch = _make_orch() - ev = SimpleNamespace(data=SimpleNamespace(text="raw")) + ev = SimpleNamespace(text="raw") orch._current_agent_response = [] orch._append_text_chunk(ev) assert orch._current_agent_response == ["raw"] @@ -417,20 +416,20 @@ async def _bad_stream(_): class TestProcessToolCalls: def test_no_tool_calls_returns_immediately(self): orch = _make_orch() - ev = SimpleNamespace(data=SimpleNamespace(contents=None)) + ev = SimpleNamespace(contents=None) _run(orch._process_tool_calls(ev, "A", None)) def test_records_complete_dict_args(self): orch = _make_orch() item = SimpleNamespace(name="search", call_id="c1", arguments={"q": "x"}) - ev = SimpleNamespace(data=SimpleNamespace(contents=[item])) + ev = SimpleNamespace(contents=[item]) _run(orch._process_tool_calls(ev, "A", None)) assert "search" in {tc["tool_name"] for tc in orch.agent_tool_usage["A"]} def test_skips_when_already_recorded(self): orch = _make_orch() item = SimpleNamespace(name="search", call_id="c1", arguments={"q": "x"}) - ev = SimpleNamespace(data=SimpleNamespace(contents=[item])) + ev = SimpleNamespace(contents=[item]) _run(orch._process_tool_calls(ev, "A", None)) # second pass should be skipped _run(orch._process_tool_calls(ev, "A", None)) @@ -439,7 +438,7 @@ def test_skips_when_already_recorded(self): def test_skips_invalid_calls(self): orch = _make_orch() item = SimpleNamespace(name=None, call_id=None, arguments=None) - ev = SimpleNamespace(data=SimpleNamespace(contents=[item])) + ev = SimpleNamespace(contents=[item]) _run(orch._process_tool_calls(ev, "A", None)) assert orch.agent_tool_usage == {} @@ -448,13 +447,13 @@ def test_streamed_string_args_buffer_until_complete(self): # Send incomplete JSON args, then complete item1 = SimpleNamespace(name="t", call_id="c", arguments='{"q":"hel') - ev1 = SimpleNamespace(data=SimpleNamespace(contents=[item1])) + ev1 = SimpleNamespace(contents=[item1]) _run(orch._process_tool_calls(ev1, "A", None)) # not yet recorded assert "A" not in orch.agent_tool_usage or not orch.agent_tool_usage["A"] item2 = SimpleNamespace(name="t", call_id="c", arguments='{"q":"hello"}') - ev2 = SimpleNamespace(data=SimpleNamespace(contents=[item2])) + ev2 = SimpleNamespace(contents=[item2]) _run(orch._process_tool_calls(ev2, "A", None)) assert orch.agent_tool_usage["A"][0]["arguments"] == {"q": "hello"} @@ -601,30 +600,27 @@ def test_skips_unrelated(self): class TestBackfillToolUsage: def test_skips_non_assistant(self): - from agent_framework import Role orch = _make_orch() - msg = SimpleNamespace(role=Role.USER, contents=[]) + msg = SimpleNamespace(role="user", contents=[]) orch._backfill_tool_usage_from_conversation([msg]) assert orch.agent_tool_usage == {} def test_records_calls_from_assistant(self): - from agent_framework import Role orch = _make_orch() item = SimpleNamespace(name="t", call_id="c", arguments={"x": 1}) msg = SimpleNamespace( - role=Role.ASSISTANT, author_name="A", contents=[item] + role="assistant", author_name="A", contents=[item] ) orch._backfill_tool_usage_from_conversation([msg]) assert orch.agent_tool_usage["A"][0]["tool_name"] == "t" def test_dedup_already_recorded(self): - from agent_framework import Role orch = _make_orch() # Pre-mark this call as already recorded orch._tool_call_recorded.add(("A", "c")) item = SimpleNamespace(name="t", call_id="c", arguments={}) msg = SimpleNamespace( - role=Role.ASSISTANT, author_name="A", contents=[item] + role="assistant", author_name="A", contents=[item] ) orch._backfill_tool_usage_from_conversation([msg]) assert "A" in orch.agent_tool_usage @@ -719,16 +715,17 @@ def test_build_groupchat_invokes_builder(self): }) with patch("libs.agent_framework.groupchat_orchestrator.GroupChatBuilder") as MockBuilder: built = MagicMock() - built.set_manager.return_value = built - built.participants.return_value = built built.build.return_value = "wf" MockBuilder.return_value = built wf = _run(orch._build_groupchat()) assert wf == "wf" + # Check constructor args + ctor_kwargs = MockBuilder.call_args.kwargs + assert ctor_kwargs["orchestrator_agent"] == "coord" # ResultGenerator excluded from participants - kwargs = built.participants.call_args.args[0] - assert "arch" in kwargs - assert "rg" not in kwargs + participants = ctor_kwargs["participants"] + assert "arch" in participants + assert "rg" not in participants # ----------------------------------------------------------------------------- @@ -776,13 +773,12 @@ def test_tail_zero_returns_head(self): class TestBuildResultGeneratorConversation: def test_excludes_named_authors(self): - from agent_framework import Role - from agent_framework import ChatMessage + from agent_framework import Content, Message orch = _make_orch() msgs = [ - ChatMessage(role=Role.ASSISTANT, text="from coord", author_name="Coordinator"), - ChatMessage(role=Role.ASSISTANT, text="from architect", author_name="Architect"), + Message(role="assistant", contents=[Content.from_text("from coord")], author_name="Coordinator"), + Message(role="assistant", contents=[Content.from_text("from architect")], author_name="Architect"), ] out = orch._build_result_generator_conversation( msgs, @@ -797,14 +793,13 @@ def test_excludes_named_authors(self): assert all("Coordinator" != m.author_name for m in out) def test_dedupes_identical_payloads(self): - from agent_framework import Role - from agent_framework import ChatMessage + from agent_framework import Content, Message orch = _make_orch() big = "X" * 1000 msgs = [ - ChatMessage(role=Role.ASSISTANT, text=big, author_name="A"), - ChatMessage(role=Role.ASSISTANT, text=big, author_name="A"), + Message(role="assistant", contents=[Content.from_text(big)], author_name="A"), + Message(role="assistant", contents=[Content.from_text(big)], author_name="A"), ] out = orch._build_result_generator_conversation( msgs, @@ -818,12 +813,11 @@ def test_dedupes_identical_payloads(self): assert len(out) == 1 def test_truncates_messages_to_per_message_budget(self): - from agent_framework import Role - from agent_framework import ChatMessage + from agent_framework import Content, Message orch = _make_orch() msgs = [ - ChatMessage(role=Role.ASSISTANT, text="A" * 500, author_name="X"), + Message(role="assistant", contents=[Content.from_text("A" * 500)], author_name="X"), ] out = orch._build_result_generator_conversation( msgs, @@ -837,12 +831,11 @@ def test_truncates_messages_to_per_message_budget(self): assert len(out[-1].text) <= 100 def test_total_budget_enforced(self): - from agent_framework import Role - from agent_framework import ChatMessage + from agent_framework import Content, Message orch = _make_orch() msgs = [ - ChatMessage(role=Role.ASSISTANT, text="A" * 100, author_name=str(i)) + Message(role="assistant", contents=[Content.from_text("A" * 100)], author_name=str(i)) for i in range(20) ] out = orch._build_result_generator_conversation( @@ -858,12 +851,11 @@ def test_total_budget_enforced(self): assert total <= 200 def test_max_messages_caps_count(self): - from agent_framework import Role - from agent_framework import ChatMessage + from agent_framework import Content, Message orch = _make_orch() msgs = [ - ChatMessage(role=Role.ASSISTANT, text=f"m{i}", author_name=str(i)) + Message(role="assistant", contents=[Content.from_text(f"m{i}")], author_name=str(i)) for i in range(20) ] out = orch._build_result_generator_conversation( @@ -915,8 +907,7 @@ def test_unknown_tool_name(self): class TestGenerateFinalResult: def test_parses_valid_json(self): from pydantic import BaseModel - from agent_framework import Role - from agent_framework import ChatMessage + from agent_framework import Content, Message class Model(BaseModel): x: int @@ -927,7 +918,7 @@ class Model(BaseModel): orch = _make_orch(participants={"Coordinator": object(), "ResultGenerator": rg}, result_format=Model) out = _run( orch._generate_final_result( - conversation=[ChatMessage(role=Role.ASSISTANT, text="x", author_name="A")], + conversation=[Message(role="assistant", contents=[Content.from_text("x")], author_name="A")], result_format=Model, result_generator_name="ResultGenerator", ) @@ -936,8 +927,7 @@ class Model(BaseModel): def test_retry_on_validation_error(self): from pydantic import BaseModel - from agent_framework import Role - from agent_framework import ChatMessage + from agent_framework import Content, Message class Model(BaseModel): x: int @@ -950,7 +940,7 @@ class Model(BaseModel): orch = _make_orch(participants={"Coordinator": object(), "ResultGenerator": rg}, result_format=Model) out = _run( orch._generate_final_result( - conversation=[ChatMessage(role=Role.ASSISTANT, text="x", author_name="A")], + conversation=[Message(role="assistant", contents=[Content.from_text("x")], author_name="A")], result_format=Model, result_generator_name="ResultGenerator", ) @@ -968,8 +958,9 @@ class TestHandleAgentUpdate: def test_invokes_subroutines(self): orch = _make_orch() ev = SimpleNamespace( - executor_id="groupchat_agent:A", - data=SimpleNamespace(text="chunk", contents=None), + agent_id="groupchat_agent:A", + text="chunk", + contents=None, ) _run(orch._handle_agent_update(ev, None, None)) assert orch._last_executor_id == "A" diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_input_observer_middleware.py b/src/processor/src/tests/unit/libs/agent_framework/test_input_observer_middleware.py index 7556b989..d9fc0851 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_input_observer_middleware.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_input_observer_middleware.py @@ -4,7 +4,7 @@ import asyncio from types import SimpleNamespace -from agent_framework import ChatMessage, Role +from agent_framework import Content, Message from libs.agent_framework.middlewares import InputObserverMiddleware @@ -13,7 +13,7 @@ def test_input_observer_middleware_replaces_user_text_when_configured() -> None: async def _run() -> None: ctx = SimpleNamespace( messages=[ - ChatMessage(role=Role.USER, text="original"), + Message(role="user", contents=[Content.from_text("original")]), ] ) @@ -24,7 +24,7 @@ async def _next(_context): await mw.process(ctx, _next) - assert ctx.messages[0].role == Role.USER + assert ctx.messages[0].role == "user" assert ctx.messages[0].text == "replacement" asyncio.run(_run()) diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_middlewares_extras.py b/src/processor/src/tests/unit/libs/agent_framework/test_middlewares_extras.py index c4c32f5a..f9ed0f4e 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_middlewares_extras.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_middlewares_extras.py @@ -7,7 +7,7 @@ from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock -from agent_framework import ChatMessage, Role +from agent_framework import Content, Message from libs.agent_framework.middlewares import ( DebuggingMiddleware, @@ -86,8 +86,8 @@ class TestInputObserverMiddleware: def test_replaces_user_messages_when_replacement_set(self): from libs.agent_framework.middlewares import InputObserverMiddleware - msg_user = ChatMessage(role=Role.USER, text="orig user") - msg_assistant = ChatMessage(role=Role.ASSISTANT, text="hi") + msg_user = Message(role="user", contents=[Content.from_text("orig user")]) + msg_assistant = Message(role="assistant", contents=[Content.from_text("hi")]) ctx = MagicMock() ctx.messages = [msg_user, msg_assistant] next_fn = AsyncMock() @@ -101,7 +101,7 @@ def test_replaces_user_messages_when_replacement_set(self): def test_no_replacement_keeps_text(self): from libs.agent_framework.middlewares import InputObserverMiddleware - msg = ChatMessage(role=Role.USER, text="keep me") + msg = Message(role="user", contents=[Content.from_text("keep me")]) ctx = MagicMock() ctx.messages = [msg] mw = InputObserverMiddleware(replacement=None) diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_shared_memory_context_provider.py b/src/processor/src/tests/unit/libs/agent_framework/test_shared_memory_context_provider.py index 1d75ee7a..2f9e1948 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_shared_memory_context_provider.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_shared_memory_context_provider.py @@ -6,6 +6,7 @@ from __future__ import annotations import asyncio +from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock from libs.agent_framework.qdrant_memory_store import MemoryEntry @@ -20,8 +21,7 @@ def _make_chat_message(text: str, role: str = "assistant") -> MagicMock: msg = MagicMock() msg.text = text msg.content = text - msg.role = MagicMock() - msg.role.value = role + msg.role = role return msg @@ -61,8 +61,18 @@ def _make_provider(store=None): ), store +def _make_session_context(input_messages=None, response=None): + """Create a fake SessionContext for testing.""" + ctx = SimpleNamespace( + input_messages=input_messages or [], + instructions=None, + response=response, + ) + return ctx + + # --------------------------------------------------------------------------- -# invoking() — Pre-LLM memory injection +# before_run() - Pre-LLM memory injection # --------------------------------------------------------------------------- @@ -74,12 +84,13 @@ async def _run(): _make_memory_entry("Azure Files for AKS", agent_name="AKS Expert"), ] messages = [_make_chat_message("How should we handle storage configuration?")] + ctx = _make_session_context(input_messages=messages) - context = await provider.invoking(messages) + await provider.before_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) - assert context.instructions is not None - assert "GKE Filestore CSI" in context.instructions - assert "Azure Files for AKS" in context.instructions + assert ctx.instructions is not None + assert "GKE Filestore CSI" in ctx.instructions[0] + assert "Azure Files for AKS" in ctx.instructions[0] store.search.assert_called_once() asyncio.run(_run()) @@ -88,9 +99,10 @@ async def _run(): def test_invoking_empty_messages_returns_empty(): async def _run(): provider, _ = _make_provider() - context = await provider.invoking([]) - assert context.instructions is None - assert context.messages == [] + ctx = _make_session_context(input_messages=[]) + + await provider.before_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) + assert ctx.instructions is None asyncio.run(_run()) @@ -100,9 +112,10 @@ async def _run(): provider, store = _make_provider() store.search.return_value = [] messages = [_make_chat_message("What is the overall migration plan for AKS?")] + ctx = _make_session_context(input_messages=messages) - context = await provider.invoking(messages) - assert context.instructions is None + await provider.before_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) + assert ctx.instructions is None asyncio.run(_run()) @@ -112,9 +125,10 @@ async def _run(): provider, store = _make_provider() store.search.side_effect = Exception("search failed") messages = [_make_chat_message("What is the networking plan for AKS?")] + ctx = _make_session_context(input_messages=messages) - context = await provider.invoking(messages) - assert context.instructions is None + await provider.before_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) + assert ctx.instructions is None asyncio.run(_run()) @@ -124,8 +138,9 @@ async def _run(): provider, store = _make_provider() long_text = "x" * 5000 messages = [_make_chat_message(long_text)] + ctx = _make_session_context(input_messages=messages) - await provider.invoking(messages) + await provider.before_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) query = store.search.call_args.kwargs["query"] assert len(query) <= 2000 @@ -141,8 +156,9 @@ async def _run(): _make_chat_message("Second"), _make_chat_message("Latest question about storage"), ] + ctx = _make_session_context(input_messages=messages) - await provider.invoking(messages) + await provider.before_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) query = store.search.call_args.kwargs["query"] assert "Latest question about storage" in query @@ -158,11 +174,12 @@ async def _run(): ] store.search.return_value = large_memories messages = [_make_chat_message("What storage configuration should we use for persistent volumes?")] + ctx = _make_session_context(input_messages=messages) - context = await provider.invoking(messages) + await provider.before_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) - assert context.instructions is not None - assert len(context.instructions) <= MAX_MEMORY_CONTEXT_CHARS + 200 + assert ctx.instructions is not None + assert len(ctx.instructions[0]) <= MAX_MEMORY_CONTEXT_CHARS + 200 asyncio.run(_run()) @@ -174,11 +191,12 @@ async def _run(): _make_memory_entry("Use Premium SSD", agent_name="Chief Architect", step="design"), ] messages = [_make_chat_message("What storage class should we choose for the cluster?")] + ctx = _make_session_context(input_messages=messages) - context = await provider.invoking(messages) + await provider.before_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) - assert "Chief Architect" in context.instructions - assert "design" in context.instructions + assert "Chief Architect" in ctx.instructions[0] + assert "design" in ctx.instructions[0] asyncio.run(_run()) @@ -188,27 +206,35 @@ async def _run(): provider, store = _make_provider() store.search.return_value = [_make_memory_entry("some memory")] single = _make_chat_message("What about networking configuration for AKS?") + ctx = _make_session_context(input_messages=[single]) - context = await provider.invoking(single) + await provider.before_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) - assert context.instructions is not None + assert ctx.instructions is not None store.search.assert_called_once() asyncio.run(_run()) # --------------------------------------------------------------------------- -# invoked() — Post-LLM memory storage +# after_run() - Post-LLM memory storage # --------------------------------------------------------------------------- +def _make_response_with_messages(messages): + """Create a mock response object with messages attribute.""" + resp = SimpleNamespace(messages=messages) + return resp + + def test_invoked_stores_response(): async def _run(): provider, store = _make_provider() - request = [_make_chat_message("What is the networking plan for AKS?")] - response = [_make_chat_message("We should use Azure CNI for networking configuration in the AKS cluster")] + response_msgs = [_make_chat_message("We should use Azure CNI for networking configuration in the AKS cluster")] + response = _make_response_with_messages(response_msgs) + ctx = _make_session_context(response=response) - await provider.invoked(request, response) + await provider.after_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) await provider.flush() store.add.assert_called_once() @@ -222,10 +248,10 @@ async def _run(): def test_invoked_skips_on_exception(): async def _run(): provider, store = _make_provider() - request = [_make_chat_message("Q")] - response = [_make_chat_message("A" * 100)] + # No response (simulating an exception scenario) + ctx = _make_session_context(response=None) - await provider.invoked(request, response, invoke_exception=Exception("fail")) + await provider.after_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) store.add.assert_not_called() asyncio.run(_run()) @@ -234,9 +260,9 @@ async def _run(): def test_invoked_skips_none_response(): async def _run(): provider, store = _make_provider() - request = [_make_chat_message("Q")] + ctx = _make_session_context(response=None) - await provider.invoked(request, None) + await provider.after_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) store.add.assert_not_called() asyncio.run(_run()) @@ -245,10 +271,11 @@ async def _run(): def test_invoked_skips_short_response(): async def _run(): provider, store = _make_provider() - request = [_make_chat_message("Q")] - short = [_make_chat_message("x" * (MIN_CONTENT_LENGTH_TO_STORE - 1))] + short_msgs = [_make_chat_message("x" * (MIN_CONTENT_LENGTH_TO_STORE - 1))] + response = _make_response_with_messages(short_msgs) + ctx = _make_session_context(response=response) - await provider.invoked(request, short) + await provider.after_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) store.add.assert_not_called() asyncio.run(_run()) @@ -257,10 +284,11 @@ async def _run(): def test_invoked_stores_long_response(): async def _run(): provider, store = _make_provider() - request = [_make_chat_message("Q")] - long_resp = [_make_chat_message("x" * (MIN_CONTENT_LENGTH_TO_STORE + 1))] + long_msgs = [_make_chat_message("x" * (MIN_CONTENT_LENGTH_TO_STORE + 1))] + response = _make_response_with_messages(long_msgs) + ctx = _make_session_context(response=response) - await provider.invoked(request, long_resp) + await provider.after_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) await provider.flush() store.add.assert_called_once() @@ -270,11 +298,12 @@ async def _run(): def test_invoked_increments_turn_counter(): async def _run(): provider, store = _make_provider() - request = [_make_chat_message("Q")] - response = [_make_chat_message("A" * 100)] + response_msgs = [_make_chat_message("A" * 100)] + response = _make_response_with_messages(response_msgs) + ctx = _make_session_context(response=response) - await provider.invoked(request, response) - await provider.invoked(request, response) + await provider.after_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) + await provider.after_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) assert provider._turn_counter == 2 asyncio.run(_run()) @@ -284,10 +313,11 @@ def test_invoked_store_failure_does_not_raise(): async def _run(): provider, store = _make_provider() store.add.side_effect = Exception("store failed") - request = [_make_chat_message("Q")] - response = [_make_chat_message("A" * 100)] + response_msgs = [_make_chat_message("A" * 100)] + response = _make_response_with_messages(response_msgs) + ctx = _make_session_context(response=response) - await provider.invoked(request, response) + await provider.after_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) await provider.flush() # Should not raise asyncio.run(_run()) @@ -296,10 +326,11 @@ async def _run(): def test_invoked_with_single_message(): async def _run(): provider, store = _make_provider() - request = _make_chat_message("What is the question about networking?") - response = _make_chat_message("We should use Azure CNI Overlay for the networking configuration in AKS") + response_msgs = [_make_chat_message("We should use Azure CNI Overlay for the networking configuration in AKS")] + response = _make_response_with_messages(response_msgs) + ctx = _make_session_context(response=response) - await provider.invoked(request, response) + await provider.after_run(agent=MagicMock(), session=MagicMock(), context=ctx, state={}) await provider.flush() store.add.assert_called_once() diff --git a/src/processor/src/tests/unit/steps/test_migration_processor_run.py b/src/processor/src/tests/unit/steps/test_migration_processor_run.py index acd4ee40..1f05bb85 100644 --- a/src/processor/src/tests/unit/steps/test_migration_processor_run.py +++ b/src/processor/src/tests/unit/steps/test_migration_processor_run.py @@ -12,12 +12,7 @@ import pytest from agent_framework import ( - ExecutorCompletedEvent, - ExecutorFailedEvent, - ExecutorInvokedEvent, - WorkflowFailedEvent, - WorkflowOutputEvent, - WorkflowStartedEvent, + WorkflowEvent, ) from agent_framework._workflows._events import WorkflowErrorDetails @@ -61,12 +56,12 @@ def _make_processor(events: list, memory_store=None) -> MigrationProcessor: proc._telemetry = telemetry # expose for assertions - async def _stream(_input): + async def _stream(_input, **kwargs): for ev in events: yield ev workflow = MagicMock() - workflow.run_stream = _stream + workflow.run = _stream proc.workflow = workflow # Patch _create_memory_store as an AsyncMock returning the provided value. @@ -79,11 +74,11 @@ class TestRunSuccessFlow: def test_workflow_started_then_normal_output_returns_data(self): data = SimpleNamespace(is_hard_terminated=False, value="ok") events = [ - WorkflowStartedEvent(), - ExecutorInvokedEvent(executor_id="analysis", data=_make_input()), - ExecutorCompletedEvent(executor_id="analysis", data={"r": 1}), - ExecutorInvokedEvent(executor_id="design", data=_make_input()), - WorkflowOutputEvent(data=data, source_executor_id="design"), + WorkflowEvent.started(), + WorkflowEvent.executor_invoked("analysis", _make_input()), + WorkflowEvent.executor_completed("analysis", {"r": 1}), + WorkflowEvent.executor_invoked("design", _make_input()), + WorkflowEvent.output("design", data), ] proc = _make_processor(events) result = _run(proc.run(_make_input())) @@ -96,10 +91,10 @@ def test_workflow_started_then_normal_output_returns_data(self): def test_invoked_event_for_non_analysis_triggers_transition_phase(self): data = SimpleNamespace(is_hard_terminated=False) events = [ - WorkflowStartedEvent(), + WorkflowEvent.started(), # Documentation invocation should map to "Documentation" display - ExecutorInvokedEvent(executor_id="documentation", data=_make_input()), - WorkflowOutputEvent(data=data, source_executor_id="documentation"), + WorkflowEvent.executor_invoked("documentation", _make_input()), + WorkflowEvent.output("documentation", data), ] proc = _make_processor(events) _run(proc.run(_make_input())) @@ -112,9 +107,9 @@ def test_invoked_event_for_non_analysis_triggers_transition_phase(self): def test_invoked_event_unknown_executor_uses_capitalize(self): data = SimpleNamespace(is_hard_terminated=False) events = [ - WorkflowStartedEvent(), - ExecutorInvokedEvent(executor_id="custom", data=_make_input()), - WorkflowOutputEvent(data=data, source_executor_id="custom"), + WorkflowEvent.started(), + WorkflowEvent.executor_invoked("custom", _make_input()), + WorkflowEvent.output("custom", data), ] proc = _make_processor(events) _run(proc.run(_make_input())) @@ -132,8 +127,8 @@ def test_hard_terminated_returns_data_and_records_failure(self): blocking_issues=["NEED_HUMAN_REVIEW"], ) events = [ - WorkflowStartedEvent(), - WorkflowOutputEvent(data=data, source_executor_id="analysis"), + WorkflowEvent.started(), + WorkflowEvent.output("analysis", data), ] proc = _make_processor(events) result = _run(proc.run(_make_input())) @@ -150,8 +145,8 @@ def test_hard_terminated_security_policy_collects_evidence(self): blocking_issues=["SECURITY_POLICY_VIOLATION"], ) events = [ - WorkflowStartedEvent(), - WorkflowOutputEvent(data=data, source_executor_id="analysis"), + WorkflowEvent.started(), + WorkflowEvent.output("analysis", data), ] proc = _make_processor(events) @@ -181,8 +176,8 @@ def test_hard_terminated_security_policy_handles_collector_error(self): blocking_issues=["SECURITY_POLICY_VIOLATION"], ) events = [ - WorkflowStartedEvent(), - WorkflowOutputEvent(data=data, source_executor_id="analysis"), + WorkflowEvent.started(), + WorkflowEvent.output("analysis", data), ] proc = _make_processor(events) with patch( @@ -198,8 +193,8 @@ def test_hard_terminated_security_policy_handles_collector_error(self): class TestRunOutputMissingFlow: def test_missing_output_raises_workflow_executor_failed_exception(self): events = [ - WorkflowStartedEvent(), - WorkflowOutputEvent(data=None, source_executor_id="analysis"), + WorkflowEvent.started(), + WorkflowEvent.output("analysis", None), ] proc = _make_processor(events) with pytest.raises(WorkflowExecutorFailedException) as excinfo: @@ -209,8 +204,8 @@ def test_missing_output_raises_workflow_executor_failed_exception(self): def test_missing_output_with_none_source_uses_unknown(self): events = [ - WorkflowStartedEvent(), - WorkflowOutputEvent(data=None, source_executor_id=None), + WorkflowEvent.started(), + WorkflowEvent.output(None, None), ] proc = _make_processor(events) with pytest.raises(WorkflowExecutorFailedException): @@ -226,9 +221,9 @@ def test_workflow_failed_event_raises_with_details(self): executor_id="yaml", ) events = [ - WorkflowStartedEvent(), - ExecutorInvokedEvent(executor_id="yaml", data=_make_input()), - WorkflowFailedEvent(details=details), + WorkflowEvent.started(), + WorkflowEvent.executor_invoked("yaml", _make_input()), + WorkflowEvent.failed(details), ] proc = _make_processor(events) with pytest.raises(WorkflowExecutorFailedException) as excinfo: @@ -246,8 +241,8 @@ def test_workflow_failed_classifies_context_size_message(self): executor_id="design", ) events = [ - WorkflowStartedEvent(), - WorkflowFailedEvent(details=details), + WorkflowEvent.started(), + WorkflowEvent.failed(details), ] proc = _make_processor(events) with pytest.raises(WorkflowExecutorFailedException): @@ -261,23 +256,23 @@ def test_workflow_failed_classifies_context_error_type(self): executor_id="analysis", ) events = [ - WorkflowStartedEvent(), - WorkflowFailedEvent(details=details), + WorkflowEvent.started(), + WorkflowEvent.failed(details), ] proc = _make_processor(events) with pytest.raises(WorkflowExecutorFailedException): _run(proc.run(_make_input())) def test_executor_failed_event_is_silently_ignored(self): - # ExecutorFailedEvent does not raise on its own; WorkflowFailedEvent does. + # executor_failed event does not raise on its own; workflow "failed" event does. details = WorkflowErrorDetails( error_type="X", message="m", traceback=None, executor_id="analysis" ) data = SimpleNamespace(is_hard_terminated=False) events = [ - WorkflowStartedEvent(), - ExecutorFailedEvent(executor_id="analysis", details=details), - WorkflowOutputEvent(data=data, source_executor_id="analysis"), + WorkflowEvent.started(), + WorkflowEvent.executor_failed("analysis", details), + WorkflowEvent.output("analysis", data), ] proc = _make_processor(events) result = _run(proc.run(_make_input())) @@ -288,9 +283,9 @@ class TestRunMemoryStoreLifecycle: def test_memory_store_is_registered_and_closed(self): data = SimpleNamespace(is_hard_terminated=False) events = [ - WorkflowStartedEvent(), - ExecutorCompletedEvent(executor_id="analysis", data=None), - WorkflowOutputEvent(data=data, source_executor_id="analysis"), + WorkflowEvent.started(), + WorkflowEvent.executor_completed("analysis", None), + WorkflowEvent.output("analysis", data), ] memory_store = MagicMock() memory_store.get_count = AsyncMock(return_value=3) @@ -304,8 +299,8 @@ def test_memory_store_is_registered_and_closed(self): def test_memory_store_close_error_is_swallowed(self): data = SimpleNamespace(is_hard_terminated=False) events = [ - WorkflowStartedEvent(), - WorkflowOutputEvent(data=data, source_executor_id="analysis"), + WorkflowEvent.started(), + WorkflowEvent.output("analysis", data), ] memory_store = MagicMock() memory_store.get_count = AsyncMock(side_effect=RuntimeError("x")) @@ -318,18 +313,18 @@ def test_memory_store_close_error_is_swallowed(self): def test_executor_completed_with_memory_store_logs_count(self): data = SimpleNamespace(is_hard_terminated=False) events = [ - WorkflowStartedEvent(), - ExecutorCompletedEvent( - executor_id="analysis", data={"some": "result"} + WorkflowEvent.started(), + WorkflowEvent.executor_completed( + "analysis", {"some": "result"} ), - WorkflowOutputEvent(data=data, source_executor_id="design"), + WorkflowEvent.output("design", data), ] memory_store = MagicMock() memory_store.get_count = AsyncMock(return_value=7) memory_store.close = AsyncMock() proc = _make_processor(events, memory_store=memory_store) _run(proc.run(_make_input())) - # get_count called at least once during ExecutorCompletedEvent and at finally + # get_count called at least once during executor_completed event and at finally assert memory_store.get_count.await_count >= 2 # record_step_result called for the executor completed event with data proc._telemetry.record_step_result.assert_any_await( diff --git a/src/processor/uv.lock b/src/processor/uv.lock index 0f3c189b..10c1b264 100644 --- a/src/processor/uv.lock +++ b/src/processor/uv.lock @@ -51,14 +51,14 @@ wheels = [ [[package]] name = "agent-framework" -version = "1.0.0b260107" +version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "agent-framework-core", extra = ["all"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7e/e7/5ad52075da4e586ca94fb8806b3085ac5dea8059413e413bff88c0452e88/agent_framework-1.0.0b260107.tar.gz", hash = "sha256:a2f6508a0ca1df3b7ca4e3a64e45bac8e33cdfe02cf69e9056e37e881a58aad7", size = 2898189, upload-time = "2026-01-07T23:57:48.213Z" } +sdist = { url = "https://files.pythonhosted.org/packages/68/e8/c2ee1c4dae4a86b95091969426d11361232a0c554124ba321852a6b6b9bd/agent_framework-1.3.0.tar.gz", hash = "sha256:a13423aceaf587cf28180138151d445bd2d4ce82908cef4a6fbb85fa1771bac1", size = 5509571, upload-time = "2026-05-08T00:09:16.022Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/55/ffef27526cc26bf163ccf9d58ba87bf4e677bba343a542e7b666846f744d/agent_framework-1.0.0b260107-py3-none-any.whl", hash = "sha256:080deb32bff4ef07227a4ba709798c67079ff8a2997fe7a0aed0010adc0c18cf", size = 5554, upload-time = "2026-01-07T23:57:08.433Z" }, + { url = "https://files.pythonhosted.org/packages/a0/81/050f8f8bce8c629a88197837b4beb35cb287f880789fc01923fd5938f142/agent_framework-1.3.0-py3-none-any.whl", hash = "sha256:baaaa932639c87be99d43333f612c3b4112d6d976f0e1e72238e42a4bd572438", size = 5684, upload-time = "2026-05-08T00:09:54.064Z" }, ] [[package]] @@ -103,31 +103,29 @@ wheels = [ ] [[package]] -name = "agent-framework-azure-ai" +name = "agent-framework-azure-ai-search" version = "1.0.0b260130" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "agent-framework-core" }, - { name = "aiohttp" }, - { name = "azure-ai-agents" }, - { name = "azure-ai-projects" }, + { name = "azure-search-documents" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ac/ef/69ead4fcd2c21608ce35353a507df23df51872552747f803c43d1d81f612/agent_framework_azure_ai-1.0.0b260130.tar.gz", hash = "sha256:c571275089a801f961370ba824568c8b02143b1a6bb5b1d78b97c6debdf4906f", size = 32723, upload-time = "2026-01-30T18:56:41.07Z" } +sdist = { url = "https://files.pythonhosted.org/packages/64/63/81c7853aa526f3c3667871cea14667af73323c6c53d31c34be34926a9de4/agent_framework_azure_ai_search-1.0.0b260130.tar.gz", hash = "sha256:0a622fdddd7dc0287de693f2aa6f770ec52ea8d1eaca817c4276daa08001c10b", size = 13312, upload-time = "2026-01-30T19:01:08.046Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/72/8f/a1467c352fed5eb6ebb9567109251cc39b5b3ebb5137a2d14c71fea51bc8/agent_framework_azure_ai-1.0.0b260130-py3-none-any.whl", hash = "sha256:87f0248fe6d4f2f4146f0a56a53527af6365d4a377dc2e3d56c37cbb9deae098", size = 38542, upload-time = "2026-01-30T19:01:12.102Z" }, + { url = "https://files.pythonhosted.org/packages/f5/ec/ac8143dbb1af2ec510f7772d712803193a6a0ad5f36b06e7ec7121df5c80/agent_framework_azure_ai_search-1.0.0b260130-py3-none-any.whl", hash = "sha256:0278c948696d7a00193a0271074c6057b57589ff98eda5544f2eafeac051d6e9", size = 13449, upload-time = "2026-01-30T19:01:23.262Z" }, ] [[package]] -name = "agent-framework-azure-ai-search" -version = "1.0.0b260130" +name = "agent-framework-azure-cosmos" +version = "1.0.0b260507" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "agent-framework-core" }, - { name = "azure-search-documents" }, + { name = "azure-cosmos" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/64/63/81c7853aa526f3c3667871cea14667af73323c6c53d31c34be34926a9de4/agent_framework_azure_ai_search-1.0.0b260130.tar.gz", hash = "sha256:0a622fdddd7dc0287de693f2aa6f770ec52ea8d1eaca817c4276daa08001c10b", size = 13312, upload-time = "2026-01-30T19:01:08.046Z" } +sdist = { url = "https://files.pythonhosted.org/packages/27/97/fd8b045fc4eb1d213d7a91eff6e48e030fdb67da30505f46f1ed20a7aa48/agent_framework_azure_cosmos-1.0.0b260507.tar.gz", hash = "sha256:2c8ec2d5eae52b9e92fd14b4adecd5a52a900a7897589549c32852d9488112c7", size = 10984, upload-time = "2026-05-08T00:09:22.016Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f5/ec/ac8143dbb1af2ec510f7772d712803193a6a0ad5f36b06e7ec7121df5c80/agent_framework_azure_ai_search-1.0.0b260130-py3-none-any.whl", hash = "sha256:0278c948696d7a00193a0271074c6057b57589ff98eda5544f2eafeac051d6e9", size = 13449, upload-time = "2026-01-30T19:01:23.262Z" }, + { url = "https://files.pythonhosted.org/packages/84/b9/6ac1960dae49ecde8ea906b302abe79b66d09d4cf74f8ed3f7dd9fc6230f/agent_framework_azure_cosmos-1.0.0b260507-py3-none-any.whl", hash = "sha256:c1d7ae4a560b592d2bff9c1ec75a7910101baf8c1778443644cc8cb81c82c1a1", size = 11989, upload-time = "2026-05-08T00:09:02.858Z" }, ] [[package]] @@ -146,6 +144,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/fa/200b40db670f79f561ff1e69e9626729ceb6486af970e3489f6c3a295d76/agent_framework_azurefunctions-1.0.0b260130-py3-none-any.whl", hash = "sha256:7d529a0bad67caa38d8823462c439e97de5e1cf364c0e9a0895df5fb44996f64", size = 17788, upload-time = "2026-01-30T18:56:45.741Z" }, ] +[[package]] +name = "agent-framework-bedrock" +version = "1.0.0b260507" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "agent-framework-core" }, + { name = "boto3" }, + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5a/86/0b7dd9d1c043b251ff8bd0e037a20495c82c798914db0372040625cae889/agent_framework_bedrock-1.0.0b260507.tar.gz", hash = "sha256:38953ab30f7aff651a9c85c1ceeefd2ad85fa094b3316858930f1c18dcaff2c6", size = 17467, upload-time = "2026-05-08T00:09:24.852Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/b4/fc4277a50b7a0a7cd038e4511a0215fb98ab5e394f719506e30c31854335/agent_framework_bedrock-1.0.0b260507-py3-none-any.whl", hash = "sha256:28ce485c639e467ca4fae4d5b747cd7f9438b8145ca096c658ab5c694611edcc", size = 13907, upload-time = "2026-05-08T00:09:18.84Z" }, +] + [[package]] name = "agent-framework-chatkit" version = "1.0.0b260130" @@ -159,6 +171,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9f/f1/68496e52aa36e66cf2962b8a8c6937053e2e57ad5f135b6983d705172554/agent_framework_chatkit-1.0.0b260130-py3-none-any.whl", hash = "sha256:a7814a5b222de7a0ac57fb89f4a6e534521c7e58bdc86a6465885fb9d57e63f1", size = 11712, upload-time = "2026-01-30T18:56:49.14Z" }, ] +[[package]] +name = "agent-framework-claude" +version = "1.0.0b260507" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "agent-framework-core" }, + { name = "claude-agent-sdk" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/17/1a/1a1c810e7c74075a4766ac0de66e3e510e0267533baa41a089ab1eb5bf01/agent_framework_claude-1.0.0b260507.tar.gz", hash = "sha256:0daccfef8141470fd206bb8b30925a44ba42ec6fb8946934dbcefe50cfeae14c", size = 11618, upload-time = "2026-05-08T00:08:57.253Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/f8/4977b7d7f1f2ea82c396de07b04f999c58475476722836f3ed0337722495/agent_framework_claude-1.0.0b260507-py3-none-any.whl", hash = "sha256:3ebd1d391b4413512970da62eb5377099ecd66305048594ec5b65cbdf141623f", size = 11588, upload-time = "2026-05-08T00:09:00.32Z" }, +] + [[package]] name = "agent-framework-copilotstudio" version = "1.0.0b260130" @@ -174,23 +199,17 @@ wheels = [ [[package]] name = "agent-framework-core" -version = "1.0.0b260107" +version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "azure-identity" }, - { name = "mcp", extra = ["ws"] }, - { name = "openai" }, { name = "opentelemetry-api" }, - { name = "opentelemetry-sdk" }, - { name = "opentelemetry-semantic-conventions-ai" }, - { name = "packaging" }, { name = "pydantic" }, - { name = "pydantic-settings" }, + { name = "python-dotenv" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9d/44/06f5d2c99dd7bdb82c2cb5cbc354b5bc6af72d1886d20eff1dff83508fae/agent_framework_core-1.0.0b260107.tar.gz", hash = "sha256:12636fb64664c6153546f0d85dafccdbe57226767c14b3f38985867389f980bb", size = 3574757, upload-time = "2026-01-07T23:57:16.113Z" } +sdist = { url = "https://files.pythonhosted.org/packages/90/59/4c212abdb93074677d643e31a3c21e33ff26a3ccc351145475cd1ffffad7/agent_framework_core-1.3.0.tar.gz", hash = "sha256:91c3659718b733f70dde6fb3626edb044733e0f7aa5f9726c9774e17fae328ef", size = 365395, upload-time = "2026-05-08T00:09:09.36Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/5a/8c6315a2ca119ad48340344616d4b8e77fd68e2892f82c402069a52ad647/agent_framework_core-1.0.0b260107-py3-none-any.whl", hash = "sha256:5bd119b8d30dc2d5bee1c4a5c3597d7afc808a52e4de148725c4f2d9bcc7632b", size = 5687298, upload-time = "2026-01-07T23:57:26.286Z" }, + { url = "https://files.pythonhosted.org/packages/56/f2/c4258333f2691ee10869bf72f51d423808962ccf0c195b1f893c06c348ad/agent_framework_core-1.3.0-py3-none-any.whl", hash = "sha256:b7a5baf2beb383e9042af057df79dae4fda0b836cbc8530b3b2a57a3c12bb7ac", size = 407978, upload-time = "2026-05-08T00:09:32.752Z" }, ] [package.optional-dependencies] @@ -198,18 +217,28 @@ all = [ { name = "agent-framework-a2a" }, { name = "agent-framework-ag-ui" }, { name = "agent-framework-anthropic" }, - { name = "agent-framework-azure-ai" }, { name = "agent-framework-azure-ai-search" }, + { name = "agent-framework-azure-cosmos" }, { name = "agent-framework-azurefunctions" }, + { name = "agent-framework-bedrock" }, { name = "agent-framework-chatkit" }, + { name = "agent-framework-claude" }, { name = "agent-framework-copilotstudio" }, { name = "agent-framework-declarative" }, { name = "agent-framework-devui" }, + { name = "agent-framework-durabletask" }, + { name = "agent-framework-foundry" }, + { name = "agent-framework-foundry-local" }, + { name = "agent-framework-github-copilot" }, + { name = "agent-framework-hyperlight", marker = "(python_full_version < '3.14' and platform_machine == 'x86_64' and sys_platform == 'linux') or (python_full_version < '3.14' and platform_machine == 'AMD64' and sys_platform == 'win32')" }, { name = "agent-framework-lab" }, { name = "agent-framework-mem0" }, { name = "agent-framework-ollama" }, + { name = "agent-framework-openai" }, + { name = "agent-framework-orchestrations" }, { name = "agent-framework-purview" }, { name = "agent-framework-redis" }, + { name = "mcp" }, ] [[package]] @@ -256,6 +285,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ba/22/122ed515935926137cc3c6ca795ef01b30feb82160cfc0f29a34f9d603de/agent_framework_durabletask-1.0.0b260130-py3-none-any.whl", hash = "sha256:a46e292800d10a62ce0923efe753594ddbf0bd6d1bb6e1258380f0dbf7d0302f", size = 36357, upload-time = "2026-01-30T19:01:24.057Z" }, ] +[[package]] +name = "agent-framework-foundry" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "agent-framework-core" }, + { name = "agent-framework-openai" }, + { name = "azure-ai-inference" }, + { name = "azure-ai-projects" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ec/f6/8700acd779cbffd933dcb5dc878abce3e0a2f536962567665ccc49965715/agent_framework_foundry-1.3.0.tar.gz", hash = "sha256:8a4b137efa0a7000e60fb396ad90e01c271d14a52f1325f1f0a32177d944bcff", size = 32620, upload-time = "2026-05-08T00:09:04.274Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/53/9acf5831263d4fcd1d5b8d39af99ee430ec2710d2f9adeab5a1fe7559da0/agent_framework_foundry-1.3.0-py3-none-any.whl", hash = "sha256:49987bc01b077f6c60af33c475f9770a02b4ff6d6822aede18fc5471b46ffd41", size = 37052, upload-time = "2026-05-08T00:09:13.139Z" }, +] + +[[package]] +name = "agent-framework-foundry-local" +version = "1.0.0b260507" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "agent-framework-core" }, + { name = "agent-framework-openai" }, + { name = "foundry-local-sdk" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cb/03/8f0b8a2209fd091903bbb068c4458f19c74e48d37f4fa08748d76c3f3091/agent_framework_foundry_local-1.0.0b260507.tar.gz", hash = "sha256:fc2d98ff1f98d0481544c3ad8453f2d56096203fd368d0b68f52ef6ae4c7b0a6", size = 6719, upload-time = "2026-05-08T00:09:35.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d9/07/1120c862714d89f40d4575a052a495f86bda0fdb4132d5c4597c7a735875/agent_framework_foundry_local-1.0.0b260507-py3-none-any.whl", hash = "sha256:515346ca7716d86c9a4110db9f5586a65c4970ac442aaa00725d27341c5825df", size = 7176, upload-time = "2026-05-08T00:09:28.74Z" }, +] + +[[package]] +name = "agent-framework-github-copilot" +version = "1.0.0b260507" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "agent-framework-core" }, + { name = "github-copilot-sdk" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3e/0f/0cab3d20c84ff309f820d02e810c1fa17f1a6fc432775605e34f651955ae/agent_framework_github_copilot-1.0.0b260507.tar.gz", hash = "sha256:f8640d4a18beca67a83b833b5d23f873aa5e1d4e91423ee1923d650b7b97d06d", size = 12546, upload-time = "2026-05-08T00:08:59.419Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/75/c8747c30acf236daa97063763fd16e443a2734e80c5678c42e103d1b50d6/agent_framework_github_copilot-1.0.0b260507-py3-none-any.whl", hash = "sha256:53a5daae86824fce017f30637edd5e50675e4630da5be09bb259383713198f40", size = 12510, upload-time = "2026-05-08T00:09:42.889Z" }, +] + +[[package]] +name = "agent-framework-hyperlight" +version = "1.0.0b260507" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "agent-framework-core", marker = "python_full_version < '3.14'" }, + { name = "hyperlight-sandbox", marker = "python_full_version < '3.14'" }, + { name = "hyperlight-sandbox-backend-wasm", marker = "(python_full_version < '3.14' and platform_machine == 'x86_64' and sys_platform == 'linux') or (python_full_version < '3.14' and platform_machine == 'AMD64' and sys_platform == 'win32')" }, + { name = "hyperlight-sandbox-python-guest", marker = "python_full_version < '3.14'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/1f/52a2541d4a0bc5657ca9c2ef4f85885fb323682052da3fc1451eabafb73d/agent_framework_hyperlight-1.0.0b260507.tar.gz", hash = "sha256:845baab7439ac7b94ee53805cf3d32d0eea3b77a040d0f1b367f0a395fd8c08b", size = 19057, upload-time = "2026-05-08T00:09:56.056Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/d8/c2e0d3f63ea53f9897bd6c31a3d07c41c48a7b30fd7a1c2b5182fffe32ca/agent_framework_hyperlight-1.0.0b260507-py3-none-any.whl", hash = "sha256:121b464edf32f3db0e5b2891525d8937f0854bc19102a7c50b1905ff29063da7", size = 19589, upload-time = "2026-05-08T00:09:52.71Z" }, +] + [[package]] name = "agent-framework-lab" version = "1.0.0b251024" @@ -294,6 +380,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b8/27/23e23a1919592dcf2aaf25aa9950a7dbda77c4ba03cba8843491b9f12024/agent_framework_ollama-1.0.0b260130-py3-none-any.whl", hash = "sha256:55e4e17f226ad61e8a9dcbbcc24ab006a3480043ecb4d32c12d2444f628054d6", size = 9167, upload-time = "2026-01-30T19:01:05.647Z" }, ] +[[package]] +name = "agent-framework-openai" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "agent-framework-core" }, + { name = "openai" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/50/54/26595b5fa394dd91a5bd434f87b1e7d781545efbf0bd8053de193f89ec63/agent_framework_openai-1.3.0.tar.gz", hash = "sha256:770828447875ee169dde8cd2f2a0343f427d856af7c83895ca12d59f8c24a7f2", size = 49146, upload-time = "2026-05-08T00:09:44.373Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/75/d8/a0e0af08123d3c2ff3f42b6976eed155536c73be4d61b898bc15cf31a38c/agent_framework_openai-1.3.0-py3-none-any.whl", hash = "sha256:1953dcb9f3e852362be84b4316ee69639313a7f119eab6ce8c88949e1f24aa4b", size = 54041, upload-time = "2026-05-08T00:09:17.744Z" }, +] + +[[package]] +name = "agent-framework-orchestrations" +version = "1.0.0b260507" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "agent-framework-core" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ca/84/1a26978d91c40f62ef472fd36d1502545bb7425b94b03765c41b322e3398/agent_framework_orchestrations-1.0.0b260507.tar.gz", hash = "sha256:3f17281a2603240e3eed26174cab6b3dca153cb18cec8380f4719e598a55013f", size = 55971, upload-time = "2026-05-08T00:09:37.058Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/dd/f2df27ba789130470311e7487d19815483f837094672408a22655b33784a/agent_framework_orchestrations-1.0.0b260507-py3-none-any.whl", hash = "sha256:396a5ed962c2a3b1f09d8fc777933397df486bdae0a5f81cf63595c4c6f102de", size = 62074, upload-time = "2026-05-08T00:09:31.24Z" }, +] + [[package]] name = "agent-framework-purview" version = "1.0.0b260130" @@ -804,6 +915,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/cc/18245721fa7747065ab478316c7fea7c74777d07f37ae60db2e84f8172e8/beartype-0.22.9-py3-none-any.whl", hash = "sha256:d16c9bbc61ea14637596c5f6fbff2ee99cbe3573e46a716401734ef50c3060c2", size = 1333658, upload-time = "2025-12-13T06:50:28.266Z" }, ] +[[package]] +name = "boto3" +version = "1.43.27" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7e/ad/32ac82224c571776d1119c8d2a5eafeab97bace3b4ed2870cb80d5cda140/boto3-1.43.27.tar.gz", hash = "sha256:dc0d1b47f391983d8b3047e49402d31f9aaa4d7b398d3b4ea986fe680cbea43a", size = 113143, upload-time = "2026-06-10T19:38:35.874Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/1b/e423f7ed0177f0cc629f9bba39f505ad4a571b1f73c51402e6000bbf453b/boto3-1.43.27-py3-none-any.whl", hash = "sha256:b3eea072c2fdbbdd8c6161f912f603be10c8ec477625926dea8b91a0842a3482", size = 140538, upload-time = "2026-06-10T19:38:34.012Z" }, +] + +[[package]] +name = "botocore" +version = "1.43.27" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fa/4e/db50ef135f1d9ffc85e209a124004a5829d8f12f4a7a0afdf380cb19866d/botocore-1.43.27.tar.gz", hash = "sha256:2093c316c24214e50e18640b1869513b759bb8cc48b95b004a8306cb9f0d6703", size = 15504242, upload-time = "2026-06-10T19:38:25.389Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2d/46/05b227b34e434b54867c2c942b0bfbbe2fe41789c18bb15ef787d03e9a56/botocore-1.43.27-py3-none-any.whl", hash = "sha256:4976544e652d5a1d8eca135da019f8e1c2d749efa2f9a31a8fb8c76f1895a40b", size = 15190293, upload-time = "2026-06-10T19:38:22.298Z" }, +] + [[package]] name = "cachetools" version = "7.1.1" @@ -982,6 +1121,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958, upload-time = "2026-04-02T09:28:37.794Z" }, ] +[[package]] +name = "claude-agent-sdk" +version = "0.1.48" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "mcp" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6c/dd/2818538efd18ed4ef72d4775efa75bb36cbea0fa418eda51df85ee9c2424/claude_agent_sdk-0.1.48.tar.gz", hash = "sha256:ee294d3f02936c0b826119ffbefcf88c67731cf8c2d2cb7111ccc97f76344272", size = 87375, upload-time = "2026-03-07T00:21:37.087Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/cf/bbbdee52ee0c63c8709b0ac03ce3c1da5bdc37def5da0eca63363448744f/claude_agent_sdk-0.1.48-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5761ff1d362e0f17c2b1bfd890d1c897f0aa81091e37bbd15b7d06f05ced552d", size = 57559306, upload-time = "2026-03-07T00:21:20.011Z" }, + { url = "https://files.pythonhosted.org/packages/57/d1/2179154b88d4cf6ba1cf6a15066ee8e96257aaeb1330e625e809ba2f28eb/claude_agent_sdk-0.1.48-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:39c1307daa17e42fa8a71180bb20af8a789d72d3891fc93519ff15540badcb83", size = 73980309, upload-time = "2026-03-07T00:21:24.592Z" }, + { url = "https://files.pythonhosted.org/packages/dc/99/55b0cd3bf54a7449e744d23cf50be104e9445cf623e1ed75722112aa6264/claude_agent_sdk-0.1.48-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:543d70acba468eccfff836965a14b8ac88cf90809aeeb88431dfcea3ee9a2fa9", size = 74583686, upload-time = "2026-03-07T00:21:28.969Z" }, + { url = "https://files.pythonhosted.org/packages/c8/f6/4851bd9a238b7aadba7639eb906aca7da32a51f01563fa4488469c608b3a/claude_agent_sdk-0.1.48-py3-none-win_amd64.whl", hash = "sha256:0d37e60bd2b17efc3f927dccef080f14897ab62cd1d0d67a4abc8a0e2d4f1006", size = 74956045, upload-time = "2026-03-07T00:21:33.475Z" }, +] + [[package]] name = "click" version = "8.3.3" @@ -1242,6 +1397,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/47/dd9a212ef6e343a6857485ffe25bba537304f1913bdbed446a23f7f592e1/filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258", size = 39812, upload-time = "2026-04-19T15:39:08.752Z" }, ] +[[package]] +name = "foundry-local-sdk" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "pydantic" }, + { name = "tqdm" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/6b/76a7fe8f9f4c52cc84eaa1cd1b66acddf993496d55d6ea587bf0d0854d1c/foundry_local_sdk-0.5.1-py3-none-any.whl", hash = "sha256:f3639a3666bc3a94410004a91671338910ac2e1b8094b1587cc4db0f4a7df07e", size = 14003, upload-time = "2025-11-21T05:39:58.099Z" }, +] + [[package]] name = "frozenlist" version = "1.8.0" @@ -1344,6 +1512,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/61/8c/dce3b1b7593858eba995b2dfdb833f872c7f863e3da92aab7128a6b11af4/furl-2.1.4-py2.py3-none-any.whl", hash = "sha256:da34d0b34e53ffe2d2e6851a7085a05d96922b5b578620a37377ff1dbeeb11c8", size = 27550, upload-time = "2025-03-09T05:36:19.928Z" }, ] +[[package]] +name = "github-copilot-sdk" +version = "1.0.0b2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dateutil" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/fe/2cb98d4b9f57f8062ea72775bde72aed1958305016753f7296398e0ceb45/github_copilot_sdk-1.0.0b2-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:1b5941d8b6e3d94d42a5bec6607a26f562e6535d5c981089d23d3d224b94601c", size = 67061619, upload-time = "2026-05-06T20:02:08.636Z" }, + { url = "https://files.pythonhosted.org/packages/57/45/76567821b2d36f81e6bca78c98d265e2762733f765fa51d69602b7f81867/github_copilot_sdk-1.0.0b2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c5b8f6a087a0cf02bb0d33976e8f8c009578d84d701a0b28d52051304791ac70", size = 63790955, upload-time = "2026-05-06T20:02:12.354Z" }, + { url = "https://files.pythonhosted.org/packages/15/67/684b0da0b1207a2bdf025c22ee075d34a1736d61a4973651035d4fd4d8dc/github_copilot_sdk-1.0.0b2-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:f403638c11b82bddb81c94675fc4e8014a1bb2e86a679a39fa167dcc3ad5416a", size = 69538664, upload-time = "2026-05-06T20:02:16.363Z" }, + { url = "https://files.pythonhosted.org/packages/57/1d/80d88ecf83683535d1a16d4817f1683db3b125f52a924ebdfe9764f5e4c3/github_copilot_sdk-1.0.0b2-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:433d16bb31171fee8d3a5b70259c527f63b297e83a8f8761ae1f16f14d641f32", size = 68163648, upload-time = "2026-05-06T20:02:21.139Z" }, + { url = "https://files.pythonhosted.org/packages/32/d3/b72aa2fbb3194b50b53e8cb1484f5606a1f8eedcdb0bfb5747da52079553/github_copilot_sdk-1.0.0b2-py3-none-win_amd64.whl", hash = "sha256:a6e9782dae4c3c2ab3527b45bb5de0f61998104c10e9ff64698280eaf37ab5dd", size = 62649144, upload-time = "2026-05-06T20:02:24.953Z" }, + { url = "https://files.pythonhosted.org/packages/b6/e2/be95b8ea0ac11d1ca474e28a59284f4e395c2710734eadfb657f5de8ace2/github_copilot_sdk-1.0.0b2-py3-none-win_arm64.whl", hash = "sha256:2e97d0ce4bad67dc5929091cb429e7bbae7d4643e4908a6af256a41439000740", size = 60374365, upload-time = "2026-05-06T20:02:29.02Z" }, +] + [[package]] name = "google-api-core" version = "2.30.3" @@ -1546,6 +1731,35 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, ] +[[package]] +name = "hyperlight-sandbox" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/de/5e/14c69eac7e1c74fbd556c6f890729a3d232d32d65cd9f8cfde72c0534e61/hyperlight_sandbox-0.4.0.tar.gz", hash = "sha256:90d7b91d4d8e17054e282b0daed55c261392a748dafc57e6416d3184cdac910b", size = 9262, upload-time = "2026-05-02T00:00:02.866Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/e3/b8c106a274c08a30261105afa5511e0ec55960e86b2f6c51e3095e96647c/hyperlight_sandbox-0.4.0-py3-none-any.whl", hash = "sha256:7ae44d2448ed6ecadb368373c7e45eb395521e7774c86a1cbc1ef9cdfc25cd2a", size = 5723, upload-time = "2026-05-02T00:00:03.811Z" }, +] + +[[package]] +name = "hyperlight-sandbox-backend-wasm" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/e5/3cdf21594eb28de7ca1a5a1ade27e137c8f3d7ab48d65fed87a3b74c4039/hyperlight_sandbox_backend_wasm-0.4.0-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:ff4627950708909202ee24c6175dc41e9c05479f89393575e3de0f14e6f5a193", size = 3918189, upload-time = "2026-05-01T23:59:16.666Z" }, + { url = "https://files.pythonhosted.org/packages/5b/97/b1bb9893bbeb979d133dc542520125dcbf8394d1a2537e753118b37c7cab/hyperlight_sandbox_backend_wasm-0.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:cce7dc28b9ded034a11a9a8cf7b9ffb838e29006be8d2e01646dd131ba501b73", size = 3383520, upload-time = "2026-05-01T23:59:27.261Z" }, + { url = "https://files.pythonhosted.org/packages/8c/29/deee4e31086628750f0ce1f67da1e28c613fd2df68465de130cbfe51e72d/hyperlight_sandbox_backend_wasm-0.4.0-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:88e194515e4784f68676b6906c98a4000f913c93172cf07981d8a977e756bbd6", size = 3917939, upload-time = "2026-05-01T23:59:14.805Z" }, + { url = "https://files.pythonhosted.org/packages/15/2a/6822aec3c04c46893406d0d6ed576dbdb4b5c1d76a0124dc220bb45b0d34/hyperlight_sandbox_backend_wasm-0.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:d1cd2269a5651ea9be1f94a3e3388f6af69e41dbc2b808c3b806481fe17ce163", size = 3383110, upload-time = "2026-05-01T23:59:23.736Z" }, +] + +[[package]] +name = "hyperlight-sandbox-python-guest" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/fd/816d1f3f277ff149a45da5381967aa04c22bc7702b5c14f0acfd9db2cee7/hyperlight_sandbox_python_guest-0.4.0.tar.gz", hash = "sha256:64c3c6c13fe550bf5b680fa0b965cf62bc4668084cc275c3467e3c015e6ead36", size = 21657381, upload-time = "2026-05-01T23:59:46.589Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/ba/efb9aacf993f0ac142da5beb9177b221e49dc860c6ea398de236015a52a0/hyperlight_sandbox_python_guest-0.4.0-py3-none-any.whl", hash = "sha256:0789eb794b99606288402ed3921b5e2630800a69d24117ecd9b82e816568202d", size = 21822062, upload-time = "2026-05-01T23:59:50.99Z" }, +] + [[package]] name = "identify" version = "2.6.19" @@ -1720,6 +1934,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/e9/1f9ada30cef7b05e74bb06f52127e7a724976c225f46adb65c37b1dadfb6/jiter-0.14.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67f00d94b281174144d6532a04b66a12cb866cbdc47c3af3bfe2973677f9861a", size = 349613, upload-time = "2026-04-10T14:28:40.066Z" }, ] +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + [[package]] name = "joserfc" version = "1.6.4" @@ -1926,11 +2149,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9c/46/f6b4ad632c67ef35209a66127e4bddc95759649dd595f71f13fba11bdf9a/mcp-1.27.0-py3-none-any.whl", hash = "sha256:5ce1fa81614958e267b21fb2aa34e0aea8e2c6ede60d52aba45fd47246b4d741", size = 215967, upload-time = "2026-04-02T14:48:07.24Z" }, ] -[package.optional-dependencies] -ws = [ - { name = "websockets" }, -] - [[package]] name = "mdurl" version = "0.1.2" @@ -2359,19 +2577,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/eb/a6/83dc2ab6fa397ee66fba04fe2e74bdf7be3b3870005359ceb7689103c058/opentelemetry_semantic_conventions-0.62b1-py3-none-any.whl", hash = "sha256:cf506938103d331fbb78eded0d9788095f7fd59016f2bda813c3324e5a74a93c", size = 231620, upload-time = "2026-04-24T13:15:35.454Z" }, ] -[[package]] -name = "opentelemetry-semantic-conventions-ai" -version = "0.5.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "opentelemetry-sdk" }, - { name = "opentelemetry-semantic-conventions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/24/02/10aeacc37a38a3a8fa16ff67bec1ae3bf882539f6f9efb0f70acf802ca2d/opentelemetry_semantic_conventions_ai-0.5.1.tar.gz", hash = "sha256:153906200d8c1d2f8e09bd78dbef526916023de85ac3dab35912bfafb69ff04c", size = 26533, upload-time = "2026-03-26T14:20:38.73Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/55/22/41fb05f1dc5fda2c468e05a41814c20859016c85117b66c8a257cae814f6/opentelemetry_semantic_conventions_ai-0.5.1-py3-none-any.whl", hash = "sha256:25aeb22bd261543b4898a73824026d96770e5351209c7d07a0b1314762b1f6e4", size = 11250, upload-time = "2026-03-26T14:20:37.108Z" }, -] - [[package]] name = "orderedmultidict" version = "1.0.2" @@ -2523,7 +2728,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "agent-framework", specifier = "==1.0.0b260107" }, + { name = "agent-framework", specifier = "==1.3.0" }, { name = "aiohttp", specifier = "==3.13.5" }, { name = "art", specifier = "==6.5" }, { name = "azure-ai-agents", specifier = "==1.2.0b5" }, @@ -3269,6 +3474,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload-time = "2025-11-30T20:24:14.634Z" }, ] +[[package]] +name = "s3transfer" +version = "0.18.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e0/1f/12417f7f493fc45e1f9fd5d4a9b6c125cf8d2cf3f8ddbdfab3e76406e9d6/s3transfer-0.18.0.tar.gz", hash = "sha256:3760b8b7ec1315da54048b2d626276732bee4300d054d492d4e1d43e20d4ecbd", size = 160560, upload-time = "2026-05-28T19:39:09.124Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2b/58/a58fc997655386daa2e25784e30c288aa3e3819e401f77029ee4899fb55a/s3transfer-0.18.0-py3-none-any.whl", hash = "sha256:239c13b09e65ad0346e1be7348b8a202dcad44ac7ea7c6eb858fc881dce739b6", size = 88572, upload-time = "2026-05-28T19:39:07.999Z" }, +] + [[package]] name = "sas-cosmosdb" version = "0.1.5" From e67884a951cda7f7cd94ec9899708790042b0b29 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 12:27:56 +0530 Subject: [PATCH 03/24] Fix SharedMemoryContextProvider not iterable TypeError AgentBuilder.with_context_providers() and with_middleware() accepted single objects but passed them directly to Agent(), which expects Sequence types. Now both methods auto-wrap single items into a list. Also wrapped the call site in orchestrator_base.py for clarity. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/libs/agent_framework/agent_builder.py | 10 ++++++++-- src/processor/src/libs/base/orchestrator_base.py | 2 +- .../libs/agent_framework/test_agent_builder.py | 14 ++++++++++++-- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/processor/src/libs/agent_framework/agent_builder.py b/src/processor/src/libs/agent_framework/agent_builder.py index 888e2dae..65261b9b 100644 --- a/src/processor/src/libs/agent_framework/agent_builder.py +++ b/src/processor/src/libs/agent_framework/agent_builder.py @@ -266,7 +266,10 @@ def with_middleware( Returns: Self for method chaining """ - self._middleware = middleware + if isinstance(middleware, list): + self._middleware = middleware + else: + self._middleware = [middleware] return self def with_context_providers( @@ -281,7 +284,10 @@ def with_context_providers( Returns: Self for method chaining """ - self._context_providers = context_providers + if isinstance(context_providers, list): + self._context_providers = context_providers + else: + self._context_providers = [context_providers] return self def with_conversation_id(self, conversation_id: str) -> "AgentBuilder": diff --git a/src/processor/src/libs/base/orchestrator_base.py b/src/processor/src/libs/base/orchestrator_base.py index a4570c04..9e198f9f 100644 --- a/src/processor/src/libs/base/orchestrator_base.py +++ b/src/processor/src/libs/base/orchestrator_base.py @@ -209,7 +209,7 @@ async def create_agents( agent_name=agent_info.agent_name, step=self.step_name, ) - builder = builder.with_context_providers(memory_provider) + builder = builder.with_context_providers([memory_provider]) agent = builder.build() agents[agent_info.agent_name] = agent diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_agent_builder.py b/src/processor/src/tests/unit/libs/agent_framework/test_agent_builder.py index 572974a8..f1a86cb5 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_agent_builder.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_agent_builder.py @@ -57,12 +57,22 @@ def test_with_tool_choice(self): def test_with_middleware(self): m = [MagicMock()] b = _builder().with_middleware(m) - assert b._middleware is m + assert b._middleware == m + + def test_with_middleware_single(self): + m = MagicMock() + b = _builder().with_middleware(m) + assert b._middleware == [m] def test_with_context_providers(self): cp = MagicMock() b = _builder().with_context_providers(cp) - assert b._context_providers is cp + assert b._context_providers == [cp] + + def test_with_context_providers_list(self): + cp1, cp2 = MagicMock(), MagicMock() + b = _builder().with_context_providers([cp1, cp2]) + assert b._context_providers == [cp1, cp2] def test_with_conversation_id(self): b = _builder().with_conversation_id("conv-1") From e3647936b34b30e48491b2c0624e4c74aa1dea1d Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 12:31:16 +0530 Subject: [PATCH 04/24] Remove trailing blank line in azure_openai_response_retry.py (W391) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/libs/agent_framework/azure_openai_response_retry.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py index 3fbd0e8b..4809162b 100644 --- a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py +++ b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py @@ -609,5 +609,4 @@ async def _inner_get_response( messages=trimmed, options=options, **kwargs ), config=self._retry_config, - ) - + ) From fa63bdfbdd5f025fab7d8db2894671d899ef5466 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 15:19:26 +0530 Subject: [PATCH 05/24] Fix async for coroutine error in retry client streaming path The parent OpenAIChatClient._inner_get_response is a regular def that returns ResponseStream (async iterable) when stream=True, or Awaitable when stream=False. The override was async def, which always returned a coroutine, breaking 'async for event in workflow.run(stream=True)'. Refactored to: - Regular def _inner_get_response dispatching stream vs non-stream - _non_streaming_with_retry: async coroutine with retry + context-trim - _streaming_with_retry: async generator with pre-first-chunk retry - _maybe_trim_messages: shared context-trim helper Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../azure_openai_response_retry.py | 183 ++++++++++++++---- 1 file changed, 149 insertions(+), 34 deletions(-) diff --git a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py index 4809162b..0370f389 100644 --- a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py +++ b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py @@ -10,7 +10,7 @@ import os import random from dataclasses import dataclass -from typing import Any, MutableSequence +from typing import Any, AsyncIterable, MutableSequence from agent_framework.openai import OpenAIChatClient from tenacity import ( @@ -530,35 +530,72 @@ def __init__( self._retry_config = retry_config or RateLimitRetryConfig.from_env() self._context_trim_config = ContextTrimConfig.from_env() - async def _inner_get_response( - self, *, messages: MutableSequence[Any], options: Any = None, **kwargs: Any + def _inner_get_response( + self, *, messages: MutableSequence[Any], options: Any = None, stream: bool = False, **kwargs: Any ) -> Any: - parent_inner_get_response = super( + """Override that adds retry + context-trimming around the parent call. + + Must remain a regular ``def`` (not ``async def``) because the parent + returns different types depending on *stream*: + - stream=False → Awaitable[ChatResponse] + - stream=True → ResponseStream (AsyncIterable) + """ + effective_messages = self._maybe_trim_messages(messages) + + if stream: + return self._streaming_with_retry( + effective_messages=effective_messages, + original_messages=messages, + options=options, + **kwargs, + ) + else: + return self._non_streaming_with_retry( + effective_messages=effective_messages, + original_messages=messages, + options=options, + **kwargs, + ) + + def _maybe_trim_messages( + self, messages: MutableSequence[Any] + ) -> MutableSequence[Any] | list[Any]: + """Apply pre-call context trimming if enabled and over budget.""" + if not self._context_trim_config.enabled: + return messages + approx_chars = sum(len(_estimate_message_text(m)) for m in messages) + if ( + self._context_trim_config.max_total_chars > 0 + and approx_chars > self._context_trim_config.max_total_chars + ): + trimmed = _trim_messages(messages, cfg=self._context_trim_config) + logger.warning( + "[AOAI_CTX_TRIM] pre-trimmed request messages: approx_chars=%s -> %s; count=%s -> %s", + approx_chars, + sum(len(_estimate_message_text(m)) for m in trimmed), + len(messages), + len(trimmed), + ) + return trimmed + return messages + + async def _non_streaming_with_retry( + self, + *, + effective_messages: MutableSequence[Any] | list[Any], + original_messages: MutableSequence[Any], + options: Any = None, + **kwargs: Any, + ) -> Any: + """Non-streaming path: full retry + context-trim fallback.""" + parent_inner = super( AzureOpenAIResponseClientWithRetry, self )._inner_get_response - effective_messages: MutableSequence[Any] | list[Any] = messages - if self._context_trim_config.enabled: - approx_chars = sum(len(_estimate_message_text(m)) for m in messages) - if ( - self._context_trim_config.max_total_chars > 0 - and approx_chars > self._context_trim_config.max_total_chars - ): - effective_messages = _trim_messages( - messages, cfg=self._context_trim_config - ) - logger.warning( - "[AOAI_CTX_TRIM] pre-trimmed request messages: approx_chars=%s -> %s; count=%s -> %s", - approx_chars, - sum(len(_estimate_message_text(m)) for m in effective_messages), - len(messages), - len(effective_messages), - ) - try: return await _retry_call( - lambda: parent_inner_get_response( - messages=effective_messages, options=options, **kwargs + lambda: parent_inner( + messages=effective_messages, options=options, stream=False, **kwargs ), config=self._retry_config, ) @@ -571,7 +608,7 @@ async def _inner_get_response( raise trimmed = _trim_messages( - messages, + original_messages, cfg=ContextTrimConfig( enabled=True, max_total_chars=max( @@ -593,20 +630,98 @@ async def _inner_get_response( ) logger.warning( "[AOAI_CTX_TRIM] retrying after context-length error; count=%s -> %s", - len(messages), + len(original_messages), len(trimmed), ) - # Cool down before retrying to avoid triggering 429s immediately. - trim_delay = self._retry_config.base_delay_seconds - trim_delay = min(trim_delay, self._retry_config.max_delay_seconds) + trim_delay = min( + self._retry_config.base_delay_seconds, + self._retry_config.max_delay_seconds, + ) logger.info( - "[AOAI_CTX_TRIM] sleeping %ss before retry", - round(trim_delay, 1), + "[AOAI_CTX_TRIM] sleeping %ss before retry", round(trim_delay, 1) ) await asyncio.sleep(trim_delay) return await _retry_call( - lambda: parent_inner_get_response( - messages=trimmed, options=options, **kwargs + lambda: parent_inner( + messages=trimmed, options=options, stream=False, **kwargs ), config=self._retry_config, - ) + ) + + async def _streaming_with_retry( + self, + *, + effective_messages: MutableSequence[Any] | list[Any], + original_messages: MutableSequence[Any], + options: Any = None, + **kwargs: Any, + ) -> AsyncIterable[Any]: + """Streaming path: retry only before the first chunk is yielded.""" + parent_inner = super( + AzureOpenAIResponseClientWithRetry, self + )._inner_get_response + + attempts = self._retry_config.max_retries + 1 + + for attempt_index in range(attempts): + response_stream = parent_inner( + messages=effective_messages, options=options, stream=True, **kwargs + ) + + iterator = response_stream.__aiter__() + try: + first = await iterator.__anext__() + + async def _tail(): + yield first + async for item in iterator: + yield item + + async for item in _tail(): + yield item + return + except StopAsyncIteration: + return + except Exception as e: + close = getattr(response_stream, "aclose", None) + if callable(close): + try: + await close() + except Exception: + logger.debug( + "Best-effort close of response stream failed", + exc_info=True, + ) + + if not _looks_like_rate_limit(e) or attempt_index >= attempts - 1: + if _looks_like_rate_limit(e): + logger.warning( + "[AOAI_RETRY_STREAM] giving up after %s/%s attempts; error=%s", + attempt_index + 1, + attempts, + _format_exc_brief(e), + ) + raise + + retry_after = _try_get_retry_after_seconds(e) + if retry_after is not None and retry_after >= 0: + delay = retry_after + else: + delay = self._retry_config.base_delay_seconds * ( + 2**attempt_index + ) + delay = min(delay, self._retry_config.max_delay_seconds) + delay = delay + random.uniform(0.0, 0.25 * max(delay, 0.1)) + + status = getattr(e, "status_code", None) or getattr( + e, "status", None + ) + logger.warning( + "[AOAI_RETRY_STREAM] attempt %s/%s; sleeping=%ss; retry_after=%s; status=%s; error=%s", + attempt_index + 1, + attempts, + round(float(delay), 3), + None if retry_after is None else round(float(retry_after), 3), + status, + _format_exc_brief(e), + ) From 0f10ef03a52b1348ddaec299a3c688bb1270502c Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 15:53:19 +0530 Subject: [PATCH 06/24] Fix streaming: delegate to parent ResponseStream instead of async generator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The framework's BaseChatClient.get_response checks isinstance(result, ResponseStream) for streaming responses. Our async generator from _streaming_with_retry failed that check, causing the framework to 'await' it — which fails with 'object async_generator can't be used in await expression'. Fix: for streaming, pass through to the parent's _inner_get_response which returns a proper ResponseStream. Retry is preserved for non-streaming calls. Removed unused _streaming_with_retry method. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../azure_openai_response_retry.py | 93 ++----------------- 1 file changed, 9 insertions(+), 84 deletions(-) diff --git a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py index 0370f389..f077532b 100644 --- a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py +++ b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py @@ -10,7 +10,7 @@ import os import random from dataclasses import dataclass -from typing import Any, AsyncIterable, MutableSequence +from typing import Any, MutableSequence from agent_framework.openai import OpenAIChatClient from tenacity import ( @@ -543,11 +543,14 @@ def _inner_get_response( effective_messages = self._maybe_trim_messages(messages) if stream: - return self._streaming_with_retry( - effective_messages=effective_messages, - original_messages=messages, - options=options, - **kwargs, + # For streaming, delegate to the parent which returns a proper + # ResponseStream. The framework checks isinstance(result, ResponseStream) + # and async generators fail that check. + parent_inner = super( + AzureOpenAIResponseClientWithRetry, self + )._inner_get_response + return parent_inner( + messages=effective_messages, options=options, stream=True, **kwargs ) else: return self._non_streaming_with_retry( @@ -647,81 +650,3 @@ async def _non_streaming_with_retry( ), config=self._retry_config, ) - - async def _streaming_with_retry( - self, - *, - effective_messages: MutableSequence[Any] | list[Any], - original_messages: MutableSequence[Any], - options: Any = None, - **kwargs: Any, - ) -> AsyncIterable[Any]: - """Streaming path: retry only before the first chunk is yielded.""" - parent_inner = super( - AzureOpenAIResponseClientWithRetry, self - )._inner_get_response - - attempts = self._retry_config.max_retries + 1 - - for attempt_index in range(attempts): - response_stream = parent_inner( - messages=effective_messages, options=options, stream=True, **kwargs - ) - - iterator = response_stream.__aiter__() - try: - first = await iterator.__anext__() - - async def _tail(): - yield first - async for item in iterator: - yield item - - async for item in _tail(): - yield item - return - except StopAsyncIteration: - return - except Exception as e: - close = getattr(response_stream, "aclose", None) - if callable(close): - try: - await close() - except Exception: - logger.debug( - "Best-effort close of response stream failed", - exc_info=True, - ) - - if not _looks_like_rate_limit(e) or attempt_index >= attempts - 1: - if _looks_like_rate_limit(e): - logger.warning( - "[AOAI_RETRY_STREAM] giving up after %s/%s attempts; error=%s", - attempt_index + 1, - attempts, - _format_exc_brief(e), - ) - raise - - retry_after = _try_get_retry_after_seconds(e) - if retry_after is not None and retry_after >= 0: - delay = retry_after - else: - delay = self._retry_config.base_delay_seconds * ( - 2**attempt_index - ) - delay = min(delay, self._retry_config.max_delay_seconds) - delay = delay + random.uniform(0.0, 0.25 * max(delay, 0.1)) - - status = getattr(e, "status_code", None) or getattr( - e, "status", None - ) - logger.warning( - "[AOAI_RETRY_STREAM] attempt %s/%s; sleeping=%ss; retry_after=%s; status=%s; error=%s", - attempt_index + 1, - attempts, - round(float(delay), 3), - None if retry_after is None else round(float(retry_after), 3), - status, - _format_exc_brief(e), - ) From 688b136f362cf79bce321c5de412c3141c9c514f Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 17:14:59 +0530 Subject: [PATCH 07/24] Guard against empty messages after context trimming - _trim_messages: keep at least 1 message (never pop to empty) - _maybe_trim_messages: fall back to originals if trim produces empty - _non_streaming_with_retry: re-raise if aggressive trim empties list - _inner_get_response: log warning and use originals if messages empty Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../azure_openai_response_retry.py | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py index f077532b..1df1a7cc 100644 --- a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py +++ b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py @@ -391,13 +391,14 @@ def _trim_messages( def _total_chars(msgs: list[Any]) -> int: return sum(len(_estimate_message_text(x)) for x in msgs) - while combined and _total_chars(combined) > cfg.max_total_chars: + while len(combined) > 1 and _total_chars(combined) > cfg.max_total_chars: # Prefer dropping earliest non-system message. + # Never drop the last message — the model needs at least one. drop_index = 0 if cfg.keep_system_messages and system_messages: drop_index = len(system_messages) - if drop_index >= len(combined): - # If only system messages remain, truncate the last one. + if drop_index >= len(combined) - 1: + # Only system messages (+ maybe 1 non-system) remain — truncate the last one. last = combined[-1] text = _estimate_message_text(last) text = _truncate_text( @@ -542,6 +543,12 @@ def _inner_get_response( """ effective_messages = self._maybe_trim_messages(messages) + if not effective_messages: + logger.warning( + "[AOAI_RETRY] empty messages list received; using original messages" + ) + effective_messages = messages + if stream: # For streaming, delegate to the parent which returns a proper # ResponseStream. The framework checks isinstance(result, ResponseStream) @@ -572,6 +579,11 @@ def _maybe_trim_messages( and approx_chars > self._context_trim_config.max_total_chars ): trimmed = _trim_messages(messages, cfg=self._context_trim_config) + if not trimmed: + logger.warning( + "[AOAI_CTX_TRIM] trimming would remove all messages; keeping originals" + ) + return messages logger.warning( "[AOAI_CTX_TRIM] pre-trimmed request messages: approx_chars=%s -> %s; count=%s -> %s", approx_chars, @@ -631,6 +643,11 @@ async def _non_streaming_with_retry( retry_on_context_error=True, ), ) + if not trimmed: + logger.warning( + "[AOAI_CTX_TRIM] aggressive trim would remove all messages; re-raising original error" + ) + raise logger.warning( "[AOAI_CTX_TRIM] retrying after context-length error; count=%s -> %s", len(original_messages), From 1d8617632fba353536960cf46a2431830ed5fa20 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 14:32:46 +0530 Subject: [PATCH 08/24] Update AZURE_OPENAI_API_VERSION from 2025-03-01-preview to v1 The Responses API requires the new v1 API endpoint. The old preview version (2025-03-01-preview) does not support the /responses endpoint, causing BadRequest 'API version not supported' errors at runtime. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- infra/main.bicep | 2 +- infra/main.json | 2 +- infra/main_custom.bicep | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/infra/main.bicep b/infra/main.bicep index f7a08659..456e3168 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -1135,7 +1135,7 @@ module appConfiguration 'br/public:avm/res/app-configuration/configuration-store } { name: 'AZURE_OPENAI_API_VERSION' - value: '2025-03-01-preview' + value: 'v1' } { name: 'AZURE_OPENAI_CHAT_DEPLOYMENT_NAME' diff --git a/infra/main.json b/infra/main.json index 67cbf372..b34fdd30 100644 --- a/infra/main.json +++ b/infra/main.json @@ -35380,7 +35380,7 @@ }, { "name": "AZURE_OPENAI_API_VERSION", - "value": "2025-03-01-preview" + "value": "v1" }, { "name": "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", diff --git a/infra/main_custom.bicep b/infra/main_custom.bicep index a4588fb5..6dd44b9b 100644 --- a/infra/main_custom.bicep +++ b/infra/main_custom.bicep @@ -1101,7 +1101,7 @@ module appConfiguration 'br/public:avm/res/app-configuration/configuration-store } { name: 'AZURE_OPENAI_API_VERSION' - value: '2025-03-01-preview' + value: 'v1' } { name: 'AZURE_OPENAI_CHAT_DEPLOYMENT_NAME' From 6938b2b49e9a64bf972147dced0b0d4ce7c8c6e3 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 21:35:54 +0530 Subject: [PATCH 09/24] Revert "Update AZURE_OPENAI_API_VERSION from 2025-03-01-preview to v1" This reverts commit 1d8617632fba353536960cf46a2431830ed5fa20. --- infra/main.bicep | 2 +- infra/main.json | 2 +- infra/main_custom.bicep | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/infra/main.bicep b/infra/main.bicep index 456e3168..f7a08659 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -1135,7 +1135,7 @@ module appConfiguration 'br/public:avm/res/app-configuration/configuration-store } { name: 'AZURE_OPENAI_API_VERSION' - value: 'v1' + value: '2025-03-01-preview' } { name: 'AZURE_OPENAI_CHAT_DEPLOYMENT_NAME' diff --git a/infra/main.json b/infra/main.json index b34fdd30..67cbf372 100644 --- a/infra/main.json +++ b/infra/main.json @@ -35380,7 +35380,7 @@ }, { "name": "AZURE_OPENAI_API_VERSION", - "value": "v1" + "value": "2025-03-01-preview" }, { "name": "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", diff --git a/infra/main_custom.bicep b/infra/main_custom.bicep index 6dd44b9b..a4588fb5 100644 --- a/infra/main_custom.bicep +++ b/infra/main_custom.bicep @@ -1101,7 +1101,7 @@ module appConfiguration 'br/public:avm/res/app-configuration/configuration-store } { name: 'AZURE_OPENAI_API_VERSION' - value: 'v1' + value: '2025-03-01-preview' } { name: 'AZURE_OPENAI_CHAT_DEPLOYMENT_NAME' From 59618707a9f852004baadcb33f1bb10fb587216e Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 21:37:57 +0530 Subject: [PATCH 10/24] Switch default client to AzureOpenAIChatCompletionWithRetry (Chat Completions API) Adds a new AzureOpenAIChatClientWithRetry that wraps OpenAIChatCompletionClient (the /chat/completions endpoint) with the same 429-retry and context-trimming logic as the existing AzureOpenAIResponseClientWithRetry, then switches the default client registered in AgentFrameworkHelper and the per-thread client in OrchestratorBase to use it. The /chat/completions endpoint works with the existing 2025-03-01-preview Azure OpenAI API version, so the v1 API-version bump (commit 1d86176) is no longer required and is reverted in the prior commit. Mirrors the approach used in microsoft/content-processing-solution-accelerator#599. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework/agent_framework_helper.py | 42 ++++++++- .../azure_openai_response_retry.py | 94 ++++++++++++++++++- .../src/libs/base/orchestrator_base.py | 2 +- .../test_agent_framework_helper.py | 16 ++++ 4 files changed, 151 insertions(+), 3 deletions(-) diff --git a/src/processor/src/libs/agent_framework/agent_framework_helper.py b/src/processor/src/libs/agent_framework/agent_framework_helper.py index b1bf6ba8..4c90b574 100644 --- a/src/processor/src/libs/agent_framework/agent_framework_helper.py +++ b/src/processor/src/libs/agent_framework/agent_framework_helper.py @@ -22,6 +22,7 @@ from .agent_framework_settings import AgentFrameworkSettings from .azure_openai_response_retry import ( + AzureOpenAIChatClientWithRetry, AzureOpenAIResponseClientWithRetry, RateLimitRetryConfig, ) @@ -40,6 +41,7 @@ class ClientType(Enum): OpenAIAssistant = "OpenAIAssistant" OpenAIResponse = "OpenAIResponse" AzureOpenAIChatCompletion = "AzureOpenAIChatCompletion" + AzureOpenAIChatCompletionWithRetry = "AzureOpenAIChatCompletionWithRetry" AzureOpenAIAssistant = "AzureOpenAIAssistant" AzureOpenAIResponse = "AzureOpenAIResponse" AzureOpenAIResponseWithRetry = "AzureOpenAIResponseWithRetry" @@ -92,7 +94,7 @@ def _initialize_all_clients(self, settings: AgentFrameworkSettings): continue self.ai_clients[service_id] = AgentFrameworkHelper.create_client( - client_type=ClientType.AzureOpenAIResponseWithRetry, + client_type=ClientType.AzureOpenAIChatCompletionWithRetry, endpoint=service_config.endpoint, deployment_name=service_config.chat_deployment_name, api_version=service_config.api_version, @@ -148,6 +150,29 @@ def create_client( ) -> "OpenAIChatCompletionClient": pass + @overload + @staticmethod + def create_client( + client_type: type[ClientType.AzureOpenAIChatCompletionWithRetry], + *, + api_key: str | None = None, + deployment_name: str | None = None, + endpoint: str | None = None, + base_url: str | None = None, + api_version: str | None = None, + ad_token: str | None = None, + ad_token_provider: object | None = None, + token_endpoint: str | None = None, + credential: object | None = None, + default_headers: dict[str, str] | None = None, + async_client: object | None = None, + env_file_path: str | None = None, + env_file_encoding: str | None = None, + instruction_role: str | None = None, + retry_config: RateLimitRetryConfig | None = None, + ) -> AzureOpenAIChatClientWithRetry: + pass + @overload @staticmethod def create_client( @@ -379,6 +404,21 @@ def create_client( env_file_encoding=env_file_encoding, instruction_role=instruction_role, ) + elif client_type == ClientType.AzureOpenAIChatCompletionWithRetry: + return AzureOpenAIChatClientWithRetry( + model=deployment_name, + api_key=api_key, + azure_endpoint=endpoint, + base_url=base_url, + api_version=api_version, + credential=credential or ad_token_provider, + default_headers=default_headers, + async_client=async_client, + env_file_path=env_file_path, + env_file_encoding=env_file_encoding, + instruction_role=instruction_role, + retry_config=retry_config, + ) elif client_type == ClientType.AzureOpenAIAssistant: raise NotImplementedError( "AzureOpenAIAssistantsClient has been removed in agent-framework 1.3.0. " diff --git a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py index 1df1a7cc..9eff88e2 100644 --- a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py +++ b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py @@ -12,7 +12,7 @@ from dataclasses import dataclass from typing import Any, MutableSequence -from agent_framework.openai import OpenAIChatClient +from agent_framework.openai import OpenAIChatClient, OpenAIChatCompletionClient from tenacity import ( AsyncRetrying, retry_if_exception, @@ -667,3 +667,95 @@ async def _non_streaming_with_retry( ), config=self._retry_config, ) + + +class AzureOpenAIChatClientWithRetry(OpenAIChatCompletionClient): + """Azure OpenAI Chat (Chat Completions) client with 429 retry at the request boundary. + + Wraps the ``/chat/completions`` endpoint used by Agent Framework by overriding + the internal ``_inner_get_response`` method. This client works with all Azure + OpenAI API versions including ``2025-03-01-preview``. + + Use this in preference to ``AzureOpenAIResponseClientWithRetry`` when the + ``/responses`` endpoint (and the ``v1`` API version it requires) is not + available in the target Azure OpenAI resource. + """ + + def __init__( + self, + *args: Any, + retry_config: RateLimitRetryConfig | None = None, + **kwargs: Any, + ): + super().__init__(*args, **kwargs) + self._retry_config = retry_config or RateLimitRetryConfig.from_env() + self._context_trim_config = ContextTrimConfig.from_env() + + async def _inner_get_response( + self, + *, + messages: MutableSequence[Any], + options: Any | None = None, + **kwargs: Any, + ) -> Any: + """Override that adds retry + context-trimming around the parent call.""" + parent_inner_get_response = super( + AzureOpenAIChatClientWithRetry, self + )._inner_get_response + + effective_messages: MutableSequence[Any] | list[Any] = messages + if self._context_trim_config.enabled: + approx_chars = sum(len(_estimate_message_text(m)) for m in messages) + if ( + self._context_trim_config.max_total_chars > 0 + and approx_chars > self._context_trim_config.max_total_chars + ): + effective_messages = _trim_messages( + messages, cfg=self._context_trim_config + ) + logger.warning( + "[AOAI_CTX_TRIM] pre-trimmed chat request messages: approx_chars=%s -> %s; count=%s -> %s", + approx_chars, + sum(len(_estimate_message_text(m)) for m in effective_messages), + len(messages), + len(effective_messages), + ) + + if not effective_messages: + logger.warning( + "[AOAI_RETRY] empty messages list received; using original messages" + ) + effective_messages = messages + + try: + return await _retry_call( + lambda: parent_inner_get_response( + messages=effective_messages, options=options, **kwargs + ), + config=self._retry_config, + ) + except Exception as e: + if not ( + self._context_trim_config.enabled + and self._context_trim_config.retry_on_context_error + and _looks_like_context_length(e) + ): + raise + + trimmed = _trim_messages(messages, cfg=self._context_trim_config) + if not trimmed: + logger.warning( + "[AOAI_CTX_TRIM] trim would remove all messages; re-raising original error" + ) + raise + logger.warning( + "[AOAI_CTX_TRIM] retrying chat after context-length error; count=%s -> %s", + len(messages), + len(trimmed), + ) + return await _retry_call( + lambda: parent_inner_get_response( + messages=trimmed, options=options, **kwargs + ), + config=self._retry_config, + ) diff --git a/src/processor/src/libs/base/orchestrator_base.py b/src/processor/src/libs/base/orchestrator_base.py index 9e198f9f..f21473e1 100644 --- a/src/processor/src/libs/base/orchestrator_base.py +++ b/src/processor/src/libs/base/orchestrator_base.py @@ -225,7 +225,7 @@ async def get_client(self, thread_id: str = None): return self._client_cache[thread_id] else: client = self.agent_framework_helper.create_client( - client_type=ClientType.AzureOpenAIResponseWithRetry, + client_type=ClientType.AzureOpenAIChatCompletionWithRetry, endpoint=self.agent_framework_helper.settings.get_service_config( "default" ).endpoint, diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_agent_framework_helper.py b/src/processor/src/tests/unit/libs/agent_framework/test_agent_framework_helper.py index 578a79d5..ad42b867 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_agent_framework_helper.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_agent_framework_helper.py @@ -95,6 +95,22 @@ def test_azure_openai_response_with_retry(self): assert kwargs["model"] == "gpt-4" assert kwargs["credential"] == "token" + def test_azure_openai_chat_completion_with_retry(self): + with patch( + "libs.agent_framework.agent_framework_helper.AzureOpenAIChatClientWithRetry" + ) as mock_cls: + client = AgentFrameworkHelper.create_client( + ClientType.AzureOpenAIChatCompletionWithRetry, + endpoint="https://x", + deployment_name="gpt-4", + ad_token_provider="token", + ) + assert client is mock_cls.return_value + kwargs = mock_cls.call_args.kwargs + assert kwargs["azure_endpoint"] == "https://x" + assert kwargs["model"] == "gpt-4" + assert kwargs["credential"] == "token" + def test_default_token_provider_when_no_credential(self): with patch( "libs.agent_framework.agent_framework_helper.AzureOpenAIResponseClientWithRetry" From 3ab3076274bf9b94e20310b1e2e2fb3632e0bf3c Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 21:58:28 +0530 Subject: [PATCH 11/24] Fix AzureOpenAIChatClientWithRetry streaming: sync def + stream parameter OpenAIChatCompletionClient._inner_get_response is a SYNC method that returns either Awaitable[ChatResponse] (stream=False) or ResponseStream (stream=True), matching the OpenAIChatClient (Responses API) shape. The previous implementation used async def without a stream parameter, which caused the framework's streaming path to receive a coroutine instead of an AsyncIterable, raising: 'async for' requires an object with __aiter__ method, got coroutine Mirror the existing AzureOpenAIResponseClientWithRetry pattern: sync _inner_get_response that branches on stream and delegates non-streaming calls to _non_streaming_with_retry. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../azure_openai_response_retry.py | 136 +++++++++++++----- 1 file changed, 102 insertions(+), 34 deletions(-) diff --git a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py index 9eff88e2..c3aafb75 100644 --- a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py +++ b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py @@ -691,35 +691,17 @@ def __init__( self._retry_config = retry_config or RateLimitRetryConfig.from_env() self._context_trim_config = ContextTrimConfig.from_env() - async def _inner_get_response( - self, - *, - messages: MutableSequence[Any], - options: Any | None = None, - **kwargs: Any, + def _inner_get_response( + self, *, messages: MutableSequence[Any], options: Any = None, stream: bool = False, **kwargs: Any ) -> Any: - """Override that adds retry + context-trimming around the parent call.""" - parent_inner_get_response = super( - AzureOpenAIChatClientWithRetry, self - )._inner_get_response + """Override that adds retry + context-trimming around the parent call. - effective_messages: MutableSequence[Any] | list[Any] = messages - if self._context_trim_config.enabled: - approx_chars = sum(len(_estimate_message_text(m)) for m in messages) - if ( - self._context_trim_config.max_total_chars > 0 - and approx_chars > self._context_trim_config.max_total_chars - ): - effective_messages = _trim_messages( - messages, cfg=self._context_trim_config - ) - logger.warning( - "[AOAI_CTX_TRIM] pre-trimmed chat request messages: approx_chars=%s -> %s; count=%s -> %s", - approx_chars, - sum(len(_estimate_message_text(m)) for m in effective_messages), - len(messages), - len(effective_messages), - ) + Must remain a regular ``def`` (not ``async def``) because the parent + returns different types depending on *stream*: + - stream=False → Awaitable[ChatResponse] + - stream=True → ResponseStream (AsyncIterable) + """ + effective_messages = self._maybe_trim_messages(messages) if not effective_messages: logger.warning( @@ -727,10 +709,68 @@ async def _inner_get_response( ) effective_messages = messages + if stream: + # For streaming, delegate to the parent which returns a proper + # ResponseStream. The framework checks isinstance(result, ResponseStream) + # and async generators fail that check. + parent_inner = super( + AzureOpenAIChatClientWithRetry, self + )._inner_get_response + return parent_inner( + messages=effective_messages, options=options, stream=True, **kwargs + ) + else: + return self._non_streaming_with_retry( + effective_messages=effective_messages, + original_messages=messages, + options=options, + **kwargs, + ) + + def _maybe_trim_messages( + self, messages: MutableSequence[Any] + ) -> MutableSequence[Any] | list[Any]: + """Apply pre-call context trimming if enabled and over budget.""" + if not self._context_trim_config.enabled: + return messages + approx_chars = sum(len(_estimate_message_text(m)) for m in messages) + if ( + self._context_trim_config.max_total_chars > 0 + and approx_chars > self._context_trim_config.max_total_chars + ): + trimmed = _trim_messages(messages, cfg=self._context_trim_config) + if not trimmed: + logger.warning( + "[AOAI_CTX_TRIM] trimming would remove all messages; keeping originals" + ) + return messages + logger.warning( + "[AOAI_CTX_TRIM] pre-trimmed chat request messages: approx_chars=%s -> %s; count=%s -> %s", + approx_chars, + sum(len(_estimate_message_text(m)) for m in trimmed), + len(messages), + len(trimmed), + ) + return trimmed + return messages + + async def _non_streaming_with_retry( + self, + *, + effective_messages: MutableSequence[Any] | list[Any], + original_messages: MutableSequence[Any], + options: Any = None, + **kwargs: Any, + ) -> Any: + """Non-streaming path: full retry + context-trim fallback.""" + parent_inner = super( + AzureOpenAIChatClientWithRetry, self + )._inner_get_response + try: return await _retry_call( - lambda: parent_inner_get_response( - messages=effective_messages, options=options, **kwargs + lambda: parent_inner( + messages=effective_messages, options=options, stream=False, **kwargs ), config=self._retry_config, ) @@ -742,20 +782,48 @@ async def _inner_get_response( ): raise - trimmed = _trim_messages(messages, cfg=self._context_trim_config) + trimmed = _trim_messages( + original_messages, + cfg=ContextTrimConfig( + enabled=True, + max_total_chars=max( + 50_000, self._context_trim_config.max_total_chars - 80_000 + ), + max_message_chars=max( + 3_000, self._context_trim_config.max_message_chars - 6_000 + ), + keep_last_messages=max( + 6, self._context_trim_config.keep_last_messages - 12 + ), + keep_head_chars=max( + 1_000, self._context_trim_config.keep_head_chars - 4_000 + ), + keep_tail_chars=self._context_trim_config.keep_tail_chars, + keep_system_messages=True, + retry_on_context_error=True, + ), + ) if not trimmed: logger.warning( - "[AOAI_CTX_TRIM] trim would remove all messages; re-raising original error" + "[AOAI_CTX_TRIM] aggressive trim would remove all messages; re-raising original error" ) raise logger.warning( "[AOAI_CTX_TRIM] retrying chat after context-length error; count=%s -> %s", - len(messages), + len(original_messages), len(trimmed), ) + trim_delay = min( + self._retry_config.base_delay_seconds, + self._retry_config.max_delay_seconds, + ) + logger.info( + "[AOAI_CTX_TRIM] sleeping %ss before retry", round(trim_delay, 1) + ) + await asyncio.sleep(trim_delay) return await _retry_call( - lambda: parent_inner_get_response( - messages=trimmed, options=options, **kwargs + lambda: parent_inner( + messages=trimmed, options=options, stream=False, **kwargs ), config=self._retry_config, ) From 2caced57a2e78813a53d2821921cf6cad4bb0c79 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 22:18:28 +0530 Subject: [PATCH 12/24] Sanitize message author_name for OpenAI Chat Completions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenAI's Chat Completions endpoint validates the message `name` field against the pattern `^[^\s<|\\/>]+$`. Our agents have display names with whitespace (e.g. `Chief Architect`, `AKS Expert`), which caused a 400 BadRequest after switching the default client to `AzureOpenAIChatClientWithRetry`. Add `_sanitize_author_name` / `_sanitize_author_names` helpers that replace runs of disallowed characters (whitespace, `<`, `|`, `\`, `/`, `>`) with a single underscore and strip leading/trailing underscores. Names that sanitize down to an empty string are dropped entirely so the field can be omitted from the request. The sanitizer is applied inside `AzureOpenAIChatClientWithRetry._inner_get_response` after context trimming (and again after the trim-fallback retry inside `_non_streaming_with_retry`) so the wire format passes validation while in-memory `Message` objects keep their original display names for orchestration logic. Originals are never mutated — modified messages are shallow-copied before the name is rewritten. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../azure_openai_response_retry.py | 88 +++++++++++++++++ .../test_azure_openai_response_retry_utils.py | 95 +++++++++++++++++++ 2 files changed, 183 insertions(+) diff --git a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py index c3aafb75..0ca89ff0 100644 --- a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py +++ b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py @@ -6,9 +6,11 @@ from __future__ import annotations import asyncio +import copy import logging import os import random +import re from dataclasses import dataclass from typing import Any, MutableSequence @@ -265,6 +267,85 @@ def _set_message_text(message: Any, new_text: str) -> Any: return message +# OpenAI Chat Completions requires message `name` to match this pattern: +# ^[^\s<|\\/>]+$ +# Agent display names like "Chief Architect" contain spaces and are rejected. +# We replace any run of disallowed characters with a single underscore so the +# wire-format passes validation while preserving readability. +_OPENAI_NAME_INVALID_CHARS = re.compile(r"[\s<|\\/>]+") + + +def _sanitize_author_name(name: Any) -> Any: + """Sanitize a single author_name for OpenAI Chat Completions. + + Returns the original value when it is not a string, is empty, or is already + valid. Otherwise returns a string with disallowed characters collapsed to + underscores and surrounding underscores stripped. If the result would be + empty (e.g. name was all whitespace), returns ``None`` so the field can be + dropped entirely. + """ + if not isinstance(name, str) or not name: + return name + if not _OPENAI_NAME_INVALID_CHARS.search(name): + return name + sanitized = _OPENAI_NAME_INVALID_CHARS.sub("_", name).strip("_") + return sanitized or None + + +def _sanitize_author_names( + messages: MutableSequence[Any], +) -> MutableSequence[Any] | list[Any]: + """Return ``messages`` with each entry's author_name sanitized. + + - For dict-shaped messages, the ``name`` key is rewritten on a shallow copy + (and removed if the sanitized value would be empty). + - For ``agent_framework.Message``-like objects, ``author_name`` is rewritten + on a shallow copy so the originals (which may live in long-lived agent + state) are not mutated. + - Messages that don't need sanitization are returned unchanged. If nothing + needed sanitization the original sequence is returned as-is. + """ + out: list[Any] = [] + any_changed = False + for m in messages: + # Dict form: {"role": ..., "name": ..., "content": ...} + if isinstance(m, dict): + name = m.get("name") + if isinstance(name, str): + sanitized = _sanitize_author_name(name) + if sanitized != name: + new_m = dict(m) + if sanitized: + new_m["name"] = sanitized + else: + new_m.pop("name", None) + out.append(new_m) + any_changed = True + continue + out.append(m) + continue + + # Object form (agent_framework Message): has .author_name attribute. + name = getattr(m, "author_name", None) + if isinstance(name, str): + sanitized = _sanitize_author_name(name) + if sanitized != name: + try: + new_m = copy.copy(m) + new_m.author_name = sanitized + out.append(new_m) + any_changed = True + continue + except Exception: + # Last-resort in-place fallback if copy/setattr is blocked. + try: + m.author_name = sanitized + except Exception: + pass + out.append(m) + return out if any_changed else messages + + @dataclass(frozen=True) class ContextTrimConfig: """Character-budget based context trimming. @@ -709,6 +790,11 @@ def _inner_get_response( ) effective_messages = messages + # OpenAI Chat Completions validates message `name` against ^[^\s<|\\/>]+$. + # Sanitize before sending so agent display names like "Chief Architect" + # don't trip a 400 BadRequest. Originals are shallow-copied, not mutated. + effective_messages = _sanitize_author_names(effective_messages) + if stream: # For streaming, delegate to the parent which returns a proper # ResponseStream. The framework checks isinstance(result, ResponseStream) @@ -813,6 +899,8 @@ async def _non_streaming_with_retry( len(original_messages), len(trimmed), ) + # Re-sanitize names on the freshly-trimmed messages before retry. + trimmed = _sanitize_author_names(trimmed) trim_delay = min( self._retry_config.base_delay_seconds, self._retry_config.max_delay_seconds, diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_azure_openai_response_retry_utils.py b/src/processor/src/tests/unit/libs/agent_framework/test_azure_openai_response_retry_utils.py index aba664fa..7236d192 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_azure_openai_response_retry_utils.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_azure_openai_response_retry_utils.py @@ -7,6 +7,8 @@ RateLimitRetryConfig, _looks_like_context_length, _looks_like_rate_limit, + _sanitize_author_name, + _sanitize_author_names, _trim_messages, _truncate_text, ) @@ -85,3 +87,96 @@ def test_trim_messages_keeps_system_and_tails_and_truncates_long_messages() -> N # The last message is intentionally never truncated (agent needs full context). assert len(trimmed[1]["content"]) <= 50 assert len(trimmed[2]["content"]) == 100 + + +# --------------------------------------------------------------------------- +# author_name sanitization (Chat Completions name pattern: ^[^\s<|\\/>]+$) +# --------------------------------------------------------------------------- + + +def test_sanitize_author_name_passthrough_for_valid_names() -> None: + assert _sanitize_author_name("Coordinator") == "Coordinator" + assert _sanitize_author_name("ResultGenerator") == "ResultGenerator" + assert _sanitize_author_name("agent-1_2.x") == "agent-1_2.x" + + +def test_sanitize_author_name_replaces_whitespace_and_specials() -> None: + assert _sanitize_author_name("Chief Architect") == "Chief_Architect" + assert _sanitize_author_name("AKS Expert") == "AKS_Expert" + # Tabs/newlines collapse to a single underscore. + assert _sanitize_author_name("a\tb\nc") == "a_b_c" + # Each disallowed char in the pattern is replaced. + assert _sanitize_author_name("foo/bar\\baz|quxy") == "foo_bar_baz_qux_x_y" + + +def test_sanitize_author_name_handles_edge_cases() -> None: + assert _sanitize_author_name(None) is None + assert _sanitize_author_name("") == "" + assert _sanitize_author_name(123) == 123 + # All-invalid input collapses to empty -> None (so callers drop the field). + assert _sanitize_author_name(" ") is None + # Leading/trailing underscores from sanitization are stripped. + assert _sanitize_author_name(" Chief Architect ") == "Chief_Architect" + + +def test_sanitize_author_names_dict_messages_shallow_copy() -> None: + original = [ + {"role": "system", "content": "sys"}, + {"role": "assistant", "name": "Chief Architect", "content": "hi"}, + {"role": "user", "name": "Coordinator", "content": "ok"}, + ] + out = _sanitize_author_names(original) + + # New list when changes happened. + assert out is not original + # Originals untouched. + assert original[1]["name"] == "Chief Architect" + # Unchanged messages share identity with originals (shallow copy only when needed). + assert out[0] is original[0] + assert out[2] is original[2] + # Changed message is a new dict with sanitized name. + assert out[1] is not original[1] + assert out[1]["name"] == "Chief_Architect" + assert out[1]["content"] == "hi" + + +def test_sanitize_author_names_dict_messages_drops_empty_name() -> None: + original = [ + {"role": "assistant", "name": " ", "content": "hello"}, + ] + out = _sanitize_author_names(original) + assert "name" not in out[0] + assert out[0]["content"] == "hello" + + +def test_sanitize_author_names_returns_input_when_nothing_changes() -> None: + original = [ + {"role": "system", "content": "sys"}, + {"role": "assistant", "name": "Coordinator", "content": "hi"}, + ] + out = _sanitize_author_names(original) + # Same sequence object returned to avoid pointless copies. + assert out is original + + +def test_sanitize_author_names_object_messages_shallow_copy() -> None: + class _Msg: + def __init__(self, role: str, author_name: str | None, content: str) -> None: + self.role = role + self.author_name = author_name + self.content = content + + m1 = _Msg("assistant", "Chief Architect", "hi") + m2 = _Msg("assistant", "Coordinator", "ok") + original = [m1, m2] + + out = _sanitize_author_names(original) + + # Original object untouched. + assert m1.author_name == "Chief Architect" + # Changed message replaced with a shallow copy carrying sanitized name. + assert out[0] is not m1 + assert out[0].author_name == "Chief_Architect" + assert out[0].content == "hi" + # Unchanged message is the same instance. + assert out[1] is m2 From 7f8a04b62623458d78c191ff8a9ec9fa45610983 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 22:41:55 +0530 Subject: [PATCH 13/24] Demote empty-messages warning to debug The [AOAI_RETRY] empty messages list received warning fired on every turn in group-chat orchestration when the same speaker was selected twice in a row, flooding logs and giving the false impression of an error. This pattern is by design in agent-framework's GroupChatOrchestrator: _broadcast_messages_to_participants excludes the source executor, so when the orchestrator routes back to the same agent, its message cache is empty. The framework already emits its own "AgentExecutor ... Running agent with empty message cache" warning for this case. The actual API call is not empty -- the parent OpenAIChatCompletionClient._prepare_options prepends the agent's system instructions from options["instructions"] before sending. So demoting our duplicate warning to DEBUG removes the noise without hiding any real failure. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../azure_openai_response_retry.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py index 0ca89ff0..21bf2005 100644 --- a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py +++ b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py @@ -625,8 +625,12 @@ def _inner_get_response( effective_messages = self._maybe_trim_messages(messages) if not effective_messages: - logger.warning( - "[AOAI_RETRY] empty messages list received; using original messages" + # Empty inputs occur legitimately in group-chat orchestration when the + # same speaker is selected twice in a row (the orchestrator's broadcast + # excludes the source). The parent client's `_prepare_options` still + # prepends the agent's system instructions, so the API call has content. + logger.debug( + "[AOAI_RETRY] empty messages list received; relying on options.instructions" ) effective_messages = messages @@ -785,8 +789,12 @@ def _inner_get_response( effective_messages = self._maybe_trim_messages(messages) if not effective_messages: - logger.warning( - "[AOAI_RETRY] empty messages list received; using original messages" + # Empty inputs occur legitimately in group-chat orchestration when the + # same speaker is selected twice in a row (the orchestrator's broadcast + # excludes the source). The parent client's `_prepare_options` still + # prepends the agent's system instructions, so the API call has content. + logger.debug( + "[AOAI_RETRY] empty messages list received; relying on options.instructions" ) effective_messages = messages From 6d371fd42e3ac1e11fdc1b94e783dcfc2d9360cb Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 22:55:51 +0530 Subject: [PATCH 14/24] Fix Coordinator anti-loop: accept framework's response schema The agent_framework_orchestrations.GroupChatBuilder forces the Coordinator's response_format to AgentOrchestrationOutput (strict schema with fields next_speaker/reason/terminate). Our prompt asks for selected_participant/instruction/finish, but strict structured output overrides the prompt's field names. Without aliases, ManagerSelectionResponse.model_validate() silently succeeded with all fields = None (extra=allow), which disabled: - The 3-strike loop-detection streak counter (line 1019-1054) - Coordinator-driven termination on finish=true (line 1065) - _agent_invoked_at[selected] elapsed-time tracking (line 1098) Use Pydantic AliasChoices so the model accepts BOTH naming conventions, restoring anti-loop and termination logic. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework/groupchat_orchestrator.py | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py index 5c3fc9ab..856575e9 100644 --- a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py +++ b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py @@ -33,7 +33,7 @@ ) from agent_framework.orchestrations import GroupChatBuilder from mem0 import AsyncMemory -from pydantic import BaseModel, ValidationError +from pydantic import AliasChoices, BaseModel, Field, ValidationError logger = logging.getLogger(__name__) @@ -44,14 +44,37 @@ class ManagerSelectionResponse(BaseModel): - """Coordinator selection payload parsed from JSON output.""" + """Coordinator selection payload parsed from JSON output. + + The Coordinator prompt instructs the model to emit fields named + ``selected_participant`` / ``instruction`` / ``finish``. However, the + underlying ``agent_framework_orchestrations.GroupChatBuilder`` forces the + Coordinator's response_format to ``AgentOrchestrationOutput`` (strict + schema with fields ``next_speaker`` / ``reason`` / ``terminate``). With + strict structured output, the model always emits the framework's field + names regardless of the prompt. + + We use Pydantic ``AliasChoices`` so this model accepts BOTH naming + conventions transparently. Without these aliases, parsing silently + succeeds (``extra=allow``) but every field ends up ``None``, disabling + loop detection and Coordinator-driven termination. + """ - selected_participant: str | None = None - instruction: str | None = None - finish: bool | None = None + selected_participant: str | None = Field( + default=None, + validation_alias=AliasChoices("selected_participant", "next_speaker"), + ) + instruction: str | None = Field( + default=None, + validation_alias=AliasChoices("instruction", "reason"), + ) + finish: bool | None = Field( + default=None, + validation_alias=AliasChoices("finish", "terminate"), + ) final_message: str | None = None - model_config = {"extra": "allow"} + model_config = {"extra": "allow", "populate_by_name": True} @dataclass From d5da788b0e17c38b8d7dda3b86c9b8637749827f Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 23:19:10 +0530 Subject: [PATCH 15/24] Sanitize message name at OpenAI wire format (defense in depth) Production still hits 400 BadRequest on messages[N].name even though _inner_get_response runs _sanitize_author_names on incoming Messages. The framework's _prepare_options/_prepare_messages_for_openai layer or agent-internal compaction can materialize messages with author_name set AFTER our early sanitization, leaving the dict 'name' field unsanitized on the wire. Override _prepare_messages_for_openai (the parent method that builds the final OpenAI dict payload) to sanitize each dict's 'name' field as a last-mile pass. This is the single chokepoint guaranteed to be on every Chat Completions request, regardless of upstream message-construction path. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../azure_openai_response_retry.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py index 21bf2005..b597afac 100644 --- a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py +++ b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py @@ -801,6 +801,12 @@ def _inner_get_response( # OpenAI Chat Completions validates message `name` against ^[^\s<|\\/>]+$. # Sanitize before sending so agent display names like "Chief Architect" # don't trip a 400 BadRequest. Originals are shallow-copied, not mutated. + # NOTE: this is a defense-in-depth pass on ``Message.author_name``. + # The authoritative sanitization happens in ``_prepare_messages_for_openai`` + # below, which sanitizes the FINAL dict ``name`` field right before the + # request is sent — catching any name that slips in via framework-internal + # message construction (e.g. compaction, memory context providers, + # orchestrator-injected messages) that bypasses this early pass. effective_messages = _sanitize_author_names(effective_messages) if stream: @@ -821,6 +827,47 @@ def _inner_get_response( **kwargs, ) + def _prepare_messages_for_openai(self, chat_messages, *args: Any, **kwargs: Any): # type: ignore[override] + """Sanitize message ``name`` fields after framework conversion to wire format. + + The parent ``_prepare_messages_for_openai`` walks ``Message`` objects and + builds the OpenAI dict payload (``{"role": ..., "name": ..., "content": ...}``). + The ``name`` field is copied from ``Message.author_name`` and is validated + by the OpenAI Chat Completions API against ``^[^\\s<|\\\\/>]+$``. + + We override here as a final, authoritative sanitization point. Even though + ``_inner_get_response`` already sanitizes ``Message.author_name``, names + can still reach this layer unsanitized from: + + * ``OpenAIChatCompletionClient._prepare_options`` calling + ``prepend_instructions_to_messages`` (which does not author_name, but + downstream callers may add named messages). + * ``ChatAgent`` / memory context providers materializing messages with + ``author_name`` set inside the agent run loop, after the client receives + the original sequence. + * Any framework-internal compaction or message-rewriting path that + constructs new ``Message`` objects. + + Sanitizing the dict output is the single chokepoint guaranteed to be + on every Chat Completions request, regardless of how the messages were + assembled upstream. + """ + result = super()._prepare_messages_for_openai(chat_messages, *args, **kwargs) + for msg in result: + if not isinstance(msg, dict): + continue + name = msg.get("name") + if not isinstance(name, str): + continue + sanitized = _sanitize_author_name(name) + if sanitized == name: + continue + if sanitized: + msg["name"] = sanitized + else: + msg.pop("name", None) + return result + def _maybe_trim_messages( self, messages: MutableSequence[Any] ) -> MutableSequence[Any] | list[Any]: From 0da531fdfd92d9c9afe42b0670e9fe81b51bc543 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 23:42:04 +0530 Subject: [PATCH 16/24] Fix loop detection: don't count looped-on agent's runs as progress When Coordinator keeps picking the same agent A and A keeps running, A's own completions were bumping _progress_counter. Loop detection compares the counter snapshot taken at the previous identical Coordinator pick against the current value; if it changed, the streak was reset to 1. So the 3-strike threshold was never reached and the Coordinator->A->A pattern ran until max_rounds. Now we only treat a non-Coordinator completion as 'progress' when the completing agent is different from the agent the Coordinator is currently latching onto (_last_coordinator_selection[0]). A different agent stepping in still resets the streak; A repeating itself does not. Adds two regression tests covering both cases. Also updates an existing termination test whose name described 'other agent makes progress' but actually used the same agent, hard-coding the buggy semantics. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework/groupchat_orchestrator.py | 21 ++++- .../test_groupchat_orchestrator_internals.py | 86 +++++++++++++++++++ ...test_groupchat_orchestrator_termination.py | 4 +- 3 files changed, 108 insertions(+), 3 deletions(-) diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py index 856575e9..5d847554 100644 --- a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py +++ b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py @@ -1018,8 +1018,27 @@ async def _complete_agent_response( # Mark progress on any non-Coordinator completion. This is used to ensure loop # detection only triggers when the Coordinator is repeating itself *and* the # rest of the conversation is not advancing. + # + # IMPORTANT: we must NOT count the looped-on agent's own runs as "progress". + # If we did, then the pattern "Coordinator picks A -> A runs -> Coordinator + # picks A -> A runs -> ..." would keep bumping the progress counter, which + # would reset the loop-detection streak on every check, and the streak would + # never grow past 1. The loop would then never be detected. + # + # Real progress means a DIFFERENT agent ran since the last identical Coordinator + # selection. So we only increment when the completing agent is not the one the + # Coordinator is currently latching onto. if agent_name != self.coordinator_name: - self._progress_counter += 1 + last_selected = ( + self._last_coordinator_selection[0] + if self._last_coordinator_selection + else None + ) + if ( + last_selected is None + or agent_name.lower() != last_selected.lower() + ): + self._progress_counter += 1 # Detect manager termination signal (finish=true) from Coordinator. # NOTE: The underlying GroupChatBuilder does not automatically stop on finish, diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py index 600869a0..27b7ac41 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py @@ -700,6 +700,92 @@ def _select(participant: str, instruction: str = "do"): assert orch._forced_termination_requested is True + def test_loop_breaker_triggered_when_looped_agent_runs_between_selections( + self, + ): + """Regression: when Coordinator keeps picking the same agent, that agent's + own runs MUST NOT count as progress, or the streak resets and the loop + never breaks. + """ + orch = _make_orch() + orch._conversation = [] + + def _select(participant: str, instruction: str = "do"): + orch._current_agent_response = [ + json.dumps( + { + "selected_participant": participant, + "instruction": instruction, + "finish": False, + "final_message": "", + } + ) + ] + orch._current_agent_start_time = datetime.now() + + def _agent_runs(name: str, text: str = "ok"): + orch._current_agent_response = [text] + orch._current_agent_start_time = datetime.now() + + # Simulate production sequence: Coordinator picks A, then A runs, + # then Coordinator picks A again, then A runs, etc. + _select("A") + _run(orch._complete_agent_response("Coordinator", None)) + _agent_runs("A") + _run(orch._complete_agent_response("A", None)) + _select("A") + _run(orch._complete_agent_response("Coordinator", None)) + _agent_runs("A") + _run(orch._complete_agent_response("A", None)) + _select("A") + _run(orch._complete_agent_response("Coordinator", None)) + + assert orch._forced_termination_requested is True + + def test_loop_breaker_resets_when_different_agent_responds(self): + """If a different agent responds between identical Coordinator selections, + treat that as real progress and reset the streak. + """ + orch = _make_orch() + orch._conversation = [] + + def _select(participant: str, instruction: str = "do"): + orch._current_agent_response = [ + json.dumps( + { + "selected_participant": participant, + "instruction": instruction, + "finish": False, + "final_message": "", + } + ) + ] + orch._current_agent_start_time = datetime.now() + + def _agent_runs(name: str, text: str = "ok"): + orch._current_agent_response = [text] + orch._current_agent_start_time = datetime.now() + + # Sequence: A, A, B, A, A (a different agent B interrupts -> streak resets) + _select("A") + _run(orch._complete_agent_response("Coordinator", None)) + _agent_runs("A") + _run(orch._complete_agent_response("A", None)) + _select("A") + _run(orch._complete_agent_response("Coordinator", None)) + _agent_runs("B") + _run(orch._complete_agent_response("B", None)) + _select("A") + _run(orch._complete_agent_response("Coordinator", None)) + _agent_runs("A") + _run(orch._complete_agent_response("A", None)) + _select("A") + _run(orch._complete_agent_response("Coordinator", None)) + + # Only 2 consecutive A selections without progress (one streak of 2 + # before B reset it, one streak of 2 after). Loop NOT detected. + assert orch._forced_termination_requested is False + # ----------------------------------------------------------------------------- # _build_groupchat diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py index dc7f124d..36a0c00b 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py @@ -113,9 +113,9 @@ def _agent_reply(text: str = "ok"): _coordinator_select("Chief Architect") await orch._complete_agent_response("Coordinator", callback=None) - # 2) The participant responds (progress). + # 2) A DIFFERENT participant responds (real progress, not the looped-on one). _agent_reply("progress") - await orch._complete_agent_response("Chief Architect", callback=None) + await orch._complete_agent_response("AKS Expert", callback=None) # 3) Coordinator repeats the same selection twice. _coordinator_select("Chief Architect") From 9a85f004c374eecd7d7c37f8e0f1ca6eeb25dbe9 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Fri, 12 Jun 2026 23:50:16 +0530 Subject: [PATCH 17/24] Add agent_description for Analysis participants so Coordinator can route by capability The Coordinator's valid_participants block was a bullet list of names only, so the LLM had no per-agent capability signal. Combined with a Coordinator prompt that names 'Chief Architect' frequently across phases 0/1/4/5/6, the model latched onto Chief Architect repeatedly and the conversation looped on the same agent. This change populates agent_description on every Analysis participant (Chief Architect, AKS Expert, and the platform experts in platform_registry.json) and renders each description into the Coordinator's valid_participants list. The descriptions are also passed through AgentBuilder.create_agent_by_agentinfo's existing description= argument, so the framework's Agent.description field is no longer always None. Scope: Analysis step only. design/yaml/documentation orchestrators are left for a follow-up after this change is validated in production. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../orchestration/analysis_orchestrator.py | 30 +++++++++++++++++-- .../orchestration/platform_registry.json | 18 +++++++---- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/src/processor/src/steps/analysis/orchestration/analysis_orchestrator.py b/src/processor/src/steps/analysis/orchestration/analysis_orchestrator.py index 1221433f..4705d240 100644 --- a/src/processor/src/steps/analysis/orchestration/analysis_orchestrator.py +++ b/src/processor/src/steps/analysis/orchestration/analysis_orchestrator.py @@ -157,8 +157,13 @@ async def prepare_agent_infos(self) -> list[AgentInfo]: prompt_path = agent_dir / "agents" / prompt_file instruction = self.read_prompt_file(str(prompt_path)) + expert_description = expert.get("description") + if not isinstance(expert_description, str) or not expert_description.strip(): + expert_description = None + expert_info = AgentInfo( agent_name=agent_name, + agent_description=expert_description, agent_instruction=instruction, tools=self.mcp_tools, ) @@ -172,6 +177,12 @@ async def prepare_agent_infos(self) -> list[AgentInfo]: aks_instruction = self.read_prompt_file(agent_dir / "agents/prompt_aks.txt") aks_agent_info = AgentInfo( agent_name="AKS Expert", + agent_description=( + "Tool-free reviewer for Azure AKS migration readiness. Reviews the " + "latest Evidence Pack and produces SIGN-OFF: PASS/FAIL on AKS-side " + "concerns. Select only after the Chief Architect has posted an Evidence " + "Pack; required reviewer when present." + ), agent_instruction=aks_instruction, tools=self.mcp_tools, ) @@ -188,6 +199,13 @@ async def prepare_agent_infos(self) -> list[AgentInfo]: chief_architect_agent_info = AgentInfo( agent_name="Chief Architect", + agent_description=( + "Lead orchestrator of the analysis and the ONLY participant with blob " + "tools. Performs all file I/O (listing, reading, writing " + "analysis_result.md) and authors / refreshes the Evidence Pack. Select " + "to run hard-termination triage, to do or redo any file work, or to " + "post a fresh Evidence Pack after reviewers ask for one." + ), agent_instruction=architect_instruction, tools=self.mcp_tools, ) @@ -208,10 +226,18 @@ async def prepare_agent_infos(self) -> list[AgentInfo]: tools=self.mcp_tools[2], # Blob IO tool only ) - # Render coordinator prompt with the current participant list. + # Render coordinator prompt with the current participant list. Include each + # participant's description so the Coordinator can route by capability rather + # than only by name (a name-only list biases the LLM toward whichever name + # appears most often elsewhere in the prompt, e.g. "Chief Architect"). participant_names = [ai.agent_name for ai in agent_infos] valid_participants_block = "\n".join([ - f'- "{name}"' for name in participant_names + ( + f'- "{ai.agent_name}": {ai.agent_description}' + if ai.agent_description + else f'- "{ai.agent_name}"' + ) + for ai in agent_infos ]) coordinator_agent_info.render( **self.task_param.model_dump(), diff --git a/src/processor/src/steps/analysis/orchestration/platform_registry.json b/src/processor/src/steps/analysis/orchestration/platform_registry.json index 342f10b6..a0342846 100644 --- a/src/processor/src/steps/analysis/orchestration/platform_registry.json +++ b/src/processor/src/steps/analysis/orchestration/platform_registry.json @@ -6,7 +6,8 @@ "agent_name": "EKS Expert", "prompt_file": "prompt_eks.txt", "category": "managed_cloud", - "signals": ["eks.amazonaws.com", "aws", "arn:aws", "kops", "eksctl"] + "signals": ["eks.amazonaws.com", "aws", "arn:aws", "kops", "eksctl"], + "description": "Tool-free reviewer specialized in Amazon EKS (AWS) source-side concerns. Provides EKS-specific analysis of the Evidence Pack and produces SIGN-OFF: PASS/FAIL. Select only after the Chief Architect has posted an Evidence Pack; required only if the source platform is EKS." }, { "id": "gke", @@ -19,7 +20,8 @@ "anthos", "gcr.io", "artifactregistry" - ] + ], + "description": "Tool-free reviewer specialized in Google GKE source-side concerns. Provides GKE-specific analysis of the Evidence Pack and produces SIGN-OFF: PASS/FAIL. Select only after the Chief Architect has posted an Evidence Pack; required only if the source platform is GKE." }, { "id": "openshift", @@ -32,21 +34,24 @@ "ocp", "security.openshift.io", "config.openshift.io" - ] + ], + "description": "Tool-free reviewer for Red Hat OpenShift (OCP) source-side concerns. Reviews the Evidence Pack and produces SIGN-OFF: PASS/FAIL. Select only after the Chief Architect has posted an Evidence Pack; required only if the source platform is OpenShift." }, { "id": "rancher", "agent_name": "Rancher Expert", "prompt_file": "prompt_rancher.txt", "category": "enterprise_distro", - "signals": ["cattle.io", "rancher", "rke", "rke2", "k3s"] + "signals": ["cattle.io", "rancher", "rke", "rke2", "k3s"], + "description": "Tool-free reviewer for Rancher / RKE / RKE2 / K3s source-side concerns. Reviews the Evidence Pack and produces SIGN-OFF: PASS/FAIL. Select only after the Chief Architect has posted an Evidence Pack; required only if the source platform is Rancher." }, { "id": "tanzu", "agent_name": "Tanzu Expert", "prompt_file": "prompt_tanzu.txt", "category": "enterprise_distro", - "signals": ["tanzu", "vmware", "tkg", "tkgs", "pinniped", "antrea"] + "signals": ["tanzu", "vmware", "tkg", "tkgs", "pinniped", "antrea"], + "description": "Tool-free reviewer for VMware Tanzu (TKG / TKGS) source-side concerns. Reviews the Evidence Pack and produces SIGN-OFF: PASS/FAIL. Select only after the Chief Architect has posted an Evidence Pack; required only if the source platform is Tanzu." }, { "id": "onprem", @@ -70,7 +75,8 @@ "ceph", "nfs", "f5" - ] + ], + "description": "Tool-free reviewer for self-managed / on-premises Kubernetes (kubeadm, k3s, Talos, bare-metal, vSphere) source-side concerns. Reviews the Evidence Pack and produces SIGN-OFF: PASS/FAIL. Select only after the Chief Architect has posted an Evidence Pack; required only if the source platform is on-prem." } ] } From 76e9c17c86c124cd4bfddf83bf7deae9ed1a5fef Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Sat, 13 Jun 2026 00:05:45 +0530 Subject: [PATCH 18/24] Fix loop detection: key on agent name only, not (agent, instruction) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Production run after the previous progress-counter fix (0da531f) STILL showed Chief Architect picked 6+ consecutive times. Root cause: the loop detection key was (agent, instruction_text). The LLM-driven Coordinator varies its instruction on every pick ('list source blobs', 'read xyz.yaml', 'save analysis_result.md') while latching onto the same agent — so every selection_key was unique, the streak reset to 1 on every pick, and the 3-strike threshold was never reached. Change: track only the agent name (lower-cased). The progress counter (now correct after 0da531f) already encodes 'no DIFFERENT agent ran in between', so 3 consecutive picks of the same agent with no other-agent progress is a strong, low-false-positive loop signal. Adds a regression test that replays the production sequence (same agent, three different instruction strings) and verifies forced termination fires. The earlier tests for exact-match repeats and for B-resets-the- streak continue to pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework/groupchat_orchestrator.py | 50 ++++++++++++------- .../test_groupchat_orchestrator_internals.py | 47 +++++++++++++++++ 2 files changed, 80 insertions(+), 17 deletions(-) diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py index 5d847554..9d833684 100644 --- a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py +++ b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py @@ -295,9 +295,16 @@ def __init__( self._forced_termination_reason: str | None = None self._forced_termination_type: str | None = None - # Loop detection for Coordinator selections (participant + instruction) - self._last_coordinator_selection: tuple[str, str] | None = None + # Loop detection for Coordinator selections. + # We track the *agent the Coordinator most recently picked* (lower-cased name) + # rather than (agent, instruction) tuples, because in practice the LLM-driven + # Coordinator varies the instruction text while looping on the same agent. + # A streak counts how many consecutive Coordinator picks landed on the same + # agent without any *other* agent running in between (see _progress_counter + # bookkeeping in _handle_agent_update). + self._last_coordinator_selection: str | None = None self._coordinator_selection_streak: int = 0 + # Diagnostic history of recent (agent, instruction) selections. self._recent_coordinator_selections: deque[tuple[str, str]] = deque(maxlen=10) # Progress counter used to avoid false-positive loop detection. @@ -1029,14 +1036,10 @@ async def _complete_agent_response( # selection. So we only increment when the completing agent is not the one the # Coordinator is currently latching onto. if agent_name != self.coordinator_name: - last_selected = ( - self._last_coordinator_selection[0] - if self._last_coordinator_selection - else None - ) + last_selected = self._last_coordinator_selection if ( last_selected is None - or agent_name.lower() != last_selected.lower() + or agent_name.lower() != last_selected ): self._progress_counter += 1 @@ -1058,17 +1061,27 @@ async def _complete_agent_response( # measures from Coordinator selection -> response completion. selected = getattr(manager_response, "selected_participant", None) - # Loop detection: same selection+instruction repeated. + # Loop detection: same agent picked repeatedly with no other agent + # making progress in between. We deliberately key on the agent name + # alone (not on the instruction text) because the LLM-driven + # Coordinator often varies its instruction text while still looping + # on the same agent ("re-list", "read xyz.yaml", "save analysis_result.md" + # all sent to the same Chief Architect over and over). The + # _progress_counter (incremented in _handle_agent_update only when + # a DIFFERENT agent runs) is what tells us whether anything else + # actually happened in between. if ( isinstance(selected, str) and selected and selected.lower() != "none" ): - selection_key = (selected, str(manager_instruction or "")) - self._recent_coordinator_selections.append(selection_key) - if selection_key == self._last_coordinator_selection: - # If any other agent responded since the last identical selection, - # treat that as progress and reset the streak. + selected_key = selected.lower() + self._recent_coordinator_selections.append( + (selected, str(manager_instruction or "")) + ) + if selected_key == self._last_coordinator_selection: + # Same agent again. If any other agent ran since the last + # identical pick, treat that as progress and reset the streak. if ( self._progress_counter != self._last_coordinator_selection_progress @@ -1080,17 +1093,20 @@ async def _complete_agent_response( else: self._coordinator_selection_streak += 1 else: - self._last_coordinator_selection = selection_key + self._last_coordinator_selection = selected_key self._coordinator_selection_streak = 1 self._last_coordinator_selection_progress = ( self._progress_counter ) - # If the Coordinator repeats the exact same ask 3 times, break. + # If the Coordinator picks the same agent 3 times in a row + # without any other agent running in between, break out. if self._coordinator_selection_streak >= 3: self._request_forced_termination( reason=( - f"Loop detected: Coordinator repeated the same selection to '{selected}' {self._coordinator_selection_streak} times with no progress" + f"Loop detected: Coordinator selected '{selected}' " + f"{self._coordinator_selection_streak} consecutive " + f"times with no other agent making progress in between" ), termination_type="hard_timeout", ) diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py index 27b7ac41..5a1121c8 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py @@ -786,6 +786,53 @@ def _agent_runs(name: str, text: str = "ok"): # before B reset it, one streak of 2 after). Loop NOT detected. assert orch._forced_termination_requested is False + def test_loop_breaker_triggered_when_same_agent_picked_with_varying_instructions( + self, + ): + """Regression for production: the LLM-driven Coordinator was looping on + Chief Architect but varying its instruction text on every pick + ('re-list', 'read xyz', 'save analysis_result.md'). The loop detector + must key on the AGENT NAME only — not on (agent, instruction) — or the + streak resets on every pick and the loop is never caught. + """ + orch = _make_orch() + orch._conversation = [] + + def _select(participant: str, instruction: str = "do"): + orch._current_agent_response = [ + json.dumps( + { + "selected_participant": participant, + "instruction": instruction, + "finish": False, + "final_message": "", + } + ) + ] + orch._current_agent_start_time = datetime.now() + + def _agent_runs(name: str, text: str = "ok"): + orch._current_agent_response = [text] + orch._current_agent_start_time = datetime.now() + + # Each Coordinator pick targets the same agent but with a DIFFERENT + # instruction. With the old (agent, instruction) tuple key this never + # tripped the breaker. + _select("Chief Architect", instruction="list source blobs") + _run(orch._complete_agent_response("Coordinator", None)) + _agent_runs("Chief Architect") + _run(orch._complete_agent_response("Chief Architect", None)) + + _select("Chief Architect", instruction="read source files") + _run(orch._complete_agent_response("Coordinator", None)) + _agent_runs("Chief Architect") + _run(orch._complete_agent_response("Chief Architect", None)) + + _select("Chief Architect", instruction="save analysis_result.md") + _run(orch._complete_agent_response("Coordinator", None)) + + assert orch._forced_termination_requested is True + # ----------------------------------------------------------------------------- # _build_groupchat From 4de1e284845e9a82e7ecbead9400cd9d277a40b9 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Sat, 13 Jun 2026 00:45:04 +0530 Subject: [PATCH 19/24] fix(workflow): reject incoherent ResultGenerator output across all steps Production deployment of the agent-framework 1.3.0 upgrade surfaced a crash chain: Analysis "succeeded" with a self-contradictory result (result=True, is_hard_terminated=False, output=None), Design then crashed at `task_param.output.process_id`. The root cause is the ResultGenerator returning an empty shell when participants never produced useful content. Fixes: * groupchat_orchestrator.run_stream now validates ResultGenerator output before constructing OrchestrationResult. If the result is not hard terminated but carries no `output` / `termination_output` payload, the orchestrator now reports success=False with a descriptive error. This is generic across all four step models (Analysis uses `output`; Design/Convert/Documentation use `termination_output`). * All four step executors gained a defense-in-depth guard that raises a clear `Executor failed: produced no Output. Reason: ...` exception when the same incoherent shape is observed. This stops the broken value at the boundary instead of propagating it downstream. * groupchat_orchestrator silent `except Exception: pass` around Coordinator JSON parsing replaced with `logger.debug(... exc_info=...)` so loop-detection failures become visible during debugging instead of being swallowed. Tests: * Updated each executor's existing soft-completion test to provide a valid output (previous setup encoded the broken shape we now reject). * Added a new guard test per executor asserting the new exception fires for the incoherent (success=True + output=None + not hard-terminated) shape. * Full unit suite: 829 passed (was 825; +4 new guard tests). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework/groupchat_orchestrator.py | 55 ++++++++++- .../analysis/workflow/analysis_executor.py | 7 ++ .../convert/workflow/yaml_convert_executor.py | 7 ++ .../steps/design/workflow/design_executor.py | 7 ++ .../workflow/documentation_executor.py | 7 ++ .../steps/analysis/test_analysis_executor.py | 99 ++++++++++++++++++- .../convert/test_yaml_convert_executor.py | 99 ++++++++++++++++++- .../unit/steps/design/test_design_executor.py | 61 +++++++++++- .../test_documentation_executor.py | 76 +++++++++++++- 9 files changed, 409 insertions(+), 9 deletions(-) diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py index 9d833684..05034ddd 100644 --- a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py +++ b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py @@ -643,17 +643,48 @@ async def run_stream( f"[RESULT] Skipping result generation - result_format: {result_format}, agent exists: {result_generator_name in self.agents}" ) + # Validate that ResultGenerator produced a coherent output. The LLM can + # sometimes return is_hard_terminated=False with output=None ("success + # but no actual output"), which causes downstream steps to crash with + # NoneType errors. Treat such self-contradictory results as failures so + # the workflow surfaces a clear error rather than propagating an empty + # shell to the next step. + generated_error: str | None = None + if final_analysis is not None and not bool( + getattr(final_analysis, "is_hard_terminated", False) + ): + # Step result models use either ``output`` (Analysis) or + # ``termination_output`` (Design, Convert, Documentation). Treat + # both equivalently: if neither holds a non-None payload, the + # ResultGenerator returned an incoherent shell. + has_output_attr = hasattr(final_analysis, "output") or hasattr( + final_analysis, "termination_output" + ) + payload = getattr(final_analysis, "output", None) or getattr( + final_analysis, "termination_output", None + ) + if has_output_attr and payload is None: + reason = ( + getattr(final_analysis, "reason", "") or "" + ) + generated_error = ( + "ResultGenerator produced incoherent output: " + "is_hard_terminated=False but output=None. " + f"Reason from result: {reason}" + ) + logger.error("[RESULT] %s", generated_error) + # Calculate execution time execution_time = (datetime.now() - start_time).total_seconds() # Build result result = OrchestrationResult[TOutput]( - success=True, + success=generated_error is None, conversation=conversation, agent_responses=self.agent_responses, tool_usage=self.agent_tool_usage, result=final_analysis, - error=None, + error=generated_error, execution_time_seconds=execution_time, ) @@ -1154,9 +1185,23 @@ async def _complete_agent_response( ): # Record invocation time for non-termination coordinator selections self._agent_invoked_at[selected] = completed_at - except Exception: - # If the Coordinator didn't emit valid JSON, ignore. - pass + except Exception as exc: + # If the Coordinator didn't emit valid JSON we silently drop + # loop-detection and termination handling for this turn. Log at + # debug so the silence is visible if loop detection ever appears + # to misfire (previously this was a bare ``pass`` which made the + # failure invisible). + preview = ( + complete_message[:200] + if isinstance(complete_message, str) + else str(type(complete_message)) + ) + logger.debug( + "Coordinator JSON parse failed; skipping loop detection for " + "this turn. Raw message preview: %r", + preview, + exc_info=exc, + ) # Invoke callback with complete response if callback: diff --git a/src/processor/src/steps/analysis/workflow/analysis_executor.py b/src/processor/src/steps/analysis/workflow/analysis_executor.py index 924a9289..999ae725 100644 --- a/src/processor/src/steps/analysis/workflow/analysis_executor.py +++ b/src/processor/src/steps/analysis/workflow/analysis_executor.py @@ -65,6 +65,13 @@ async def handle_execute( error_msg = result.error or "Analysis orchestration failed with no output" raise Exception(f"AnalysisExecutor failed: {error_msg}") + if not result.result.is_hard_terminated and result.result.output is None: + reason = result.result.reason or "" + raise Exception( + "AnalysisExecutor failed: orchestration reported success but produced " + f"no AnalysisOutput. Reason: {reason}" + ) + if result.result: if not result.result.is_hard_terminated: await ctx.send_message(result.result) diff --git a/src/processor/src/steps/convert/workflow/yaml_convert_executor.py b/src/processor/src/steps/convert/workflow/yaml_convert_executor.py index 7a9e283f..195de773 100644 --- a/src/processor/src/steps/convert/workflow/yaml_convert_executor.py +++ b/src/processor/src/steps/convert/workflow/yaml_convert_executor.py @@ -45,6 +45,13 @@ async def handle_execute( ) raise Exception(f"YamlConvertExecutor failed: {error_msg}") + if not result.result.is_hard_terminated and result.result.termination_output is None: + reason = result.result.reason or "" + raise Exception( + "YamlConvertExecutor failed: orchestration reported success but " + f"produced no YAML conversion output. Reason: {reason}" + ) + if result.result: if not result.result.is_hard_terminated: await ctx.send_message(result.result) diff --git a/src/processor/src/steps/design/workflow/design_executor.py b/src/processor/src/steps/design/workflow/design_executor.py index 986fed12..b70ed75c 100644 --- a/src/processor/src/steps/design/workflow/design_executor.py +++ b/src/processor/src/steps/design/workflow/design_executor.py @@ -42,6 +42,13 @@ async def handle_execute( error_msg = result.error or "Design orchestration failed with no output" raise Exception(f"DesignExecutor failed: {error_msg}") + if not result.result.is_hard_terminated and result.result.termination_output is None: + reason = result.result.reason or "" + raise Exception( + "DesignExecutor failed: orchestration reported success but produced " + f"no DesignOutput. Reason: {reason}" + ) + if result.result: if not result.result.is_hard_terminated: await ctx.send_message(result.result) diff --git a/src/processor/src/steps/documentation/workflow/documentation_executor.py b/src/processor/src/steps/documentation/workflow/documentation_executor.py index 9347b13f..64070201 100644 --- a/src/processor/src/steps/documentation/workflow/documentation_executor.py +++ b/src/processor/src/steps/documentation/workflow/documentation_executor.py @@ -47,4 +47,11 @@ async def handle_execute( ) raise Exception(f"DocumentationExecutor failed: {error_msg}") + if not result.result.is_hard_terminated and result.result.termination_output is None: + reason = result.result.reason or "" + raise Exception( + "DocumentationExecutor failed: orchestration reported success but " + f"produced no DocumentationOutput. Reason: {reason}" + ) + await ctx.yield_output(result.result) diff --git a/src/processor/src/tests/unit/steps/analysis/test_analysis_executor.py b/src/processor/src/tests/unit/steps/analysis/test_analysis_executor.py index 1c2d1298..c3d7613f 100644 --- a/src/processor/src/tests/unit/steps/analysis/test_analysis_executor.py +++ b/src/processor/src/tests/unit/steps/analysis/test_analysis_executor.py @@ -6,11 +6,47 @@ import asyncio from libs.agent_framework.groupchat_orchestrator import OrchestrationResult -from steps.analysis.models.step_output import Analysis_BooleanExtendedResult +from steps.analysis.models.step_output import ( + AnalysisOutput, + Analysis_BooleanExtendedResult, + ComplexityAnalysis, + FileType, + MigrationReadiness, +) from steps.analysis.models.step_param import Analysis_TaskParam from steps.analysis.workflow.analysis_executor import AnalysisExecutor +def _make_analysis_output(process_id: str) -> AnalysisOutput: + return AnalysisOutput( + process_id=process_id, + platform_detected="EKS", + confidence_score="95%", + files_discovered=[ + FileType( + filename="app.yaml", + type="Deployment", + complexity="Low", + azure_mapping="AKS Deployment", + ) + ], + complexity_analysis=ComplexityAnalysis( + network_complexity="Low", + security_complexity="Low", + storage_complexity="Low", + compute_complexity="Low", + ), + migration_readiness=MigrationReadiness( + overall_score="A", + concerns=[], + recommendations=[], + ), + summary="ok", + expert_insights=[], + analysis_file="analysis.md", + ) + + class _FakeTelemetry: def __init__(self): self.transitions: list[tuple[str, str, str]] = [] @@ -59,6 +95,7 @@ async def execute(self, task_param=None): result=True, is_hard_terminated=False, process_id=task_param.process_id, + output=_make_analysis_output(task_param.process_id), ), ) @@ -143,3 +180,63 @@ async def execute(self, task_param=None): assert isinstance(ctx.yielded[0], Analysis_BooleanExtendedResult) asyncio.run(_run()) + + +def test_analysis_executor_raises_when_soft_completion_has_no_output(monkeypatch): + """Soft completion with output=None is incoherent: AnalysisExecutor must raise. + + This guards against ResultGenerator returning a self-contradictory shell + (success=True, is_hard_terminated=False, output=None) which would otherwise + propagate to Design and crash there with `NoneType.process_id`. + """ + async def _run(): + import pytest + + telemetry = _FakeTelemetry() + app_context = _FakeAppContext(telemetry) + ctx = _FakeCtx() + + class _FakeOrchestrator: + def __init__(self, _app_context): + pass + + async def execute(self, task_param=None): + return OrchestrationResult( + success=True, + conversation=[], + agent_responses=[], + tool_usage={}, + result=Analysis_BooleanExtendedResult( + result=True, + is_hard_terminated=False, + process_id=task_param.process_id, + reason="agents never produced output", + ), + ) + + monkeypatch.setattr( + "steps.analysis.workflow.analysis_executor.text2art", + lambda _s: "ART", + raising=False, + ) + monkeypatch.setattr( + "steps.analysis.workflow.analysis_executor.AnalysisOrchestrator", + _FakeOrchestrator, + ) + + executor = AnalysisExecutor(id="analysis", app_context=app_context) + message = Analysis_TaskParam( + process_id="p1", + container_name="c1", + source_file_folder="p1/source", + workspace_file_folder="p1/workspace", + output_file_folder="p1/output", + ) + + with pytest.raises(Exception, match="produced no AnalysisOutput"): + await executor.handle_execute(message, ctx) # type: ignore[arg-type] + + assert len(ctx.sent) == 0 + assert len(ctx.yielded) == 0 + + asyncio.run(_run()) diff --git a/src/processor/src/tests/unit/steps/convert/test_yaml_convert_executor.py b/src/processor/src/tests/unit/steps/convert/test_yaml_convert_executor.py index d1957289..2a12d3b7 100644 --- a/src/processor/src/tests/unit/steps/convert/test_yaml_convert_executor.py +++ b/src/processor/src/tests/unit/steps/convert/test_yaml_convert_executor.py @@ -6,11 +6,63 @@ import asyncio from libs.agent_framework.groupchat_orchestrator import OrchestrationResult -from steps.convert.models.step_output import Yaml_ExtendedBooleanResult +from steps.convert.models.step_output import ( + ConvertedFile, + ConversionMetrics, + ConversionQuality, + DimensionalAnalysis, + MultiDimensionalAnalysis, + YamlOutput, + Yaml_ExtendedBooleanResult, +) from steps.convert.workflow.yaml_convert_executor import YamlConvertExecutor from steps.design.models.step_output import Design_ExtendedBooleanResult +def _make_yaml_output() -> YamlOutput: + dim = DimensionalAnalysis( + complexity="Low", + converted_components=["pod"], + azure_optimizations="none", + concerns=[], + success_rate="100%", + ) + return YamlOutput( + converted_files=[ + ConvertedFile( + source_file="a.yaml", + converted_file="a-azure.yaml", + conversion_status="Success", + accuracy_rating="100%", + concerns=[], + azure_enhancements=[], + ) + ], + multi_dimensional_analysis=MultiDimensionalAnalysis( + network_analysis=dim, + security_analysis=dim, + storage_analysis=dim, + compute_analysis=dim, + ), + overall_conversion_metrics=ConversionMetrics( + total_files=1, + successful_conversions=1, + failed_conversions=0, + overall_accuracy="100%", + azure_compatibility="100%", + ), + conversion_quality=ConversionQuality( + azure_best_practices="ok", + security_hardening="ok", + performance_optimization="ok", + production_readiness="ok", + ), + summary="ok", + expert_insights=[], + conversion_report_file="report.md", + ) + + class _FakeTelemetry: def __init__(self): self.transitions: list[tuple[str, str, str]] = [] @@ -59,6 +111,7 @@ async def execute(self, task_param=None): result=True, is_hard_terminated=False, process_id=task_param.process_id, + termination_output=_make_yaml_output(), ), ) @@ -118,3 +171,47 @@ async def execute(self, task_param=None): assert isinstance(ctx.yielded[0], Yaml_ExtendedBooleanResult) asyncio.run(_run()) + + +def test_yaml_convert_executor_raises_when_soft_completion_has_no_output(monkeypatch): + """Soft completion with termination_output=None is incoherent: must raise.""" + async def _run(): + import pytest + + telemetry = _FakeTelemetry() + app_context = _FakeAppContext(telemetry) + ctx = _FakeCtx() + + class _FakeOrchestrator: + def __init__(self, _app_context): + pass + + async def execute(self, task_param=None): + return OrchestrationResult( + success=True, + conversation=[], + agent_responses=[], + tool_usage={}, + result=Yaml_ExtendedBooleanResult( + result=True, + is_hard_terminated=False, + process_id=task_param.process_id, + reason="agents never produced output", + ), + ) + + monkeypatch.setattr( + "steps.convert.workflow.yaml_convert_executor.YamlConvertOrchestrator", + _FakeOrchestrator, + ) + + executor = YamlConvertExecutor(id="yaml", app_context=app_context) + message = Design_ExtendedBooleanResult(process_id="p1") + + with pytest.raises(Exception, match="produced no YAML conversion output"): + await executor.handle_execute(message, ctx) # type: ignore[arg-type] + + assert len(ctx.sent) == 0 + assert len(ctx.yielded) == 0 + + asyncio.run(_run()) diff --git a/src/processor/src/tests/unit/steps/design/test_design_executor.py b/src/processor/src/tests/unit/steps/design/test_design_executor.py index c03b93b0..f782a794 100644 --- a/src/processor/src/tests/unit/steps/design/test_design_executor.py +++ b/src/processor/src/tests/unit/steps/design/test_design_executor.py @@ -7,10 +7,24 @@ from libs.agent_framework.groupchat_orchestrator import OrchestrationResult from steps.analysis.models.step_output import Analysis_BooleanExtendedResult -from steps.design.models.step_output import Design_ExtendedBooleanResult +from steps.design.models.step_output import ( + DesignOutput, + Design_ExtendedBooleanResult, + OutputFile, +) from steps.design.workflow.design_executor import DesignExecutor +def _make_design_output() -> DesignOutput: + return DesignOutput( + result="Success", + summary="ok", + azure_services=["AKS"], + architecture_decisions=["use managed identity"], + outputs=[OutputFile(file="design.md", description="design doc")], + ) + + class _FakeTelemetry: def __init__(self): self.transitions: list[tuple[str, str, str]] = [] @@ -59,6 +73,7 @@ async def execute(self, task_param=None): result=True, is_hard_terminated=False, process_id=task_param.process_id, + termination_output=_make_design_output(), ), ) @@ -118,3 +133,47 @@ async def execute(self, task_param=None): assert isinstance(ctx.yielded[0], Design_ExtendedBooleanResult) asyncio.run(_run()) + + +def test_design_executor_raises_when_soft_completion_has_no_output(monkeypatch): + """Soft completion with termination_output=None is incoherent: must raise.""" + async def _run(): + import pytest + + telemetry = _FakeTelemetry() + app_context = _FakeAppContext(telemetry) + ctx = _FakeCtx() + + class _FakeOrchestrator: + def __init__(self, _app_context): + pass + + async def execute(self, task_param=None): + return OrchestrationResult( + success=True, + conversation=[], + agent_responses=[], + tool_usage={}, + result=Design_ExtendedBooleanResult( + result=True, + is_hard_terminated=False, + process_id=task_param.process_id, + reason="agents never produced output", + ), + ) + + monkeypatch.setattr( + "steps.design.workflow.design_executor.DesignOrchestrator", + _FakeOrchestrator, + ) + + executor = DesignExecutor(id="design", app_context=app_context) + message = Analysis_BooleanExtendedResult(process_id="p1") + + with pytest.raises(Exception, match="produced no DesignOutput"): + await executor.handle_execute(message, ctx) # type: ignore[arg-type] + + assert len(ctx.sent) == 0 + assert len(ctx.yielded) == 0 + + asyncio.run(_run()) diff --git a/src/processor/src/tests/unit/steps/documentation/test_documentation_executor.py b/src/processor/src/tests/unit/steps/documentation/test_documentation_executor.py index 9d9ff63e..29f0c95c 100644 --- a/src/processor/src/tests/unit/steps/documentation/test_documentation_executor.py +++ b/src/processor/src/tests/unit/steps/documentation/test_documentation_executor.py @@ -7,10 +7,40 @@ from libs.agent_framework.groupchat_orchestrator import OrchestrationResult from steps.convert.models.step_output import Yaml_ExtendedBooleanResult -from steps.documentation.models.step_output import Documentation_ExtendedBooleanResult +from steps.documentation.models.step_output import ( + AggregatedResults, + DocumentationOutput, + Documentation_ExtendedBooleanResult, + ExpertCollaboration, + GeneratedFilesCollection, + ProcessMetrics, +) from steps.documentation.workflow.documentation_executor import DocumentationExecutor +def _make_documentation_output() -> DocumentationOutput: + return DocumentationOutput( + aggregated_results=AggregatedResults( + executive_summary="ok", + total_files_processed=1, + overall_success_rate="100%", + platform_detected="EKS", + conversion_accuracy="100%", + documentation_completeness="100%", + enterprise_readiness="ok", + ), + generated_files=GeneratedFilesCollection(), + expert_collaboration=ExpertCollaboration(), + process_metrics=ProcessMetrics( + platform_detected="EKS", + conversion_accuracy="100%", + documentation_completeness="100%", + enterprise_readiness="ok", + ), + summary="ok", + ) + + class _FakeTelemetry: def __init__(self): self.transitions: list[tuple[str, str, str]] = [] @@ -54,6 +84,7 @@ async def execute(self, task_param=None): result=Documentation_ExtendedBooleanResult( result=True, process_id=task_param.process_id, + termination_output=_make_documentation_output(), ), ) @@ -71,3 +102,46 @@ async def execute(self, task_param=None): assert isinstance(ctx.yielded[0], Documentation_ExtendedBooleanResult) asyncio.run(_run()) + + +def test_documentation_executor_raises_when_soft_completion_has_no_output(monkeypatch): + """Soft completion with termination_output=None is incoherent: must raise.""" + async def _run(): + import pytest + + telemetry = _FakeTelemetry() + app_context = _FakeAppContext(telemetry) + ctx = _FakeCtx() + + class _FakeOrchestrator: + def __init__(self, _app_context): + pass + + async def execute(self, task_param=None): + return OrchestrationResult( + success=True, + conversation=[], + agent_responses=[], + tool_usage={}, + result=Documentation_ExtendedBooleanResult( + result=True, + is_hard_terminated=False, + process_id=task_param.process_id, + reason="agents never produced output", + ), + ) + + monkeypatch.setattr( + "steps.documentation.workflow.documentation_executor.DocumentationOrchestrator", + _FakeOrchestrator, + ) + + executor = DocumentationExecutor(id="documentation", app_context=app_context) + message = Yaml_ExtendedBooleanResult(process_id="p1") + + with pytest.raises(Exception, match="produced no DocumentationOutput"): + await executor.handle_execute(message, ctx) # type: ignore[arg-type] + + assert len(ctx.yielded) == 0 + + asyncio.run(_run()) From f74629ba53c8722c4e3b04738389c9cd8b9ecb2f Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Sat, 13 Jun 2026 14:04:09 +0530 Subject: [PATCH 20/24] fix(groupchat): resolve agent identity via author_name for af 1.3.0 Root cause of the "Runner did not converge after 100 iterations" production failure (and the Chief-Architect-only loop that preceded it): agent-framework 1.3.0 changed how AgentResponseUpdate is constructed. `map_chat_to_agent_update` (_types.py:2825-2837) now only sets `author_name` and leaves `agent_id` as None. Our orchestrator was reading `event.agent_id` exclusively, so every streaming update resolved to `agent_name=""`. That silently broke: * Loop detection (line 1080 `if agent_name == self.coordinator_name` never matched, so the streak counter never advanced and the 3x same-agent guard never fired). Production looped 100x on Chief Architect with zero detection. * Coordinator termination signal extraction (`finish=true`, `instruction=complete`, blocking instructions) - same gated block. * Manager-instruction parsing for the next participant. The [MEMORY] logs continued to show real agent names ("Chief Architect") because `SharedMemoryContextProvider` reads the name from the agent's own context, not from the workflow event - which is why the regression was invisible from logs alone. Fix: in `_handle_agent_update`, prefer `event.author_name` (which IS populated by 1.3.0's `map_chat_to_agent_update`) and fall back to `agent_id` only when author_name is missing, for backwards compat with older event shapes. Use `getattr` defensively so existing tests that construct SimpleNamespace events without author_name still work. Tests: * test_handle_agent_update_resolves_coordinator_via_author_name_when_agent_id_is_none - asserts the identity resolution itself * test_loop_detection_fires_on_3_consecutive_coordinator_selections_via_handle_agent_update - end-to-end through the production code path: 3 identical Coordinator selections via _handle_agent_update must trip _forced_termination * Both tests verified to FAIL without the fix (intentionally reverted to confirm) and PASS with the fix * Full suite: 831 passed (was 829, +2 regression tests) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework/groupchat_orchestrator.py | 13 ++- ...test_groupchat_orchestrator_termination.py | 106 ++++++++++++++++++ 2 files changed, 118 insertions(+), 1 deletion(-) diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py index 05034ddd..b92ddd88 100644 --- a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py +++ b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py @@ -727,7 +727,18 @@ async def _handle_agent_update( 3. Trigger callback with complete response 4. Handle tool calls separately from text streaming """ - agent_name = self._normalize_executor_id(event.agent_id or "") + # NOTE: In agent-framework 1.3.0, ``AgentResponseUpdate.agent_id`` is no + # longer populated by ``map_chat_to_agent_update`` (only ``author_name`` + # is set, from the agent's name). Reading ``event.agent_id`` alone + # silently yielded an empty string, which made every downstream identity + # check (loop detection, coordinator termination signal extraction, + # manager-instruction parsing) silently no-op. Prefer ``author_name`` + # and fall back to ``agent_id`` only for older shapes. Use ``getattr`` + # so older event types without ``author_name`` still work. + author_name = getattr(event, "author_name", None) + agent_name = author_name or self._normalize_executor_id( + getattr(event, "agent_id", None) or "" + ) await self._start_agent_if_needed(agent_name, stream_callback, callback) self._append_text_chunk(event) await self._process_tool_calls(event, agent_name, stream_callback) diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py index 36a0c00b..5d98a0d0 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py @@ -127,3 +127,109 @@ def _agent_reply(text: str = "ok"): assert orch._forced_termination_requested is False asyncio.run(_run()) + + +@dataclass +class _AgentResponseUpdateStub: + """Mimics the agent-framework 1.3.0 AgentResponseUpdate shape. + + Only the fields actually read by ``_handle_agent_update`` / + ``_normalize_executor_id`` matter. In 1.3.0 ``agent_id`` is no longer + populated by ``map_chat_to_agent_update`` - only ``author_name`` is set. + This stub reproduces that shape. + """ + + author_name: str | None = None + agent_id: str | None = None + contents: list = None # type: ignore[assignment] + + def __post_init__(self): + if self.contents is None: + self.contents = [] + + +def test_handle_agent_update_resolves_coordinator_via_author_name_when_agent_id_is_none(): + """Regression guard for agent-framework 1.3.0. + + In 1.3.0 ``AgentResponseUpdate.agent_id`` is ``None`` because + ``map_chat_to_agent_update`` only sets ``author_name``. Reading + ``event.agent_id`` alone silently produced an empty string, so + ``agent_name == self.coordinator_name`` never matched and loop + detection / coordinator termination signal extraction silently + no-opped. The orchestrator must treat ``author_name`` as the + authoritative source. + """ + + async def _run(): + orch = _make_orchestrator() + + event = _AgentResponseUpdateStub( + author_name="Coordinator", + agent_id=None, + ) + + # No-op tool/text processing: we only care about agent identity. + await orch._handle_agent_update(event, stream_callback=None, callback=None) # type: ignore[arg-type] + + assert orch._last_executor_id == "Coordinator", ( + "author_name must be used to identify the agent; otherwise " + "_last_executor_id stays empty and downstream coordinator " + "checks silently fail." + ) + + asyncio.run(_run()) + + +def test_loop_detection_fires_on_3_consecutive_coordinator_selections_via_handle_agent_update(): + """End-to-end check: feeding 3 identical Coordinator selections through + ``_handle_agent_update`` (the path used in production) must trigger the + loop-detection forced termination. This is the path that was silently + broken in the 1.3.0 regression. + """ + + async def _run(): + orch = _make_orchestrator() + orch._conversation = [] + + coordinator_json = json.dumps( + { + "selected_participant": "Chief Architect", + "instruction": "re-list", + "finish": False, + "final_message": "", + } + ) + + # Simulate three consecutive Coordinator turns, each emitting the + # same selection. Between each Coordinator turn we drive an update + # from a non-Coordinator agent so the orchestrator's "agent switch" + # logic completes the previous Coordinator response (which is what + # actually runs loop-detection at line 1080). + for _ in range(3): + # Coordinator emits its selection as a streaming chunk. + await orch._handle_agent_update( + _AgentResponseUpdateStub(author_name="Coordinator"), + stream_callback=None, + callback=None, + ) # type: ignore[arg-type] + orch._current_agent_response = [coordinator_json] + + # Then Chief Architect emits a chunk: the agent switch closes + # out the Coordinator response and runs loop detection. + await orch._handle_agent_update( + _AgentResponseUpdateStub(author_name="Chief Architect"), + stream_callback=None, + callback=None, + ) # type: ignore[arg-type] + orch._current_agent_response = ["ack"] + + # Closing the final Chief Architect response keeps state consistent. + await orch._complete_agent_response("Chief Architect", callback=None) + + assert orch._forced_termination_requested is True, ( + "Loop detection failed to fire after 3 identical Coordinator " + "selections via _handle_agent_update; agent identity resolution " + "is broken." + ) + + asyncio.run(_run()) From 42902ab23491133d4b6c5857b0e5fca13e372191 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Sat, 13 Jun 2026 14:33:34 +0530 Subject: [PATCH 21/24] fix(groupchat): route WorkflowEvent payloads + enforce framework max_rounds for af 1.3.0 In agent-framework 1.3.0, `workflow.run(stream=True)` only yields `WorkflowEvent` instances. `AgentResponseUpdate` is wrapped inside `event.data` for `type=="output"` events. The two types are unrelated (verified by MRO), so the previous `isinstance(event, AgentResponseUpdate)` gate from the b260107 era was permanently dead in 1.3.0. As a result every orchestrator-side safety guard inside that branch silently no-opped: * per-agent loop detection * Coordinator finish=true detection * max_rounds enforcement * streaming callback dispatch * manager-instruction extraction That is why production runs hit the framework's own 100-iteration runner cap as `RuntimeError("Runner did not converge after 100 iterations")` even after the recent identity-resolution patch (which only touched code that never executed). Three coordinated fixes: 1. Replace the dead `isinstance(event, AgentResponseUpdate)` gate with `isinstance(event, WorkflowEvent) and event.type == "output"` and inspect `event.data` / `event.executor_id` to distinguish per- participant streaming chunks (executor_id matches one of self.agents and data is AgentResponseUpdate) from the framework orchestrator's final output (list[Message] or custom result object). 2. Add `executor_id` parameter to `_handle_agent_update` so identity resolves from the WorkflowEvent wrapper's executor_id (always populated from `AgentExecutor.id` = the agent's name) first, then falls back to `event.author_name`, then legacy `event.agent_id`. Matches the approach already used by Content Processing Solution. 3. Pass `max_rounds=self.max_rounds` and `intermediate_outputs=True` to `GroupChatBuilder`: - `max_rounds` gives the framework itself a clean termination ceiling so even if our orchestrator-side guards miss, the workflow halts cleanly instead of crashing at the runner's 100-iteration cap. - `intermediate_outputs=True` is required for each participant's `yield_output(AgentResponseUpdate)` call to surface as a workflow `output` event. Without this, only the orchestrator's final yield reaches our streaming loop and the per-agent guards above never run. Tests: * Existing termination/loop-detection tests still pass (handler now has 3-tier identity resolution with backward-compat for `author_name`). * Added `test_handle_agent_update_prefers_executor_id_over_author_name` to lock in the new precedence. * Added `test_handle_agent_update_strips_executor_id_prefix` to cover the `groupchat_agent:Coordinator` framework prefix. * Full suite: 833 passed (was 831; +2 new tests). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework/groupchat_orchestrator.py | 99 +++++++++++++------ ...test_groupchat_orchestrator_termination.py | 70 ++++++++++++- 2 files changed, 135 insertions(+), 34 deletions(-) diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py index b92ddd88..94079d93 100644 --- a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py +++ b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py @@ -543,9 +543,32 @@ async def run_stream( termination_type="hard_timeout", ) - if isinstance(event, AgentResponseUpdate): + # In agent-framework 1.3.0, ``workflow.run(stream=True)`` yields + # only ``WorkflowEvent`` instances; ``AgentResponseUpdate`` is + # wrapped inside ``WorkflowEvent.data`` for ``type=="output"`` + # events. The previous ``isinstance(event, AgentResponseUpdate)`` + # check from the b260107 era is permanently dead in 1.3.0 + # because the two types are unrelated. We now dispatch on + # ``WorkflowEvent.type`` and inspect ``event.data`` / + # ``event.executor_id`` to route per-participant streaming + # chunks vs the orchestrator's final output. + if not isinstance(event, WorkflowEvent) or event.type != "output": + continue + + data = event.data + src_executor = self._normalize_executor_id(event.executor_id or "") + + # Per-participant streaming chunk. Requires + # ``intermediate_outputs=True`` on the GroupChatBuilder so the + # underlying executors' ``yield_output(AgentResponseUpdate)`` + # calls surface as workflow events rather than being swallowed. + if ( + isinstance(data, AgentResponseUpdate) + and src_executor in self.agents + ): await self._handle_agent_update( - event, + data, + executor_id=event.executor_id, stream_callback=on_agent_response_stream, callback=on_agent_response, ) @@ -565,22 +588,23 @@ async def run_stream( # If the Coordinator requested finish=true, stop immediately. if self._termination_requested: break - elif event.type == "output": - event: WorkflowEvent - # Complete last agent's response before finishing - if self._last_executor_id and self._current_agent_response: - await self._complete_agent_response( - self._last_executor_id, on_agent_response - ) - # Extract final conversation from output - if isinstance(event.data, list): - conversation = event.data - self._conversation = conversation # Update instance variable - else: - # Handle custom result objects with conversation attribute - conversation = getattr(event.data, "conversation", []) - self._conversation = conversation # Update instance variable + continue + + # Final orchestrator output: complete any buffered agent + # response and capture the conversation. + if self._last_executor_id and self._current_agent_response: + await self._complete_agent_response( + self._last_executor_id, on_agent_response + ) + + if isinstance(data, list): + conversation = data + self._conversation = conversation # Update instance variable + else: + # Handle custom result objects with conversation attribute + conversation = getattr(data, "conversation", []) + self._conversation = conversation # Update instance variable # Backfill tool usage from the final conversation (more reliable than streaming updates) # AgentResponseUpdate may stream text only; tool calls are represented as FunctionCallContent @@ -715,6 +739,7 @@ async def run_stream( async def _handle_agent_update( self, event: AgentResponseUpdate, + executor_id: str | None = None, stream_callback: AgentResponseStreamCallback | None = None, callback: AgentResponseCallback | None = None, ) -> None: @@ -726,19 +751,21 @@ async def _handle_agent_update( 2. On agent switch, complete previous agent's response 3. Trigger callback with complete response 4. Handle tool calls separately from text streaming + + Agent identity resolution priority: + 1. ``executor_id`` from the wrapping ``WorkflowEvent`` (always + populated by the workflow runner from ``AgentExecutor.id`` which + is the agent's name). This is the primary source in 1.3.0. + 2. ``event.author_name`` (set by 1.3.0's ``map_chat_to_agent_update``). + 3. ``event.agent_id`` (legacy; not populated in 1.3.0). """ - # NOTE: In agent-framework 1.3.0, ``AgentResponseUpdate.agent_id`` is no - # longer populated by ``map_chat_to_agent_update`` (only ``author_name`` - # is set, from the agent's name). Reading ``event.agent_id`` alone - # silently yielded an empty string, which made every downstream identity - # check (loop detection, coordinator termination signal extraction, - # manager-instruction parsing) silently no-op. Prefer ``author_name`` - # and fall back to ``agent_id`` only for older shapes. Use ``getattr`` - # so older event types without ``author_name`` still work. - author_name = getattr(event, "author_name", None) - agent_name = author_name or self._normalize_executor_id( - getattr(event, "agent_id", None) or "" - ) + if executor_id: + agent_name = self._normalize_executor_id(executor_id) + else: + author_name = getattr(event, "author_name", None) + agent_name = author_name or self._normalize_executor_id( + getattr(event, "agent_id", None) or "" + ) await self._start_agent_if_needed(agent_name, stream_callback, callback) self._append_text_chunk(event) await self._process_tool_calls(event, agent_name, stream_callback) @@ -1237,10 +1264,24 @@ async def _build_groupchat(self) -> Workflow: and name != self.get_result_generator_name() ] + # ``max_rounds`` is enforced at the framework level so the workflow + # halts cleanly even if our orchestrator-side guards miss an event + # shape. Without this, the framework's default behavior is "continue + # indefinitely" (see GroupChatBuilder docstring) until the workflow + # runner hits its own 100-iteration cap and raises + # ``RuntimeError("Runner did not converge after 100 iterations")``. + # + # ``intermediate_outputs=True`` surfaces each participant's + # ``yield_output(AgentResponseUpdate)`` call as a workflow ``output`` + # event. Without this, only the orchestrator's final yield reaches + # our streaming loop, which means per-agent loop detection, finish + # signal extraction, and streaming callbacks all silently no-op. return ( GroupChatBuilder( orchestrator_agent=coordinator, participants=participants, + max_rounds=self.max_rounds, + intermediate_outputs=True, ) .build() ) diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py index 5d98a0d0..73eb7e6b 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py @@ -227,9 +227,69 @@ async def _run(): await orch._complete_agent_response("Chief Architect", callback=None) assert orch._forced_termination_requested is True, ( - "Loop detection failed to fire after 3 identical Coordinator " - "selections via _handle_agent_update; agent identity resolution " - "is broken." - ) - + "Loop detection failed to fire after 3 identical Coordinator " + "selections via _handle_agent_update; agent identity resolution " + "is broken." + ) + + asyncio.run(_run()) + + +def test_handle_agent_update_prefers_executor_id_over_author_name(): + """In agent-framework 1.3.0, the workflow runner always wraps payloads in + a ``WorkflowEvent`` whose ``executor_id`` is the ``AgentExecutor.id`` + (= the agent's name). This is the most reliable identity source - more + reliable than ``author_name`` which may differ if the agent runtime + rewrites the chat author. The handler must prefer ``executor_id`` when + provided. + """ + + async def _run(): + orch = _make_orchestrator() + + # author_name disagrees with the framework executor_id on purpose. + event = _AgentResponseUpdateStub( + author_name="SomethingElse", + agent_id=None, + ) + + await orch._handle_agent_update( + event, + executor_id="Coordinator", + stream_callback=None, + callback=None, + ) # type: ignore[arg-type] + + assert orch._last_executor_id == "Coordinator", ( + "executor_id from the WorkflowEvent wrapper must take precedence " + "over event.author_name; otherwise downstream coordinator checks " + "may resolve to the wrong agent." + ) + + asyncio.run(_run()) + + +def test_handle_agent_update_strips_executor_id_prefix(): + """``GroupChatBuilder`` may wrap executor ids with a + ``groupchat_agent:Coordinator`` prefix. ``_normalize_executor_id`` must + strip it so the agent name compares cleanly against ``coordinator_name``. + """ + + async def _run(): + orch = _make_orchestrator() + + event = _AgentResponseUpdateStub(author_name=None, agent_id=None) + + await orch._handle_agent_update( + event, + executor_id="groupchat_agent:Coordinator", + stream_callback=None, + callback=None, + ) # type: ignore[arg-type] + + assert orch._last_executor_id == "Coordinator", ( + "_normalize_executor_id must strip the framework prefix so " + "agent identity matches the configured coordinator_name." + ) + asyncio.run(_run()) From 7a0212fdaa6594dc8383327e95cf9661c7e200f5 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Sat, 13 Jun 2026 15:36:22 +0530 Subject: [PATCH 22/24] fix(groupchat): detect participant loops via executor_completed events In agent-framework 1.3.0 the GroupChat orchestrator agent (Coordinator) is invoked directly inside the framework's internal _invoke_agent_helper (agent_framework_orchestrations/_group_chat.py:484) rather than through an AgentExecutor. The Coordinator therefore never surfaces as a workflow event, which makes our existing Coordinator-JSON-based loop detector in _complete_agent_response permanently dead in 1.3.0. Symptom in production: workflow loops with the Coordinator latched onto the same participant (e.g., Chief Architect repeatedly asked to produce an Evidence Pack that never satisfies the next reviewer). The loop runs until the framework's max_rounds ceiling fires (~17 min at default 100) instead of being caught early. Fix: * Track participant turn completions from WorkflowEvent.executor_completed, the one observable signal that does NOT depend on Coordinator visibility (participants ARE wrapped in AgentExecutor and so do emit these events). * Force-terminate (hard_loop) after 3 consecutive completions of the same participant. * Force-terminate (hard_timeout) when total participant completions reach max_rounds; independent of len(agent_responses) which only grows on agent switch and so can never reach max_rounds during a same-participant loop. * Flush per-participant streaming buffer on each executor_completed so back-to-back same-agent turns produce one AgentResponse per turn instead of accumulating across turns. * Move forced-termination break check to top of the streaming loop so any branch (timeout, participant loop, Coordinator finish=true) takes effect on the very next event rather than waiting for the next output event. Adds 3 regression tests covering the streak trigger, the alternation reset, and the round-budget enforcement. 836 tests pass (833 -> 836). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework/groupchat_orchestrator.py | 163 +++++++++++++++++- ...test_groupchat_orchestrator_termination.py | 110 ++++++++++++ 2 files changed, 264 insertions(+), 9 deletions(-) diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py index 94079d93..3185f6eb 100644 --- a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py +++ b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py @@ -313,6 +313,33 @@ def __init__( # Snapshot of progress_counter at the time we last saw _last_coordinator_selection. self._last_coordinator_selection_progress: int = 0 + # Per-participant turn tracking driven by ``WorkflowEvent.executor_completed``. + # + # In agent-framework 1.3.0 the GroupChat orchestrator agent (the + # Coordinator) is invoked directly inside the framework's internal + # ``_invoke_agent_helper`` (see + # ``agent_framework_orchestrations/_group_chat.py:484``). It is NOT + # wrapped in an ``AgentExecutor`` and therefore never surfaces as a + # workflow event - which makes the Coordinator-JSON-based loop + # detection in ``_complete_agent_response`` permanently dead in 1.3.0. + # + # The only observable "the conversation is moving" pulse we have is + # ``executor_completed`` events for the *participants* (which DO go + # through ``AgentExecutor``). We track: + # - the most recently completed participant, + # - the streak of consecutive completions of that participant, + # - the total number of participant turns, + # and use these for two safety nets in the streaming loop: + # * 3+ consecutive same-participant turns => hard_loop termination + # * total turns >= ``max_rounds`` => hard_timeout termination + # (independent of ``len(self.agent_responses)`` which only grows on + # agent switch and so cannot reach ``max_rounds`` during a same- + # participant loop). + self._participant_completions_total: int = 0 + self._last_completed_participant: str | None = None + self._participant_completion_streak: int = 0 + self._participant_consecutive_loop_threshold: int = 3 + def _request_forced_termination( self, *, reason: str, termination_type: str ) -> None: @@ -543,6 +570,15 @@ async def run_stream( termination_type="hard_timeout", ) + # Honor any pending termination request at the *top* of each + # iteration so that branches which set the flags (timeout, + # participant loop detection, Coordinator finish=true) take + # effect immediately on the next event - rather than being + # gated on the next ``output`` event arriving (which during a + # slow loop can be many seconds away). + if self._forced_termination_requested or self._termination_requested: + break + # In agent-framework 1.3.0, ``workflow.run(stream=True)`` yields # only ``WorkflowEvent`` instances; ``AgentResponseUpdate`` is # wrapped inside ``WorkflowEvent.data`` for ``type=="output"`` @@ -552,7 +588,46 @@ async def run_stream( # ``WorkflowEvent.type`` and inspect ``event.data`` / # ``event.executor_id`` to route per-participant streaming # chunks vs the orchestrator's final output. - if not isinstance(event, WorkflowEvent) or event.type != "output": + if not isinstance(event, WorkflowEvent): + continue + + # Participant turn completion. Used for loop / max_rounds + # safety nets that work even when the Coordinator is + # invisible to the streaming loop (which it is in 1.3.0 - + # the Coordinator runs inside the framework's internal + # ``_invoke_agent_helper`` and never surfaces as an executor + # event). See ``_track_participant_completion`` for details. + if event.type == "executor_completed": + src_executor = self._normalize_executor_id( + event.executor_id or "" + ) + if ( + src_executor in self.agents + and src_executor != self.coordinator_name + and src_executor != self.get_result_generator_name() + ): + # Flush this participant's streaming buffer into a + # discrete per-turn ``AgentResponse`` before we track + # the completion. Without this, when the framework's + # Coordinator picks the same participant back-to-back + # (the loop pattern we're trying to detect), + # ``_start_agent_if_needed`` sees no agent switch on + # the NEXT turn's chunks and the buffer would grow + # across turns - producing one merged response rather + # than one response per turn. + if ( + self._last_executor_id == src_executor + and self._current_agent_response + ): + await self._complete_agent_response( + src_executor, on_agent_response + ) + self._current_agent_response = [] + self._last_executor_id = None + self._track_participant_completion(src_executor) + continue + + if event.type != "output": continue data = event.data @@ -573,7 +648,12 @@ async def run_stream( callback=on_agent_response, ) - # Enforce max rounds as a safety guard. + # Secondary max_rounds safety net based on agent switches. + # The primary check lives in ``_track_participant_completion`` + # (driven by ``executor_completed`` events) and works even + # when the same agent runs back-to-back. This switch-based + # check is kept as defense-in-depth for sessions with + # normal alternation. if self.max_rounds and len(self.agent_responses) >= self.max_rounds: self._request_forced_termination( reason=( @@ -582,13 +662,9 @@ async def run_stream( termination_type="hard_timeout", ) - if self._forced_termination_requested: - break - - # If the Coordinator requested finish=true, stop immediately. - if self._termination_requested: - break - + # Termination flags are honored at the top of the next + # iteration so any branch can request termination + # uniformly without duplicating break logic here. continue # Final orchestrator output: complete any buffered agent @@ -777,6 +853,75 @@ def _normalize_executor_id(self, executor_id: str) -> str: """ return executor_id.split(":")[-1] + def _track_participant_completion(self, src_executor: str) -> None: + """Track a participant turn completion for loop / max_rounds detection. + + Called from the streaming loop on every ``WorkflowEvent.type == + "executor_completed"`` event whose ``executor_id`` matches one of our + registered non-Coordinator, non-ResultGenerator participants. + + Why this exists (agent-framework 1.3.0 design constraint): + The framework's ``GroupChatBuilder.orchestrator_agent`` (our + Coordinator) is invoked directly via ``self._agent.run(...)`` + inside ``agent_framework_orchestrations/_group_chat.py:484``. It + is NOT wrapped in an ``AgentExecutor`` and therefore never + surfaces as a workflow event. Our existing Coordinator-JSON-based + loop detector in ``_complete_agent_response`` (lines ~1118-1181) + is consequently permanently dead in 1.3.0. We need an independent + loop signal that does NOT rely on Coordinator visibility. + + Two safety nets enforced here: + + 1. Same-participant streak (``_participant_consecutive_loop_threshold``, + default 3): if the Coordinator keeps selecting the same participant + (e.g., the Chief Architect latched on producing an Evidence Pack + that never satisfies the next reviewer), 3+ consecutive completions + of the same participant force-terminate with ``hard_loop``. + + 2. Total round budget: each participant turn counts as one round. + Once total completions reach ``self.max_rounds`` the workflow + force-terminates with ``hard_timeout``. This is independent of + ``len(self.agent_responses)`` (which only grows on agent switch + via ``_start_agent_if_needed`` and therefore cannot reach + ``max_rounds`` during a same-participant loop). + """ + if src_executor == self._last_completed_participant: + self._participant_completion_streak += 1 + else: + self._last_completed_participant = src_executor + self._participant_completion_streak = 1 + self._participant_completions_total += 1 + + if ( + self._participant_completion_streak + >= self._participant_consecutive_loop_threshold + ): + self._request_forced_termination( + reason=( + f"Loop detected: participant '{src_executor}' completed " + f"{self._participant_completion_streak} consecutive turns " + "with no other participant in between (Coordinator is " + "stuck on the same selection; in agent-framework 1.3.0 " + "the Coordinator runs inside the framework and is " + "invisible to the streaming loop, so we infer this from " + "executor_completed events)" + ), + termination_type="hard_loop", + ) + return + + if ( + self.max_rounds + and self._participant_completions_total >= self.max_rounds + ): + self._request_forced_termination( + reason=( + f"Workflow exceeded max_rounds={self.max_rounds} " + "participant turns; terminating to avoid infinite loop" + ), + termination_type="hard_timeout", + ) + async def _start_agent_if_needed( self, agent_name: str, diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py index 73eb7e6b..2bd88d65 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py @@ -293,3 +293,113 @@ async def _run(): ) asyncio.run(_run()) + + +def test_participant_completion_streak_triggers_forced_termination(): + """In agent-framework 1.3.0 the GroupChat orchestrator agent (Coordinator) + is invoked directly inside the framework's ``_invoke_agent_helper`` and + is NOT wrapped in an ``AgentExecutor``, so it never surfaces as a + workflow event. The Coordinator-JSON loop detector in + ``_complete_agent_response`` is therefore permanently dead in 1.3.0. + + The only observable loop signal we have is consecutive + ``executor_completed`` events for the same participant. After + ``_participant_consecutive_loop_threshold`` (default 3) same-participant + completions, the orchestrator must force-terminate with ``hard_loop`` + so the workflow halts cleanly instead of running until the framework's + own max_rounds ceiling (which at default 100 is ~17 min). + """ + + async def _run(): + orch = _make_orchestrator() + # Register a participant so the tracker recognizes it. + orch.agents = {"Coordinator": object(), "Chief Architect": object()} + + for _ in range(3): + orch._track_participant_completion("Chief Architect") + + assert orch._forced_termination_requested is True, ( + "Three consecutive completions of the same participant must " + "trigger the participant-streak loop breaker; otherwise the " + "Chief-Architect-only loop observed in production (with the " + "Coordinator invisible to our streaming loop in 1.3.0) can " + "never be detected and the workflow runs until the framework's " + "own max_rounds ceiling fires." + ) + assert orch._forced_termination_type == "hard_loop" + assert "Chief Architect" in (orch._forced_termination_reason or "") + assert "3 consecutive" in (orch._forced_termination_reason or "") + + asyncio.run(_run()) + + +def test_participant_completion_streak_resets_on_different_participant(): + """If a different participant runs in between, the same-participant + streak counter resets. This prevents false-positive loop detection + when participants alternate normally. + """ + + async def _run(): + orch = _make_orchestrator() + orch.agents = { + "Coordinator": object(), + "Chief Architect": object(), + "AKS Expert": object(), + } + + orch._track_participant_completion("Chief Architect") + orch._track_participant_completion("Chief Architect") + # A different participant runs -> streak resets. + orch._track_participant_completion("AKS Expert") + orch._track_participant_completion("Chief Architect") + orch._track_participant_completion("Chief Architect") # streak=2 only + + assert orch._forced_termination_requested is False, ( + "Alternating participants must not trigger the loop breaker; " + "the streak should reset whenever a different participant runs." + ) + assert orch._participant_completion_streak == 2 + assert orch._last_completed_participant == "Chief Architect" + + asyncio.run(_run()) + + +def test_participant_completions_total_enforces_max_rounds_under_alternation(): + """``max_rounds`` must be enforced from the per-participant total count + (which grows on EVERY completion) - not from ``len(agent_responses)`` + (which only grows on agent switch in ``_start_agent_if_needed`` and + therefore can never reach ``max_rounds`` during a same-agent loop). + + This test exercises the alternation case where the streak detector + never fires, ensuring the round-budget guard still halts the workflow. + """ + + async def _run(): + orch = GroupChatOrchestrator( + name="t", + process_id="p1", + participants={ + "Coordinator": object(), + "A": object(), + "B": object(), + }, + memory_client=None, + coordinator_name="Coordinator", + max_rounds=4, + result_output_format=None, + ) + + # Alternate A and B to keep the streak below threshold. + orch._track_participant_completion("A") + orch._track_participant_completion("B") + orch._track_participant_completion("A") + # Streak detector hasn't fired yet (max streak = 1 because of perfect + # alternation). The 4th turn must trip the max_rounds budget. + assert orch._forced_termination_requested is False + orch._track_participant_completion("B") + + assert orch._forced_termination_requested is True + assert orch._forced_termination_type == "hard_timeout" + assert "max_rounds=4" in (orch._forced_termination_reason or "") + + asyncio.run(_run()) From 03387c45df1fd5f0cc6cac68c0e06ecf9d28556d Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Sat, 13 Jun 2026 15:52:36 +0530 Subject: [PATCH 23/24] Revert "fix(groupchat): detect participant loops via executor_completed events" This reverts commit 7a0212fdaa6594dc8383327e95cf9661c7e200f5. --- .../agent_framework/groupchat_orchestrator.py | 163 +----------------- ...test_groupchat_orchestrator_termination.py | 110 ------------ 2 files changed, 9 insertions(+), 264 deletions(-) diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py index 3185f6eb..94079d93 100644 --- a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py +++ b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py @@ -313,33 +313,6 @@ def __init__( # Snapshot of progress_counter at the time we last saw _last_coordinator_selection. self._last_coordinator_selection_progress: int = 0 - # Per-participant turn tracking driven by ``WorkflowEvent.executor_completed``. - # - # In agent-framework 1.3.0 the GroupChat orchestrator agent (the - # Coordinator) is invoked directly inside the framework's internal - # ``_invoke_agent_helper`` (see - # ``agent_framework_orchestrations/_group_chat.py:484``). It is NOT - # wrapped in an ``AgentExecutor`` and therefore never surfaces as a - # workflow event - which makes the Coordinator-JSON-based loop - # detection in ``_complete_agent_response`` permanently dead in 1.3.0. - # - # The only observable "the conversation is moving" pulse we have is - # ``executor_completed`` events for the *participants* (which DO go - # through ``AgentExecutor``). We track: - # - the most recently completed participant, - # - the streak of consecutive completions of that participant, - # - the total number of participant turns, - # and use these for two safety nets in the streaming loop: - # * 3+ consecutive same-participant turns => hard_loop termination - # * total turns >= ``max_rounds`` => hard_timeout termination - # (independent of ``len(self.agent_responses)`` which only grows on - # agent switch and so cannot reach ``max_rounds`` during a same- - # participant loop). - self._participant_completions_total: int = 0 - self._last_completed_participant: str | None = None - self._participant_completion_streak: int = 0 - self._participant_consecutive_loop_threshold: int = 3 - def _request_forced_termination( self, *, reason: str, termination_type: str ) -> None: @@ -570,15 +543,6 @@ async def run_stream( termination_type="hard_timeout", ) - # Honor any pending termination request at the *top* of each - # iteration so that branches which set the flags (timeout, - # participant loop detection, Coordinator finish=true) take - # effect immediately on the next event - rather than being - # gated on the next ``output`` event arriving (which during a - # slow loop can be many seconds away). - if self._forced_termination_requested or self._termination_requested: - break - # In agent-framework 1.3.0, ``workflow.run(stream=True)`` yields # only ``WorkflowEvent`` instances; ``AgentResponseUpdate`` is # wrapped inside ``WorkflowEvent.data`` for ``type=="output"`` @@ -588,46 +552,7 @@ async def run_stream( # ``WorkflowEvent.type`` and inspect ``event.data`` / # ``event.executor_id`` to route per-participant streaming # chunks vs the orchestrator's final output. - if not isinstance(event, WorkflowEvent): - continue - - # Participant turn completion. Used for loop / max_rounds - # safety nets that work even when the Coordinator is - # invisible to the streaming loop (which it is in 1.3.0 - - # the Coordinator runs inside the framework's internal - # ``_invoke_agent_helper`` and never surfaces as an executor - # event). See ``_track_participant_completion`` for details. - if event.type == "executor_completed": - src_executor = self._normalize_executor_id( - event.executor_id or "" - ) - if ( - src_executor in self.agents - and src_executor != self.coordinator_name - and src_executor != self.get_result_generator_name() - ): - # Flush this participant's streaming buffer into a - # discrete per-turn ``AgentResponse`` before we track - # the completion. Without this, when the framework's - # Coordinator picks the same participant back-to-back - # (the loop pattern we're trying to detect), - # ``_start_agent_if_needed`` sees no agent switch on - # the NEXT turn's chunks and the buffer would grow - # across turns - producing one merged response rather - # than one response per turn. - if ( - self._last_executor_id == src_executor - and self._current_agent_response - ): - await self._complete_agent_response( - src_executor, on_agent_response - ) - self._current_agent_response = [] - self._last_executor_id = None - self._track_participant_completion(src_executor) - continue - - if event.type != "output": + if not isinstance(event, WorkflowEvent) or event.type != "output": continue data = event.data @@ -648,12 +573,7 @@ async def run_stream( callback=on_agent_response, ) - # Secondary max_rounds safety net based on agent switches. - # The primary check lives in ``_track_participant_completion`` - # (driven by ``executor_completed`` events) and works even - # when the same agent runs back-to-back. This switch-based - # check is kept as defense-in-depth for sessions with - # normal alternation. + # Enforce max rounds as a safety guard. if self.max_rounds and len(self.agent_responses) >= self.max_rounds: self._request_forced_termination( reason=( @@ -662,9 +582,13 @@ async def run_stream( termination_type="hard_timeout", ) - # Termination flags are honored at the top of the next - # iteration so any branch can request termination - # uniformly without duplicating break logic here. + if self._forced_termination_requested: + break + + # If the Coordinator requested finish=true, stop immediately. + if self._termination_requested: + break + continue # Final orchestrator output: complete any buffered agent @@ -853,75 +777,6 @@ def _normalize_executor_id(self, executor_id: str) -> str: """ return executor_id.split(":")[-1] - def _track_participant_completion(self, src_executor: str) -> None: - """Track a participant turn completion for loop / max_rounds detection. - - Called from the streaming loop on every ``WorkflowEvent.type == - "executor_completed"`` event whose ``executor_id`` matches one of our - registered non-Coordinator, non-ResultGenerator participants. - - Why this exists (agent-framework 1.3.0 design constraint): - The framework's ``GroupChatBuilder.orchestrator_agent`` (our - Coordinator) is invoked directly via ``self._agent.run(...)`` - inside ``agent_framework_orchestrations/_group_chat.py:484``. It - is NOT wrapped in an ``AgentExecutor`` and therefore never - surfaces as a workflow event. Our existing Coordinator-JSON-based - loop detector in ``_complete_agent_response`` (lines ~1118-1181) - is consequently permanently dead in 1.3.0. We need an independent - loop signal that does NOT rely on Coordinator visibility. - - Two safety nets enforced here: - - 1. Same-participant streak (``_participant_consecutive_loop_threshold``, - default 3): if the Coordinator keeps selecting the same participant - (e.g., the Chief Architect latched on producing an Evidence Pack - that never satisfies the next reviewer), 3+ consecutive completions - of the same participant force-terminate with ``hard_loop``. - - 2. Total round budget: each participant turn counts as one round. - Once total completions reach ``self.max_rounds`` the workflow - force-terminates with ``hard_timeout``. This is independent of - ``len(self.agent_responses)`` (which only grows on agent switch - via ``_start_agent_if_needed`` and therefore cannot reach - ``max_rounds`` during a same-participant loop). - """ - if src_executor == self._last_completed_participant: - self._participant_completion_streak += 1 - else: - self._last_completed_participant = src_executor - self._participant_completion_streak = 1 - self._participant_completions_total += 1 - - if ( - self._participant_completion_streak - >= self._participant_consecutive_loop_threshold - ): - self._request_forced_termination( - reason=( - f"Loop detected: participant '{src_executor}' completed " - f"{self._participant_completion_streak} consecutive turns " - "with no other participant in between (Coordinator is " - "stuck on the same selection; in agent-framework 1.3.0 " - "the Coordinator runs inside the framework and is " - "invisible to the streaming loop, so we infer this from " - "executor_completed events)" - ), - termination_type="hard_loop", - ) - return - - if ( - self.max_rounds - and self._participant_completions_total >= self.max_rounds - ): - self._request_forced_termination( - reason=( - f"Workflow exceeded max_rounds={self.max_rounds} " - "participant turns; terminating to avoid infinite loop" - ), - termination_type="hard_timeout", - ) - async def _start_agent_if_needed( self, agent_name: str, diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py index 2bd88d65..73eb7e6b 100644 --- a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py +++ b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py @@ -293,113 +293,3 @@ async def _run(): ) asyncio.run(_run()) - - -def test_participant_completion_streak_triggers_forced_termination(): - """In agent-framework 1.3.0 the GroupChat orchestrator agent (Coordinator) - is invoked directly inside the framework's ``_invoke_agent_helper`` and - is NOT wrapped in an ``AgentExecutor``, so it never surfaces as a - workflow event. The Coordinator-JSON loop detector in - ``_complete_agent_response`` is therefore permanently dead in 1.3.0. - - The only observable loop signal we have is consecutive - ``executor_completed`` events for the same participant. After - ``_participant_consecutive_loop_threshold`` (default 3) same-participant - completions, the orchestrator must force-terminate with ``hard_loop`` - so the workflow halts cleanly instead of running until the framework's - own max_rounds ceiling (which at default 100 is ~17 min). - """ - - async def _run(): - orch = _make_orchestrator() - # Register a participant so the tracker recognizes it. - orch.agents = {"Coordinator": object(), "Chief Architect": object()} - - for _ in range(3): - orch._track_participant_completion("Chief Architect") - - assert orch._forced_termination_requested is True, ( - "Three consecutive completions of the same participant must " - "trigger the participant-streak loop breaker; otherwise the " - "Chief-Architect-only loop observed in production (with the " - "Coordinator invisible to our streaming loop in 1.3.0) can " - "never be detected and the workflow runs until the framework's " - "own max_rounds ceiling fires." - ) - assert orch._forced_termination_type == "hard_loop" - assert "Chief Architect" in (orch._forced_termination_reason or "") - assert "3 consecutive" in (orch._forced_termination_reason or "") - - asyncio.run(_run()) - - -def test_participant_completion_streak_resets_on_different_participant(): - """If a different participant runs in between, the same-participant - streak counter resets. This prevents false-positive loop detection - when participants alternate normally. - """ - - async def _run(): - orch = _make_orchestrator() - orch.agents = { - "Coordinator": object(), - "Chief Architect": object(), - "AKS Expert": object(), - } - - orch._track_participant_completion("Chief Architect") - orch._track_participant_completion("Chief Architect") - # A different participant runs -> streak resets. - orch._track_participant_completion("AKS Expert") - orch._track_participant_completion("Chief Architect") - orch._track_participant_completion("Chief Architect") # streak=2 only - - assert orch._forced_termination_requested is False, ( - "Alternating participants must not trigger the loop breaker; " - "the streak should reset whenever a different participant runs." - ) - assert orch._participant_completion_streak == 2 - assert orch._last_completed_participant == "Chief Architect" - - asyncio.run(_run()) - - -def test_participant_completions_total_enforces_max_rounds_under_alternation(): - """``max_rounds`` must be enforced from the per-participant total count - (which grows on EVERY completion) - not from ``len(agent_responses)`` - (which only grows on agent switch in ``_start_agent_if_needed`` and - therefore can never reach ``max_rounds`` during a same-agent loop). - - This test exercises the alternation case where the streak detector - never fires, ensuring the round-budget guard still halts the workflow. - """ - - async def _run(): - orch = GroupChatOrchestrator( - name="t", - process_id="p1", - participants={ - "Coordinator": object(), - "A": object(), - "B": object(), - }, - memory_client=None, - coordinator_name="Coordinator", - max_rounds=4, - result_output_format=None, - ) - - # Alternate A and B to keep the streak below threshold. - orch._track_participant_completion("A") - orch._track_participant_completion("B") - orch._track_participant_completion("A") - # Streak detector hasn't fired yet (max streak = 1 because of perfect - # alternation). The 4th turn must trip the max_rounds budget. - assert orch._forced_termination_requested is False - orch._track_participant_completion("B") - - assert orch._forced_termination_requested is True - assert orch._forced_termination_type == "hard_timeout" - assert "max_rounds=4" in (orch._forced_termination_reason or "") - - asyncio.run(_run()) From 14d52f26638030c90ef2a9640dd7958867a9840e Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Sat, 13 Jun 2026 16:09:27 +0530 Subject: [PATCH 24/24] chore(logging): suppress harmless empty-message-cache warning and drop dead code * Add a narrow logging.Filter on agent_framework._workflows._agent_executor that drops only the 'Running agent with empty message cache' message. This warning fires by design in GroupChat orchestration when the orchestrator routes back to the same speaker (broadcast cache is empty because _broadcast_messages_to_participants excludes the source executor). The framework's parent client prepends system instructions before the LLM call, so the API request still has content. Other warnings/errors from the same logger remain visible. * Remove three lines of commented-out duplicate callback invocation in groupchat_orchestrator._complete_agent_response. The live callback handler is in the block directly above; the commented block was refactor debris. No behavioural change. All 833 tests pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agent_framework/groupchat_orchestrator.py | 4 --- src/processor/src/utils/logging_utils.py | 30 +++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py index 94079d93..90a0f914 100644 --- a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py +++ b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py @@ -1250,10 +1250,6 @@ async def _complete_agent_response( "on_agent_response callback failed (agent=%s)", agent_name ) - # # Invoke callback - # if callback: - # await callback(response) - async def _build_groupchat(self) -> Workflow: """Build the GroupChat Orchestrator workflow""" coordinator = self.agents[self.coordinator_name] diff --git a/src/processor/src/utils/logging_utils.py b/src/processor/src/utils/logging_utils.py index 29da2226..568ed950 100644 --- a/src/processor/src/utils/logging_utils.py +++ b/src/processor/src/utils/logging_utils.py @@ -22,6 +22,29 @@ from azure.core.exceptions import HttpResponseError +class _EmptyMessageCacheFilter(logging.Filter): + """Suppress the harmless ``empty message cache`` warning emitted by + ``agent_framework._workflows._agent_executor``. + + This warning fires by design in GroupChat orchestration when the orchestrator + routes back to the same speaker (its broadcast cache is empty because + ``_broadcast_messages_to_participants`` excludes the source executor). The + framework's parent client prepends the agent's system instructions before + calling the LLM, so the API call still has content. The warning is pure noise. + + The filter is intentionally narrow: it matches only the exact message and + leaves every other warning/error from the same logger visible. + """ + + _MARKER = "Running agent with empty message cache" + + def filter(self, record: logging.LogRecord) -> bool: # noqa: D401 + try: + return self._MARKER not in record.getMessage() + except Exception: + return True + + def configure_application_logging(debug_mode: bool = False): """ Comprehensive logging configuration with third-party suppression. @@ -120,6 +143,13 @@ def configure_application_logging(debug_mode: bool = False): for logger_name in always_warning_loggers: logging.getLogger(logger_name).setLevel(logging.WARNING) + # Suppress only the harmless "Running agent with empty message cache" warning + # emitted by agent_framework's GroupChat orchestration. Real warnings/errors + # from the same logger are still surfaced. + _executor_logger = logging.getLogger("agent_framework._workflows._agent_executor") + if not any(isinstance(f, _EmptyMessageCacheFilter) for f in _executor_logger.filters): + _executor_logger.addFilter(_EmptyMessageCacheFilter()) + # Set environment variables to suppress verbose output at the source os.environ.setdefault("HTTPX_LOG_LEVEL", "WARNING") os.environ.setdefault("AZURE_CORE_ENABLE_HTTP_LOGGER", "false")