fix(groupchat): resolve agent identity via author_name for af 1.3.0

Prachig-Microsoft · Copilot · Prachig-Microsoft · commit f74629ba53c8 · 2026-06-13T14:04:09.000+05:30
Root cause of the "Runner did not converge after 100 iterations"
production failure (and the Chief-Architect-only loop that preceded it):
agent-framework 1.3.0 changed how AgentResponseUpdate is constructed.
`map_chat_to_agent_update` (_types.py:2825-2837) now only sets
`author_name` and leaves `agent_id` as None.

Our orchestrator was reading `event.agent_id` exclusively, so every
streaming update resolved to `agent_name=""`. That silently broke:

  * Loop detection (line 1080 `if agent_name == self.coordinator_name`
    never matched, so the streak counter never advanced and the 3x
    same-agent guard never fired). Production looped 100x on Chief
    Architect with zero detection.
  * Coordinator termination signal extraction (`finish=true`,
    `instruction=complete`, blocking instructions) - same gated block.
  * Manager-instruction parsing for the next participant.

The [MEMORY] logs continued to show real agent names ("Chief Architect")
because `SharedMemoryContextProvider` reads the name from the agent's
own context, not from the workflow event - which is why the regression
was invisible from logs alone.

Fix: in `_handle_agent_update`, prefer `event.author_name` (which IS
populated by 1.3.0's `map_chat_to_agent_update`) and fall back to
`agent_id` only when author_name is missing, for backwards compat with
older event shapes. Use `getattr` defensively so existing tests that
construct SimpleNamespace events without author_name still work.

Tests:

* test_handle_agent_update_resolves_coordinator_via_author_name_when_agent_id_is_none
  - asserts the identity resolution itself
* test_loop_detection_fires_on_3_consecutive_coordinator_selections_via_handle_agent_update
  - end-to-end through the production code path: 3 identical Coordinator
    selections via _handle_agent_update must trip _forced_termination
* Both tests verified to FAIL without the fix (intentionally reverted to
  confirm) and PASS with the fix
* Full suite: 831 passed (was 829, +2 regression tests)

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py
@@ -727,7 +727,18 @@ async def _handle_agent_update(
         3. Trigger callback with complete response
         4. Handle tool calls separately from text streaming
         """
-        agent_name = self._normalize_executor_id(event.agent_id or "")
+        # NOTE: In agent-framework 1.3.0, ``AgentResponseUpdate.agent_id`` is no
+        # longer populated by ``map_chat_to_agent_update`` (only ``author_name``
+        # is set, from the agent's name). Reading ``event.agent_id`` alone
+        # silently yielded an empty string, which made every downstream identity
+        # check (loop detection, coordinator termination signal extraction,
+        # manager-instruction parsing) silently no-op. Prefer ``author_name``
+        # and fall back to ``agent_id`` only for older shapes. Use ``getattr``
+        # so older event types without ``author_name`` still work.
+        author_name = getattr(event, "author_name", None)
+        agent_name = author_name or self._normalize_executor_id(
+            getattr(event, "agent_id", None) or ""
+        )
         await self._start_agent_if_needed(agent_name, stream_callback, callback)
         self._append_text_chunk(event)
         await self._process_tool_calls(event, agent_name, stream_callback)
diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_termination.py
@@ -127,3 +127,109 @@ def _agent_reply(text: str = "ok"):
         assert orch._forced_termination_requested is False
 
     asyncio.run(_run())
+
+
+@dataclass
+class _AgentResponseUpdateStub:
+    """Mimics the agent-framework 1.3.0 AgentResponseUpdate shape.
+
+    Only the fields actually read by ``_handle_agent_update`` /
+    ``_normalize_executor_id`` matter. In 1.3.0 ``agent_id`` is no longer
+    populated by ``map_chat_to_agent_update`` - only ``author_name`` is set.
+    This stub reproduces that shape.
+    """
+
+    author_name: str | None = None
+    agent_id: str | None = None
+    contents: list = None  # type: ignore[assignment]
+
+    def __post_init__(self):
+        if self.contents is None:
+            self.contents = []
+
+
+def test_handle_agent_update_resolves_coordinator_via_author_name_when_agent_id_is_none():
+    """Regression guard for agent-framework 1.3.0.
+
+    In 1.3.0 ``AgentResponseUpdate.agent_id`` is ``None`` because
+    ``map_chat_to_agent_update`` only sets ``author_name``. Reading
+    ``event.agent_id`` alone silently produced an empty string, so
+    ``agent_name == self.coordinator_name`` never matched and loop
+    detection / coordinator termination signal extraction silently
+    no-opped. The orchestrator must treat ``author_name`` as the
+    authoritative source.
+    """
+
+    async def _run():
+        orch = _make_orchestrator()
+
+        event = _AgentResponseUpdateStub(
+            author_name="Coordinator",
+            agent_id=None,
+        )
+
+        # No-op tool/text processing: we only care about agent identity.
+        await orch._handle_agent_update(event, stream_callback=None, callback=None)  # type: ignore[arg-type]
+
+        assert orch._last_executor_id == "Coordinator", (
+            "author_name must be used to identify the agent; otherwise "
+            "_last_executor_id stays empty and downstream coordinator "
+            "checks silently fail."
+        )
+
+    asyncio.run(_run())
+
+
+def test_loop_detection_fires_on_3_consecutive_coordinator_selections_via_handle_agent_update():
+    """End-to-end check: feeding 3 identical Coordinator selections through
+    ``_handle_agent_update`` (the path used in production) must trigger the
+    loop-detection forced termination. This is the path that was silently
+    broken in the 1.3.0 regression.
+    """
+
+    async def _run():
+        orch = _make_orchestrator()
+        orch._conversation = []
+
+        coordinator_json = json.dumps(
+            {
+                "selected_participant": "Chief Architect",
+                "instruction": "re-list",
+                "finish": False,
+                "final_message": "",
+            }
+        )
+
+        # Simulate three consecutive Coordinator turns, each emitting the
+        # same selection. Between each Coordinator turn we drive an update
+        # from a non-Coordinator agent so the orchestrator's "agent switch"
+        # logic completes the previous Coordinator response (which is what
+        # actually runs loop-detection at line 1080).
+        for _ in range(3):
+            # Coordinator emits its selection as a streaming chunk.
+            await orch._handle_agent_update(
+                _AgentResponseUpdateStub(author_name="Coordinator"),
+                stream_callback=None,
+                callback=None,
+            )  # type: ignore[arg-type]
+            orch._current_agent_response = [coordinator_json]
+
+            # Then Chief Architect emits a chunk: the agent switch closes
+            # out the Coordinator response and runs loop detection.
+            await orch._handle_agent_update(
+                _AgentResponseUpdateStub(author_name="Chief Architect"),
+                stream_callback=None,
+                callback=None,
+            )  # type: ignore[arg-type]
+            orch._current_agent_response = ["ack"]
+
+        # Closing the final Chief Architect response keeps state consistent.
+        await orch._complete_agent_response("Chief Architect", callback=None)
+
+        assert orch._forced_termination_requested is True, (
+            "Loop detection failed to fire after 3 identical Coordinator "
+            "selections via _handle_agent_update; agent identity resolution "
+            "is broken."
+        )
+
+    asyncio.run(_run())