Fix loop detection: key on agent name only, not (agent, instruction)

Prachig-Microsoft · Copilot · Prachig-Microsoft · commit 76e9c17c86c1 · 2026-06-13T00:05:45.000+05:30
Production run after the previous progress-counter fix (0da531f) STILL showed Chief Architect picked 6+ consecutive times. Root cause: the loop detection key was (agent, instruction_text). The LLM-driven Coordinator varies its instruction on every pick ('list source blobs', 'read xyz.yaml', 'save analysis_result.md') while latching onto the same agent — so every selection_key was unique, the streak reset to 1 on every pick, and the 3-strike threshold was never reached. Change: track only the agent name (lower-cased). The progress counter (now correct after 0da531f) already encodes 'no DIFFERENT agent ran in between', so 3 consecutive picks of the same agent with no other-agent progress is a strong, low-false-positive loop signal. Adds a regression test that replays the production sequence (same agent, three different instruction strings) and verifies forced termination fires. The earlier tests for exact-match repeats and for B-resets-the- streak continue to pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py
@@ -295,9 +295,16 @@ def __init__(
         self._forced_termination_reason: str | None = None
         self._forced_termination_type: str | None = None
 
-        # Loop detection for Coordinator selections (participant + instruction)
-        self._last_coordinator_selection: tuple[str, str] | None = None
+        # Loop detection for Coordinator selections.
+        # We track the *agent the Coordinator most recently picked* (lower-cased name)
+        # rather than (agent, instruction) tuples, because in practice the LLM-driven
+        # Coordinator varies the instruction text while looping on the same agent.
+        # A streak counts how many consecutive Coordinator picks landed on the same
+        # agent without any *other* agent running in between (see _progress_counter
+        # bookkeeping in _handle_agent_update).
+        self._last_coordinator_selection: str | None = None
         self._coordinator_selection_streak: int = 0
+        # Diagnostic history of recent (agent, instruction) selections.
         self._recent_coordinator_selections: deque[tuple[str, str]] = deque(maxlen=10)
 
         # Progress counter used to avoid false-positive loop detection.
@@ -1029,14 +1036,10 @@ async def _complete_agent_response(
         # selection. So we only increment when the completing agent is not the one the
         # Coordinator is currently latching onto.
         if agent_name != self.coordinator_name:
-            last_selected = (
-                self._last_coordinator_selection[0]
-                if self._last_coordinator_selection
-                else None
-            )
+            last_selected = self._last_coordinator_selection
             if (
                 last_selected is None
-                or agent_name.lower() != last_selected.lower()
+                or agent_name.lower() != last_selected
             ):
                 self._progress_counter += 1
 
@@ -1058,17 +1061,27 @@ async def _complete_agent_response(
                 # measures from Coordinator selection -> response completion.
                 selected = getattr(manager_response, "selected_participant", None)
 
-                # Loop detection: same selection+instruction repeated.
+                # Loop detection: same agent picked repeatedly with no other agent
+                # making progress in between. We deliberately key on the agent name
+                # alone (not on the instruction text) because the LLM-driven
+                # Coordinator often varies its instruction text while still looping
+                # on the same agent ("re-list", "read xyz.yaml", "save analysis_result.md"
+                # all sent to the same Chief Architect over and over). The
+                # _progress_counter (incremented in _handle_agent_update only when
+                # a DIFFERENT agent runs) is what tells us whether anything else
+                # actually happened in between.
                 if (
                     isinstance(selected, str)
                     and selected
                     and selected.lower() != "none"
                 ):
-                    selection_key = (selected, str(manager_instruction or ""))
-                    self._recent_coordinator_selections.append(selection_key)
-                    if selection_key == self._last_coordinator_selection:
-                        # If any other agent responded since the last identical selection,
-                        # treat that as progress and reset the streak.
+                    selected_key = selected.lower()
+                    self._recent_coordinator_selections.append(
+                        (selected, str(manager_instruction or ""))
+                    )
+                    if selected_key == self._last_coordinator_selection:
+                        # Same agent again. If any other agent ran since the last
+                        # identical pick, treat that as progress and reset the streak.
                         if (
                             self._progress_counter
                             != self._last_coordinator_selection_progress
@@ -1080,17 +1093,20 @@ async def _complete_agent_response(
                         else:
                             self._coordinator_selection_streak += 1
                     else:
-                        self._last_coordinator_selection = selection_key
+                        self._last_coordinator_selection = selected_key
                         self._coordinator_selection_streak = 1
                         self._last_coordinator_selection_progress = (
                             self._progress_counter
                         )
 
-                    # If the Coordinator repeats the exact same ask 3 times, break.
+                    # If the Coordinator picks the same agent 3 times in a row
+                    # without any other agent running in between, break out.
                     if self._coordinator_selection_streak >= 3:
                         self._request_forced_termination(
                             reason=(
-                                f"Loop detected: Coordinator repeated the same selection to '{selected}' {self._coordinator_selection_streak} times with no progress"
+                                f"Loop detected: Coordinator selected '{selected}' "
+                                f"{self._coordinator_selection_streak} consecutive "
+                                f"times with no other agent making progress in between"
                             ),
                             termination_type="hard_timeout",
                         )
diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py b/src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py
@@ -786,6 +786,53 @@ def _agent_runs(name: str, text: str = "ok"):
         # before B reset it, one streak of 2 after). Loop NOT detected.
         assert orch._forced_termination_requested is False
 
+    def test_loop_breaker_triggered_when_same_agent_picked_with_varying_instructions(
+        self,
+    ):
+        """Regression for production: the LLM-driven Coordinator was looping on
+        Chief Architect but varying its instruction text on every pick
+        ('re-list', 'read xyz', 'save analysis_result.md'). The loop detector
+        must key on the AGENT NAME only — not on (agent, instruction) — or the
+        streak resets on every pick and the loop is never caught.
+        """
+        orch = _make_orch()
+        orch._conversation = []
+
+        def _select(participant: str, instruction: str = "do"):
+            orch._current_agent_response = [
+                json.dumps(
+                    {
+                        "selected_participant": participant,
+                        "instruction": instruction,
+                        "finish": False,
+                        "final_message": "",
+                    }
+                )
+            ]
+            orch._current_agent_start_time = datetime.now()
+
+        def _agent_runs(name: str, text: str = "ok"):
+            orch._current_agent_response = [text]
+            orch._current_agent_start_time = datetime.now()
+
+        # Each Coordinator pick targets the same agent but with a DIFFERENT
+        # instruction. With the old (agent, instruction) tuple key this never
+        # tripped the breaker.
+        _select("Chief Architect", instruction="list source blobs")
+        _run(orch._complete_agent_response("Coordinator", None))
+        _agent_runs("Chief Architect")
+        _run(orch._complete_agent_response("Chief Architect", None))
+
+        _select("Chief Architect", instruction="read source files")
+        _run(orch._complete_agent_response("Coordinator", None))
+        _agent_runs("Chief Architect")
+        _run(orch._complete_agent_response("Chief Architect", None))
+
+        _select("Chief Architect", instruction="save analysis_result.md")
+        _run(orch._complete_agent_response("Coordinator", None))
+
+        assert orch._forced_termination_requested is True
+
 
 # -----------------------------------------------------------------------------
 # _build_groupchat