Skip to content

Commit 76e9c17

Browse files
Fix loop detection: key on agent name only, not (agent, instruction)
Production run after the previous progress-counter fix (0da531f) STILL showed Chief Architect picked 6+ consecutive times. Root cause: the loop detection key was (agent, instruction_text). The LLM-driven Coordinator varies its instruction on every pick ('list source blobs', 'read xyz.yaml', 'save analysis_result.md') while latching onto the same agent — so every selection_key was unique, the streak reset to 1 on every pick, and the 3-strike threshold was never reached. Change: track only the agent name (lower-cased). The progress counter (now correct after 0da531f) already encodes 'no DIFFERENT agent ran in between', so 3 consecutive picks of the same agent with no other-agent progress is a strong, low-false-positive loop signal. Adds a regression test that replays the production sequence (same agent, three different instruction strings) and verifies forced termination fires. The earlier tests for exact-match repeats and for B-resets-the- streak continue to pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 9a85f00 commit 76e9c17

2 files changed

Lines changed: 80 additions & 17 deletions

File tree

src/processor/src/libs/agent_framework/groupchat_orchestrator.py

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -295,9 +295,16 @@ def __init__(
295295
self._forced_termination_reason: str | None = None
296296
self._forced_termination_type: str | None = None
297297

298-
# Loop detection for Coordinator selections (participant + instruction)
299-
self._last_coordinator_selection: tuple[str, str] | None = None
298+
# Loop detection for Coordinator selections.
299+
# We track the *agent the Coordinator most recently picked* (lower-cased name)
300+
# rather than (agent, instruction) tuples, because in practice the LLM-driven
301+
# Coordinator varies the instruction text while looping on the same agent.
302+
# A streak counts how many consecutive Coordinator picks landed on the same
303+
# agent without any *other* agent running in between (see _progress_counter
304+
# bookkeeping in _handle_agent_update).
305+
self._last_coordinator_selection: str | None = None
300306
self._coordinator_selection_streak: int = 0
307+
# Diagnostic history of recent (agent, instruction) selections.
301308
self._recent_coordinator_selections: deque[tuple[str, str]] = deque(maxlen=10)
302309

303310
# Progress counter used to avoid false-positive loop detection.
@@ -1029,14 +1036,10 @@ async def _complete_agent_response(
10291036
# selection. So we only increment when the completing agent is not the one the
10301037
# Coordinator is currently latching onto.
10311038
if agent_name != self.coordinator_name:
1032-
last_selected = (
1033-
self._last_coordinator_selection[0]
1034-
if self._last_coordinator_selection
1035-
else None
1036-
)
1039+
last_selected = self._last_coordinator_selection
10371040
if (
10381041
last_selected is None
1039-
or agent_name.lower() != last_selected.lower()
1042+
or agent_name.lower() != last_selected
10401043
):
10411044
self._progress_counter += 1
10421045

@@ -1058,17 +1061,27 @@ async def _complete_agent_response(
10581061
# measures from Coordinator selection -> response completion.
10591062
selected = getattr(manager_response, "selected_participant", None)
10601063

1061-
# Loop detection: same selection+instruction repeated.
1064+
# Loop detection: same agent picked repeatedly with no other agent
1065+
# making progress in between. We deliberately key on the agent name
1066+
# alone (not on the instruction text) because the LLM-driven
1067+
# Coordinator often varies its instruction text while still looping
1068+
# on the same agent ("re-list", "read xyz.yaml", "save analysis_result.md"
1069+
# all sent to the same Chief Architect over and over). The
1070+
# _progress_counter (incremented in _handle_agent_update only when
1071+
# a DIFFERENT agent runs) is what tells us whether anything else
1072+
# actually happened in between.
10621073
if (
10631074
isinstance(selected, str)
10641075
and selected
10651076
and selected.lower() != "none"
10661077
):
1067-
selection_key = (selected, str(manager_instruction or ""))
1068-
self._recent_coordinator_selections.append(selection_key)
1069-
if selection_key == self._last_coordinator_selection:
1070-
# If any other agent responded since the last identical selection,
1071-
# treat that as progress and reset the streak.
1078+
selected_key = selected.lower()
1079+
self._recent_coordinator_selections.append(
1080+
(selected, str(manager_instruction or ""))
1081+
)
1082+
if selected_key == self._last_coordinator_selection:
1083+
# Same agent again. If any other agent ran since the last
1084+
# identical pick, treat that as progress and reset the streak.
10721085
if (
10731086
self._progress_counter
10741087
!= self._last_coordinator_selection_progress
@@ -1080,17 +1093,20 @@ async def _complete_agent_response(
10801093
else:
10811094
self._coordinator_selection_streak += 1
10821095
else:
1083-
self._last_coordinator_selection = selection_key
1096+
self._last_coordinator_selection = selected_key
10841097
self._coordinator_selection_streak = 1
10851098
self._last_coordinator_selection_progress = (
10861099
self._progress_counter
10871100
)
10881101

1089-
# If the Coordinator repeats the exact same ask 3 times, break.
1102+
# If the Coordinator picks the same agent 3 times in a row
1103+
# without any other agent running in between, break out.
10901104
if self._coordinator_selection_streak >= 3:
10911105
self._request_forced_termination(
10921106
reason=(
1093-
f"Loop detected: Coordinator repeated the same selection to '{selected}' {self._coordinator_selection_streak} times with no progress"
1107+
f"Loop detected: Coordinator selected '{selected}' "
1108+
f"{self._coordinator_selection_streak} consecutive "
1109+
f"times with no other agent making progress in between"
10941110
),
10951111
termination_type="hard_timeout",
10961112
)

src/processor/src/tests/unit/libs/agent_framework/test_groupchat_orchestrator_internals.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,53 @@ def _agent_runs(name: str, text: str = "ok"):
786786
# before B reset it, one streak of 2 after). Loop NOT detected.
787787
assert orch._forced_termination_requested is False
788788

789+
def test_loop_breaker_triggered_when_same_agent_picked_with_varying_instructions(
790+
self,
791+
):
792+
"""Regression for production: the LLM-driven Coordinator was looping on
793+
Chief Architect but varying its instruction text on every pick
794+
('re-list', 'read xyz', 'save analysis_result.md'). The loop detector
795+
must key on the AGENT NAME only — not on (agent, instruction) — or the
796+
streak resets on every pick and the loop is never caught.
797+
"""
798+
orch = _make_orch()
799+
orch._conversation = []
800+
801+
def _select(participant: str, instruction: str = "do"):
802+
orch._current_agent_response = [
803+
json.dumps(
804+
{
805+
"selected_participant": participant,
806+
"instruction": instruction,
807+
"finish": False,
808+
"final_message": "",
809+
}
810+
)
811+
]
812+
orch._current_agent_start_time = datetime.now()
813+
814+
def _agent_runs(name: str, text: str = "ok"):
815+
orch._current_agent_response = [text]
816+
orch._current_agent_start_time = datetime.now()
817+
818+
# Each Coordinator pick targets the same agent but with a DIFFERENT
819+
# instruction. With the old (agent, instruction) tuple key this never
820+
# tripped the breaker.
821+
_select("Chief Architect", instruction="list source blobs")
822+
_run(orch._complete_agent_response("Coordinator", None))
823+
_agent_runs("Chief Architect")
824+
_run(orch._complete_agent_response("Chief Architect", None))
825+
826+
_select("Chief Architect", instruction="read source files")
827+
_run(orch._complete_agent_response("Coordinator", None))
828+
_agent_runs("Chief Architect")
829+
_run(orch._complete_agent_response("Chief Architect", None))
830+
831+
_select("Chief Architect", instruction="save analysis_result.md")
832+
_run(orch._complete_agent_response("Coordinator", None))
833+
834+
assert orch._forced_termination_requested is True
835+
789836

790837
# -----------------------------------------------------------------------------
791838
# _build_groupchat

0 commit comments

Comments
 (0)