Update behavioral monitor sample to use TaskResult history

TimesAndPlaces · TimesAndPlaces · commit d0a33dada895 · 2026-03-29T18:51:20.000+03:00
diff --git a/python/samples/agentchat_behavioral_monitor/README.md b/python/samples/agentchat_behavioral_monitor/README.md
@@ -55,14 +55,17 @@ monitor = BehavioralMonitor(
     min_messages=6,
 )
 
-# Check after each conversation step
-result = monitor.check(agent.chat_messages[recipient])
+history = []
+
+# Check after each public AgentChat run
+task_result = await assistant_agent.run(task="Use jwt and bcrypt for auth")
+result = monitor.observe_result(history, task_result)
 if result["drift_detected"]:
-    # Re-inject context or trigger memory recall
     print("Drift at turn", result["turn"], "ghost:", result["ghost_terms"])
 
-# Or patch an agent to auto-check after every reply
-monitor.patch_agent(assistant_agent)
+# Later runs keep extending the same external history
+task_result = await assistant_agent.run(task="Now add a profile endpoint")
+result = monitor.observe_result(history, task_result)
 ```
 
 ## Parameters
diff --git a/python/samples/agentchat_behavioral_monitor/main.py b/python/samples/agentchat_behavioral_monitor/main.py
@@ -7,20 +7,24 @@
 to detect that drift by measuring Ghost Consistency Score (CCS): the fraction
 of vocabulary from the earliest turns still present in the most recent turns.
 
+The integration path here stays on the public AgentChat surface:
+collect `TaskResult.messages` from `AssistantAgent.run()` / `run_stream()`,
+append them to an external history list, and score that history after each run.
+
 Usage:
     python main.py
 
 Requires:
     pip install autogen-agentchat
 
-Optional (for richer semantic distance):
-    pip install sentence-transformers
+    Optional:
+        Use with `AssistantAgent.run()` / `run_stream()` by passing the
+        accumulated `TaskResult.messages` history into `observe_result()`.
 """
 
 from __future__ import annotations
 
 import re
-import sys
 from collections import Counter
 from typing import Any, Dict, List, Optional, Sequence
 
@@ -155,40 +159,15 @@ def check(self, messages: Sequence[Any]) -> Dict[str, Any]:
 
         return result
 
-    def patch_agent(self, agent: Any) -> None:
+    def observe_result(self, history: List[Any], task_result: Any) -> Dict[str, Any]:
         """
-        Monkey-patch an AssistantAgent so that every reply automatically
-        runs a consistency check. Prints a warning when drift is detected.
+        Append one public AgentChat task result into `history`, then score it.
+
+        This works with the current public `AssistantAgent.run()` /
+        `run_stream()` API instead of monkey-patching private internals.
         """
-        original = agent.generate_reply
-        monitor = self
-
-        def _patched(*args, **kwargs):
-            reply = original(*args, **kwargs)
-            msgs = []
-            for attr in ("chat_messages", "_oai_messages", "messages"):
-                candidate = getattr(agent, attr, None)
-                if candidate is None:
-                    continue
-                if isinstance(candidate, dict):
-                    for v in candidate.values():
-                        msgs = list(v)
-                        break
-                elif isinstance(candidate, list):
-                    msgs = list(candidate)
-                if msgs:
-                    break
-            if msgs:
-                r = monitor.check(msgs)
-                if r["drift_detected"]:
-                    print(
-                        f"[BehavioralMonitor] \u26a0 drift at turn {r['turn']}: "
-                        f"CCS={r['ccs']:.3f}, ghost={r['ghost_terms']}",
-                        file=sys.stderr,
-                    )
-            return reply
-
-        agent.generate_reply = _patched
+        history.extend(getattr(task_result, "messages", []))
+        return self.check(history)
 
 
 # ---------------------------------------------------------------------------