fix: ensure LLM stats tracking is accurate by including completed subagents (#441)

bearsyankees · web-flow · commit 15c95718e600 · 2026-04-13T00:09:13.000-04:00
diff --git a/strix/agents/base_agent.py b/strix/agents/base_agent.py
@@ -134,7 +134,8 @@ def _add_to_agents_graph(self) -> None:
         }
         agents_graph_actions._agent_graph["nodes"][self.state.agent_id] = node
 
-        agents_graph_actions._agent_instances[self.state.agent_id] = self
+        with agents_graph_actions._agent_llm_stats_lock:
+            agents_graph_actions._agent_instances[self.state.agent_id] = self
         agents_graph_actions._agent_states[self.state.agent_id] = self.state
 
         if self.state.parent_id:
diff --git a/strix/telemetry/tracer.py b/strix/telemetry/tracer.py
@@ -799,17 +799,25 @@ def get_real_tool_count(self) -> int:
         )
 
     def get_total_llm_stats(self) -> dict[str, Any]:
-        from strix.tools.agents_graph.agents_graph_actions import _agent_instances
+        from strix.tools.agents_graph.agents_graph_actions import (
+            _agent_instances,
+            _completed_agent_llm_totals,
+            _agent_llm_stats_lock,
+        )
+
+        with _agent_llm_stats_lock:
+            completed_totals = dict(_completed_agent_llm_totals)
+            active_agents = list(_agent_instances.values())
 
         total_stats = {
-            "input_tokens": 0,
-            "output_tokens": 0,
-            "cached_tokens": 0,
-            "cost": 0.0,
-            "requests": 0,
+            "input_tokens": int(completed_totals.get("input_tokens", 0) or 0),
+            "output_tokens": int(completed_totals.get("output_tokens", 0) or 0),
+            "cached_tokens": int(completed_totals.get("cached_tokens", 0) or 0),
+            "cost": float(completed_totals.get("cost", 0.0) or 0.0),
+            "requests": int(completed_totals.get("requests", 0) or 0),
         }
 
-        for agent_instance in _agent_instances.values():
+        for agent_instance in active_agents:
             if hasattr(agent_instance, "llm") and hasattr(agent_instance.llm, "_total_stats"):
                 agent_stats = agent_instance.llm._total_stats
                 total_stats["input_tokens"] += agent_stats.input_tokens
diff --git a/strix/tools/agents_graph/agents_graph_actions.py b/strix/tools/agents_graph/agents_graph_actions.py
@@ -19,9 +19,55 @@
 
 _agent_instances: dict[str, Any] = {}
 
+_agent_llm_stats_lock = threading.Lock()
+
+
+def _empty_llm_stats_totals() -> dict[str, int | float]:
+    return {
+        "input_tokens": 0,
+        "output_tokens": 0,
+        "cached_tokens": 0,
+        "cost": 0.0,
+        "requests": 0,
+    }
+
+
+_completed_agent_llm_totals: dict[str, int | float] = _empty_llm_stats_totals()
+
 _agent_states: dict[str, Any] = {}
 
 
+def _snapshot_agent_llm_stats(agent: Any) -> dict[str, int | float] | None:
+    if not hasattr(agent, "llm") or not hasattr(agent.llm, "_total_stats"):
+        return None
+
+    stats = agent.llm._total_stats
+    return {
+        "input_tokens": stats.input_tokens,
+        "output_tokens": stats.output_tokens,
+        "cached_tokens": stats.cached_tokens,
+        "cost": stats.cost,
+        "requests": stats.requests,
+    }
+
+
+def _finalize_agent_llm_stats(agent_id: str, agent: Any) -> None:
+    stats = _snapshot_agent_llm_stats(agent)
+    with _agent_llm_stats_lock:
+        if stats is not None:
+            _completed_agent_llm_totals["input_tokens"] += int(stats["input_tokens"])
+            _completed_agent_llm_totals["output_tokens"] += int(stats["output_tokens"])
+            _completed_agent_llm_totals["cached_tokens"] += int(stats["cached_tokens"])
+            _completed_agent_llm_totals["cost"] += float(stats["cost"])
+            _completed_agent_llm_totals["requests"] += int(stats["requests"])
+
+            node = _agent_graph["nodes"].get(agent_id)
+            if node is not None:
+                node["llm_stats"] = stats
+
+        _agent_instances.pop(agent_id, None)
+
+
 def _is_whitebox_agent(agent_id: str) -> bool:
     agent = _agent_instances.get(agent_id)
     return bool(getattr(getattr(agent, "llm_config", None), "is_whitebox", False))
@@ -237,7 +283,7 @@ def _run_agent_in_thread(
         _agent_graph["nodes"][state.agent_id]["finished_at"] = datetime.now(UTC).isoformat()
         _agent_graph["nodes"][state.agent_id]["result"] = {"error": str(e)}
         _running_agents.pop(state.agent_id, None)
-        _agent_instances.pop(state.agent_id, None)
+        _finalize_agent_llm_stats(state.agent_id, agent)
         raise
     else:
         if state.stop_requested:
@@ -247,7 +293,7 @@ def _run_agent_in_thread(
         _agent_graph["nodes"][state.agent_id]["finished_at"] = datetime.now(UTC).isoformat()
         _agent_graph["nodes"][state.agent_id]["result"] = result
         _running_agents.pop(state.agent_id, None)
-        _agent_instances.pop(state.agent_id, None)
+        _finalize_agent_llm_stats(state.agent_id, agent)
 
         return {"result": result}
 
@@ -418,7 +464,8 @@ def create_agent(
         if inherit_context:
             inherited_messages = agent_state.get_conversation_history()
 
-        _agent_instances[state.agent_id] = agent
+        with _agent_llm_stats_lock:
+            _agent_instances[state.agent_id] = agent
 
         thread = threading.Thread(
             target=_run_agent_in_thread,
diff --git a/tests/telemetry/test_tracer.py b/tests/telemetry/test_tracer.py
@@ -10,6 +10,7 @@
 from strix.telemetry import tracer as tracer_module
 from strix.telemetry import utils as telemetry_utils
 from strix.telemetry.tracer import Tracer, set_global_tracer
+from strix.tools.agents_graph import agents_graph_actions
 
 
 def _load_events(events_path: Path) -> list[dict[str, Any]]:
@@ -255,6 +256,75 @@ def test_events_with_agent_id_include_agent_name(monkeypatch, tmp_path) -> None:
     assert chat_event["actor"]["agent_name"] == "Root Agent"
 
 
+def test_get_total_llm_stats_includes_completed_subagents(monkeypatch, tmp_path) -> None:
+    monkeypatch.chdir(tmp_path)
+
+    class DummyStats:
+        def __init__(
+            self,
+            *,
+            input_tokens: int,
+            output_tokens: int,
+            cached_tokens: int,
+            cost: float,
+            requests: int,
+        ) -> None:
+            self.input_tokens = input_tokens
+            self.output_tokens = output_tokens
+            self.cached_tokens = cached_tokens
+            self.cost = cost
+            self.requests = requests
+
+    class DummyLLM:
+        def __init__(self, stats: DummyStats) -> None:
+            self._total_stats = stats
+
+    class DummyAgent:
+        def __init__(self, stats: DummyStats) -> None:
+            self.llm = DummyLLM(stats)
+
+    tracer = Tracer("cost-rollup")
+    set_global_tracer(tracer)
+
+    monkeypatch.setattr(
+        agents_graph_actions,
+        "_agent_instances",
+        {
+            "root-agent": DummyAgent(
+                DummyStats(
+                    input_tokens=1_000,
+                    output_tokens=250,
+                    cached_tokens=100,
+                    cost=0.12345,
+                    requests=2,
+                )
+            )
+        },
+    )
+    monkeypatch.setattr(
+        agents_graph_actions,
+        "_completed_agent_llm_totals",
+        {
+            "input_tokens": 2_000,
+            "output_tokens": 500,
+            "cached_tokens": 400,
+            "cost": 0.54321,
+            "requests": 3,
+        },
+    )
+
+    stats = tracer.get_total_llm_stats()
+
+    assert stats["total"] == {
+        "input_tokens": 3_000,
+        "output_tokens": 750,
+        "cached_tokens": 500,
+        "cost": 0.6667,
+        "requests": 5,
+    }
+    assert stats["total_tokens"] == 3_750
+
+
 def test_run_metadata_is_only_on_run_lifecycle_events(monkeypatch, tmp_path) -> None:
     monkeypatch.chdir(tmp_path)
 
diff --git a/tests/tools/test_agents_graph_whitebox.py b/tests/tools/test_agents_graph_whitebox.py
@@ -5,16 +5,24 @@
 from strix.tools.agents_graph import agents_graph_actions
 
 
-def test_create_agent_inherits_parent_whitebox_flag(monkeypatch) -> None:
-    monkeypatch.setenv("STRIX_LLM", "openai/gpt-5")
-
+def _reset_agent_graph_state() -> None:
     agents_graph_actions._agent_graph["nodes"].clear()
     agents_graph_actions._agent_graph["edges"].clear()
     agents_graph_actions._agent_messages.clear()
     agents_graph_actions._running_agents.clear()
     agents_graph_actions._agent_instances.clear()
+    agents_graph_actions._completed_agent_llm_totals.clear()
+    agents_graph_actions._completed_agent_llm_totals.update(
+        agents_graph_actions._empty_llm_stats_totals()
+    )
     agents_graph_actions._agent_states.clear()
 
+
+def test_create_agent_inherits_parent_whitebox_flag(monkeypatch) -> None:
+    monkeypatch.setenv("STRIX_LLM", "openai/gpt-5")
+
+    _reset_agent_graph_state()
+
     parent_id = "parent-agent"
     parent_llm = LLMConfig(timeout=123, scan_mode="standard", is_whitebox=True)
     agents_graph_actions._agent_instances[parent_id] = SimpleNamespace(
@@ -66,12 +74,7 @@ def start(self) -> None:
 def test_delegation_prompt_includes_wiki_memory_instruction_in_whitebox(monkeypatch) -> None:
     monkeypatch.setenv("STRIX_LLM", "openai/gpt-5")
 
-    agents_graph_actions._agent_graph["nodes"].clear()
-    agents_graph_actions._agent_graph["edges"].clear()
-    agents_graph_actions._agent_messages.clear()
-    agents_graph_actions._running_agents.clear()
-    agents_graph_actions._agent_instances.clear()
-    agents_graph_actions._agent_states.clear()
+    _reset_agent_graph_state()
 
     parent_id = "parent-1"
     child_id = "child-1"
@@ -116,12 +119,7 @@ async def agent_loop(self, _task: str) -> dict[str, bool]:
 def test_agent_finish_appends_wiki_update_for_whitebox(monkeypatch) -> None:
     monkeypatch.setenv("STRIX_LLM", "openai/gpt-5")
 
-    agents_graph_actions._agent_graph["nodes"].clear()
-    agents_graph_actions._agent_graph["edges"].clear()
-    agents_graph_actions._agent_messages.clear()
-    agents_graph_actions._running_agents.clear()
-    agents_graph_actions._agent_instances.clear()
-    agents_graph_actions._agent_states.clear()
+    _reset_agent_graph_state()
 
     parent_id = "parent-2"
     child_id = "child-2"
@@ -192,12 +190,7 @@ def fake_append_note_content(note_id: str, delta: str):
 def test_run_agent_in_thread_injects_shared_wiki_context_in_whitebox(monkeypatch) -> None:
     monkeypatch.setenv("STRIX_LLM", "openai/gpt-5")
 
-    agents_graph_actions._agent_graph["nodes"].clear()
-    agents_graph_actions._agent_graph["edges"].clear()
-    agents_graph_actions._agent_messages.clear()
-    agents_graph_actions._running_agents.clear()
-    agents_graph_actions._agent_instances.clear()
-    agents_graph_actions._agent_states.clear()
+    _reset_agent_graph_state()
 
     parent_id = "parent-3"
     child_id = "child-3"

Original file line number	Diff line number	Diff line change
`@@ -134,7 +134,8 @@ def _add_to_agents_graph(self) -> None:`
`134`	`134`	`}`
`135`	`135`	`agents_graph_actions._agent_graph["nodes"][self.state.agent_id] = node`
`136`	`136`
`137`		`- agents_graph_actions._agent_instances[self.state.agent_id] = self`
	`137`	`+ with agents_graph_actions._agent_llm_stats_lock:`
	`138`	`+ agents_graph_actions._agent_instances[self.state.agent_id] = self`
`138`	`139`	`agents_graph_actions._agent_states[self.state.agent_id] = self.state`
`139`	`140`
`140`	`141`	`if self.state.parent_id:`