fix(hooks): persist tool stats from short-lived hook processes

JeremyDev87 · JeremyDev87 · commit c823cf41def4 · 2026-04-12T09:30:50.000+09:00
PostToolUse hooks run in fresh Python processes that record exactly
one tool call before exiting. With flush_interval=10, that single
record_tool_call() never reached disk, leaving the Stop hook summary
permanently stuck at "[CB] Xm | 0 tools | 0 errors" no matter how
many tools were used in the session.

- post-tool-use.py: explicit stats.flush() right after record for
  immediate visibility (statusline, Stop summary).
- stats.py: track every live SessionStats in a module-level WeakSet
  and flush them via a single atexit handler. WeakSet keeps GC
  unchanged and avoids the per-instance handler leak that a naive
  atexit.register(self.method) would cause.
- finalize() drops the instance from the WeakSet so the post-finalize
  flush sweep does not resurrect the cleaned-up file.
- Module docstring documents the short-lived process pattern.
- Regression tests cover: WeakSet membership, persistence via the
  module flush handler, finalize() removal, and the no-handler-leak
  invariant (20 instances must not register 20 handlers).
diff --git a/packages/claude-code-plugin/hooks/lib/stats.py b/packages/claude-code-plugin/hooks/lib/stats.py
@@ -2,10 +2,25 @@
 
 Tracks tool call count, tool names, errors, and session duration.
 Uses fcntl.flock() for file-level locking on every IO operation.
+
+Short-lived hook processes (e.g. PostToolUse) typically create a
+SessionStats, call record_tool_call() once, then exit. Because the
+default flush_interval is 10, that single call would never reach disk.
+Two safety nets cover this:
+
+1. Callers in short-lived processes SHOULD call ``flush()`` explicitly
+   right after recording, for immediate visibility (statusline, Stop
+   hook summary, etc.).
+2. As a fallback, every live SessionStats instance is tracked in a
+   module-level WeakSet and flushed once at interpreter exit by a single
+   atexit handler. This avoids leaking a per-instance atexit handler and
+   keeps GC behavior unchanged.
 """
+import atexit
 import json
 import os
 import time
+import weakref
 from typing import Any, Dict, List, Optional
 
 try:
@@ -17,8 +32,38 @@
 DEFAULT_DATA_DIR = os.path.join(os.path.expanduser("~"), ".codingbuddy")
 
 
+# Track every live SessionStats instance with weak references so they
+# can be flushed at interpreter exit without preventing garbage
+# collection. Using a WeakSet means dead instances are removed
+# automatically; using a single module-level atexit handler avoids the
+# per-instance handler leak that would otherwise accumulate when many
+# SessionStats are created in one process.
+_live_instances: "weakref.WeakSet[SessionStats]" = weakref.WeakSet()
+
+
+def _flush_all_pending() -> None:
+    """Flush every live SessionStats instance.
+
+    Registered as a single atexit handler at module load time. Iterates
+    a snapshot of the WeakSet so concurrent GC during iteration is safe.
+    All exceptions are swallowed because atexit handlers must not raise
+    during interpreter shutdown.
+    """
+    for inst in list(_live_instances):
+        try:
+            inst.flush()
+        except Exception:
+            pass
+
+
+atexit.register(_flush_all_pending)
+
+
 class SessionStats:
-    """Track operational metrics for a Claude Code session."""
+    """Track operational metrics for a Claude Code session.
+
+    See module docstring for the short-lived process pattern.
+    """
 
     def __init__(self, session_id: str, data_dir: Optional[str] = None, flush_interval: int = 10):
         """Initialize stats tracker.
@@ -67,6 +112,13 @@ def __init__(self, session_id: str, data_dir: Optional[str] = None, flush_interv
         self._mem_tool_names: Dict[str, int] = {}
         self._mem_hook_timings: Dict[str, List[float]] = {}
 
+        # Add to the module-level WeakSet so the single atexit handler
+        # can flush this instance on interpreter exit. WeakSet does not
+        # prevent garbage collection; once the caller drops its last
+        # reference (and finalize() has cleared mem state), the instance
+        # disappears from the set automatically.
+        _live_instances.add(self)
+
     def record_hook_timing(self, hook_name: str, elapsed_ms: float) -> None:
         """Record a hook execution timing in memory.
 
@@ -205,6 +257,10 @@ def finalize(self) -> Dict[str, Any]:
         except OSError:
             pass
 
+        # Remove from the live set so the atexit flush does not resurrect
+        # the file we just removed.
+        _live_instances.discard(self)
+
         return data
 
     @staticmethod
diff --git a/packages/claude-code-plugin/hooks/post-tool-use.py b/packages/claude-code-plugin/hooks/post-tool-use.py
@@ -34,6 +34,13 @@ def handle_post_tool_use(data: dict):
         stats = SessionStats(session_id=session_id)
         tool_name = data.get("tool_name", "unknown")
         stats.record_tool_call(tool_name, success=True)
+        # PostToolUse runs in a fresh short-lived process per call. The
+        # default flush_interval (10) means a single record_tool_call would
+        # never reach disk. Flush immediately so subsequent reads (statusline,
+        # Stop hook summary) see accurate counts. atexit in SessionStats is a
+        # safety net, but explicit flush keeps the disk state observable
+        # right after this hook exits.
+        stats.flush()
     except Exception:
         pass  # Never block tool execution
 
diff --git a/packages/claude-code-plugin/tests/test_stats.py b/packages/claude-code-plugin/tests/test_stats.py
@@ -207,6 +207,93 @@ def test_format_summary_uses_memory_data(self, data_dir):
         assert "Bash:2" in summary
 
 
+class TestShortLivedProcess:
+    """Regression tests for short-lived hook process pattern.
+
+    Each hook invocation (e.g. PostToolUse) is a fresh Python process that
+    creates one SessionStats, calls record_tool_call() exactly once, then
+    exits. With flush_interval > 1 and no explicit flush, the in-memory
+    increment was lost — leaving disk counters stuck at 0 for the entire
+    session. This produced statusline/stop-summary output like
+    `[CB] Xm | 0 tools | 0 errors` even after many tool calls.
+
+    The library now tracks every live SessionStats in a module-level
+    WeakSet and flushes them via a single atexit handler. These tests
+    drive that handler directly (rather than calling
+    ``atexit._run_exitfuncs()``) so they do not interfere with other
+    tests' atexit registrations.
+    """
+
+    def test_instance_added_to_live_set(self, data_dir):
+        """Newly constructed SessionStats must be tracked for atexit flush."""
+        from stats import _live_instances
+
+        s = SessionStats(
+            session_id="weakset-test", data_dir=data_dir, flush_interval=10
+        )
+        assert s in _live_instances
+
+    def test_record_persists_via_module_atexit_handler(self, data_dir):
+        """Records should reach disk via _flush_all_pending() when caller forgets."""
+        from stats import _flush_all_pending
+
+        s1 = SessionStats(
+            session_id="exit-flush-test", data_dir=data_dir, flush_interval=10
+        )
+        s1.record_tool_call("Bash")
+        # Caller does NOT call flush() — simulates short-lived hook process.
+        # Drive the same code path the atexit handler would run.
+        _flush_all_pending()
+
+        # Fresh instance reads from disk only — must see the recorded call.
+        s2 = SessionStats(
+            session_id="exit-flush-test", data_dir=data_dir, flush_interval=10
+        )
+        on_disk = s2._locked_read()
+        assert on_disk["tool_count"] == 1
+        assert on_disk["tool_names"]["Bash"] == 1
+
+    def test_finalize_removes_instance_from_live_set(self, data_dir):
+        """finalize() removes the file AND drops the instance from the live set."""
+        from stats import _flush_all_pending, _live_instances
+
+        s = SessionStats(
+            session_id="finalize-weakset", data_dir=data_dir, flush_interval=10
+        )
+        s.record_tool_call("Bash")
+        s.finalize()
+        stats_file = os.path.join(data_dir, "finalize-weakset.json")
+        assert not os.path.exists(stats_file)
+        assert s not in _live_instances
+
+        # Subsequent flush sweeps must NOT recreate the file
+        _flush_all_pending()
+        assert not os.path.exists(stats_file)
+
+    def test_only_one_atexit_handler_regardless_of_instance_count(self, data_dir):
+        """Library must register a single atexit handler, not one per instance."""
+        import atexit
+
+        # Snapshot atexit handler count, create many instances, snapshot again.
+        # The implementation uses a module-level WeakSet + single handler,
+        # so the count should not grow with instance creation.
+        before = len(getattr(atexit, "_exithandlers", []))
+        instances = [
+            SessionStats(
+                session_id=f"leak-test-{i}",
+                data_dir=data_dir,
+                flush_interval=10,
+            )
+            for i in range(20)
+        ]
+        after = len(getattr(atexit, "_exithandlers", []))
+        # Allow for at most the single module-level registration that may
+        # have happened on first import in this test session.
+        assert after - before <= 1
+        # Keep references alive until the assertion runs
+        assert len(instances) == 20
+
+
 class TestHookTimingIntegration:
     """Tests for hook timing integration in SessionStats (#945)."""