codingbuddy/packages/claude-code-plugin/tests/test_stats.py at c823cf41def4a2eaeee5a211cc7774f54c980dc2 · JeremyDev87/codingbuddy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
"""Tests for SessionStats — operational statistics tracker (#825)."""
import json
import os
import sys
import time
import pytest

# Ensure hooks/lib is on path
_tests_dir = os.path.dirname(os.path.abspath(__file__))
_lib_dir = os.path.join(os.path.dirname(_tests_dir), "hooks", "lib")
if _lib_dir not in sys.path:
    sys.path.insert(0, _lib_dir)

from stats import SessionStats


@pytest.fixture
def data_dir(tmp_path):
    """Temp directory for stats files."""
    d = tmp_path / "stats"
    d.mkdir()
    return str(d)


@pytest.fixture
def stats(data_dir):
    return SessionStats(session_id="test-session", data_dir=data_dir)


class TestInit:
    def test_creates_data_dir_with_correct_permissions(self, tmp_path):
        d = str(tmp_path / "new_stats_dir")
        s = SessionStats(session_id="s1", data_dir=d)
        assert os.path.isdir(d)
        mode = os.stat(d).st_mode & 0o777
        assert mode == 0o700

    def test_creates_stats_file(self, stats, data_dir):
        expected = os.path.join(data_dir, "test-session.json")
        assert os.path.isfile(expected)

    def test_uses_env_data_dir(self, tmp_path, monkeypatch):
        d = str(tmp_path / "env_dir")
        monkeypatch.setenv("CLAUDE_PLUGIN_DATA", d)
        s = SessionStats(session_id="env-test")
        assert os.path.isdir(d)

    def test_default_data_dir(self, monkeypatch):
        monkeypatch.delenv("CLAUDE_PLUGIN_DATA", raising=False)
        s = SessionStats(session_id="default-test")
        expected_dir = os.path.join(os.path.expanduser("~"), ".codingbuddy")
        assert os.path.isdir(expected_dir)

    def test_chmod_failure_graceful(self, tmp_path, monkeypatch):
        """chmod failure should not crash initialization."""
        d = str(tmp_path / "restricted_dir")
        os.makedirs(d, mode=0o700)

        def failing_chmod(*args, **kwargs):
            raise PermissionError("Operation not permitted")

        monkeypatch.setattr(os, "chmod", failing_chmod)
        s = SessionStats(session_id="perm-test", data_dir=d)
        assert os.path.isdir(d)


class TestRecordToolCall:
    def test_increments_count(self, stats):
        stats.record_tool_call("Bash")
        stats.record_tool_call("Edit")
        result = stats.finalize()
        assert result["tool_count"] == 2

    def test_tracks_tool_names(self, stats):
        stats.record_tool_call("Bash")
        stats.record_tool_call("Bash")
        stats.record_tool_call("Edit")
        result = stats.finalize()
        assert result["tool_names"]["Bash"] == 2
        assert result["tool_names"]["Edit"] == 1

    def test_tracks_errors(self, stats):
        stats.record_tool_call("Bash", success=True)
        stats.record_tool_call("Bash", success=False)
        stats.record_tool_call("Edit", success=False)
        result = stats.finalize()
        assert result["error_count"] == 2

    def test_success_does_not_increment_errors(self, stats):
        stats.record_tool_call("Bash", success=True)
        result = stats.finalize()
        assert result["error_count"] == 0


class TestDuration:
    def test_tracks_session_duration(self, stats):
        time.sleep(0.05)
        result = stats.finalize()
        assert result["duration_seconds"] >= 0.04


class TestFormatSummary:
    def test_format_summary_structure(self, stats):
        stats.record_tool_call("Bash")
        stats.record_tool_call("Bash")
        stats.record_tool_call("Edit")
        summary = stats.format_summary()
        assert summary.startswith("[CB]")
        assert "3 tools" in summary
        assert "0 errors" in summary
        assert "Bash:2" in summary
        assert "Edit:1" in summary

    def test_format_summary_with_errors(self, stats):
        stats.record_tool_call("Bash", success=False)
        summary = stats.format_summary()
        assert "1 error" in summary


class TestRoundtrip:
    def test_data_persists_across_instances(self, data_dir):
        s1 = SessionStats(session_id="rt-test", data_dir=data_dir)
        s1.record_tool_call("Bash")
        s1.record_tool_call("Edit")
        s1.flush()  # Flush to disk before new instance reads

        # New instance same session
        s2 = SessionStats(session_id="rt-test", data_dir=data_dir)
        s2.record_tool_call("Read")
        result = s2.finalize()
        assert result["tool_count"] == 3
        assert result["tool_names"]["Bash"] == 1
        assert result["tool_names"]["Read"] == 1


class TestLocking:
    def test_concurrent_writes_dont_corrupt(self, data_dir):
        """Multiple rapid writes should not corrupt the file."""
        s = SessionStats(session_id="lock-test", data_dir=data_dir)
        for i in range(50):
            s.record_tool_call(f"Tool{i % 5}")
        result = s.finalize()
        assert result["tool_count"] == 50


class TestInMemoryAccumulation:
    """Tests for in-memory accumulation with periodic flush (#931)."""

    def test_record_does_not_write_immediately(self, data_dir):
        """record_tool_call should NOT write to disk on every call."""
        s = SessionStats(session_id="lazy-test", data_dir=data_dir, flush_interval=10)
        # Read initial file content
        with open(s.stats_file, "r") as f:
            initial = json.load(f)
        initial_count = initial.get("tool_count", 0)

        s.record_tool_call("Bash")
        # File should still have initial count (not flushed yet)
        with open(s.stats_file, "r") as f:
            on_disk = json.load(f)
        assert on_disk["tool_count"] == initial_count

    def test_flush_writes_accumulated_data(self, data_dir):
        """flush() should persist all accumulated stats to disk."""
        s = SessionStats(session_id="flush-test", data_dir=data_dir, flush_interval=100)
        s.record_tool_call("Bash")
        s.record_tool_call("Edit")
        s.record_tool_call("Read")
        s.flush()

        with open(s.stats_file, "r") as f:
            on_disk = json.load(f)
        assert on_disk["tool_count"] == 3
        assert on_disk["tool_names"]["Bash"] == 1
        assert on_disk["tool_names"]["Edit"] == 1

    def test_auto_flush_at_interval(self, data_dir):
        """Should auto-flush when flush_interval calls are reached."""
        s = SessionStats(session_id="auto-flush", data_dir=data_dir, flush_interval=3)
        s.record_tool_call("Bash")
        s.record_tool_call("Edit")
        # Not yet flushed (2 < 3)
        with open(s.stats_file, "r") as f:
            on_disk = json.load(f)
        assert on_disk["tool_count"] == 0

        s.record_tool_call("Read")  # 3rd call -> auto-flush
        with open(s.stats_file, "r") as f:
            on_disk = json.load(f)
        assert on_disk["tool_count"] == 3

    def test_finalize_flushes_pending(self, data_dir):
        """finalize() should flush pending stats before returning."""
        s = SessionStats(session_id="fin-flush", data_dir=data_dir, flush_interval=100)
        s.record_tool_call("Bash")
        s.record_tool_call("Edit")
        result = s.finalize()
        assert result["tool_count"] == 2

    def test_format_summary_uses_memory_data(self, data_dir):
        """format_summary() should reflect in-memory state, not just disk."""
        s = SessionStats(session_id="mem-summary", data_dir=data_dir, flush_interval=100)
        s.record_tool_call("Bash")
        s.record_tool_call("Bash")
        summary = s.format_summary()
        assert "2 tools" in summary
        assert "Bash:2" in summary


class TestShortLivedProcess:
    """Regression tests for short-lived hook process pattern.

    Each hook invocation (e.g. PostToolUse) is a fresh Python process that
    creates one SessionStats, calls record_tool_call() exactly once, then
    exits. With flush_interval > 1 and no explicit flush, the in-memory
    increment was lost — leaving disk counters stuck at 0 for the entire
    session. This produced statusline/stop-summary output like
    `[CB] Xm | 0 tools | 0 errors` even after many tool calls.

    The library now tracks every live SessionStats in a module-level
    WeakSet and flushes them via a single atexit handler. These tests
    drive that handler directly (rather than calling
    ``atexit._run_exitfuncs()``) so they do not interfere with other
    tests' atexit registrations.
    """

    def test_instance_added_to_live_set(self, data_dir):
        """Newly constructed SessionStats must be tracked for atexit flush."""
        from stats import _live_instances

        s = SessionStats(
            session_id="weakset-test", data_dir=data_dir, flush_interval=10
        )
        assert s in _live_instances

    def test_record_persists_via_module_atexit_handler(self, data_dir):
        """Records should reach disk via _flush_all_pending() when caller forgets."""
        from stats import _flush_all_pending

        s1 = SessionStats(
            session_id="exit-flush-test", data_dir=data_dir, flush_interval=10
        )
        s1.record_tool_call("Bash")
        # Caller does NOT call flush() — simulates short-lived hook process.
        # Drive the same code path the atexit handler would run.
        _flush_all_pending()

        # Fresh instance reads from disk only — must see the recorded call.
        s2 = SessionStats(
            session_id="exit-flush-test", data_dir=data_dir, flush_interval=10
        )
        on_disk = s2._locked_read()
        assert on_disk["tool_count"] == 1
        assert on_disk["tool_names"]["Bash"] == 1

    def test_finalize_removes_instance_from_live_set(self, data_dir):
        """finalize() removes the file AND drops the instance from the live set."""
        from stats import _flush_all_pending, _live_instances

        s = SessionStats(
            session_id="finalize-weakset", data_dir=data_dir, flush_interval=10
        )
        s.record_tool_call("Bash")
        s.finalize()
        stats_file = os.path.join(data_dir, "finalize-weakset.json")
        assert not os.path.exists(stats_file)
        assert s not in _live_instances

        # Subsequent flush sweeps must NOT recreate the file
        _flush_all_pending()
        assert not os.path.exists(stats_file)

    def test_only_one_atexit_handler_regardless_of_instance_count(self, data_dir):
        """Library must register a single atexit handler, not one per instance."""
        import atexit

        # Snapshot atexit handler count, create many instances, snapshot again.
        # The implementation uses a module-level WeakSet + single handler,
        # so the count should not grow with instance creation.
        before = len(getattr(atexit, "_exithandlers", []))
        instances = [
            SessionStats(
                session_id=f"leak-test-{i}",
                data_dir=data_dir,
                flush_interval=10,
            )
            for i in range(20)
        ]
        after = len(getattr(atexit, "_exithandlers", []))
        # Allow for at most the single module-level registration that may
        # have happened on first import in this test session.
        assert after - before <= 1
        # Keep references alive until the assertion runs
        assert len(instances) == 20


class TestHookTimingIntegration:
    """Tests for hook timing integration in SessionStats (#945)."""

    def test_hook_timings_field_initialized(self, stats):
        """SessionStats should have hook_timings field in stored data."""
        data = stats._locked_read()
        assert "hook_timings" in data
        assert data["hook_timings"] == {}

    def test_record_hook_timing_stores_data(self, stats):
        """record_hook_timing() should store timing in hook_timings."""
        stats.record_hook_timing("PreToolUse", 150.0)
        stats.record_hook_timing("PreToolUse", 200.0)
        stats.record_hook_timing("PostToolUse", 50.0)
        stats.flush()
        data = stats._locked_read()
        assert data["hook_timings"]["PreToolUse"] == [150.0, 200.0]
        assert data["hook_timings"]["PostToolUse"] == [50.0]

    def test_finalize_includes_hook_timing_stats(self, stats):
        """finalize() should include computed hook timing statistics."""
        stats.record_hook_timing("PreToolUse", 100.0)
        stats.record_hook_timing("PreToolUse", 200.0)
        stats.record_hook_timing("PreToolUse", 300.0)
        result = stats.finalize()
        assert "hook_timing_stats" in result
        ht = result["hook_timing_stats"]["PreToolUse"]
        assert ht["count"] == 3
        assert ht["avg_ms"] == pytest.approx(200.0, abs=0.01)
        assert ht["max_ms"] == pytest.approx(300.0, abs=0.01)

    def test_format_summary_includes_timing_report(self, stats):
        """format_summary() should include hook timing info when present."""
        stats.record_hook_timing("PreToolUse", 8500.0)
        summary = stats.format_summary()
        assert "PreToolUse" in summary

    def test_format_summary_no_timing_when_empty(self, stats):
        """format_summary() should not include timing section when no timings."""
        summary = stats.format_summary()
        assert "⏱" not in summary


class TestCleanup:
    def test_cleanup_stale_removes_old_files(self, data_dir):
        # Create a stale file
        stale_file = os.path.join(data_dir, "old-session.json")
        with open(stale_file, "w") as f:
            json.dump({"started_at": time.time() - 100000}, f)

        # Create a fresh file
        fresh = SessionStats(session_id="fresh", data_dir=data_dir)

        SessionStats.cleanup_stale(data_dir, max_age_hours=1)

        assert not os.path.exists(stale_file)
        assert os.path.exists(os.path.join(data_dir, "fresh.json"))

    def test_cleanup_stale_keeps_recent(self, data_dir):
        s = SessionStats(session_id="recent", data_dir=data_dir)
        SessionStats.cleanup_stale(data_dir, max_age_hours=24)
        assert os.path.exists(os.path.join(data_dir, "recent.json"))

    def test_finalize_cleans_up_file(self, stats, data_dir):
        stats.record_tool_call("Bash")
        stats.finalize()
        stats_file = os.path.join(data_dir, "test-session.json")
        assert not os.path.exists(stats_file)