-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathsession_end.py
More file actions
945 lines (823 loc) · 39.8 KB
/
Copy pathsession_end.py
File metadata and controls
945 lines (823 loc) · 39.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
#!/usr/bin/env python3
"""
Location: pact-plugin/hooks/session_end.py
Summary: SessionEnd hook that writes a session_end journal event and performs
session directory cleanup.
Used by: hooks.json SessionEnd hook
Actions:
1. Write session_end event to the session journal
2. Detect open PRs that were not paused (append warning to journal)
3. Clean up stale session directories using a dual TTL (30 days active, 180 days paused)
Purely observational — no destructive operations on project files. Session
directory cleanup is best-effort and never blocks session termination.
Input: JSON from stdin with session context
Output: None (SessionEnd hooks cannot inject context)
"""
from __future__ import annotations
import json
import os
import re
import shutil
import sys
import time
from pathlib import Path
# Add hooks directory to path for shared package imports
_hooks_dir = Path(__file__).parent
if str(_hooks_dir) not in sys.path:
sys.path.insert(0, str(_hooks_dir))
from shared.error_output import hook_error_json
from shared import check_pr_state
import shared.pact_context as pact_context
from shared.pact_context import get_project_dir, get_session_dir, get_session_id, get_team_name
from shared.session_journal import (
append_event,
make_event,
read_events,
read_last_event_from,
)
from shared.session_state import is_safe_path_component
from shared.session_registry import get_registry_path as _get_registry_path
from shared.paths import get_claude_config_dir
from shared.task_utils import get_task_list
# Suppress false "hook error" display in Claude Code UI on bare exit paths
_SUPPRESS_OUTPUT = json.dumps({"suppressOutput": True})
def get_project_slug() -> str:
"""Derive project slug from session context (basename of project_dir)."""
project_dir = get_project_dir()
if project_dir:
return Path(project_dir).name
return ""
def check_unpaused_pr(
tasks: list[dict] | None,
project_slug: str,
) -> str | None:
"""
Safety-net: detect open PRs that were NOT paused (no memory consolidation).
Compares the session journal's most-recent `session_paused` event against
its most-recent `review_dispatch` event. The pause covers a PR only when
it occurred at-or-after that PR was dispatched; an older pause does NOT
cover a freshly-dispatched PR (e.g., pause→resume→new PR→quit). If the
current PR is unpaused, returns a warning string so the caller can attach
it to the single `session_end` journal event.
Also checks task metadata as fallback for PRs not tracked through the normal
review workflow (preserves the existing safety-net regex detection).
This is detection-only. SessionEnd is async fire-and-forget and cannot run
agents or memory operations.
Args:
tasks: List of task dicts from get_task_list(), or None
project_slug: Project identifier for the session directory
Returns:
Warning string if an unpaused PR is detected, otherwise None.
"""
if not project_slug:
return None
# Fix B (#453): structural consolidation signal — short-circuit if
# /PACT:wrap-up or /PACT:pause ran Pass 2 memory consolidation in
# this session. Placed first because it is the cheapest check
# (disk-local journal read already cached by read_events) and
# covers the most common false-positive cases (wrap-up on merged
# PR, pause with consolidation). Fail-open: read_events returns []
# on missing journal / unreadable journal / corrupt entries, which
# falls through to the legacy logic below — identical to pre-fix
# behavior for sessions that never consolidated.
if read_events("session_consolidated"):
return None
paused_events = read_events("session_paused")
review_events = read_events("review_dispatch")
# Reconcile pause vs review timing: a pause only "covers" a PR when it
# occurred at-or-after that PR's dispatch. Bias toward "paused" (silence)
# on equal timestamps via `>=` to avoid spurious warnings on the
# 1-second ISO precision tie.
if paused_events and review_events:
last_pause_ts = paused_events[-1].get("ts", "")
last_review_ts = review_events[-1].get("ts", "")
if last_pause_ts >= last_review_ts:
return None # Most recent PR was paused; safe.
# else fall through — current PR is unpaused
elif paused_events:
return None # Paused, no PRs at all — safe.
# Check journal for PR creation
pr_number = None
if review_events:
# Use the most recent review_dispatch event's PR number
pr_number = review_events[-1].get("pr_number")
# Fallback: scan task metadata for PR indicators (safety net for PRs
# not tracked through the review workflow journal events)
if not pr_number and tasks:
for task in tasks:
metadata = task.get("metadata") or {}
if metadata.get("pr_number") is not None:
pr_number = metadata["pr_number"]
break
handoff = metadata.get("handoff") or {}
for value in handoff.values():
if isinstance(value, str):
match = re.search(r'github\.com/[^/]+/[^/]+/pull/(\d+)', value)
if match:
pr_number = match.group(1)
break
if pr_number:
break
if not pr_number:
return None
# Fix A (#453): live PR-state check — last-line-of-defense against
# merged or closed PRs that neither Fix B nor the pause-vs-review
# timestamp comparison caught (e.g., PR merged via GitHub web UI
# mid-session with no wrap-up). Invoked only when every cheaper
# signal has fallen through, so AC#4 (no network for wrap-up cases)
# is preserved structurally by the ordering above.
#
# Fail-open: check_pr_state returns "" on gh-missing / timeout /
# auth-expired / OSError. "" is not in ("MERGED", "CLOSED"), so we
# fall through to the warning — the conservative pre-fix behavior
# when we cannot distinguish "offline" from "PR actually open."
pr_state = check_pr_state(pr_number)
if pr_state in ("MERGED", "CLOSED"):
return None
return (
f"Session ended without memory consolidation. "
f"PR #{pr_number} may still be open but pause-mode was not run. "
f"Run /PACT:pause or /PACT:wrap-up in next session."
)
# Regex for validating UUID-format directory names (session IDs).
# `\Z` (strict end-of-string) is used instead of `$`: in Python `re`,
# `$` matches end-of-string OR immediately before a trailing newline,
# so `deadbeef-dead-beef-dead-beefdeadbeef\n` would pass a `$` anchor
# and re-enter the skip-set / reaper allowlist as a crafted name.
# `\Z` rejects trailing newlines and is the stricter anchor.
_UUID_PATTERN = re.compile(
r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\Z'
)
# Regex for validating PACT team directory names. Intentionally LOOSER
# than what `generate_team_name` in shared/pact_context.py actually emits —
# the producer emits `pact-` + `secrets.token_hex(4)` (8 lowercase hex
# chars, no internal hyphens) or the session-id-prefix fallback
# (`pact-` + 8 hex chars). This regex accepts any `pact-`-prefixed
# lowercase-hex-and-hyphen shape so the reaper tolerates future drift
# in the producer (e.g. a naming scheme that introduces internal
# hyphens) without silently reaping a live team dir.
# Non-matching entries in ~/.claude/teams/ belong to other tooling and
# MUST NOT be reaped by cleanup_old_teams, even if they're stale by
# mtime. The reaper treats ~/.claude/teams/ as shared space, not
# PACT-owned space. `\Z` (strict end-of-string) — see _UUID_PATTERN.
_TEAM_NAME_PATTERN = re.compile(r'^pact-[a-f0-9-]+\Z')
# Default threshold for active (non-paused) session directory cleanup.
# 30 days balances disk usage (~50KB × 30 sessions = ~1.5MB) against
# cross-session recovery value.
_SESSION_MAX_AGE_DAYS = 30
# Extended threshold for paused session directories. Paused state is
# in-progress user work that has not been consolidated to memory or merged,
# so it gets a longer TTL than active sessions to protect the pause→resume
# workflow across long gaps. The extended TTL is protection, not permanent
# retention — paused sessions still age out past this threshold.
_PAUSED_SESSION_MAX_AGE_DAYS = 180
# Checkpoint file expiration for ~/.claude/pact-refresh/*.json. 7 days
# matches the prior refresh/constants.py CHECKPOINT_MAX_AGE_DAYS value.
# This cleanup is primarily a one-time sweep for existing deployments —
# with precompact_refresh.py removed (#413), no new checkpoints are
# written, so the directory asymptotically empties.
_CHECKPOINT_MAX_AGE_DAYS = 7
def _is_paused_session(session_dir: str) -> bool:
"""
Return True iff this session has ever recorded a session_paused event.
This is a pure "has-ever-been-paused" existence predicate — it does NOT
compare timestamps against session_end events. A session that was paused
and later ended still counts as paused from the cleanup policy's
perspective; the caller (`cleanup_old_sessions`) then applies the
extended paused TTL (`_PAUSED_SESSION_MAX_AGE_DAYS`, default 180 days)
to such sessions.
Splitting the predicate from the policy closes two data-loss bugs that
existed in the older timestamp-comparison form:
- AdvF1 (pause→quit race): `/PACT:pause` writes `session_paused`, then
quitting Claude Code fires `session_end` ~1s later. Any ordering where
`session_end.ts >= session_paused.ts` used to return False and delete
the paused state at the 30-day TTL.
- BugF2 (equal-timestamp tie): journal timestamps have 1-Hz ISO
precision, so pause and end events landing in the same wall-clock
second produced equal `ts` fields and hit the old `>=` comparison.
By dropping the timestamp comparison entirely, neither race nor tie can
produce a wrong answer.
Fail-open: if the journal is missing, empty, or unreadable,
`read_last_event_from` returns None and this predicate returns False so
the caller is free to apply the standard active-session TTL.
Args:
session_dir: Absolute path to the session directory.
Returns:
True iff a `session_paused` event exists in the session's journal.
"""
return read_last_event_from(session_dir, "session_paused") is not None
def cleanup_old_sessions(
project_slug: str,
current_session_id: str,
sessions_dir: str | None = None,
max_age_days: int = _SESSION_MAX_AGE_DAYS,
paused_max_age_days: int = _PAUSED_SESSION_MAX_AGE_DAYS,
) -> None:
"""
Remove stale session directories, applying a dual TTL.
Each candidate session directory is checked against a TTL selected per
entry: paused sessions (those whose journal contains any
`session_paused` event) use the extended `paused_max_age_days`
threshold (default 180 days), while active sessions use
`max_age_days` (default 30 days). The extended threshold protects
in-progress user work across the pause→resume workflow without
retaining paused state forever — paused sessions still age out past
180 days.
Best-effort cleanup — never raises. Skips the current session's
directory and any entry that doesn't look like a UUID directory.
Args:
project_slug: Project identifier (basename of project_dir)
current_session_id: Current session's UUID (never deleted)
sessions_dir: Override for base directory (testing)
max_age_days: TTL for active sessions in days (default: 30)
paused_max_age_days: TTL for paused sessions in days (default: 180).
Exposed as a kwarg so tests can inject smaller values for
boundary verification; production call sites use the default.
"""
if not project_slug or not current_session_id:
return
if sessions_dir is None:
sessions_dir = str(get_claude_config_dir() / "pact-sessions")
slug_dir = Path(sessions_dir) / project_slug
if not slug_dir.exists():
return
try:
for entry in slug_dir.iterdir():
# Skip symlinks (live or dangling) — is_symlink uses lstat
# semantics, short-circuiting before is_dir (which follows
# symlinks). Prevents a planted link from pinning alive or
# leaking mtime information about its target.
if entry.is_symlink():
continue
if not entry.is_dir():
continue
if not _UUID_PATTERN.match(entry.name):
continue
if entry.name == current_session_id:
continue
try:
age_days = (time.time() - entry.stat().st_mtime) / 86400
# Select TTL per entry: paused sessions get the extended
# threshold; active sessions get the standard one.
threshold = (
paused_max_age_days
if _is_paused_session(str(entry))
else max_age_days
)
if age_days > threshold:
shutil.rmtree(entry, ignore_errors=True)
except OSError:
continue
except OSError:
pass
def _dir_max_child_mtime(entry: Path, glob: str = "*.json") -> float | None:
"""
Return the max mtime across children of `entry` matching `glob`.
Generalized helper used by both reapers:
- tasks reaper passes `glob="*.json"` — platform `TaskUpdate` rewrites
individual `{id}.json` files; only *.json entries carry the signal.
- teams reaper passes `glob="*"` — the team dir holds config.json
AND member subdirectories AND arbitrary future sidecars; any child
touch indicates the team is live.
Why max-child rather than parent-dir stat: POSIX in-place overwrite
(e.g. `config.json` rewrite via write-then-rename-or-truncate) does
NOT bump the parent directory's mtime — the parent's mtime only
changes on create/unlink/rename of its entries. So a team dir whose
config.json is rewritten in place but has no member subdirs created
would false-reap on parent-dir mtime. Max-child mtime is the tight
upper bound on "when was anything under this dir last touched."
Return values (cycle-5 refinement):
- `float`: either a successful max-child mtime, OR the parent's
`lstat().st_mtime` when the dir is legitimately empty (no children
matched the glob).
- `None` sentinel: "could not determine age." Two triggers:
(a) outer `entry.glob()` raised OSError AND parent `lstat()` also
raised — we can't enumerate OR fall back; OR
(b) at least one child was observed but EVERY `child.lstat()`
raised — distinguishable from empty-dir because we saw children.
Callers MUST treat `None` as "skip this entry, count as skipped"
rather than proceeding to an age calculation that would collapse
"can't observe" into "use parent mtime" (a false-reap risk under
permission regressions). The empty-dir case keeps the old semantic
(fall back to parent mtime so stale empty dirs still age out).
Fail-open: never raises. Returns a valid mtime or `None` in every
branch. The parent-stat fallback uses `lstat()` (symlink-own
semantics) for defense-in-isolation against callers that might
forget an `is_symlink` guard — cycle-2 F2 pattern.
Args:
entry: Directory to probe.
glob: Glob pattern selecting which children to consult. Default
`"*.json"` matches the tasks-reaper convention; teams reaper
passes `"*"` to walk all children (config.json + subdirs).
Returns:
Max child mtime, or parent mtime on empty-dir, or `None` sentinel
when age cannot be determined (see above).
"""
latest = 0.0
saw_any_child = False
try:
for child in entry.glob(glob):
saw_any_child = True
try:
# lstat() uses symlink-own semantics (no dereference). A
# symlink child (attacker-planted `tasks/{real-dir}/x.json`
# → `/var/log/syslog`) must NOT be allowed to pin the
# parent's effective mtime to an arbitrary target; the
# link's own mtime is the correct signal. lstat is the
# portable pre-3.10 form (stat(follow_symlinks=False)
# requires Python 3.10+).
latest = max(latest, child.lstat().st_mtime)
except OSError:
continue
except OSError:
pass
if latest > 0.0:
return latest
# latest == 0.0 here. Two distinct scenarios:
# - saw_any_child=False: legitimately empty (or outer glob raised
# before yielding). Fall back to parent mtime so stale empties age
# out — the intended empty-dir semantic.
# - saw_any_child=True: we saw children but every child.lstat()
# raised. Collapsing this into "use parent mtime" would lose the
# signal that we CAN'T observe the dir. Return sentinel so the
# caller skips instead of false-reaping under a permission skew.
if saw_any_child:
return None
try:
# lstat (not stat) — cycle-5 defensive-in-isolation: the caller
# already filters symlinks via is_symlink before calling us, but
# using lstat here makes the helper safe even when called in
# isolation (e.g. from future consumers that forget the guard).
return entry.lstat().st_mtime
except OSError:
# Can neither observe children nor the parent — sentinel.
return None
def cleanup_old_teams(
current_team_name: str,
teams_base_dir: str | None = None,
max_age_days: int = _SESSION_MAX_AGE_DAYS,
) -> tuple[int, int]:
"""
Remove stale team directories under ~/.claude/teams/ (issue #412 Fix B).
Three defense layers:
1. Name-pattern gate — only directories matching `_TEAM_NAME_PATTERN`
(`^pact-[a-f0-9-]+$`) are candidates. This mirrors the INVARIANT
documented on `generate_team_name` in shared/pact_context.py. Non-PACT
writers that create `~/.claude/teams/foo-bar/` are out of scope:
`~/.claude/teams/` is shared space, not PACT-owned space.
2. Current-team skip — exact-match skip of `current_team_name`.
3. Fail-closed on empty `current_team_name` — returns (0, 0) without
reaping anything. An empty skip key combined with a permissive
name filter would be catastrophic; the guard is belt-and-suspenders
against a callsite bug even though layer (1) already filters.
Age probe walks child mtimes via `_dir_max_child_mtime(entry, glob="*")`.
Parent-dir mtime is wrong here: POSIX in-place overwrites (e.g.
`config.json` rewritten without rename/unlink) do NOT bump the
parent's mtime — only create/unlink/rename of entries does. Walking
ALL children ("*") covers both the config.json-rewrite case AND the
SubagentStart member-subdir creation case, giving a tight upper
bound on "when was this team dir last touched."
Best-effort: never raises. Swallows OSError per-entry and outer.
Args:
current_team_name: Current session's team_name from
pact_context.get_team_name(). MUST be non-empty.
teams_base_dir: Override for base directory (testing). Defaults
to ~/.claude/teams.
max_age_days: TTL in days (default: 30).
Returns:
(reaped, skipped) — `reaped` counts directories the TTL predicate
selected and passed to `shutil.rmtree(..., ignore_errors=True)`;
because `ignore_errors=True` swallows permission/EBUSY failures,
`reaped` is attempted-deletions, NOT verified-deletions. `skipped`
counts entries where stat/rmtree raised OSError before the rmtree
dispatch (i.e. the TTL probe itself failed).
"""
if not current_team_name:
return 0, 0
if teams_base_dir is None:
teams_base_dir = str(get_claude_config_dir() / "teams")
base = Path(teams_base_dir)
if not base.exists():
return 0, 0
reaped = 0
skipped = 0
try:
for entry in base.iterdir():
# Skip symlinks (live or dangling) — is_symlink uses lstat
# semantics, short-circuiting before is_dir (which follows
# symlinks). Prevents a planted link from pinning alive or
# leaking mtime information about its target.
if entry.is_symlink():
continue
if not entry.is_dir():
continue
# Name-shape gate: only touch PACT-shaped team dirs. Mirrors
# the generate_team_name INVARIANT in shared/pact_context.py. Non-
# matching entries belong to other tooling and are out of
# scope for this reaper.
if not _TEAM_NAME_PATTERN.match(entry.name):
continue
# Case-insensitive skip (cycle-5 defensive): pact_context's
# `get_team_name()` lowercases its return value and the
# generate_team_name INVARIANT pins lowercase, so byte-exact
# compare is correct-by-coincidence today. `.lower()` on both
# sides tolerates future drift in either producer without a
# silent reap of the current session's dir.
if entry.name.lower() == current_team_name.lower():
continue
try:
mtime = _dir_max_child_mtime(entry, glob="*")
# Cycle-5 sentinel check: `None` means the helper couldn't
# determine the dir's effective age (all child stats
# raised, or glob + parent lstat both raised). Treat as
# "cannot observe" → skipped; do NOT proceed to the age
# calculation (which would TypeError on None anyway, but
# an explicit guard makes the invariant self-documenting).
if mtime is None:
skipped += 1
continue
age_days = (time.time() - mtime) / 86400
if age_days > max_age_days:
shutil.rmtree(entry, ignore_errors=True)
reaped += 1
except OSError:
skipped += 1
continue
except OSError:
pass
return reaped, skipped
def cleanup_old_tasks(
skip_names: set[str],
tasks_base_dir: str | None = None,
max_age_days: int = _SESSION_MAX_AGE_DAYS,
) -> tuple[int, int]:
"""
Remove stale task subdirectories under ~/.claude/tasks/ (issue #412 Fix B).
Skips every entry whose name is in `skip_names`. Fails closed —
returns (0, 0) if `skip_names` is empty or contains only blank
strings. Per-entry mtime is probed via
`_dir_max_child_mtime(entry, glob="*.json")` because platform writes
update individual `{id}.json` files without bumping the parent dir's
mtime.
Best-effort: never raises. Swallows OSError per-entry and outer.
Args:
skip_names: Set of current-session names to preserve. Must
contain at least one non-blank entry. Caller assembles
{team_name, task_list_id, session_id} filtering empties.
tasks_base_dir: Override for base directory (testing). Defaults
to ~/.claude/tasks.
max_age_days: TTL in days (default: 30).
Returns:
(reaped, skipped) — same semantics as cleanup_old_teams: `reaped`
is attempted-deletions (rmtree called with ignore_errors=True, so
failures are silent), `skipped` is entries where the TTL probe or
rmtree dispatch itself raised OSError.
"""
if not skip_names or all(not n for n in skip_names):
return 0, 0
if tasks_base_dir is None:
tasks_base_dir = str(get_claude_config_dir() / "tasks")
base = Path(tasks_base_dir)
if not base.exists():
return 0, 0
reaped = 0
skipped = 0
try:
for entry in base.iterdir():
# Skip symlinks (live or dangling) — is_symlink uses lstat
# semantics, short-circuiting before is_dir (which follows
# symlinks). Prevents a planted link from pinning alive or
# leaking mtime information about its target.
if entry.is_symlink():
continue
if not entry.is_dir():
continue
if entry.name in skip_names:
continue
try:
mtime = _dir_max_child_mtime(entry, glob="*.json")
# Cycle-5 sentinel check: `None` means the helper couldn't
# determine the dir's effective age. Skip rather than
# false-reap under a permission regression.
if mtime is None:
skipped += 1
continue
age_days = (time.time() - mtime) / 86400
if age_days > max_age_days:
shutil.rmtree(entry, ignore_errors=True)
reaped += 1
except OSError:
skipped += 1
continue
except OSError:
pass
return reaped, skipped
def _assemble_tasks_skip_set(
team_name: str,
task_list_id: str,
session_id: str,
) -> set[str]:
"""
Build the skip-set for `cleanup_old_tasks` from the three platform-
key channels that can address `~/.claude/tasks/{name}/`.
The three channels:
- `team_name` — PACT canonical (from pact_context.get_team_name()).
Bounded by the `generate_team_name` producer-side filter, but a
non-PACT writer or future producer drift could still leak unsafe
values, so the same allowlist applies (cycle-7 symmetry).
- `task_list_id` — user-controlled env var `CLAUDE_CODE_TASK_LIST_ID`
(platform-sourced). The positive-regex allowlist prevents a
crafted value from bypassing the skip-set via unicode line
terminators or path separators. Per PR #426 cycle-1 finding
(patterns_path_name_fallback_escape) — the allowlist matches
real-world task_list_id shapes (hex, uuid, alphanumeric ids)
while rejecting dots, slashes, null bytes, and control chars
by construction.
- `session_id` — bare Claude Code fallback per
`task_utils.get_task_list` (platform-sourced via SessionStart
stdin). Flows through the SAME allowlist as `task_list_id`
(cycle-5 symmetry) — defense-in-depth should not asymmetrically
trust one channel.
Fail-discard on allowlist mismatch: a failing value is silently
dropped. The skip-set is ADDITIVE — missing a skip entry means we
fall back to the other keys that DID pass, so discarding is
strictly safer than trusting an untrusted value as a path key.
Empty-string members are pruned by `discard("")`, so the caller
does not need to pre-filter empties.
Extracted from `main()` for direct unit testability — the function
takes only primitives and returns a deterministic set, so callers
can assert skip-set contents without mocking the session context.
Args:
team_name: Raw team_name from pact_context. May be empty.
task_list_id: Raw CLAUDE_CODE_TASK_LIST_ID env var. May be empty.
session_id: Raw session_id from pact_context. May be empty.
Returns:
The skip-set, with empty strings and allowlist-failing values
removed. Caller treats a non-empty return as "the tasks reaper
is safe to run"; empty means "all channels short-circuited or
failed — do NOT run the tasks reaper" (fail-closed).
"""
safe_team_name = team_name if is_safe_path_component(team_name) else ""
safe_task_list_id = (
task_list_id if is_safe_path_component(task_list_id) else ""
)
safe_session_id = session_id if is_safe_path_component(session_id) else ""
skip_names = {safe_team_name, safe_task_list_id, safe_session_id}
skip_names.discard("")
return skip_names
def _is_safe_team_segment(team: str) -> bool:
"""Return True iff ``team`` is a single safe path component — usable to build
a ``teams/<team>`` path without raising or escaping the teams root.
The ``@team`` half of a registry value is SELF-ASSERTED and unsanitized (only
the name half is sanitized at write, since team is config-validated on read),
so a garbled/adversarial value could carry a NUL byte (an ``os.stat``/``open``
syscall rejects it with ``ValueError: embedded null byte``; ``Path.is_dir()``
only swallows it since Python 3.12), a path separator, or a ``..`` traversal
that resolves to a real directory and is wrongly KEPT on every Python version.
Legitimate team names are single lowercase-hex components (``pact-<hex>``, per
``generate_team_name``), so reject: empty, any C0 control char / DEL / NUL,
``/`` or ``\\``, and the traversal segments ``.`` / ``..``. Never raises.
"""
if not team:
return False
if any(ord(ch) < 0x20 or ord(ch) == 0x7f for ch in team):
return False
if "/" in team or "\\" in team:
return False
if team in (".", ".."):
return False
return True
def _prune_registry_dead_teams(
registry_path: Path | None = None,
teams_dir: Path | None = None,
) -> int:
"""Prune self-registration registry lines whose ``@team`` is no longer a
live team directory under ``~/.claude/teams/``.
The registry (``~/.claude/pact-sessions/.teammate-registry.jsonl``) grows one
line per teammate per session. Last-wins-on-read makes stale lines harmless
to correctness, but they accumulate, so SessionEnd drops the lines whose team
has already been reaped — keeping the file bounded. A line is KEPT when its
value's ``@team`` still has a directory under teams_dir; everything else
(lines for reaped teams, malformed lines, lines with no ``@``, lines whose
``@team`` is not a safe single path segment) is dropped.
Best-effort: never raises. The self-asserted ``@team`` is validated as a safe
single path segment (``_is_safe_team_segment``) BEFORE any ``teams/<team>``
path build, so a garbled/adversarial value cannot raise (e.g. a NUL byte) or
escape the teams root. A missing registry / unreadable file / write race is
swallowed (the hook-fail-open invariant; a stale line is harmless). The
rewrite preserves 0o600 and uses O_NOFOLLOW so a planted symlink at the path
cannot redirect the write.
Args:
registry_path: the registry file. Defaults to the shared get_registry_path().
teams_dir: the live-teams root. Defaults to ~/.claude/teams.
Returns:
Number of lines pruned (0 if the file is absent or nothing was stale).
"""
if registry_path is None:
registry_path = _get_registry_path()
if teams_dir is None:
teams_dir = get_claude_config_dir() / "teams"
try:
if not registry_path.exists() or registry_path.is_symlink():
return 0
raw = registry_path.read_text(encoding="utf-8")
except OSError:
return 0
kept_lines: list[str] = []
pruned = 0
for line in raw.splitlines():
stripped = line.strip()
if not stripped:
continue
keep = False
try:
obj = json.loads(stripped)
value = obj.get("value") if isinstance(obj, dict) else None
if isinstance(value, str) and "@" in value:
team = value.partition("@")[2]
# Validate the self-asserted @team is a single safe path segment
# BEFORE building an FS path: a garbled/adversarial @team (NUL,
# control char, slash, '..') must never raise out of the prune
# (honor the never-raises contract) nor build an uncontained
# teams/<team> path. The is_dir() lives INSIDE this try so even an
# unexpected path error drops the line instead of raising.
if _is_safe_team_segment(team) and (teams_dir / team).is_dir():
keep = True
except (ValueError, OSError):
keep = False # malformed line / path error → drop, never raise
if keep:
kept_lines.append(stripped)
else:
pruned += 1
if pruned == 0:
return 0 # nothing stale → leave the file untouched (no needless rewrite)
try:
nofollow = getattr(os, "O_NOFOLLOW", 0)
flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC | nofollow
fd = os.open(str(registry_path), flags, 0o600)
try:
# O_CREAT's mode arg is a no-op when the file already exists, so set
# 0o600 explicitly to preserve the register-side permission on rewrite.
os.fchmod(fd, 0o600)
payload = ("\n".join(kept_lines) + "\n") if kept_lines else ""
os.write(fd, payload.encode("utf-8"))
finally:
os.close(fd)
except OSError:
return 0 # write race / symlink (ELOOP) → leave as-is, never raise
return pruned
def _cleanup_old_checkpoints(
checkpoint_dir: Path | None = None,
max_age_days: int = _CHECKPOINT_MAX_AGE_DAYS,
) -> int:
"""
Remove checkpoint files older than max_age_days from ~/.claude/pact-refresh/.
Post-#413, the precompact_refresh.py hook that wrote these files is deleted,
so this cleanup is primarily a one-time sweep for existing deployments —
a directory that never gets written to will eventually empty.
Best-effort: never raises. Swallows per-file OSError (handles races) and
the outer glob failure (hook-fail-open invariant).
Args:
checkpoint_dir: Directory containing checkpoint files. Defaults to
~/.claude/pact-refresh. Accepts override for testing.
max_age_days: TTL for checkpoint files (default: 7).
Returns:
Number of files cleaned up.
"""
if checkpoint_dir is None:
checkpoint_dir = get_claude_config_dir() / "pact-refresh"
if not checkpoint_dir.exists():
return 0
max_age_seconds = max_age_days * 24 * 60 * 60
cutoff_time = time.time() - max_age_seconds
cleaned = 0
try:
for checkpoint_file in checkpoint_dir.glob("*.json"):
# Skip symlinks (live or dangling). Mirrors cycle-1 hardening
# on the three sibling reapers — `is_symlink()` uses lstat
# semantics, so it short-circuits before any follow-semantic
# probe. Prevents a planted link from driving the TTL oracle
# off the target's mtime or from unlinking the link when the
# link's own mtime is within TTL but the target's is older.
if checkpoint_file.is_symlink():
continue
try:
# lstat (not stat) — cycle-2 defense: even with the
# symlink guard above, lstat is the correct probe for
# the link's own mtime if the guard is ever removed or
# if a future caller bypasses it. Defense-in-isolation.
mtime = checkpoint_file.lstat().st_mtime
if mtime < cutoff_time:
checkpoint_file.unlink()
cleaned += 1
except OSError:
pass
except OSError:
pass
return cleaned
def main():
try:
try:
input_data = json.load(sys.stdin)
except json.JSONDecodeError:
input_data = {}
pact_context.init(input_data)
project_slug = get_project_slug()
session_dir = get_session_dir()
current_session_id = get_session_id()
# Safety-net: warn if open PR detected but pause-mode wasn't run.
# Returns a warning string (or None) so we can emit a single
# session_end event with an optional `warning=` field.
tasks = get_task_list()
warning = check_unpaused_pr(
tasks=tasks,
project_slug=project_slug,
)
# Write a single session_end event to the journal (best-effort).
# Wrapped in its own try/except so a journal failure does not skip
# the cleanup steps that follow.
try:
event_kwargs = {"warning": warning} if warning else {}
append_event(make_event("session_end", **event_kwargs))
except Exception as e:
print(f"Hook warning (session_end journal): {e}", file=sys.stderr)
# Clean up stale session directories (dual TTL: 30d active, 180d paused)
cleanup_old_sessions(
project_slug=project_slug,
current_session_id=current_session_id,
)
# Clean up stale ~/.claude/teams/ and ~/.claude/tasks/ (#412 Fix B).
# Callsite short-circuit on empty team_name is the belt-and-suspenders
# layer around the internal fail-closed guard.
current_team_name = get_team_name()
# Registry cleanup: SessionEnd also prunes the self-registration
# registry (~/.claude/pact-sessions/.teammate-registry.jsonl), dropping
# lines whose @team no longer has a live directory under ~/.claude/teams/.
# The registry grows one line per teammate per session; last-wins-on-read
# keeps stale lines harmless to correctness, but they accumulate, so the
# prune (after the teams reaper, so reaped teams are already gone) keeps
# the file bounded. Best-effort + fail-safe: a missing file / write race
# is swallowed and never blocks session termination. (Run AFTER
# cleanup_old_teams so a team reaped this run is also pruned here.)
teams_r, teams_s = 0, 0
tasks_r, tasks_s = 0, 0
teams_reaper_ran = False
tasks_reaper_ran = False
if current_team_name:
teams_r, teams_s = cleanup_old_teams(
current_team_name=current_team_name,
)
teams_reaper_ran = True
_prune_registry_dead_teams()
# Assemble skip-set via the module-level helper — see
# `_assemble_tasks_skip_set` for the full rationale on the three
# platform-key channels and the positive-regex allowlist. The
# helper takes only primitives so it's directly unit-testable
# without mocking the session context.
skip_names = _assemble_tasks_skip_set(
team_name=current_team_name,
task_list_id=os.environ.get("CLAUDE_CODE_TASK_LIST_ID", ""),
session_id=current_session_id or "",
)
if skip_names:
tasks_r, tasks_s = cleanup_old_tasks(
skip_names=skip_names,
)
tasks_reaper_ran = True
# Best-effort audit record for the reapers. A journal write
# failure does not undo the cleanup that already happened.
# `teams_ran`/`tasks_ran` discriminate "reaper executed and
# found nothing" (True, 0/0) from "reaper short-circuited at
# callsite" (False, 0/0) per side — otherwise the two states
# are indistinguishable in the journal. Cycle-8 replaces the
# older single `reaper_ran` bool with per-reaper bools so an
# auditor can tell WHICH side short-circuited. Likewise
# `teams_ttl_days`/`tasks_ttl_days` replace the single
# `ttl_days` — currently both default to `_SESSION_MAX_AGE_DAYS`
# but the split future-proofs against TTL divergence (e.g. if
# the tasks reaper ever gets a dual-TTL like cleanup_old_sessions).
try:
append_event(make_event(
"cleanup_summary",
teams_reaped=teams_r,
teams_skipped=teams_s,
tasks_reaped=tasks_r,
tasks_skipped=tasks_s,
teams_ttl_days=_SESSION_MAX_AGE_DAYS,
tasks_ttl_days=_SESSION_MAX_AGE_DAYS,
teams_ran=teams_reaper_ran,
tasks_ran=tasks_reaper_ran,
))
except Exception as e:
print(f"Hook warning (cleanup_summary journal): {e}", file=sys.stderr)
# Clean up stale pact-refresh checkpoint files (7-day TTL).
# Post-#413, these accumulate only in legacy deployments.
_cleanup_old_checkpoints()
print(_SUPPRESS_OUTPUT)
sys.exit(0)
except Exception as e:
print(f"Hook warning (session_end): {e}", file=sys.stderr)
print(hook_error_json("session_end", e))
sys.exit(0)
if __name__ == "__main__":
main()