Skip to content

Commit dc78f1f

Browse files
committed
fix(hud): session self-heal & stale state detection (Wave 1-B)
Resolves the bug where statusLine renders leftover fields from a prior session (e.g. sessionId="manual-fix", currentMode="ACT" when user is in PLAN mode) instead of the current state. New lib/hud_session.py module: - detect_stale_session(): 4-signal staleness check (empty / repair marker / stdin mismatch / age > 4h) - heal_stale_state(): soft reset, returns copy, doesn't write disk - reset_stale_session(): durable variant for session-start boot - SESSION_STALE_SECONDS constant (4 hours) codingbuddy-hud.py::main() heals state before rendering so every statusLine render sees a valid snapshot. 24 new tests in test_hud_session.py cover all 4 staleness signals, heal semantics (clear / preserve / non-mutation / empty state), and reset_stale_session edge cases (fresh noop, marker healed, missing file silent, malformed silent). 179/179 tests pass (Golden Rule 133 + Wave 0 22 + Wave 1-B 24). Closes #1468 Part of #1464 (Wave 0 statusbar refactor)
1 parent de622cc commit dc78f1f

3 files changed

Lines changed: 428 additions & 16 deletions

File tree

packages/claude-code-plugin/hooks/codingbuddy-hud.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,21 @@ def main():
480480
state_file = os.environ.get("CODINGBUDDY_HUD_STATE_FILE", DEFAULT_STATE_FILE)
481481
hud_state = read_state(state_file)
482482

483+
# Wave 1-B: self-heal stale state (e.g. manual-fix marker,
484+
# old timestamp, stdin session mismatch) before rendering so
485+
# the HUD never shows leftover fields from a prior session.
486+
try:
487+
from hud_session import detect_stale_session, heal_stale_state
488+
stdin_session_id = (
489+
stdin_data.get("session_id") if stdin_data else ""
490+
) or ""
491+
if detect_stale_session(
492+
hud_state, stdin_session_id=stdin_session_id
493+
):
494+
hud_state = heal_stale_state(hud_state)
495+
except Exception:
496+
pass # never block rendering on self-heal failure
497+
483498
env_agent = os.environ.get("CODINGBUDDY_ACTIVE_AGENT", "")
484499

485500
output = format_status_line(stdin_data, hud_state, env_agent)
Lines changed: 151 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,155 @@
1-
"""Session self-heal and stale state detection (#1326).
1+
"""Session self-heal and stale state detection (#1326, Wave 1-B).
22
3-
Wave 0 skeleton — reserved for **Wave 1-B**.
3+
Addresses the bug where ``hud-state.json`` retains stale fields from
4+
a previous session (e.g., ``sessionId="manual-fix"``, ``version="5.2.0"``)
5+
and the statusLine renders them as if they were current. This was the
6+
root cause of the bug report: "현재 PLAN 모드인데 ACT로 되어 있고".
47
5-
Planned contents (Wave 1-B owner fills):
6-
* ``detect_stale_session(state: dict, *, now: datetime | None = None) -> bool``
7-
* ``reset_stale_session(state_file: str) -> None``
8-
* ``SESSION_STALE_SECONDS`` constant
8+
When Claude Code invokes statusLine, stdin carries the real session
9+
ID. If it does not match ``hud_state.sessionId``, the leftover state
10+
is a snapshot from a different session (or a manual edit) and must
11+
be healed before rendering. Additionally, any state older than
12+
``SESSION_STALE_SECONDS`` is treated as stale even without a stdin
13+
mismatch so abandoned sessions do not bleed into fresh ones.
914
10-
The current monolith embeds no session self-heal logic; Wave 1-B will
11-
introduce both the helpers and their call site in
12-
``codingbuddy-hud.format_status_line`` (or its Wave 1-D successor in
13-
``hud_layout``). This module exists as a placeholder so Wave 1-B can
14-
commit to its own sub-branch without racing other Wave workers to
15-
create the file.
15+
Healing is a *soft reset*: the cleared fields (currentMode, version,
16+
activeAgent, phase, focus, blockerCount) are overwritten in memory
17+
but the file on disk is not touched — that is the responsibility of
18+
``session-start.py`` or an explicit ``reset_stale_session()`` call.
1619
"""
20+
from __future__ import annotations
21+
22+
from datetime import datetime, timezone
23+
from typing import Any, Dict, Optional
24+
25+
# A session older than this is considered stale even when the session
26+
# ID matches. Four hours covers lunch breaks and short meetings but
27+
# catches overnight leftovers and manual edits from yesterday.
28+
SESSION_STALE_SECONDS = 4 * 60 * 60 # 4 hours
29+
30+
# sessionId values that indicate a not-really-a-session state. Any
31+
# match triggers an immediate heal regardless of other signals.
32+
_REPAIR_MARKERS = frozenset({"", "manual-fix", "unknown", "none"})
33+
34+
35+
def detect_stale_session(
36+
state: Dict[str, Any],
37+
*,
38+
now: Optional[datetime] = None,
39+
stdin_session_id: str = "",
40+
) -> bool:
41+
"""Return True if ``state`` should be healed before rendering.
42+
43+
Staleness indicators (any one triggers stale):
44+
45+
1. ``state`` is empty (nothing to heal — returns False).
46+
2. ``state.sessionId`` is a repair marker (``""``, ``"manual-fix"``,
47+
``"unknown"``, ``"none"``).
48+
3. ``stdin_session_id`` is non-empty and differs from
49+
``state.sessionId`` — caller is from a different session.
50+
4. ``state.sessionStartTimestamp`` is older than
51+
:data:`SESSION_STALE_SECONDS` or unparseable.
52+
53+
Args:
54+
state: Current HUD state dict from ``read_hud_state``.
55+
now: Optional clock override for deterministic age tests.
56+
Defaults to ``datetime.now(timezone.utc)``.
57+
stdin_session_id: The current Claude Code session id read
58+
from stdin. Empty string means "not available — skip
59+
mismatch check".
60+
"""
61+
if not state:
62+
return False
63+
64+
session_id = state.get("sessionId", "") or ""
65+
66+
# (2) Repair marker check
67+
if session_id in _REPAIR_MARKERS:
68+
return True
69+
70+
# (3) stdin mismatch check
71+
if stdin_session_id and session_id != stdin_session_id:
72+
return True
73+
74+
# (4) Age check — prefer `updatedAt` (refreshed on every
75+
# `update_hud_state` write) so long active sessions do not
76+
# falsely flag stale after SESSION_STALE_SECONDS. Fall back
77+
# to `sessionStartTimestamp` when `updatedAt` is absent.
78+
ts = state.get("updatedAt", "") or state.get("sessionStartTimestamp", "")
79+
if ts:
80+
try:
81+
start = datetime.fromisoformat(ts)
82+
if start.tzinfo is None:
83+
start = start.replace(tzinfo=timezone.utc)
84+
current = now or datetime.now(timezone.utc)
85+
age_seconds = (current - start).total_seconds()
86+
if age_seconds > SESSION_STALE_SECONDS:
87+
return True
88+
except (ValueError, TypeError):
89+
# Unparseable timestamp => definitely stale
90+
return True
91+
92+
return False
93+
94+
95+
def heal_stale_state(state: Dict[str, Any]) -> Dict[str, Any]:
96+
"""Return a *new* state dict with ephemeral fields cleared.
97+
98+
Does **not** mutate the input and does **not** write to disk. The
99+
caller is expected to pass the healed copy to ``format_status_line``
100+
immediately; persisting a fresh baseline is the responsibility of
101+
``session-start.py`` on the next session boot or of
102+
:func:`reset_stale_session` for callers that want durability now.
103+
104+
Cleared fields (so the HUD renders a safe default):
105+
106+
- ``currentMode`` → ``None`` (statusLine shows "Ready")
107+
- ``version`` → ``""`` (hud_version falls back to plugin.json)
108+
- ``activeAgent`` → ``None``
109+
- ``phase`` → ``"ready"``
110+
- ``focus`` → ``None``
111+
- ``blockerCount``→ ``0``
112+
113+
Preserved fields:
114+
115+
- ``sessionId`` (so debugging can see what was there)
116+
- ``sessionStartTimestamp`` (for audit / forensics)
117+
- Any other field not listed above
118+
"""
119+
healed: Dict[str, Any] = dict(state)
120+
healed["currentMode"] = None
121+
healed["version"] = ""
122+
healed["activeAgent"] = None
123+
healed["phase"] = "ready"
124+
healed["focus"] = None
125+
healed["blockerCount"] = 0
126+
return healed
127+
128+
129+
def reset_stale_session(state_file: str) -> None:
130+
"""Persist a healed copy of ``state_file`` to disk.
131+
132+
Reads the current state, runs :func:`detect_stale_session` on it,
133+
and if stale, writes the healed copy via ``hud_state.update_hud_state``.
134+
Intended for call sites that need durable healing (e.g., session
135+
boot). No-ops silently on any failure so it never blocks the caller.
136+
"""
137+
try:
138+
from hud_state import read_hud_state, update_hud_state
139+
140+
current = read_hud_state(state_file, fill_defaults=False)
141+
if not detect_stale_session(current):
142+
return
143+
healed = heal_stale_state(current)
144+
# update_hud_state merges kwargs — only pass the fields we healed
145+
update_hud_state(
146+
state_file=state_file,
147+
currentMode=healed["currentMode"],
148+
version=healed["version"],
149+
activeAgent=healed["activeAgent"],
150+
phase=healed["phase"],
151+
focus=healed["focus"],
152+
blockerCount=healed["blockerCount"],
153+
)
154+
except Exception:
155+
pass

0 commit comments

Comments
 (0)