|
1 | | -"""Session self-heal and stale state detection (#1326). |
| 1 | +"""Session self-heal and stale state detection (#1326, Wave 1-B). |
2 | 2 |
|
3 | | -Wave 0 skeleton — reserved for **Wave 1-B**. |
| 3 | +Addresses the bug where ``hud-state.json`` retains stale fields from |
| 4 | +a previous session (e.g., ``sessionId="manual-fix"``, ``version="5.2.0"``) |
| 5 | +and the statusLine renders them as if they were current. This was the |
| 6 | +root cause of the bug report: "현재 PLAN 모드인데 ACT로 되어 있고". |
4 | 7 |
|
5 | | -Planned contents (Wave 1-B owner fills): |
6 | | - * ``detect_stale_session(state: dict, *, now: datetime | None = None) -> bool`` |
7 | | - * ``reset_stale_session(state_file: str) -> None`` |
8 | | - * ``SESSION_STALE_SECONDS`` constant |
| 8 | +When Claude Code invokes statusLine, stdin carries the real session |
| 9 | +ID. If it does not match ``hud_state.sessionId``, the leftover state |
| 10 | +is a snapshot from a different session (or a manual edit) and must |
| 11 | +be healed before rendering. Additionally, any state older than |
| 12 | +``SESSION_STALE_SECONDS`` is treated as stale even without a stdin |
| 13 | +mismatch so abandoned sessions do not bleed into fresh ones. |
9 | 14 |
|
10 | | -The current monolith embeds no session self-heal logic; Wave 1-B will |
11 | | -introduce both the helpers and their call site in |
12 | | -``codingbuddy-hud.format_status_line`` (or its Wave 1-D successor in |
13 | | -``hud_layout``). This module exists as a placeholder so Wave 1-B can |
14 | | -commit to its own sub-branch without racing other Wave workers to |
15 | | -create the file. |
| 15 | +Healing is a *soft reset*: the cleared fields (currentMode, version, |
| 16 | +activeAgent, phase, focus, blockerCount) are overwritten in memory |
| 17 | +but the file on disk is not touched — that is the responsibility of |
| 18 | +``session-start.py`` or an explicit ``reset_stale_session()`` call. |
16 | 19 | """ |
| 20 | +from __future__ import annotations |
| 21 | + |
| 22 | +from datetime import datetime, timezone |
| 23 | +from typing import Any, Dict, Optional |
| 24 | + |
| 25 | +# A session older than this is considered stale even when the session |
| 26 | +# ID matches. Four hours covers lunch breaks and short meetings but |
| 27 | +# catches overnight leftovers and manual edits from yesterday. |
| 28 | +SESSION_STALE_SECONDS = 4 * 60 * 60 # 4 hours |
| 29 | + |
| 30 | +# sessionId values that indicate a not-really-a-session state. Any |
| 31 | +# match triggers an immediate heal regardless of other signals. |
| 32 | +_REPAIR_MARKERS = frozenset({"", "manual-fix", "unknown", "none"}) |
| 33 | + |
| 34 | + |
| 35 | +def detect_stale_session( |
| 36 | + state: Dict[str, Any], |
| 37 | + *, |
| 38 | + now: Optional[datetime] = None, |
| 39 | + stdin_session_id: str = "", |
| 40 | +) -> bool: |
| 41 | + """Return True if ``state`` should be healed before rendering. |
| 42 | +
|
| 43 | + Staleness indicators (any one triggers stale): |
| 44 | +
|
| 45 | + 1. ``state`` is empty (nothing to heal — returns False). |
| 46 | + 2. ``state.sessionId`` is a repair marker (``""``, ``"manual-fix"``, |
| 47 | + ``"unknown"``, ``"none"``). |
| 48 | + 3. ``stdin_session_id`` is non-empty and differs from |
| 49 | + ``state.sessionId`` — caller is from a different session. |
| 50 | + 4. ``state.sessionStartTimestamp`` is older than |
| 51 | + :data:`SESSION_STALE_SECONDS` or unparseable. |
| 52 | +
|
| 53 | + Args: |
| 54 | + state: Current HUD state dict from ``read_hud_state``. |
| 55 | + now: Optional clock override for deterministic age tests. |
| 56 | + Defaults to ``datetime.now(timezone.utc)``. |
| 57 | + stdin_session_id: The current Claude Code session id read |
| 58 | + from stdin. Empty string means "not available — skip |
| 59 | + mismatch check". |
| 60 | + """ |
| 61 | + if not state: |
| 62 | + return False |
| 63 | + |
| 64 | + session_id = state.get("sessionId", "") or "" |
| 65 | + |
| 66 | + # (2) Repair marker check |
| 67 | + if session_id in _REPAIR_MARKERS: |
| 68 | + return True |
| 69 | + |
| 70 | + # (3) stdin mismatch check |
| 71 | + if stdin_session_id and session_id != stdin_session_id: |
| 72 | + return True |
| 73 | + |
| 74 | + # (4) Age check — prefer `updatedAt` (refreshed on every |
| 75 | + # `update_hud_state` write) so long active sessions do not |
| 76 | + # falsely flag stale after SESSION_STALE_SECONDS. Fall back |
| 77 | + # to `sessionStartTimestamp` when `updatedAt` is absent. |
| 78 | + ts = state.get("updatedAt", "") or state.get("sessionStartTimestamp", "") |
| 79 | + if ts: |
| 80 | + try: |
| 81 | + start = datetime.fromisoformat(ts) |
| 82 | + if start.tzinfo is None: |
| 83 | + start = start.replace(tzinfo=timezone.utc) |
| 84 | + current = now or datetime.now(timezone.utc) |
| 85 | + age_seconds = (current - start).total_seconds() |
| 86 | + if age_seconds > SESSION_STALE_SECONDS: |
| 87 | + return True |
| 88 | + except (ValueError, TypeError): |
| 89 | + # Unparseable timestamp => definitely stale |
| 90 | + return True |
| 91 | + |
| 92 | + return False |
| 93 | + |
| 94 | + |
| 95 | +def heal_stale_state(state: Dict[str, Any]) -> Dict[str, Any]: |
| 96 | + """Return a *new* state dict with ephemeral fields cleared. |
| 97 | +
|
| 98 | + Does **not** mutate the input and does **not** write to disk. The |
| 99 | + caller is expected to pass the healed copy to ``format_status_line`` |
| 100 | + immediately; persisting a fresh baseline is the responsibility of |
| 101 | + ``session-start.py`` on the next session boot or of |
| 102 | + :func:`reset_stale_session` for callers that want durability now. |
| 103 | +
|
| 104 | + Cleared fields (so the HUD renders a safe default): |
| 105 | +
|
| 106 | + - ``currentMode`` → ``None`` (statusLine shows "Ready") |
| 107 | + - ``version`` → ``""`` (hud_version falls back to plugin.json) |
| 108 | + - ``activeAgent`` → ``None`` |
| 109 | + - ``phase`` → ``"ready"`` |
| 110 | + - ``focus`` → ``None`` |
| 111 | + - ``blockerCount``→ ``0`` |
| 112 | +
|
| 113 | + Preserved fields: |
| 114 | +
|
| 115 | + - ``sessionId`` (so debugging can see what was there) |
| 116 | + - ``sessionStartTimestamp`` (for audit / forensics) |
| 117 | + - Any other field not listed above |
| 118 | + """ |
| 119 | + healed: Dict[str, Any] = dict(state) |
| 120 | + healed["currentMode"] = None |
| 121 | + healed["version"] = "" |
| 122 | + healed["activeAgent"] = None |
| 123 | + healed["phase"] = "ready" |
| 124 | + healed["focus"] = None |
| 125 | + healed["blockerCount"] = 0 |
| 126 | + return healed |
| 127 | + |
| 128 | + |
| 129 | +def reset_stale_session(state_file: str) -> None: |
| 130 | + """Persist a healed copy of ``state_file`` to disk. |
| 131 | +
|
| 132 | + Reads the current state, runs :func:`detect_stale_session` on it, |
| 133 | + and if stale, writes the healed copy via ``hud_state.update_hud_state``. |
| 134 | + Intended for call sites that need durable healing (e.g., session |
| 135 | + boot). No-ops silently on any failure so it never blocks the caller. |
| 136 | + """ |
| 137 | + try: |
| 138 | + from hud_state import read_hud_state, update_hud_state |
| 139 | + |
| 140 | + current = read_hud_state(state_file, fill_defaults=False) |
| 141 | + if not detect_stale_session(current): |
| 142 | + return |
| 143 | + healed = heal_stale_state(current) |
| 144 | + # update_hud_state merges kwargs — only pass the fields we healed |
| 145 | + update_hud_state( |
| 146 | + state_file=state_file, |
| 147 | + currentMode=healed["currentMode"], |
| 148 | + version=healed["version"], |
| 149 | + activeAgent=healed["activeAgent"], |
| 150 | + phase=healed["phase"], |
| 151 | + focus=healed["focus"], |
| 152 | + blockerCount=healed["blockerCount"], |
| 153 | + ) |
| 154 | + except Exception: |
| 155 | + pass |
0 commit comments