From 792976d0432cfc64acc19a27c1ed3d8afa2b25cd Mon Sep 17 00:00:00 2001 From: JeremyDev87 Date: Sun, 5 Apr 2026 17:49:41 +0900 Subject: [PATCH] fix(plugin): replace misleading Cache:% status-bar metric with raw cache token display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Cache:XX% segment derived from context_window.current_usage only reflects the most recent API call, not session-wide cache efficiency. Users frequently misread it as cumulative cache hit rate. Replace compute_cache_hit_rate() with format_cache_segment() that renders raw token values (e.g. ♻2k/3.5k) with the following semantics: - numerator = cache_read_input_tokens - denominator = input_tokens + cache_creation_input_tokens + cache_read_input_tokens - values represent the latest API call, not session totals Also add format_compact_tokens() helper for k-suffix compact rendering (532 → 532, 1000 → 1k, 1500 → 1.5k, 128000 → 128k). Safe fallback: when current_usage is missing/null/zero, the cache segment is omitted entirely so the status line still renders without a broken slot. Test coverage (#1354): - format_cache_segment: 7 cases covering empty, null, input-only, partial, full, large-value k-format, and no-percent regression - format_status_line integration: 3 cases locking in the new output contract and guarding against Cache:% regression Closes #1355 Closes #1354 --- .../hooks/codingbuddy-hud.py | 58 +++++++-- packages/claude-code-plugin/tests/test_hud.py | 110 ++++++++++++++++-- 2 files changed, 145 insertions(+), 23 deletions(-) diff --git a/packages/claude-code-plugin/hooks/codingbuddy-hud.py b/packages/claude-code-plugin/hooks/codingbuddy-hud.py index 6dbe654d..aee7a302 100644 --- a/packages/claude-code-plugin/hooks/codingbuddy-hud.py +++ b/packages/claude-code-plugin/hooks/codingbuddy-hud.py @@ -131,20 +131,53 @@ def estimate_cost(model_id: str, context_window: dict) -> float: return input_cost + cache_write_cost + cache_read_cost + output_cost -def compute_cache_hit_rate(context_window: dict) -> float: - """Compute cache hit rate as percentage (0-100).""" - usage = context_window.get("current_usage", {}) +def format_compact_tokens(n: int) -> str: + """Format token count compactly for status-bar display. + + - < 1000 → raw integer (e.g. `532`) + - >= 1000 → `Nk` with one decimal trimmed of trailing `.0` (e.g. `1.5k`, `128k`) + """ + try: + value = int(n) + except (TypeError, ValueError): + return "0" + if value < 1000: + return str(value) + k = value / 1000.0 + # Trim trailing .0 for whole thousands + if k == int(k): + return f"{int(k)}k" + return f"{k:.1f}k" + + +def format_cache_segment(context_window: dict) -> str: + """Render the cache segment as raw tokens from the latest API call. + + IMPORTANT: `context_window.current_usage` from Claude Code stdin reflects + **only the most recent API call**, not cumulative session cache usage. + This helper therefore renders raw token counts (numerator/denominator) + rather than a percentage, which users tend to misread as session-wide + cache efficiency (#1355, #1356). + + Numerator = `cache_read_input_tokens` + Denominator = `input_tokens + cache_creation_input_tokens + cache_read_input_tokens` + + Returns an empty string when usage data is missing so the caller can + omit the segment entirely from the status line. + """ + usage = context_window.get("current_usage") if context_window else None if not usage: - return 0.0 + return "" - input_tokens = usage.get("input_tokens", 0) - cache_write = usage.get("cache_creation_input_tokens", 0) - cache_read = usage.get("cache_read_input_tokens", 0) + input_tokens = usage.get("input_tokens", 0) or 0 + cache_write = usage.get("cache_creation_input_tokens", 0) or 0 + cache_read = usage.get("cache_read_input_tokens", 0) or 0 total = input_tokens + cache_write + cache_read if total == 0: - return 0.0 - return (cache_read / total) * 100 + return "" + + return f"\u267b{format_compact_tokens(cache_read)}/{format_compact_tokens(total)}" def get_health(ctx_pct: float) -> str: @@ -378,7 +411,7 @@ def format_status_line( model_id, display_name = resolve_model_label(stdin_data) cost, is_exact = resolve_cost(stdin_data, model_id, ctx_window) duration = resolve_duration(stdin_data, hud_state) - cache = compute_cache_hit_rate(ctx_window) + cache_segment = format_cache_segment(ctx_window) agent = resolve_agent(stdin_data, hud_state, active_agent) cost_prefix = "$" if is_exact else "~$" @@ -390,9 +423,10 @@ def format_status_line( f"{mode_label} {health}", duration, f"{cost_prefix}{cost:.2f}", - f"Cache:{cache:.0f}%", - f"Ctx:{ctx_pct:.0f}%", ] + if cache_segment: + segments.append(cache_segment) + segments.append(f"Ctx:{ctx_pct:.0f}%") rl = format_rate_limits(stdin_data) if rl: diff --git a/packages/claude-code-plugin/tests/test_hud.py b/packages/claude-code-plugin/tests/test_hud.py index 9870492f..320ee4d3 100644 --- a/packages/claude-code-plugin/tests/test_hud.py +++ b/packages/claude-code-plugin/tests/test_hud.py @@ -96,34 +96,122 @@ def test_cache_reduces_cost(self): assert cost_with < cost_no -class TestCacheHitRate: - def test_no_cache(self): - assert hud.compute_cache_hit_rate({}) == 0.0 +class TestFormatCacheSegment: + """Tests for the raw cache token display (#1355). - def test_zero_tokens(self): + The status-bar cache segment reflects the most recent API call only, + not session-wide cache efficiency. It must render raw tokens instead + of a percentage to avoid misleading users. + """ + + def test_no_context_window(self): + """Empty context window → safe fallback (empty string).""" + assert hud.format_cache_segment({}) == "" + + def test_null_current_usage(self): + """current_usage missing → safe fallback (empty string).""" + assert hud.format_cache_segment({"current_usage": None}) == "" + assert hud.format_cache_segment({"current_usage": {}}) == "" + + def test_input_tokens_only_no_cache_read(self): + """input_tokens > 0 with no cache read → 0/total.""" ctx = {"current_usage": { - "input_tokens": 0, + "input_tokens": 1000, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0, }} - assert hud.compute_cache_hit_rate(ctx) == 0.0 + result = hud.format_cache_segment(ctx) + assert "0/1k" in result - def test_partial_cache(self): + def test_partial_cache_read(self): + """Partial cache read → raw numerator/denominator.""" ctx = {"current_usage": { "input_tokens": 500, "cache_creation_input_tokens": 200, "cache_read_input_tokens": 800, }} - rate = hud.compute_cache_hit_rate(ctx) - assert 53 < rate < 54 # 800/1500 = 53.3% + result = hud.format_cache_segment(ctx) + # numerator=800, denominator=500+200+800=1500 + assert "800" in result + assert "1500" in result or "1.5k" in result - def test_full_cache(self): + def test_full_cache_read_shows_raw_not_100pct(self): + """Full cache read → shows raw tokens, NOT `100%`.""" ctx = {"current_usage": { "input_tokens": 0, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 1000, }} - assert hud.compute_cache_hit_rate(ctx) == 100.0 + result = hud.format_cache_segment(ctx) + assert "100%" not in result + assert "1k/1k" in result + + def test_large_values_use_k_format(self): + """Large values compact as `Nk`.""" + ctx = {"current_usage": { + "input_tokens": 50000, + "cache_creation_input_tokens": 78000, + "cache_read_input_tokens": 128000, + }} + result = hud.format_cache_segment(ctx) + # numerator=128000 → 128k, denominator=256000 → 256k + assert "128k" in result + assert "256k" in result + + def test_regression_no_percent_in_output(self): + """REGRESSION: Cache segment must never render `%`.""" + ctx = {"current_usage": { + "input_tokens": 500, + "cache_creation_input_tokens": 200, + "cache_read_input_tokens": 800, + }} + result = hud.format_cache_segment(ctx) + assert "%" not in result + + +class TestFormatStatusLineCacheSegment: + """Integration: final status-line output includes raw cache segment (#1354).""" + + _NO_PLUGINS = "/tmp/_nonexistent_plugins_.json" + + def test_status_line_no_longer_contains_cache_percent(self): + """REGRESSION: `Cache:XX%` must never appear in format_status_line output.""" + stdin = { + "context_window": { + "used_percentage": 45, + "current_usage": { + "input_tokens": 1000, + "cache_creation_input_tokens": 500, + "cache_read_input_tokens": 2000, + }, + }, + } + result = hud.format_status_line(stdin, {}, plugins_file=self._NO_PLUGINS) + assert "Cache:" not in result + assert "%" in result # Ctx:45% is still a percentage — only cache changes + + def test_status_line_contains_raw_cache_tokens(self): + """format_status_line renders raw cache token segment.""" + stdin = { + "context_window": { + "used_percentage": 45, + "current_usage": { + "input_tokens": 1000, + "cache_creation_input_tokens": 500, + "cache_read_input_tokens": 2000, + }, + }, + } + result = hud.format_status_line(stdin, {}, plugins_file=self._NO_PLUGINS) + # cache_read=2000 → 2k, total=3500 → 3.5k + assert "2k/3.5k" in result + + def test_status_line_hides_cache_when_usage_absent(self): + """Missing current_usage → cache segment is hidden, status line still renders.""" + stdin = {"context_window": {"used_percentage": 10}} + result = hud.format_status_line(stdin, {}, plugins_file=self._NO_PLUGINS) + assert "Cache:" not in result + assert "Ctx:10%" in result # other segments still present class TestHealth: