Skip to content

Commit 792976d

Browse files
committed
fix(plugin): replace misleading Cache:% status-bar metric with raw cache token display
The Cache:XX% segment derived from context_window.current_usage only reflects the most recent API call, not session-wide cache efficiency. Users frequently misread it as cumulative cache hit rate. Replace compute_cache_hit_rate() with format_cache_segment() that renders raw token values (e.g. ♻2k/3.5k) with the following semantics: - numerator = cache_read_input_tokens - denominator = input_tokens + cache_creation_input_tokens + cache_read_input_tokens - values represent the latest API call, not session totals Also add format_compact_tokens() helper for k-suffix compact rendering (532 → 532, 1000 → 1k, 1500 → 1.5k, 128000 → 128k). Safe fallback: when current_usage is missing/null/zero, the cache segment is omitted entirely so the status line still renders without a broken slot. Test coverage (#1354): - format_cache_segment: 7 cases covering empty, null, input-only, partial, full, large-value k-format, and no-percent regression - format_status_line integration: 3 cases locking in the new output contract and guarding against Cache:% regression Closes #1355 Closes #1354
1 parent 8078a7a commit 792976d

2 files changed

Lines changed: 145 additions & 23 deletions

File tree

packages/claude-code-plugin/hooks/codingbuddy-hud.py

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -131,20 +131,53 @@ def estimate_cost(model_id: str, context_window: dict) -> float:
131131
return input_cost + cache_write_cost + cache_read_cost + output_cost
132132

133133

134-
def compute_cache_hit_rate(context_window: dict) -> float:
135-
"""Compute cache hit rate as percentage (0-100)."""
136-
usage = context_window.get("current_usage", {})
134+
def format_compact_tokens(n: int) -> str:
135+
"""Format token count compactly for status-bar display.
136+
137+
- < 1000 → raw integer (e.g. `532`)
138+
- >= 1000 → `Nk` with one decimal trimmed of trailing `.0` (e.g. `1.5k`, `128k`)
139+
"""
140+
try:
141+
value = int(n)
142+
except (TypeError, ValueError):
143+
return "0"
144+
if value < 1000:
145+
return str(value)
146+
k = value / 1000.0
147+
# Trim trailing .0 for whole thousands
148+
if k == int(k):
149+
return f"{int(k)}k"
150+
return f"{k:.1f}k"
151+
152+
153+
def format_cache_segment(context_window: dict) -> str:
154+
"""Render the cache segment as raw tokens from the latest API call.
155+
156+
IMPORTANT: `context_window.current_usage` from Claude Code stdin reflects
157+
**only the most recent API call**, not cumulative session cache usage.
158+
This helper therefore renders raw token counts (numerator/denominator)
159+
rather than a percentage, which users tend to misread as session-wide
160+
cache efficiency (#1355, #1356).
161+
162+
Numerator = `cache_read_input_tokens`
163+
Denominator = `input_tokens + cache_creation_input_tokens + cache_read_input_tokens`
164+
165+
Returns an empty string when usage data is missing so the caller can
166+
omit the segment entirely from the status line.
167+
"""
168+
usage = context_window.get("current_usage") if context_window else None
137169
if not usage:
138-
return 0.0
170+
return ""
139171

140-
input_tokens = usage.get("input_tokens", 0)
141-
cache_write = usage.get("cache_creation_input_tokens", 0)
142-
cache_read = usage.get("cache_read_input_tokens", 0)
172+
input_tokens = usage.get("input_tokens", 0) or 0
173+
cache_write = usage.get("cache_creation_input_tokens", 0) or 0
174+
cache_read = usage.get("cache_read_input_tokens", 0) or 0
143175
total = input_tokens + cache_write + cache_read
144176

145177
if total == 0:
146-
return 0.0
147-
return (cache_read / total) * 100
178+
return ""
179+
180+
return f"\u267b{format_compact_tokens(cache_read)}/{format_compact_tokens(total)}"
148181

149182

150183
def get_health(ctx_pct: float) -> str:
@@ -378,7 +411,7 @@ def format_status_line(
378411
model_id, display_name = resolve_model_label(stdin_data)
379412
cost, is_exact = resolve_cost(stdin_data, model_id, ctx_window)
380413
duration = resolve_duration(stdin_data, hud_state)
381-
cache = compute_cache_hit_rate(ctx_window)
414+
cache_segment = format_cache_segment(ctx_window)
382415
agent = resolve_agent(stdin_data, hud_state, active_agent)
383416

384417
cost_prefix = "$" if is_exact else "~$"
@@ -390,9 +423,10 @@ def format_status_line(
390423
f"{mode_label} {health}",
391424
duration,
392425
f"{cost_prefix}{cost:.2f}",
393-
f"Cache:{cache:.0f}%",
394-
f"Ctx:{ctx_pct:.0f}%",
395426
]
427+
if cache_segment:
428+
segments.append(cache_segment)
429+
segments.append(f"Ctx:{ctx_pct:.0f}%")
396430

397431
rl = format_rate_limits(stdin_data)
398432
if rl:

packages/claude-code-plugin/tests/test_hud.py

Lines changed: 99 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -96,34 +96,122 @@ def test_cache_reduces_cost(self):
9696
assert cost_with < cost_no
9797

9898

99-
class TestCacheHitRate:
100-
def test_no_cache(self):
101-
assert hud.compute_cache_hit_rate({}) == 0.0
99+
class TestFormatCacheSegment:
100+
"""Tests for the raw cache token display (#1355).
102101
103-
def test_zero_tokens(self):
102+
The status-bar cache segment reflects the most recent API call only,
103+
not session-wide cache efficiency. It must render raw tokens instead
104+
of a percentage to avoid misleading users.
105+
"""
106+
107+
def test_no_context_window(self):
108+
"""Empty context window → safe fallback (empty string)."""
109+
assert hud.format_cache_segment({}) == ""
110+
111+
def test_null_current_usage(self):
112+
"""current_usage missing → safe fallback (empty string)."""
113+
assert hud.format_cache_segment({"current_usage": None}) == ""
114+
assert hud.format_cache_segment({"current_usage": {}}) == ""
115+
116+
def test_input_tokens_only_no_cache_read(self):
117+
"""input_tokens > 0 with no cache read → 0/total."""
104118
ctx = {"current_usage": {
105-
"input_tokens": 0,
119+
"input_tokens": 1000,
106120
"cache_creation_input_tokens": 0,
107121
"cache_read_input_tokens": 0,
108122
}}
109-
assert hud.compute_cache_hit_rate(ctx) == 0.0
123+
result = hud.format_cache_segment(ctx)
124+
assert "0/1k" in result
110125

111-
def test_partial_cache(self):
126+
def test_partial_cache_read(self):
127+
"""Partial cache read → raw numerator/denominator."""
112128
ctx = {"current_usage": {
113129
"input_tokens": 500,
114130
"cache_creation_input_tokens": 200,
115131
"cache_read_input_tokens": 800,
116132
}}
117-
rate = hud.compute_cache_hit_rate(ctx)
118-
assert 53 < rate < 54 # 800/1500 = 53.3%
133+
result = hud.format_cache_segment(ctx)
134+
# numerator=800, denominator=500+200+800=1500
135+
assert "800" in result
136+
assert "1500" in result or "1.5k" in result
119137

120-
def test_full_cache(self):
138+
def test_full_cache_read_shows_raw_not_100pct(self):
139+
"""Full cache read → shows raw tokens, NOT `100%`."""
121140
ctx = {"current_usage": {
122141
"input_tokens": 0,
123142
"cache_creation_input_tokens": 0,
124143
"cache_read_input_tokens": 1000,
125144
}}
126-
assert hud.compute_cache_hit_rate(ctx) == 100.0
145+
result = hud.format_cache_segment(ctx)
146+
assert "100%" not in result
147+
assert "1k/1k" in result
148+
149+
def test_large_values_use_k_format(self):
150+
"""Large values compact as `Nk`."""
151+
ctx = {"current_usage": {
152+
"input_tokens": 50000,
153+
"cache_creation_input_tokens": 78000,
154+
"cache_read_input_tokens": 128000,
155+
}}
156+
result = hud.format_cache_segment(ctx)
157+
# numerator=128000 → 128k, denominator=256000 → 256k
158+
assert "128k" in result
159+
assert "256k" in result
160+
161+
def test_regression_no_percent_in_output(self):
162+
"""REGRESSION: Cache segment must never render `%`."""
163+
ctx = {"current_usage": {
164+
"input_tokens": 500,
165+
"cache_creation_input_tokens": 200,
166+
"cache_read_input_tokens": 800,
167+
}}
168+
result = hud.format_cache_segment(ctx)
169+
assert "%" not in result
170+
171+
172+
class TestFormatStatusLineCacheSegment:
173+
"""Integration: final status-line output includes raw cache segment (#1354)."""
174+
175+
_NO_PLUGINS = "/tmp/_nonexistent_plugins_.json"
176+
177+
def test_status_line_no_longer_contains_cache_percent(self):
178+
"""REGRESSION: `Cache:XX%` must never appear in format_status_line output."""
179+
stdin = {
180+
"context_window": {
181+
"used_percentage": 45,
182+
"current_usage": {
183+
"input_tokens": 1000,
184+
"cache_creation_input_tokens": 500,
185+
"cache_read_input_tokens": 2000,
186+
},
187+
},
188+
}
189+
result = hud.format_status_line(stdin, {}, plugins_file=self._NO_PLUGINS)
190+
assert "Cache:" not in result
191+
assert "%" in result # Ctx:45% is still a percentage — only cache changes
192+
193+
def test_status_line_contains_raw_cache_tokens(self):
194+
"""format_status_line renders raw cache token segment."""
195+
stdin = {
196+
"context_window": {
197+
"used_percentage": 45,
198+
"current_usage": {
199+
"input_tokens": 1000,
200+
"cache_creation_input_tokens": 500,
201+
"cache_read_input_tokens": 2000,
202+
},
203+
},
204+
}
205+
result = hud.format_status_line(stdin, {}, plugins_file=self._NO_PLUGINS)
206+
# cache_read=2000 → 2k, total=3500 → 3.5k
207+
assert "2k/3.5k" in result
208+
209+
def test_status_line_hides_cache_when_usage_absent(self):
210+
"""Missing current_usage → cache segment is hidden, status line still renders."""
211+
stdin = {"context_window": {"used_percentage": 10}}
212+
result = hud.format_status_line(stdin, {}, plugins_file=self._NO_PLUGINS)
213+
assert "Cache:" not in result
214+
assert "Ctx:10%" in result # other segments still present
127215

128216

129217
class TestHealth:

0 commit comments

Comments
 (0)