Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 46 additions & 12 deletions packages/claude-code-plugin/hooks/codingbuddy-hud.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,20 +131,53 @@ def estimate_cost(model_id: str, context_window: dict) -> float:
return input_cost + cache_write_cost + cache_read_cost + output_cost


def compute_cache_hit_rate(context_window: dict) -> float:
"""Compute cache hit rate as percentage (0-100)."""
usage = context_window.get("current_usage", {})
def format_compact_tokens(n: int) -> str:
"""Format token count compactly for status-bar display.

- < 1000 → raw integer (e.g. `532`)
- >= 1000 → `Nk` with one decimal trimmed of trailing `.0` (e.g. `1.5k`, `128k`)
"""
try:
value = int(n)
except (TypeError, ValueError):
return "0"
if value < 1000:
return str(value)
k = value / 1000.0
# Trim trailing .0 for whole thousands
if k == int(k):
return f"{int(k)}k"
return f"{k:.1f}k"


def format_cache_segment(context_window: dict) -> str:
"""Render the cache segment as raw tokens from the latest API call.

IMPORTANT: `context_window.current_usage` from Claude Code stdin reflects
**only the most recent API call**, not cumulative session cache usage.
This helper therefore renders raw token counts (numerator/denominator)
rather than a percentage, which users tend to misread as session-wide
cache efficiency (#1355, #1356).

Numerator = `cache_read_input_tokens`
Denominator = `input_tokens + cache_creation_input_tokens + cache_read_input_tokens`

Returns an empty string when usage data is missing so the caller can
omit the segment entirely from the status line.
"""
usage = context_window.get("current_usage") if context_window else None
if not usage:
return 0.0
return ""

input_tokens = usage.get("input_tokens", 0)
cache_write = usage.get("cache_creation_input_tokens", 0)
cache_read = usage.get("cache_read_input_tokens", 0)
input_tokens = usage.get("input_tokens", 0) or 0
cache_write = usage.get("cache_creation_input_tokens", 0) or 0
cache_read = usage.get("cache_read_input_tokens", 0) or 0
total = input_tokens + cache_write + cache_read

if total == 0:
return 0.0
return (cache_read / total) * 100
return ""

return f"\u267b{format_compact_tokens(cache_read)}/{format_compact_tokens(total)}"


def get_health(ctx_pct: float) -> str:
Expand Down Expand Up @@ -378,7 +411,7 @@ def format_status_line(
model_id, display_name = resolve_model_label(stdin_data)
cost, is_exact = resolve_cost(stdin_data, model_id, ctx_window)
duration = resolve_duration(stdin_data, hud_state)
cache = compute_cache_hit_rate(ctx_window)
cache_segment = format_cache_segment(ctx_window)
agent = resolve_agent(stdin_data, hud_state, active_agent)

cost_prefix = "$" if is_exact else "~$"
Expand All @@ -390,9 +423,10 @@ def format_status_line(
f"{mode_label} {health}",
duration,
f"{cost_prefix}{cost:.2f}",
f"Cache:{cache:.0f}%",
f"Ctx:{ctx_pct:.0f}%",
]
if cache_segment:
segments.append(cache_segment)
segments.append(f"Ctx:{ctx_pct:.0f}%")

rl = format_rate_limits(stdin_data)
if rl:
Expand Down
110 changes: 99 additions & 11 deletions packages/claude-code-plugin/tests/test_hud.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,34 +96,122 @@ def test_cache_reduces_cost(self):
assert cost_with < cost_no


class TestCacheHitRate:
def test_no_cache(self):
assert hud.compute_cache_hit_rate({}) == 0.0
class TestFormatCacheSegment:
"""Tests for the raw cache token display (#1355).

def test_zero_tokens(self):
The status-bar cache segment reflects the most recent API call only,
not session-wide cache efficiency. It must render raw tokens instead
of a percentage to avoid misleading users.
"""

def test_no_context_window(self):
"""Empty context window → safe fallback (empty string)."""
assert hud.format_cache_segment({}) == ""

def test_null_current_usage(self):
"""current_usage missing → safe fallback (empty string)."""
assert hud.format_cache_segment({"current_usage": None}) == ""
assert hud.format_cache_segment({"current_usage": {}}) == ""

def test_input_tokens_only_no_cache_read(self):
"""input_tokens > 0 with no cache read → 0/total."""
ctx = {"current_usage": {
"input_tokens": 0,
"input_tokens": 1000,
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0,
}}
assert hud.compute_cache_hit_rate(ctx) == 0.0
result = hud.format_cache_segment(ctx)
assert "0/1k" in result

def test_partial_cache(self):
def test_partial_cache_read(self):
"""Partial cache read → raw numerator/denominator."""
ctx = {"current_usage": {
"input_tokens": 500,
"cache_creation_input_tokens": 200,
"cache_read_input_tokens": 800,
}}
rate = hud.compute_cache_hit_rate(ctx)
assert 53 < rate < 54 # 800/1500 = 53.3%
result = hud.format_cache_segment(ctx)
# numerator=800, denominator=500+200+800=1500
assert "800" in result
assert "1500" in result or "1.5k" in result

def test_full_cache(self):
def test_full_cache_read_shows_raw_not_100pct(self):
"""Full cache read → shows raw tokens, NOT `100%`."""
ctx = {"current_usage": {
"input_tokens": 0,
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 1000,
}}
assert hud.compute_cache_hit_rate(ctx) == 100.0
result = hud.format_cache_segment(ctx)
assert "100%" not in result
assert "1k/1k" in result

def test_large_values_use_k_format(self):
"""Large values compact as `Nk`."""
ctx = {"current_usage": {
"input_tokens": 50000,
"cache_creation_input_tokens": 78000,
"cache_read_input_tokens": 128000,
}}
result = hud.format_cache_segment(ctx)
# numerator=128000 → 128k, denominator=256000 → 256k
assert "128k" in result
assert "256k" in result

def test_regression_no_percent_in_output(self):
"""REGRESSION: Cache segment must never render `%`."""
ctx = {"current_usage": {
"input_tokens": 500,
"cache_creation_input_tokens": 200,
"cache_read_input_tokens": 800,
}}
result = hud.format_cache_segment(ctx)
assert "%" not in result


class TestFormatStatusLineCacheSegment:
"""Integration: final status-line output includes raw cache segment (#1354)."""

_NO_PLUGINS = "/tmp/_nonexistent_plugins_.json"

def test_status_line_no_longer_contains_cache_percent(self):
"""REGRESSION: `Cache:XX%` must never appear in format_status_line output."""
stdin = {
"context_window": {
"used_percentage": 45,
"current_usage": {
"input_tokens": 1000,
"cache_creation_input_tokens": 500,
"cache_read_input_tokens": 2000,
},
},
}
result = hud.format_status_line(stdin, {}, plugins_file=self._NO_PLUGINS)
assert "Cache:" not in result
assert "%" in result # Ctx:45% is still a percentage — only cache changes

def test_status_line_contains_raw_cache_tokens(self):
"""format_status_line renders raw cache token segment."""
stdin = {
"context_window": {
"used_percentage": 45,
"current_usage": {
"input_tokens": 1000,
"cache_creation_input_tokens": 500,
"cache_read_input_tokens": 2000,
},
},
}
result = hud.format_status_line(stdin, {}, plugins_file=self._NO_PLUGINS)
# cache_read=2000 → 2k, total=3500 → 3.5k
assert "2k/3.5k" in result

def test_status_line_hides_cache_when_usage_absent(self):
"""Missing current_usage → cache segment is hidden, status line still renders."""
stdin = {"context_window": {"used_percentage": 10}}
result = hud.format_status_line(stdin, {}, plugins_file=self._NO_PLUGINS)
assert "Cache:" not in result
assert "Ctx:10%" in result # other segments still present


class TestHealth:
Expand Down
Loading