Skip to content

Commit 05afaeb

Browse files
TheTomclaude
andcommitted
experiments/rht-k-sweep: final screencap polish
Two reviewer tweaks: 1. Tighten column whitespace so percent values sit visibly inside their own cell ("+141.7% " with trailing pad), not dangling next to the following column. Eliminates any ambiguity at the % label. 2. "Application to this KV-cache setup fails" -> "Extra-RHT application fails for this KV-cache setup". Even more precise: the theorem itself does not fail; the prescription of adding extra RHTs fails on this workload/codebook. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 60c3e20 commit 05afaeb

1 file changed

Lines changed: 11 additions & 8 deletions

File tree

experiments/rht_k_sweep/screencap.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -128,22 +128,25 @@ def dpct(new, old):
128128

129129

130130
print(f"{B}Result{R}")
131-
print(f" {'k_extra':<10}{'post-kurt':<14}{'KS':<10}{'MSE':<14}{'Δ MSE %':<11}{'KL mean':<14}{'Δ KL %':<11}{'catastrophic':<14}")
132-
print(f" {DIM}{'-'*98}{R}")
133-
def pct(v): # format as e.g. "+141.7%" right-padded
131+
def pct(v):
134132
return f"{v:+.1f}%"
135133

136-
print(f" {GREEN}{'0 baseline':<10}{R} {kurt0:<+12.3f}{kspos0:<10.3f}{m0:<14.3e}{'—':<11}{kl0.mean():<14.3e}{'—':<11}{GREEN}{c0:<14.1%}{R}")
137-
print(f" {RED}{'1 +1 RHT':<10}{R} {kurt1:<+12.3f}{kspos1:<10.3f}{m1:<14.3e}{RED}{pct(dpct(m1,m0)):<11}{R}{kl1.mean():<14.3e}{RED}{pct(dpct(kl1.mean(),kl0.mean())):<11}{R}{RED}{B}{c1:<14.1%}{R}")
138-
print(f" {RED}{'2 +2 RHT':<10}{R} {kurt2:<+12.3f}{kspos2:<10.3f}{m2:<14.3e}{RED}{pct(dpct(m2,m0)):<11}{R}{kl2.mean():<14.3e}{RED}{pct(dpct(kl2.mean(),kl0.mean())):<11}{R}{RED}{c2:<14.1%}{R}")
134+
# Column widths chosen so % values sit visibly inside their own cell with
135+
# clear whitespace before the next column. Easier to read at screencap zoom.
136+
print(f" {'k_extra':<13}{'post-kurt':<13}{'KS':<10}{'MSE':<14}{'Δ MSE %':<13}{'KL mean':<14}{'Δ KL %':<13}{'catastrophic':<14}")
137+
print(f" {DIM}{'-'*104}{R}")
138+
print(f" {GREEN}{'0 baseline':<13}{R}{kurt0:<+13.3f}{kspos0:<10.3f}{m0:<14.3e}{'—':<13}{kl0.mean():<14.3e}{'—':<13}{GREEN}{c0:<14.1%}{R}")
139+
print(f" {RED}{'1 +1 RHT':<13}{R}{kurt1:<+13.3f}{kspos1:<10.3f}{m1:<14.3e}{RED}{pct(dpct(m1,m0)):<13}{R}{kl1.mean():<14.3e}{RED}{pct(dpct(kl1.mean(),kl0.mean())):<13}{R}{RED}{B}{c1:<14.1%}{R}")
140+
print(f" {RED}{'2 +2 RHT':<13}{R}{kurt2:<+13.3f}{kspos2:<10.3f}{m2:<14.3e}{RED}{pct(dpct(m2,m0)):<13}{R}{kl2.mean():<14.3e}{RED}{pct(dpct(kl2.mean(),kl0.mean())):<13}{R}{RED}{c2:<14.1%}{R}")
139141
print()
140142
print(f" {B}{RED}catastrophic rate: {c0:.1%}{c1:.1%}{R} {DIM}(per-query KL > 1.10 × baseline median){R}")
141143
print()
142144
print(f"{B}Mechanism{R}")
143145
print(f" Consistent with theorem direction: marginal moves toward Gaussian/URR target")
144146
print(f" (kurt {kurt0:+.2f}{kurt1:+.2f}, KS {kspos0:.3f}{kspos1:.3f}).")
145-
print(f" Application to this KV-cache setup fails: production turbo4 centroids extend to")
146-
print(f" ±0.174 ≈ ±2σ, matching the real post-WHT K shape: bounded / sub-Gaussian.")
147+
print(f" Extra-RHT application fails for this KV-cache setup: production turbo4")
148+
print(f" centroids extend to ±0.174 ≈ ±2σ, matching the real post-WHT K shape:")
149+
print(f" bounded / sub-Gaussian.")
147150
print(f" +RHT Gaussianizes the marginal → mass past ±2σ → saturation at the codebook")
148151
print(f" extreme → 100% catastrophic on the attention-softmax KL proxy.")
149152
print()

0 commit comments

Comments
 (0)