Skip to content

Commit 1e3d7a2

Browse files
TheTomclaude
andcommitted
experiments/rht-k-sweep: tweet-screencap script
One-shot script that prints the production-codebook result in a visually-formatted terminal output for X attachment. ANSI colors: green baseline row, red +1/+2 RHT rows, bold-red emphasis on the 25.0% -> 100.0% catastrophic rate line. Self-contained, runs in ~5s on M5 Max. No GPU, no model load. Matches production_codebook.py numbers exactly. python3 experiments/rht_k_sweep/screencap.py Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 21c1fc7 commit 1e3d7a2

1 file changed

Lines changed: 147 additions & 0 deletions

File tree

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
"""Clean terminal output for screenshot. Production codebook only,
2+
formatted for visual impact."""
3+
4+
import numpy as np
5+
from scipy import linalg, stats
6+
7+
# ANSI
8+
R = "\033[0m"
9+
B = "\033[1m"
10+
RED = "\033[91m"
11+
GREEN = "\033[92m"
12+
YELLOW = "\033[93m"
13+
DIM = "\033[2m"
14+
CYAN = "\033[96m"
15+
16+
d, T, n_trials, n_queries = 128, 512, 5, 64
17+
sigma = 1.0 / np.sqrt(d)
18+
19+
# Production turbo4 codebook (ggml/src/ggml-cuda/turbo-quant.cuh)
20+
PROD = np.array([
21+
-0.173926, -0.117195, -0.089527, -0.068756,
22+
-0.051262, -0.035597, -0.020989, -0.006938,
23+
0.006938, 0.020989, 0.035597, 0.051262,
24+
0.068756, 0.089527, 0.117195, 0.173926
25+
])
26+
27+
28+
def gen_K(T, d, rng, nf):
29+
bern = rng.choice([-1.0, 1.0], size=(T, d))
30+
noise = rng.normal(0.0, nf, size=(T, d))
31+
raw = bern + noise
32+
return raw * (sigma / np.sqrt(np.var(raw)))
33+
34+
35+
H = linalg.hadamard(d) / np.sqrt(d)
36+
37+
38+
def apply_rht(X, k, seed):
39+
rng = np.random.default_rng(seed)
40+
Y, ss = X.copy(), []
41+
for _ in range(k):
42+
s = rng.choice([-1.0, 1.0], size=d).astype(np.float64)
43+
ss.append(s)
44+
Y = (Y * s) @ H.T
45+
return Y, ss
46+
47+
48+
def invert_rht(Y, ss):
49+
X = Y.copy()
50+
for s in reversed(ss):
51+
X = (X @ H) * s
52+
return X
53+
54+
55+
def quantize_prod(X, cb):
56+
norms = np.maximum(np.linalg.norm(X, axis=1, keepdims=True), 1e-12)
57+
flat = (X / norms).flatten()
58+
idx = np.argmin(np.abs(flat[:, None] - cb[None, :]), axis=1)
59+
return cb[idx].reshape(X.shape) * norms
60+
61+
62+
def softmax(x, axis=-1):
63+
x = x - x.max(axis=axis, keepdims=True)
64+
e = np.exp(x)
65+
return e / e.sum(axis=axis, keepdims=True)
66+
67+
68+
def attn_kl(K_ref, K_test, Q):
69+
sd = np.sqrt(d)
70+
p = softmax(Q @ K_ref.T / sd)
71+
q = softmax(Q @ K_test.T / sd)
72+
eps = 1e-12
73+
return (p * (np.log(p + eps) - np.log(q + eps))).sum(axis=-1)
74+
75+
76+
# Tune to §3 layer-0 stats
77+
K_orig = gen_K(T, d, np.random.default_rng(42), 0.38)
78+
k0, ks0 = stats.kurtosis(K_orig.flatten()), stats.kstest(K_orig.flatten(), 'norm', args=(0, sigma))[0]
79+
80+
81+
def run(k_extra):
82+
mses, kls = [], []
83+
kurt_p, ks_p = None, None
84+
for t in range(n_trials):
85+
seed = 2000 + 100 * (k_extra + 1) + t
86+
if k_extra == 0:
87+
K_rot, ss = K_orig.copy(), []
88+
else:
89+
K_rot, ss = apply_rht(K_orig, k_extra, seed)
90+
if t == 0:
91+
kurt_p = stats.kurtosis(K_rot.flatten())
92+
ks_p = stats.kstest(K_rot.flatten(), 'norm', args=(0, sigma))[0]
93+
Kq = quantize_prod(K_rot, PROD)
94+
Kr = invert_rht(Kq, ss) if ss else Kq
95+
mses.append(np.mean((Kr - K_orig) ** 2))
96+
q_rng = np.random.default_rng(seed + 50000)
97+
Q = q_rng.normal(0.0, sigma, size=(n_queries, d))
98+
kls.append(attn_kl(K_orig, Kr, Q))
99+
return float(np.mean(mses)), np.concatenate(kls), kurt_p, ks_p
100+
101+
102+
print()
103+
print(f"{B}{CYAN}Basat 2026 (arxiv:2605.06014v1) — RHT-count prescription for KV cache{R}")
104+
print(f"{DIM}claim: more RHTs → Gaussian-marginal-recovery → better quantization{R}")
105+
print(f"{DIM}application cited: TurboQuant KV-cache compression{R}")
106+
print()
107+
print(f"{B}Test setup{R}")
108+
print(f" source K : sub-Gaussian, kurt={k0:+.3f}, KS-vs-N(0,1/d)={ks0:.3f}")
109+
print(f" {DIM}(matched to §3 layer-0 K of why-mse-fails-for-kv-quantization){R}")
110+
print(f" codebook : production TURBO_CENTROIDS_4BIT, 16 levels, ±0.174")
111+
print(f" {DIM}(from ggml/src/ggml-cuda/turbo-quant.cuh — ships in fork){R}")
112+
print(f" block norm : per-128-element L2 (QK_TURBO4 = 128 = head_dim)")
113+
print(f" attn KL proxy : softmax(Q K^T / √d), Q ~ N(0, 1/d), {n_queries*n_trials} queries")
114+
print()
115+
116+
m0, kl0, kurt0, kspos0 = run(0)
117+
m1, kl1, kurt1, kspos1 = run(1)
118+
m2, kl2, kurt2, kspos2 = run(2)
119+
120+
base_med = float(np.median(kl0))
121+
c0 = float(np.mean(kl0 >= 1.10 * base_med))
122+
c1 = float(np.mean(kl1 >= 1.10 * base_med))
123+
c2 = float(np.mean(kl2 >= 1.10 * base_med))
124+
125+
126+
def dpct(new, old):
127+
return (new / old - 1) * 100
128+
129+
130+
print(f"{B}Result{R}")
131+
print(f" {'k_extra':<10}{'post-kurt':<14}{'KS':<10}{'MSE':<14}{'Δ MSE':<10}{'KL mean':<14}{'Δ KL':<10}{'catastrophic':<14}")
132+
print(f" {DIM}{'-'*96}{R}")
133+
print(f" {GREEN}{'0 baseline':<10}{R} {kurt0:<+12.3f}{kspos0:<10.3f}{m0:<14.3e}{'—':<10}{kl0.mean():<14.3e}{'—':<10}{GREEN}{c0:<14.1%}{R}")
134+
print(f" {RED}{'1 +1 RHT':<10}{R} {kurt1:<+12.3f}{kspos1:<10.3f}{m1:<14.3e}{RED}{dpct(m1,m0):<+10.1f}{R}{kl1.mean():<14.3e}{RED}{dpct(kl1.mean(),kl0.mean()):<+10.1f}{R}{RED}{B}{c1:<14.1%}{R}")
135+
print(f" {RED}{'2 +2 RHT':<10}{R} {kurt2:<+12.3f}{kspos2:<10.3f}{m2:<14.3e}{RED}{dpct(m2,m0):<+10.1f}{R}{kl2.mean():<14.3e}{RED}{dpct(kl2.mean(),kl0.mean()):<+10.1f}{R}{RED}{c2:<14.1%}{R}")
136+
print()
137+
print(f" {B}{RED}catastrophic rate: {c0:.1%}{c1:.1%}{R} {DIM}(per-query KL > 1.10 × baseline median){R}")
138+
print()
139+
print(f"{B}Mechanism{R}")
140+
print(f" Theorem holds: kurt drifts from {kurt0:+.2f} to {kurt1:+.2f} after 1 extra RHT, as proven.")
141+
print(f" Application fails: production turbo4 centroids extend to ±0.174 ≈ ±2σ because")
142+
print(f" real K post-WHT is bounded sub-Gaussian. +RHT Gaussianizes K → mass past ±2σ →")
143+
print(f" saturation at the codebook extreme → 100% catastrophic.")
144+
print()
145+
print(f"{DIM}repro: experiments/rht_k_sweep/screencap.py (pure numpy/scipy, ~5s, no GPU){R}")
146+
print(f"{DIM}branch: experiment/rht-k-sweep on TheTom/llama-cpp-turboquant fork (local){R}")
147+
print()

0 commit comments

Comments
 (0)