|
| 1 | +#!/bin/bash |
| 2 | +# KV Cache Quality Benchmark — Reproducible verification |
| 3 | +# |
| 4 | +# Proves that 1-bit KV produces byte-identical output to 4-bit uniform. |
| 5 | +# Run: bash bench/kv_quality_bench.sh <model.tqm> |
| 6 | +# |
| 7 | +# Requirements: built tq_run binary in build/ |
| 8 | + |
| 9 | +set -e |
| 10 | + |
| 11 | +MODEL="${1:-model.tqm}" |
| 12 | +TQ_RUN="./build/tq_run" |
| 13 | +THREADS=6 |
| 14 | +RESULTS_DIR="bench/kv_quality_results" |
| 15 | + |
| 16 | +if [ ! -f "$TQ_RUN" ]; then |
| 17 | + echo "Error: $TQ_RUN not found. Build first: cmake --build build" |
| 18 | + exit 1 |
| 19 | +fi |
| 20 | +if [ ! -f "$MODEL" ]; then |
| 21 | + echo "Error: Model not found: $MODEL" |
| 22 | + echo "Usage: bash bench/kv_quality_bench.sh <model.tqm>" |
| 23 | + exit 1 |
| 24 | +fi |
| 25 | + |
| 26 | +mkdir -p "$RESULTS_DIR" |
| 27 | + |
| 28 | +KV_TYPES="uniform_4b turbo_kv_4b turbo_kv_3b turbo_kv_1b" |
| 29 | + |
| 30 | +# Test prompts covering diverse capabilities |
| 31 | +PROMPTS=( |
| 32 | + "1+1=" |
| 33 | + "The capital of France is" |
| 34 | + "The capital of Japan is" |
| 35 | + "Water boils at" |
| 36 | + "The sun rises in the" |
| 37 | + "Write a Python function to reverse a string:" |
| 38 | + "If a train travels 60 miles in 1 hour, how far does it travel in 3 hours?" |
| 39 | + "Explain how a computer works to a 5-year-old child." |
| 40 | + "List the planets in our solar system:" |
| 41 | + "Once upon a time, in a faraway land," |
| 42 | +) |
| 43 | + |
| 44 | +TOKENS_PER_PROMPT=100 |
| 45 | +TOTAL_TESTS=${#PROMPTS[@]} |
| 46 | +PASS=0 |
| 47 | +FAIL=0 |
| 48 | +DIVERGED=0 |
| 49 | + |
| 50 | +echo "============================================================" |
| 51 | +echo " TurboQuant KV Cache Quality Benchmark" |
| 52 | +echo "============================================================" |
| 53 | +echo "" |
| 54 | +echo "Model: $MODEL" |
| 55 | +echo "Threads: $THREADS" |
| 56 | +echo "Tokens: $TOKENS_PER_PROMPT per prompt" |
| 57 | +echo "Prompts: $TOTAL_TESTS" |
| 58 | +echo "KV types: $KV_TYPES" |
| 59 | +echo "Mode: greedy (temperature=0)" |
| 60 | +echo "" |
| 61 | +echo "============================================================" |
| 62 | +echo "" |
| 63 | + |
| 64 | +# Phase 1: Generate outputs for all combinations |
| 65 | +echo "[Phase 1] Generating outputs..." |
| 66 | +for idx in "${!PROMPTS[@]}"; do |
| 67 | + prompt="${PROMPTS[$idx]}" |
| 68 | + short=$(echo "$prompt" | head -c 40 | tr ' /' '_-') |
| 69 | + printf " [%2d/%d] %s\n" $((idx+1)) $TOTAL_TESTS "$prompt" |
| 70 | + |
| 71 | + for kv in $KV_TYPES; do |
| 72 | + outfile="$RESULTS_DIR/p${idx}_${kv}.txt" |
| 73 | + $TQ_RUN "$MODEL" -p "$prompt" -j $THREADS -n $TOKENS_PER_PROMPT -T 0.0 -k $kv 2>&1 \ |
| 74 | + | sed -n '/^---$/,/^---$/p' | tail -n +2 | sed '$d' \ |
| 75 | + > "$outfile" |
| 76 | + done |
| 77 | +done |
| 78 | + |
| 79 | +echo "" |
| 80 | +echo "[Phase 2] Comparing outputs..." |
| 81 | +echo "" |
| 82 | + |
| 83 | +# Phase 2: Compare all KV types against baseline (uniform_4b) |
| 84 | +printf "%-45s %-12s %-12s %-12s\n" "Prompt" "vs 4b" "vs 3b" "vs 1b" |
| 85 | +printf "%-45s %-12s %-12s %-12s\n" "-----" "------" "------" "------" |
| 86 | + |
| 87 | +for idx in "${!PROMPTS[@]}"; do |
| 88 | + prompt="${PROMPTS[$idx]}" |
| 89 | + display=$(echo "$prompt" | head -c 42) |
| 90 | + |
| 91 | + baseline="$RESULTS_DIR/p${idx}_uniform_4b.txt" |
| 92 | + results="" |
| 93 | + |
| 94 | + for kv in turbo_kv_4b turbo_kv_3b turbo_kv_1b; do |
| 95 | + candidate="$RESULTS_DIR/p${idx}_${kv}.txt" |
| 96 | + if diff -q "$baseline" "$candidate" > /dev/null 2>&1; then |
| 97 | + results="$results MATCH " |
| 98 | + PASS=$((PASS + 1)) |
| 99 | + else |
| 100 | + # Check how many tokens match before divergence |
| 101 | + baseline_tokens=$(wc -c < "$baseline" | tr -d ' ') |
| 102 | + candidate_tokens=$(wc -c < "$candidate" | tr -d ' ') |
| 103 | + # Find first differing byte |
| 104 | + first_diff=$(cmp "$baseline" "$candidate" 2>/dev/null | head -1 | grep -o 'byte [0-9]*' | grep -o '[0-9]*') |
| 105 | + if [ -z "$first_diff" ]; then |
| 106 | + # One file is prefix of other |
| 107 | + results="$results PREFIX " |
| 108 | + else |
| 109 | + results="$results DIFF@${first_diff}B " |
| 110 | + fi |
| 111 | + FAIL=$((FAIL + 1)) |
| 112 | + DIVERGED=$((DIVERGED + 1)) |
| 113 | + fi |
| 114 | + done |
| 115 | + |
| 116 | + printf "%-45s%s\n" "$display" "$results" |
| 117 | +done |
| 118 | + |
| 119 | +echo "" |
| 120 | +echo "============================================================" |
| 121 | + |
| 122 | +# Phase 3: Speed benchmark |
| 123 | +echo "" |
| 124 | +echo "[Phase 3] Speed benchmark (100 tokens)..." |
| 125 | +echo "" |
| 126 | +printf "%-15s %10s %12s %15s\n" "KV Type" "tok/s" "KV/token" "Compression" |
| 127 | +printf "%-15s %10s %12s %15s\n" "-------" "-----" "--------" "-----------" |
| 128 | + |
| 129 | +for kv in $KV_TYPES; do |
| 130 | + output=$($TQ_RUN "$MODEL" -p "Hello world, this is a test." -j $THREADS -n 100 -T 0.0 -k $kv -M 2>&1) |
| 131 | + speed=$(echo "$output" | grep "tok/s" | tail -1 | grep -o '[0-9]*\.[0-9]* tok/s' | head -1) |
| 132 | + per_token=$(echo "$output" | grep "Per-token KV" | head -1 | grep -o '[0-9]*\.[0-9]* KB') |
| 133 | + ratio=$(echo "$output" | grep "Compression" | grep -o '[0-9]*\.[0-9]*x') |
| 134 | + printf "%-15s %10s %12s %15s\n" "$kv" "$speed" "$per_token" "$ratio" |
| 135 | +done |
| 136 | + |
| 137 | +echo "" |
| 138 | +echo "============================================================" |
| 139 | +echo "" |
| 140 | + |
| 141 | +# Summary |
| 142 | +TOTAL_COMPARISONS=$((TOTAL_TESTS * 3)) |
| 143 | +echo " Quality: $PASS/$TOTAL_COMPARISONS byte-identical matches" |
| 144 | +if [ $DIVERGED -gt 0 ]; then |
| 145 | + echo " WARNING: $DIVERGED divergences detected!" |
| 146 | + echo " Check $RESULTS_DIR/ for details." |
| 147 | +else |
| 148 | + echo " ALL OUTPUTS BYTE-IDENTICAL across all KV types." |
| 149 | +fi |
| 150 | +echo "" |
| 151 | +echo " Results saved to: $RESULTS_DIR/" |
| 152 | +echo "" |
| 153 | + |
| 154 | +# Write CSV summary |
| 155 | +CSV="$RESULTS_DIR/summary.csv" |
| 156 | +echo "prompt_idx,prompt,uniform_4b_vs_turbo_4b,uniform_4b_vs_turbo_3b,uniform_4b_vs_turbo_1b" > "$CSV" |
| 157 | +for idx in "${!PROMPTS[@]}"; do |
| 158 | + prompt="${PROMPTS[$idx]}" |
| 159 | + row="$idx,\"$prompt\"" |
| 160 | + for kv in turbo_kv_4b turbo_kv_3b turbo_kv_1b; do |
| 161 | + if diff -q "$RESULTS_DIR/p${idx}_uniform_4b.txt" "$RESULTS_DIR/p${idx}_${kv}.txt" > /dev/null 2>&1; then |
| 162 | + row="$row,MATCH" |
| 163 | + else |
| 164 | + row="$row,DIFF" |
| 165 | + fi |
| 166 | + done |
| 167 | + echo "$row" >> "$CSV" |
| 168 | +done |
| 169 | +echo " CSV: $CSV" |
| 170 | + |
| 171 | +exit $DIVERGED |
0 commit comments