|
| 1 | +#!/usr/bin/env bash |
| 2 | +# |
| 3 | +# perf-bench.sh — A/B performance benchmark for cburn. |
| 4 | +# |
| 5 | +# Builds two binaries: |
| 6 | +# baseline — from the current git HEAD (or a ref you specify) |
| 7 | +# candidate — from your working tree (uncommitted changes and all) |
| 8 | +# |
| 9 | +# For each cache mode (normal, --no-output-cache, --no-cache), it first |
| 10 | +# checks that both binaries produce identical output, then benchmarks |
| 11 | +# them head-to-head with hyperfine. |
| 12 | +# |
| 13 | +# Usage: |
| 14 | +# ./perf-bench.sh # 50 warmup, 100 runs |
| 15 | +# ./perf-bench.sh --warmup 5 --runs 20 # quick iteration |
| 16 | +# ./perf-bench.sh --providers "claude codex" # just those two |
| 17 | +# ./perf-bench.sh --label "simd-json" # tag the results |
| 18 | +# ./perf-bench.sh --baseline-ref v1.2.0 # compare against a tag |
| 19 | + |
| 20 | +set -euo pipefail |
| 21 | + |
| 22 | +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" |
| 23 | + |
| 24 | +# ── defaults ────────────────────────────────────────────────────────── |
| 25 | +WARMUP=50 |
| 26 | +RUNS=100 |
| 27 | +PROVIDERS="all claude codex opencode cursor copilot pi" |
| 28 | +PERIOD="30days" |
| 29 | +LABEL="" |
| 30 | +BASELINE_REF="HEAD" |
| 31 | +SKIP_BUILD=0 |
| 32 | + |
| 33 | +# ── parse args ──────────────────────────────────────────────────────── |
| 34 | +while [ $# -gt 0 ]; do |
| 35 | + case "$1" in |
| 36 | + --warmup) WARMUP="$2"; shift 2 ;; |
| 37 | + --runs) RUNS="$2"; shift 2 ;; |
| 38 | + --providers) PROVIDERS="$2"; shift 2 ;; |
| 39 | + --period) PERIOD="$2"; shift 2 ;; |
| 40 | + --label) LABEL="$2"; shift 2 ;; |
| 41 | + --baseline-ref) BASELINE_REF="$2"; shift 2 ;; |
| 42 | + --skip-build) SKIP_BUILD=1; shift ;; |
| 43 | + -h|--help) |
| 44 | + sed -n '3,/^$/{ s/^# //; s/^#//; p }' "$0" |
| 45 | + exit 0 ;; |
| 46 | + *) echo "unknown flag: $1" >&2; exit 1 ;; |
| 47 | + esac |
| 48 | +done |
| 49 | + |
| 50 | +# ── preflight ───────────────────────────────────────────────────────── |
| 51 | +if ! command -v hyperfine >/dev/null; then |
| 52 | + echo "error: hyperfine is required (brew install hyperfine)" >&2 |
| 53 | + exit 1 |
| 54 | +fi |
| 55 | + |
| 56 | +# ── paths ───────────────────────────────────────────────────────────── |
| 57 | +BENCH_DIR="$SCRIPT_DIR/target/bench" |
| 58 | +mkdir -p "$BENCH_DIR" |
| 59 | + |
| 60 | +BASELINE_BIN="$BENCH_DIR/cburn-baseline" |
| 61 | +CANDIDATE_BIN="$BENCH_DIR/cburn-candidate" |
| 62 | + |
| 63 | +WORKTREE_DIR="$BENCH_DIR/_worktree_baseline" |
| 64 | + |
| 65 | +# ── git info ────────────────────────────────────────────────────────── |
| 66 | +HEAD_SHA="$(git -C "$SCRIPT_DIR" rev-parse --short HEAD)" |
| 67 | +BASELINE_SHA="$(git -C "$SCRIPT_DIR" rev-parse --short "$BASELINE_REF")" |
| 68 | +DIRTY="" |
| 69 | +if ! git -C "$SCRIPT_DIR" diff --quiet 2>/dev/null; then |
| 70 | + DIRTY=" (dirty)" |
| 71 | +fi |
| 72 | + |
| 73 | +# ── build both binaries ────────────────────────────────────────────── |
| 74 | +if [ "$SKIP_BUILD" -eq 0 ]; then |
| 75 | + echo "Building two binaries for A/B comparison" |
| 76 | + echo "=========================================" |
| 77 | + echo |
| 78 | + |
| 79 | + # --- candidate: build from working tree --- |
| 80 | + echo "[candidate] building from working tree (${HEAD_SHA}${DIRTY})..." |
| 81 | + (cd "$SCRIPT_DIR" && cargo build --release 2>&1 | tail -3) |
| 82 | + cp "$SCRIPT_DIR/target/release/cburn" "$CANDIDATE_BIN" |
| 83 | + echo " -> $CANDIDATE_BIN ($(du -h "$CANDIDATE_BIN" | cut -f1 | xargs))" |
| 84 | + echo |
| 85 | + |
| 86 | + # --- baseline: build from BASELINE_REF via git worktree --- |
| 87 | + echo "[baseline] building from ${BASELINE_REF} (${BASELINE_SHA})..." |
| 88 | + |
| 89 | + # clean up any leftover worktree |
| 90 | + if [ -d "$WORKTREE_DIR" ]; then |
| 91 | + git -C "$SCRIPT_DIR" worktree remove --force "$WORKTREE_DIR" 2>/dev/null || rm -rf "$WORKTREE_DIR" |
| 92 | + fi |
| 93 | + |
| 94 | + git -C "$SCRIPT_DIR" worktree add --detach "$WORKTREE_DIR" "$BASELINE_REF" 2>/dev/null |
| 95 | + (cd "$WORKTREE_DIR" && cargo build --release 2>&1 | tail -3) |
| 96 | + cp "$WORKTREE_DIR/target/release/cburn" "$BASELINE_BIN" |
| 97 | + echo " -> $BASELINE_BIN ($(du -h "$BASELINE_BIN" | cut -f1 | xargs))" |
| 98 | + |
| 99 | + # clean up worktree |
| 100 | + git -C "$SCRIPT_DIR" worktree remove --force "$WORKTREE_DIR" 2>/dev/null || true |
| 101 | + echo |
| 102 | +else |
| 103 | + echo "Skipping build, reusing existing binaries..." |
| 104 | + [ ! -x "$BASELINE_BIN" ] && echo "error: $BASELINE_BIN not found" >&2 && exit 1 |
| 105 | + [ ! -x "$CANDIDATE_BIN" ] && echo "error: $CANDIDATE_BIN not found" >&2 && exit 1 |
| 106 | + echo |
| 107 | +fi |
| 108 | + |
| 109 | +# ── output setup ────────────────────────────────────────────────────── |
| 110 | +TIMESTAMP="$(date +%Y%m%d-%H%M%S)" |
| 111 | +RESULTS_DIR="$SCRIPT_DIR/.notes/bench" |
| 112 | +mkdir -p "$RESULTS_DIR" |
| 113 | + |
| 114 | +SUFFIX="" |
| 115 | +[ -n "$LABEL" ] && SUFFIX="-${LABEL}" |
| 116 | +OUT_FILE="$RESULTS_DIR/perf-${TIMESTAMP}${SUFFIX}.txt" |
| 117 | +JSON_DIR="$RESULTS_DIR/json-${TIMESTAMP}${SUFFIX}" |
| 118 | +mkdir -p "$JSON_DIR" |
| 119 | + |
| 120 | +CURSOR_CACHE_WIPE="rm -f $HOME/.cache/codeburn/cursor-results.json $HOME/.cache/codeburn/cursor-full-cache.json" |
| 121 | + |
| 122 | +# ── correctness tracking ───────────────────────────────────────────── |
| 123 | +OUTPUT_MISMATCHES=0 |
| 124 | +DIFF_DIR="$BENCH_DIR/diffs" |
| 125 | +rm -rf "$DIFF_DIR" |
| 126 | +mkdir -p "$DIFF_DIR" |
| 127 | + |
| 128 | +# ── header ──────────────────────────────────────────────────────────── |
| 129 | +{ |
| 130 | + echo "A/B Performance Benchmark $(date '+%Y-%m-%d %H:%M')" |
| 131 | + echo |
| 132 | + echo " baseline: ${BASELINE_REF} (${BASELINE_SHA})" |
| 133 | + echo " candidate: working tree (${HEAD_SHA}${DIRTY})" |
| 134 | + [ -n "$LABEL" ] && echo " label: ${LABEL}" |
| 135 | + echo " warmup: ${WARMUP}" |
| 136 | + echo " runs: ${RUNS}" |
| 137 | + echo " period: ${PERIOD}" |
| 138 | + echo " providers: ${PROVIDERS}" |
| 139 | + echo " machine: $(uname -ms), $(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo '?') cores" |
| 140 | + echo |
| 141 | +} | tee "$OUT_FILE" |
| 142 | + |
| 143 | +# ── verify + bench runner ──────────────────────────────────────────── |
| 144 | +# run_ab <mode_name> <extra_flags> <prepare_cmd> |
| 145 | +run_ab() { |
| 146 | + local mode_name="$1" |
| 147 | + local extra_flags="$2" |
| 148 | + local prepare_cmd="$3" |
| 149 | + |
| 150 | + { |
| 151 | + echo "-----------------------------------------------------------" |
| 152 | + echo " ${mode_name}" |
| 153 | + echo "-----------------------------------------------------------" |
| 154 | + echo |
| 155 | + } | tee -a "$OUT_FILE" |
| 156 | + |
| 157 | + for provider in $PROVIDERS; do |
| 158 | + local base_cmd="$BASELINE_BIN report ${extra_flags} --provider ${provider} --period ${PERIOD}" |
| 159 | + local cand_cmd="$CANDIDATE_BIN report ${extra_flags} --provider ${provider} --period ${PERIOD}" |
| 160 | + local slug="${mode_name//[ \/()]/_}-${provider}" |
| 161 | + local json_file="${JSON_DIR}/${slug}.json" |
| 162 | + |
| 163 | + echo " ${provider}:" | tee -a "$OUT_FILE" |
| 164 | + |
| 165 | + # -- correctness check -- |
| 166 | + [ -n "$prepare_cmd" ] && eval "$prepare_cmd" 2>/dev/null || true |
| 167 | + |
| 168 | + local base_out="$DIFF_DIR/${slug}-baseline.txt" |
| 169 | + local cand_out="$DIFF_DIR/${slug}-candidate.txt" |
| 170 | + |
| 171 | + CODEBURN_STATIC_OUTPUT=1 $BASELINE_BIN report ${extra_flags} --provider "${provider}" --period "${PERIOD}" > "$base_out" 2>/dev/null || true |
| 172 | + CODEBURN_STATIC_OUTPUT=1 $CANDIDATE_BIN report ${extra_flags} --provider "${provider}" --period "${PERIOD}" > "$cand_out" 2>/dev/null || true |
| 173 | + |
| 174 | + local match="ok" |
| 175 | + if ! diff -q "$base_out" "$cand_out" >/dev/null 2>&1; then |
| 176 | + match="MISMATCH" |
| 177 | + OUTPUT_MISMATCHES=$((OUTPUT_MISMATCHES + 1)) |
| 178 | + diff -u "$base_out" "$cand_out" > "$DIFF_DIR/${slug}.diff" 2>/dev/null || true |
| 179 | + echo " !! output mismatch -- diff saved to $DIFF_DIR/${slug}.diff" | tee -a "$OUT_FILE" |
| 180 | + else |
| 181 | + echo " output: identical" | tee -a "$OUT_FILE" |
| 182 | + fi |
| 183 | + |
| 184 | + # -- benchmark both -- |
| 185 | + HF_ARGS=( |
| 186 | + --shell=none |
| 187 | + --input null |
| 188 | + --warmup "$WARMUP" |
| 189 | + --runs "$RUNS" |
| 190 | + --export-json "$json_file" |
| 191 | + ) |
| 192 | + [ -n "$prepare_cmd" ] && HF_ARGS+=(--prepare "$prepare_cmd") |
| 193 | + |
| 194 | + hyperfine "${HF_ARGS[@]}" \ |
| 195 | + -n "baseline" "$base_cmd" \ |
| 196 | + -n "candidate" "$cand_cmd" \ |
| 197 | + 2>&1 | grep -E '(Time|Range|faster|slower)' | sed 's/^/ /' | tee -a "$OUT_FILE" |
| 198 | + |
| 199 | + # -- extract numbers from the json -- |
| 200 | + if [ -f "$json_file" ]; then |
| 201 | + local summary |
| 202 | + summary="$(python3 -c " |
| 203 | +import json |
| 204 | +rs = json.load(open('$json_file'))['results'] |
| 205 | +def fmt(r): |
| 206 | + s = r['stddev'] or 0 |
| 207 | + return f\"{r['mean']*1000:.1f} ms +/- {s*1000:.1f}\" |
| 208 | +b, c = rs[0]['mean'], rs[1]['mean'] |
| 209 | +d = (c - b) / b * 100 |
| 210 | +sign = '+' if d > 0 else '' |
| 211 | +print(f' baseline {fmt(rs[0])} candidate {fmt(rs[1])} delta {sign}{d:.1f}% output {\"$match\"}')")" |
| 212 | + echo "$summary" | tee -a "$OUT_FILE" |
| 213 | + fi |
| 214 | + |
| 215 | + echo | tee -a "$OUT_FILE" |
| 216 | + done |
| 217 | +} |
| 218 | + |
| 219 | +# ── run the three modes ─────────────────────────────────────────────── |
| 220 | + |
| 221 | +run_ab "normal (all caches)" "" "" |
| 222 | +run_ab "no output cache (--no-output-cache)" "--no-output-cache" "" |
| 223 | +run_ab "no cache / cold (--no-cache)" "--no-cache" "$CURSOR_CACHE_WIPE" |
| 224 | + |
| 225 | +# ── summary ────────────────────────────────────────────────────────── |
| 226 | +{ |
| 227 | + echo "===========================================" |
| 228 | + if [ "$OUTPUT_MISMATCHES" -gt 0 ]; then |
| 229 | + echo " ${OUTPUT_MISMATCHES} output mismatch(es)" |
| 230 | + echo " diffs: target/bench/diffs/" |
| 231 | + else |
| 232 | + echo " all outputs match" |
| 233 | + fi |
| 234 | + echo " results: ${OUT_FILE}" |
| 235 | + echo " json: ${JSON_DIR}/" |
| 236 | + echo "===========================================" |
| 237 | +} | tee -a "$OUT_FILE" |
| 238 | + |
| 239 | +exit $OUTPUT_MISMATCHES |
0 commit comments