Skip to content
This repository was archived by the owner on Jun 11, 2026. It is now read-only.

Commit ec8fa13

Browse files
committed
add perf bench
1 parent 46fd52f commit ec8fa13

1 file changed

Lines changed: 239 additions & 0 deletions

File tree

perf-bench.sh

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
#!/usr/bin/env bash
2+
#
3+
# perf-bench.sh — A/B performance benchmark for cburn.
4+
#
5+
# Builds two binaries:
6+
# baseline — from the current git HEAD (or a ref you specify)
7+
# candidate — from your working tree (uncommitted changes and all)
8+
#
9+
# For each cache mode (normal, --no-output-cache, --no-cache), it first
10+
# checks that both binaries produce identical output, then benchmarks
11+
# them head-to-head with hyperfine.
12+
#
13+
# Usage:
14+
# ./perf-bench.sh # 50 warmup, 100 runs
15+
# ./perf-bench.sh --warmup 5 --runs 20 # quick iteration
16+
# ./perf-bench.sh --providers "claude codex" # just those two
17+
# ./perf-bench.sh --label "simd-json" # tag the results
18+
# ./perf-bench.sh --baseline-ref v1.2.0 # compare against a tag
19+
20+
set -euo pipefail
21+
22+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
23+
24+
# ── defaults ──────────────────────────────────────────────────────────
25+
WARMUP=50
26+
RUNS=100
27+
PROVIDERS="all claude codex opencode cursor copilot pi"
28+
PERIOD="30days"
29+
LABEL=""
30+
BASELINE_REF="HEAD"
31+
SKIP_BUILD=0
32+
33+
# ── parse args ────────────────────────────────────────────────────────
34+
while [ $# -gt 0 ]; do
35+
case "$1" in
36+
--warmup) WARMUP="$2"; shift 2 ;;
37+
--runs) RUNS="$2"; shift 2 ;;
38+
--providers) PROVIDERS="$2"; shift 2 ;;
39+
--period) PERIOD="$2"; shift 2 ;;
40+
--label) LABEL="$2"; shift 2 ;;
41+
--baseline-ref) BASELINE_REF="$2"; shift 2 ;;
42+
--skip-build) SKIP_BUILD=1; shift ;;
43+
-h|--help)
44+
sed -n '3,/^$/{ s/^# //; s/^#//; p }' "$0"
45+
exit 0 ;;
46+
*) echo "unknown flag: $1" >&2; exit 1 ;;
47+
esac
48+
done
49+
50+
# ── preflight ─────────────────────────────────────────────────────────
51+
if ! command -v hyperfine >/dev/null; then
52+
echo "error: hyperfine is required (brew install hyperfine)" >&2
53+
exit 1
54+
fi
55+
56+
# ── paths ─────────────────────────────────────────────────────────────
57+
BENCH_DIR="$SCRIPT_DIR/target/bench"
58+
mkdir -p "$BENCH_DIR"
59+
60+
BASELINE_BIN="$BENCH_DIR/cburn-baseline"
61+
CANDIDATE_BIN="$BENCH_DIR/cburn-candidate"
62+
63+
WORKTREE_DIR="$BENCH_DIR/_worktree_baseline"
64+
65+
# ── git info ──────────────────────────────────────────────────────────
66+
HEAD_SHA="$(git -C "$SCRIPT_DIR" rev-parse --short HEAD)"
67+
BASELINE_SHA="$(git -C "$SCRIPT_DIR" rev-parse --short "$BASELINE_REF")"
68+
DIRTY=""
69+
if ! git -C "$SCRIPT_DIR" diff --quiet 2>/dev/null; then
70+
DIRTY=" (dirty)"
71+
fi
72+
73+
# ── build both binaries ──────────────────────────────────────────────
74+
if [ "$SKIP_BUILD" -eq 0 ]; then
75+
echo "Building two binaries for A/B comparison"
76+
echo "========================================="
77+
echo
78+
79+
# --- candidate: build from working tree ---
80+
echo "[candidate] building from working tree (${HEAD_SHA}${DIRTY})..."
81+
(cd "$SCRIPT_DIR" && cargo build --release 2>&1 | tail -3)
82+
cp "$SCRIPT_DIR/target/release/cburn" "$CANDIDATE_BIN"
83+
echo " -> $CANDIDATE_BIN ($(du -h "$CANDIDATE_BIN" | cut -f1 | xargs))"
84+
echo
85+
86+
# --- baseline: build from BASELINE_REF via git worktree ---
87+
echo "[baseline] building from ${BASELINE_REF} (${BASELINE_SHA})..."
88+
89+
# clean up any leftover worktree
90+
if [ -d "$WORKTREE_DIR" ]; then
91+
git -C "$SCRIPT_DIR" worktree remove --force "$WORKTREE_DIR" 2>/dev/null || rm -rf "$WORKTREE_DIR"
92+
fi
93+
94+
git -C "$SCRIPT_DIR" worktree add --detach "$WORKTREE_DIR" "$BASELINE_REF" 2>/dev/null
95+
(cd "$WORKTREE_DIR" && cargo build --release 2>&1 | tail -3)
96+
cp "$WORKTREE_DIR/target/release/cburn" "$BASELINE_BIN"
97+
echo " -> $BASELINE_BIN ($(du -h "$BASELINE_BIN" | cut -f1 | xargs))"
98+
99+
# clean up worktree
100+
git -C "$SCRIPT_DIR" worktree remove --force "$WORKTREE_DIR" 2>/dev/null || true
101+
echo
102+
else
103+
echo "Skipping build, reusing existing binaries..."
104+
[ ! -x "$BASELINE_BIN" ] && echo "error: $BASELINE_BIN not found" >&2 && exit 1
105+
[ ! -x "$CANDIDATE_BIN" ] && echo "error: $CANDIDATE_BIN not found" >&2 && exit 1
106+
echo
107+
fi
108+
109+
# ── output setup ──────────────────────────────────────────────────────
110+
TIMESTAMP="$(date +%Y%m%d-%H%M%S)"
111+
RESULTS_DIR="$SCRIPT_DIR/.notes/bench"
112+
mkdir -p "$RESULTS_DIR"
113+
114+
SUFFIX=""
115+
[ -n "$LABEL" ] && SUFFIX="-${LABEL}"
116+
OUT_FILE="$RESULTS_DIR/perf-${TIMESTAMP}${SUFFIX}.txt"
117+
JSON_DIR="$RESULTS_DIR/json-${TIMESTAMP}${SUFFIX}"
118+
mkdir -p "$JSON_DIR"
119+
120+
CURSOR_CACHE_WIPE="rm -f $HOME/.cache/codeburn/cursor-results.json $HOME/.cache/codeburn/cursor-full-cache.json"
121+
122+
# ── correctness tracking ─────────────────────────────────────────────
123+
OUTPUT_MISMATCHES=0
124+
DIFF_DIR="$BENCH_DIR/diffs"
125+
rm -rf "$DIFF_DIR"
126+
mkdir -p "$DIFF_DIR"
127+
128+
# ── header ────────────────────────────────────────────────────────────
129+
{
130+
echo "A/B Performance Benchmark $(date '+%Y-%m-%d %H:%M')"
131+
echo
132+
echo " baseline: ${BASELINE_REF} (${BASELINE_SHA})"
133+
echo " candidate: working tree (${HEAD_SHA}${DIRTY})"
134+
[ -n "$LABEL" ] && echo " label: ${LABEL}"
135+
echo " warmup: ${WARMUP}"
136+
echo " runs: ${RUNS}"
137+
echo " period: ${PERIOD}"
138+
echo " providers: ${PROVIDERS}"
139+
echo " machine: $(uname -ms), $(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo '?') cores"
140+
echo
141+
} | tee "$OUT_FILE"
142+
143+
# ── verify + bench runner ────────────────────────────────────────────
144+
# run_ab <mode_name> <extra_flags> <prepare_cmd>
145+
run_ab() {
146+
local mode_name="$1"
147+
local extra_flags="$2"
148+
local prepare_cmd="$3"
149+
150+
{
151+
echo "-----------------------------------------------------------"
152+
echo " ${mode_name}"
153+
echo "-----------------------------------------------------------"
154+
echo
155+
} | tee -a "$OUT_FILE"
156+
157+
for provider in $PROVIDERS; do
158+
local base_cmd="$BASELINE_BIN report ${extra_flags} --provider ${provider} --period ${PERIOD}"
159+
local cand_cmd="$CANDIDATE_BIN report ${extra_flags} --provider ${provider} --period ${PERIOD}"
160+
local slug="${mode_name//[ \/()]/_}-${provider}"
161+
local json_file="${JSON_DIR}/${slug}.json"
162+
163+
echo " ${provider}:" | tee -a "$OUT_FILE"
164+
165+
# -- correctness check --
166+
[ -n "$prepare_cmd" ] && eval "$prepare_cmd" 2>/dev/null || true
167+
168+
local base_out="$DIFF_DIR/${slug}-baseline.txt"
169+
local cand_out="$DIFF_DIR/${slug}-candidate.txt"
170+
171+
CODEBURN_STATIC_OUTPUT=1 $BASELINE_BIN report ${extra_flags} --provider "${provider}" --period "${PERIOD}" > "$base_out" 2>/dev/null || true
172+
CODEBURN_STATIC_OUTPUT=1 $CANDIDATE_BIN report ${extra_flags} --provider "${provider}" --period "${PERIOD}" > "$cand_out" 2>/dev/null || true
173+
174+
local match="ok"
175+
if ! diff -q "$base_out" "$cand_out" >/dev/null 2>&1; then
176+
match="MISMATCH"
177+
OUTPUT_MISMATCHES=$((OUTPUT_MISMATCHES + 1))
178+
diff -u "$base_out" "$cand_out" > "$DIFF_DIR/${slug}.diff" 2>/dev/null || true
179+
echo " !! output mismatch -- diff saved to $DIFF_DIR/${slug}.diff" | tee -a "$OUT_FILE"
180+
else
181+
echo " output: identical" | tee -a "$OUT_FILE"
182+
fi
183+
184+
# -- benchmark both --
185+
HF_ARGS=(
186+
--shell=none
187+
--input null
188+
--warmup "$WARMUP"
189+
--runs "$RUNS"
190+
--export-json "$json_file"
191+
)
192+
[ -n "$prepare_cmd" ] && HF_ARGS+=(--prepare "$prepare_cmd")
193+
194+
hyperfine "${HF_ARGS[@]}" \
195+
-n "baseline" "$base_cmd" \
196+
-n "candidate" "$cand_cmd" \
197+
2>&1 | grep -E '(Time|Range|faster|slower)' | sed 's/^/ /' | tee -a "$OUT_FILE"
198+
199+
# -- extract numbers from the json --
200+
if [ -f "$json_file" ]; then
201+
local summary
202+
summary="$(python3 -c "
203+
import json
204+
rs = json.load(open('$json_file'))['results']
205+
def fmt(r):
206+
s = r['stddev'] or 0
207+
return f\"{r['mean']*1000:.1f} ms +/- {s*1000:.1f}\"
208+
b, c = rs[0]['mean'], rs[1]['mean']
209+
d = (c - b) / b * 100
210+
sign = '+' if d > 0 else ''
211+
print(f' baseline {fmt(rs[0])} candidate {fmt(rs[1])} delta {sign}{d:.1f}% output {\"$match\"}')")"
212+
echo "$summary" | tee -a "$OUT_FILE"
213+
fi
214+
215+
echo | tee -a "$OUT_FILE"
216+
done
217+
}
218+
219+
# ── run the three modes ───────────────────────────────────────────────
220+
221+
run_ab "normal (all caches)" "" ""
222+
run_ab "no output cache (--no-output-cache)" "--no-output-cache" ""
223+
run_ab "no cache / cold (--no-cache)" "--no-cache" "$CURSOR_CACHE_WIPE"
224+
225+
# ── summary ──────────────────────────────────────────────────────────
226+
{
227+
echo "==========================================="
228+
if [ "$OUTPUT_MISMATCHES" -gt 0 ]; then
229+
echo " ${OUTPUT_MISMATCHES} output mismatch(es)"
230+
echo " diffs: target/bench/diffs/"
231+
else
232+
echo " all outputs match"
233+
fi
234+
echo " results: ${OUT_FILE}"
235+
echo " json: ${JSON_DIR}/"
236+
echo "==========================================="
237+
} | tee -a "$OUT_FILE"
238+
239+
exit $OUTPUT_MISMATCHES

0 commit comments

Comments
 (0)