Skip to content

Commit 2208e8c

Browse files
committed
bench: add CLI startup benchmark for codeflash compare --script
Measures median wall-clock time for --version, --help, auth status, and compare --help across 30 runs with 3 warmups. Usage: codeflash compare main codeflash/optimize \ --script "python benchmarks/bench_cli_startup.py" \ --script-output benchmarks/results.json
1 parent b533f50 commit 2208e8c

2 files changed

Lines changed: 72 additions & 0 deletions

File tree

benchmarks/__init__.py

Whitespace-only changes.

benchmarks/bench_cli_startup.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""Benchmark CLI startup latency for codeflash compare --script mode.
2+
3+
Run from a worktree root. Installs deps via uv sync, then times several
4+
CLI entry points and writes a JSON file mapping command names to median
5+
wall-clock seconds.
6+
7+
Usage:
8+
codeflash compare main codeflash/optimize \
9+
--script "python benchmarks/bench_cli_startup.py" \
10+
--script-output benchmarks/results.json
11+
"""
12+
13+
from __future__ import annotations
14+
15+
import json
16+
import os
17+
import subprocess
18+
import time
19+
from pathlib import Path
20+
21+
WARMUP = 3
22+
RUNS = 30
23+
OUTPUT = os.environ.get("BENCH_OUTPUT", "benchmarks/results.json")
24+
25+
COMMANDS: dict[str, list[str]] = {
26+
"version": ["uv", "run", "codeflash", "--version"],
27+
"help": ["uv", "run", "codeflash", "--help"],
28+
"auth_status": ["uv", "run", "codeflash", "auth", "status"],
29+
"compare_help": ["uv", "run", "codeflash", "compare", "--help"],
30+
}
31+
32+
33+
def measure(cmd: list[str], warmup: int = WARMUP, runs: int = RUNS) -> float:
34+
"""Return median wall-clock seconds for *cmd* over *runs* iterations."""
35+
env = {**os.environ, "CODEFLASH_API_KEY": "bench_dummy_key"}
36+
for _ in range(warmup):
37+
subprocess.run(cmd, capture_output=True, check=False, env=env)
38+
39+
times: list[float] = []
40+
for _ in range(runs):
41+
t0 = time.perf_counter()
42+
subprocess.run(cmd, capture_output=True, check=False, env=env)
43+
times.append(time.perf_counter() - t0)
44+
45+
times.sort()
46+
mid = len(times) // 2
47+
return times[mid] if len(times) % 2 else (times[mid - 1] + times[mid]) / 2
48+
49+
50+
def main() -> None:
51+
# Ensure deps are installed in the worktree
52+
subprocess.run(["uv", "sync"], check=True, capture_output=True)
53+
54+
results: dict[str, float] = {}
55+
for name, cmd in COMMANDS.items():
56+
print(f" {name}: ", end="", flush=True)
57+
median = measure(cmd)
58+
results[name] = round(median, 4)
59+
print(f"{median * 1000:.0f} ms")
60+
61+
# Total = sum of medians (useful for a single summary number)
62+
results["__total__"] = round(sum(results.values()), 4)
63+
64+
output_path = Path(OUTPUT)
65+
output_path.parent.mkdir(parents=True, exist_ok=True)
66+
with output_path.open("w") as f:
67+
json.dump(results, f, indent=2)
68+
print(f"\nResults written to {OUTPUT}")
69+
70+
71+
if __name__ == "__main__":
72+
main()

0 commit comments

Comments
 (0)