Skip to content
This repository was archived by the owner on Feb 6, 2026. It is now read-only.

Commit 54a34f9

Browse files
committed
ci(bench): add PR test and benchmark threshold checks
1 parent b962357 commit 54a34f9

3 files changed

Lines changed: 264 additions & 0 deletions

File tree

.github/bench-thresholds.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"default": {
3+
"min_speedup_p50": 0.95,
4+
"min_speedup_p95": 0.90
5+
},
6+
"overrides": {
7+
"push-only@1000000": {
8+
"min_speedup_p50": 1.00,
9+
"min_speedup_p95": 0.95
10+
},
11+
"pop-only@1000000": {
12+
"min_speedup_p50": 1.00,
13+
"min_speedup_p95": 0.95
14+
},
15+
"mixed (50/50)@1000000": {
16+
"min_speedup_p50": 1.00,
17+
"min_speedup_p95": 0.95
18+
}
19+
}
20+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
name: PR Test And Benchmark
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- master
7+
8+
jobs:
9+
test-and-benchmark:
10+
name: test-and-benchmark
11+
runs-on: ubuntu-latest
12+
timeout-minutes: 30
13+
14+
steps:
15+
- name: Checkout
16+
uses: actions/checkout@v4
17+
18+
- name: Configure
19+
run: cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
20+
21+
- name: Build
22+
run: cmake --build build -j
23+
24+
- name: Run Unit Tests
25+
run: ctest --test-dir build --output-on-failure
26+
27+
- name: Run Benchmark
28+
run: |
29+
mkdir -p artifacts
30+
./build/bench_dheap4 --warmup 1 --iters 7 | tee artifacts/bench.txt
31+
32+
- name: Parse Benchmark And Check Thresholds
33+
run: |
34+
python3 scripts/bench_report.py \
35+
--input artifacts/bench.txt \
36+
--thresholds .github/bench-thresholds.json \
37+
--json artifacts/bench.json \
38+
--markdown artifacts/bench.md \
39+
--strict
40+
41+
- name: Publish Benchmark Summary
42+
if: always()
43+
run: cat artifacts/bench.md >> "$GITHUB_STEP_SUMMARY"
44+
45+
- name: Upload Benchmark Artifacts
46+
if: always()
47+
uses: actions/upload-artifact@v4
48+
with:
49+
name: benchmark-results-${{ github.run_id }}
50+
path: artifacts/

scripts/bench_report.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
#!/usr/bin/env python3
2+
"""Parse benchmark output, emit machine-readable reports, and enforce thresholds."""
3+
4+
from __future__ import annotations
5+
6+
import argparse
7+
import json
8+
import re
9+
import sys
10+
from pathlib import Path
11+
from typing import Any, Dict, List
12+
13+
14+
BENCH_LINE_RE = re.compile(
15+
r"^(push-only|pop-only|mixed \(50/50\))\s+(\d+)\s+"
16+
r"([0-9]+(?:\.[0-9]+)?)\s+([0-9]+(?:\.[0-9]+)?)\s+"
17+
r"([0-9]+(?:\.[0-9]+)?)\s+([0-9]+(?:\.[0-9]+)?)\s+"
18+
r"([0-9]+(?:\.[0-9]+)?)x\s+([0-9]+(?:\.[0-9]+)?)x$"
19+
)
20+
21+
22+
def parse_args() -> argparse.Namespace:
23+
parser = argparse.ArgumentParser(description=__doc__)
24+
parser.add_argument("--input", required=True, help="Path to benchmark stdout text file.")
25+
parser.add_argument("--thresholds", required=True, help="Path to thresholds JSON file.")
26+
parser.add_argument("--json", required=True, help="Path to output parsed JSON file.")
27+
parser.add_argument("--markdown", required=True, help="Path to output markdown summary file.")
28+
parser.add_argument(
29+
"--strict",
30+
action="store_true",
31+
help="Exit non-zero when any threshold is violated.",
32+
)
33+
return parser.parse_args()
34+
35+
36+
def load_thresholds(path: Path) -> Dict[str, Any]:
37+
raw = json.loads(path.read_text(encoding="utf-8"))
38+
if "default" not in raw:
39+
raise ValueError("thresholds JSON must contain a 'default' object")
40+
default = raw["default"]
41+
if "min_speedup_p50" not in default or "min_speedup_p95" not in default:
42+
raise ValueError(
43+
"thresholds.default must include 'min_speedup_p50' and 'min_speedup_p95'"
44+
)
45+
if "overrides" not in raw:
46+
raw["overrides"] = {}
47+
return raw
48+
49+
50+
def parse_bench_lines(text: str) -> List[Dict[str, Any]]:
51+
cases: List[Dict[str, Any]] = []
52+
for raw_line in text.splitlines():
53+
line = raw_line.strip()
54+
match = BENCH_LINE_RE.match(line)
55+
if not match:
56+
continue
57+
58+
test_name = match.group(1)
59+
n = int(match.group(2))
60+
dheap_p50 = float(match.group(3))
61+
dheap_p95 = float(match.group(4))
62+
stl_p50 = float(match.group(5))
63+
stl_p95 = float(match.group(6))
64+
speedup_p50 = float(match.group(7))
65+
speedup_p95 = float(match.group(8))
66+
cases.append(
67+
{
68+
"test": test_name,
69+
"n": n,
70+
"dheap_p50_ms": dheap_p50,
71+
"dheap_p95_ms": dheap_p95,
72+
"stl_p50_ms": stl_p50,
73+
"stl_p95_ms": stl_p95,
74+
"speedup_p50": speedup_p50,
75+
"speedup_p95": speedup_p95,
76+
}
77+
)
78+
return cases
79+
80+
81+
def evaluate_cases(cases: List[Dict[str, Any]], thresholds: Dict[str, Any]) -> List[Dict[str, Any]]:
82+
regressions: List[Dict[str, Any]] = []
83+
default_cfg = thresholds["default"]
84+
overrides = thresholds.get("overrides", {})
85+
86+
for case in cases:
87+
key = f"{case['test']}@{case['n']}"
88+
cfg = dict(default_cfg)
89+
cfg.update(overrides.get(key, {}))
90+
min_p50 = float(cfg["min_speedup_p50"])
91+
min_p95 = float(cfg["min_speedup_p95"])
92+
93+
p50_ok = case["speedup_p50"] >= min_p50
94+
p95_ok = case["speedup_p95"] >= min_p95
95+
case["threshold"] = {"min_speedup_p50": min_p50, "min_speedup_p95": min_p95}
96+
case["status"] = "PASS" if p50_ok and p95_ok else "FAIL"
97+
98+
if p50_ok and p95_ok:
99+
continue
100+
101+
regression = {
102+
"key": key,
103+
"test": case["test"],
104+
"n": case["n"],
105+
"speedup_p50": case["speedup_p50"],
106+
"speedup_p95": case["speedup_p95"],
107+
"min_speedup_p50": min_p50,
108+
"min_speedup_p95": min_p95,
109+
}
110+
regressions.append(regression)
111+
112+
return regressions
113+
114+
115+
def build_markdown(
116+
cases: List[Dict[str, Any]], regressions: List[Dict[str, Any]], thresholds: Dict[str, Any]
117+
) -> str:
118+
lines: List[str] = []
119+
lines.append("## Benchmark Summary")
120+
lines.append("")
121+
lines.append(
122+
"Threshold defaults: "
123+
f"`Spd(p50) >= {thresholds['default']['min_speedup_p50']}`, "
124+
f"`Spd(p95) >= {thresholds['default']['min_speedup_p95']}`"
125+
)
126+
lines.append("")
127+
lines.append("| Test | N | DHeap p50 (ms) | DHeap p95 (ms) | STL p50 (ms) | STL p95 (ms) | Spd(p50) | Spd(p95) | Status |")
128+
lines.append("|---|---:|---:|---:|---:|---:|---:|---:|---|")
129+
for case in cases:
130+
lines.append(
131+
"| {test} | {n} | {dheap_p50_ms:.3f} | {dheap_p95_ms:.3f} | "
132+
"{stl_p50_ms:.3f} | {stl_p95_ms:.3f} | {speedup_p50:.3f}x | {speedup_p95:.3f}x | {status} |".format(
133+
**case
134+
)
135+
)
136+
137+
lines.append("")
138+
if regressions:
139+
lines.append("### Regression Alerts")
140+
for r in regressions:
141+
lines.append(
142+
"- `{key}` threshold violation: "
143+
"Spd(p50)={speedup_p50:.3f}x < {min_speedup_p50:.3f}x "
144+
"or Spd(p95)={speedup_p95:.3f}x < {min_speedup_p95:.3f}x".format(**r)
145+
)
146+
else:
147+
lines.append("### Regression Alerts")
148+
lines.append("- None")
149+
150+
return "\n".join(lines) + "\n"
151+
152+
153+
def main() -> int:
154+
args = parse_args()
155+
input_path = Path(args.input)
156+
threshold_path = Path(args.thresholds)
157+
json_path = Path(args.json)
158+
markdown_path = Path(args.markdown)
159+
160+
bench_text = input_path.read_text(encoding="utf-8")
161+
thresholds = load_thresholds(threshold_path)
162+
cases = parse_bench_lines(bench_text)
163+
164+
if not cases:
165+
print("No benchmark result rows parsed from input.", file=sys.stderr)
166+
return 2
167+
168+
regressions = evaluate_cases(cases, thresholds)
169+
170+
result = {
171+
"passed": len(regressions) == 0,
172+
"thresholds": thresholds,
173+
"cases": cases,
174+
"regressions": regressions,
175+
}
176+
json_path.write_text(json.dumps(result, indent=2), encoding="utf-8")
177+
markdown_path.write_text(build_markdown(cases, regressions, thresholds), encoding="utf-8")
178+
179+
for r in regressions:
180+
print(
181+
"::warning::Benchmark regression in {key}: "
182+
"Spd(p50)={speedup_p50:.3f}x (min {min_speedup_p50:.3f}x), "
183+
"Spd(p95)={speedup_p95:.3f}x (min {min_speedup_p95:.3f}x)".format(**r)
184+
)
185+
186+
if regressions and args.strict:
187+
print(f"Threshold check failed for {len(regressions)} case(s).", file=sys.stderr)
188+
return 1
189+
190+
return 0
191+
192+
193+
if __name__ == "__main__":
194+
raise SystemExit(main())

0 commit comments

Comments
 (0)