Skip to content

Commit dabcc97

Browse files
Mossakaclaude
andcommitted
feat: add historical benchmark storage and relative regression detection
Add a benchmark history system that stores the last 20 benchmark runs and compares current results against the rolling mean p95 to detect gradual performance regressions (>25% slower than historical average). - New src/benchmark/history.ts with appendToHistory(), compareAgainstBaseline() - New scripts/ci/update-benchmark-history.ts CLI for CI usage - Modified benchmark-performance.ts to accept --baseline flag - Updated performance-monitor.yml with cache restore/save steps - Added 15 unit tests covering all history logic Closes #1760 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent b31ec28 commit dabcc97

5 files changed

Lines changed: 492 additions & 9 deletions

File tree

.github/workflows/performance-monitor.yml

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,36 @@ jobs:
4444
WRAPPER
4545
sudo chmod +x /usr/local/bin/awf
4646
47+
- name: Restore benchmark history
48+
uses: actions/cache/restore@v4
49+
with:
50+
path: benchmark-history.json
51+
key: benchmark-history-${{ github.ref_name }}
52+
restore-keys: |
53+
benchmark-history-
54+
4755
- name: Run benchmarks
4856
id: benchmark
4957
run: |
50-
npx tsx scripts/ci/benchmark-performance.ts > benchmark-results.json 2>benchmark-progress.log || true
58+
BASELINE_ARG=""
59+
if [ -f benchmark-history.json ]; then
60+
BASELINE_ARG="--baseline benchmark-history.json"
61+
fi
62+
npx tsx scripts/ci/benchmark-performance.ts $BASELINE_ARG > benchmark-results.json 2>benchmark-progress.log || true
5163
cat benchmark-progress.log
5264
echo "--- JSON output ---"
5365
cat benchmark-results.json
5466
67+
- name: Update benchmark history
68+
run: |
69+
npx tsx scripts/ci/update-benchmark-history.ts benchmark-results.json benchmark-history.json
70+
71+
- name: Save benchmark history
72+
uses: actions/cache/save@v4
73+
with:
74+
path: benchmark-history.json
75+
key: benchmark-history-${{ github.ref_name }}-${{ github.run_id }}
76+
5577
- name: Upload results
5678
if: always()
5779
uses: actions/upload-artifact@v4
@@ -60,6 +82,7 @@ jobs:
6082
path: |
6183
benchmark-results.json
6284
benchmark-progress.log
85+
benchmark-history.json
6386
retention-days: 90
6487

6588
- name: Validate benchmark JSON
@@ -82,25 +105,35 @@ jobs:
82105
echo "regression_count=$REGRESSIONS" >> "$GITHUB_OUTPUT"
83106
84107
if [ "$REGRESSIONS" -gt 0 ]; then
85-
echo "## ⚠️ Performance Regressions Detected" >> "$GITHUB_STEP_SUMMARY"
108+
echo "## Performance Regressions Detected" >> "$GITHUB_STEP_SUMMARY"
86109
echo "" >> "$GITHUB_STEP_SUMMARY"
87110
jq -r '.regressions[]' benchmark-results.json | while read -r line; do
88111
echo "- $line" >> "$GITHUB_STEP_SUMMARY"
89112
done
90113
else
91-
echo "## All Metrics Within Thresholds" >> "$GITHUB_STEP_SUMMARY"
114+
echo "## All Metrics Within Thresholds" >> "$GITHUB_STEP_SUMMARY"
92115
fi
93116
94117
echo "" >> "$GITHUB_STEP_SUMMARY"
95118
echo "### Results" >> "$GITHUB_STEP_SUMMARY"
96119
echo "" >> "$GITHUB_STEP_SUMMARY"
97-
echo "| Metric | Mean | Median | P95 | P99 | Target | Critical |" >> "$GITHUB_STEP_SUMMARY"
98-
echo "|--------|------|--------|-----|-----|--------|----------|" >> "$GITHUB_STEP_SUMMARY"
120+
echo "| Metric | Mean | Median | P95 | P99 | Target | Critical | Trend |" >> "$GITHUB_STEP_SUMMARY"
121+
echo "|--------|------|--------|-----|-----|--------|----------|-------|" >> "$GITHUB_STEP_SUMMARY"
122+
123+
# Build trend data from history if available
124+
if [ -f benchmark-history.json ]; then
125+
HISTORY_COUNT=$(jq '.entries | length' benchmark-history.json)
126+
else
127+
HISTORY_COUNT=0
128+
fi
99129
100-
jq -r '.results[] as $r | .thresholds[$r.metric] as $t |
101-
"| \($r.metric) | \($r.mean)\($r.unit) | \($r.median)\($r.unit) | \($r.p95)\($r.unit) | \($r.p99)\($r.unit) | \($t.target // "N/A")\(if $t then $r.unit else "" end) | \($t.critical // "N/A")\(if $t then $r.unit else "" end) |"' \
130+
jq -r --argjson hcount "$HISTORY_COUNT" '.results[] as $r | .thresholds[$r.metric] as $t |
131+
"| \($r.metric) | \($r.mean)\($r.unit) | \($r.median)\($r.unit) | \($r.p95)\($r.unit) | \($r.p99)\($r.unit) | \($t.target // "N/A")\(if $t then $r.unit else "" end) | \($t.critical // "N/A")\(if $t then $r.unit else "" end) | \(if $hcount > 1 then "see log" else "N/A (first run)" end) |"' \
102132
benchmark-results.json >> "$GITHUB_STEP_SUMMARY"
103133
134+
echo "" >> "$GITHUB_STEP_SUMMARY"
135+
echo "*History: ${HISTORY_COUNT} data points*" >> "$GITHUB_STEP_SUMMARY"
136+
104137
- name: Ensure labels exist
105138
if: steps.check.outputs.regression_count != '0'
106139
uses: actions/github-script@v7

scripts/ci/benchmark-performance.ts

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
*/
1717

1818
import { execSync, ExecSyncOptions, spawn, ChildProcess } from "child_process";
19+
import * as fs from "fs";
20+
import { BenchmarkHistory, compareAgainstBaseline, trendArrow } from "../../src/benchmark/history";
1921

2022
// ── Configuration ──────────────────────────────────────────────────
2123

@@ -324,11 +326,24 @@ function benchmarkNetworkCreation(): BenchmarkResult {
324326

325327
// ── Main ───────────────────────────────────────────────────────────
326328

329+
function parseBaselineArg(): string | null {
330+
const idx = process.argv.indexOf("--baseline");
331+
if (idx !== -1 && idx + 1 < process.argv.length) {
332+
return process.argv[idx + 1];
333+
}
334+
return null;
335+
}
336+
327337
async function main(): Promise<void> {
328338
const commitSha = exec("git rev-parse HEAD");
339+
const baselinePath = parseBaselineArg();
340+
329341
console.error(`AWF Performance Benchmark`);
330342
console.error(` Commit: ${commitSha}`);
331343
console.error(` Iterations: ${ITERATIONS}`);
344+
if (baselinePath) {
345+
console.error(` Baseline: ${baselinePath}`);
346+
}
332347
console.error("");
333348

334349
const results: BenchmarkResult[] = [];
@@ -353,6 +368,16 @@ async function main(): Promise<void> {
353368
}
354369
}
355370

371+
// Check for relative regressions against historical baseline
372+
let history: BenchmarkHistory | null = null;
373+
if (baselinePath && fs.existsSync(baselinePath)) {
374+
try {
375+
history = JSON.parse(fs.readFileSync(baselinePath, "utf-8"));
376+
} catch (err) {
377+
console.error(`Warning: could not parse baseline file: ${err}`);
378+
}
379+
}
380+
356381
const report: BenchmarkReport = {
357382
timestamp: new Date().toISOString(),
358383
commitSha,
@@ -362,19 +387,39 @@ async function main(): Promise<void> {
362387
regressions,
363388
};
364389

390+
if (history && history.entries.length > 0) {
391+
const comparisons = compareAgainstBaseline(report, history);
392+
console.error("Historical comparison (current p95 vs rolling mean p95):");
393+
for (const c of comparisons) {
394+
const arrow = trendArrow(c.ratio);
395+
console.error(
396+
` ${arrow} ${c.metric}: ${c.currentP95}${c.unit} vs ${c.rollingMeanP95}${c.unit} avg (${c.ratio}x)`
397+
);
398+
if (c.regressed) {
399+
regressions.push(
400+
`${c.metric}: p95=${c.currentP95}${c.unit} is ${c.ratio}x the historical average of ${c.rollingMeanP95}${c.unit} (>1.25x threshold)`
401+
);
402+
}
403+
}
404+
// Update regressions in report after relative check
405+
report.regressions = regressions;
406+
} else if (baselinePath) {
407+
console.error("No historical baseline data available, skipping relative comparison.");
408+
}
409+
365410
// Output JSON to stdout
366411
console.log(JSON.stringify(report, null, 2));
367412

368413
if (regressions.length > 0) {
369414
console.error("");
370-
console.error("⚠️ Performance regressions detected:");
415+
console.error("Performance regressions detected:");
371416
for (const r of regressions) {
372417
console.error(` - ${r}`);
373418
}
374419
process.exit(1);
375420
} else {
376421
console.error("");
377-
console.error("All metrics within acceptable thresholds.");
422+
console.error("All metrics within acceptable thresholds.");
378423
}
379424
}
380425

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env npx tsx
2+
/**
3+
* Update benchmark history file with current results.
4+
*
5+
* Usage:
6+
* npx tsx scripts/ci/update-benchmark-history.ts <benchmark-results.json> <benchmark-history.json>
7+
*
8+
* - Reads the current benchmark report from <benchmark-results.json>
9+
* - Reads (or creates) history from <benchmark-history.json>
10+
* - Appends current results to history, trims to last 20 entries
11+
* - Writes the updated history back to <benchmark-history.json>
12+
*/
13+
14+
import * as fs from "fs";
15+
import { appendToHistory, BenchmarkHistory, BenchmarkReport } from "../../src/benchmark/history";
16+
17+
function main(): void {
18+
const args = process.argv.slice(2);
19+
if (args.length < 2) {
20+
console.error("Usage: update-benchmark-history.ts <benchmark-results.json> <benchmark-history.json>");
21+
process.exit(1);
22+
}
23+
24+
const [resultsPath, historyPath] = args;
25+
26+
// Read current benchmark results
27+
if (!fs.existsSync(resultsPath)) {
28+
console.error(`Error: benchmark results file not found: ${resultsPath}`);
29+
process.exit(1);
30+
}
31+
const report: BenchmarkReport = JSON.parse(fs.readFileSync(resultsPath, "utf-8"));
32+
33+
// Read existing history (or start fresh)
34+
let history: BenchmarkHistory | null = null;
35+
if (fs.existsSync(historyPath)) {
36+
try {
37+
history = JSON.parse(fs.readFileSync(historyPath, "utf-8"));
38+
console.error(`Loaded history with ${history!.entries.length} entries`);
39+
} catch (err) {
40+
console.error(`Warning: could not parse history file, starting fresh: ${err}`);
41+
history = null;
42+
}
43+
} else {
44+
console.error("No existing history file, creating new one");
45+
}
46+
47+
// Append and trim
48+
const updated = appendToHistory(history, report);
49+
console.error(`Updated history: ${updated.entries.length} entries`);
50+
51+
// Write updated history
52+
fs.writeFileSync(historyPath, JSON.stringify(updated, null, 2) + "\n");
53+
console.error(`Wrote history to ${historyPath}`);
54+
}
55+
56+
main();

0 commit comments

Comments
 (0)