Skip to content

Benchmark PR

Benchmark PR #688

Workflow file for this run

name: Benchmark PR
on:
workflow_dispatch:
inputs:
pr:
description: 'PR number'
required: true
framework:
description: 'Framework to benchmark'
required: true
profile:
description: 'Profile (e.g. baseline, baseline-h2, leave empty for all)'
required: false
default: ''
save:
description: 'Save results (true/false)'
required: false
default: ''
permissions:
contents: write
pull-requests: write
concurrency:
group: benchmark
cancel-in-progress: false
jobs:
runner-busy:
if: vars.RUNNER_LOCAL == 'true'
runs-on: ubuntu-latest
steps:
- name: Post runner-busy notice
run: |
gh pr comment "${{ inputs.pr }}" \
--repo "${{ github.repository }}" \
--body "⏸️ Runner is currently performing local benchmark runs and is disabled for GitHub Actions, please try later or check our Discord announcements on runner state for more info."
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
benchmark:
if: vars.RUNNER_LOCAL != 'true'
runs-on: self-hosted
environment: runner
steps:
- uses: actions/checkout@v5
with:
ref: refs/pull/${{ inputs.pr }}/head
fetch-depth: 0
- name: Validate inputs
run: |
if [ -z "${{ inputs.framework }}" ]; then
echo "Error: framework is required"
exit 1
fi
if [ ! -d "frameworks/${{ inputs.framework }}" ]; then
echo "Error: framework '${{ inputs.framework }}' not found"
exit 1
fi
# When --save is requested, merge origin/main into the PR branch first.
# The bot commits its results back to the PR head; on an old branch the
# site/data/*.json files have drifted vs. main and the bot commit
# immediately makes the PR CONFLICTING. Merging main here lands the
# bench commit on a base that's already in sync, so the PR stays
# MERGEABLE. Skipped for non-save runs since they don't push back.
- name: Merge main into PR branch
if: inputs.save == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git fetch origin main --depth=1
if ! git merge --no-edit origin/main; then
git merge --abort 2>/dev/null || true
gh pr comment "${{ inputs.pr }}" \
--repo "${{ github.repository }}" \
--body "⚠️ \`/benchmark --save\` aborted: \`main\` has diverged and cannot be auto-merged into this branch. Please merge or rebase \`main\` manually, push, and re-run \`/benchmark --save\`."
exit 1
fi
- name: Clean previous containers, results, and temp data
run: |
docker ps -aq --filter "name=httparena-" | xargs -r docker rm -f 2>/dev/null || true
rm -rf results/
rm -rf /tmp/pr_site_data /tmp/main_site_data /tmp/bench_site_data /tmp/bench_comparison.md /tmp/bench_body.txt /tmp/bench_full.txt /tmp/bench_table.txt
- name: Fetch main branch data for comparison
run: |
# Save PR's site/data aside, get main's for comparison
cp -r site/data /tmp/pr_site_data
git fetch origin main --depth=1
git checkout origin/main -- site/data/ 2>/dev/null || true
# Keep main's site/data for comparison, restore PR code
cp -r site/data /tmp/main_site_data
cp -r /tmp/pr_site_data/* site/data/
- name: Run benchmarks
id: bench
run: |
log=$(mktemp)
# Always save results to disk for comparison; only commit if --save requested
./scripts/benchmark.sh "${{ inputs.framework }}" "${{ inputs.profile }}" --save 2>&1 | tee "$log"
echo "log_file=$log" >> "$GITHUB_OUTPUT"
- name: Compare with main
id: compare
run: |
# Preserve benchmark results before overwriting site/data for comparison
cp -r site/data /tmp/bench_site_data 2>/dev/null || true
# Use main's site/data for comparison
if [ -d /tmp/main_site_data ]; then
cp -r /tmp/main_site_data/* site/data/ 2>/dev/null || true
fi
comparison=$(./scripts/compare.sh "${{ inputs.framework }}" "${{ inputs.profile }}" 2>/dev/null || echo "")
echo "$comparison" > /tmp/bench_comparison.md
# Restore benchmark site/data (not original PR data)
if [ -d /tmp/bench_site_data ]; then
cp -r /tmp/bench_site_data/* site/data/ 2>/dev/null || true
fi
- name: Commit saved results
if: inputs.save == 'true'
run: |
PR_DATA=$(gh api "/repos/${{ github.repository }}/pulls/${{ inputs.pr }}" --jq '.head.ref + " " + .head.repo.full_name')
PR_BRANCH=$(echo "$PR_DATA" | awk '{print $1}')
PR_REPO=$(echo "$PR_DATA" | awk '{print $2}')
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
find site/static/logs -name "${{ inputs.framework }}.log" -exec git add -f {} +
git add -f site/data/frameworks.json site/data/current.json 2>/dev/null || true
# Only add leaderboard files for the profiles that were actually benchmarked
for f in results/*/*/${{ inputs.framework }}.json; do
[ -f "$f" ] || continue
dir=$(dirname "$f")
conns=$(basename "$dir")
prof=$(basename "$(dirname "$dir")")
git add -f "site/data/${prof}-${conns}.json" 2>/dev/null || true
done
if git diff --cached --quiet; then
echo "No results to commit"
else
git commit -m "Benchmark results: ${{ inputs.framework }} ${{ inputs.profile }}"
git remote set-url origin "https://x-access-token:${GH_TOKEN}@github.com/${PR_REPO}.git"
git push origin HEAD:"${PR_BRANCH}" || echo "Warning: could not push to fork (permissions)"
fi
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Post results to PR
if: always()
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
log="${{ steps.bench.outputs.log_file }}"
profile="${{ inputs.profile }}"
profile_label="${profile:-all tests}"
fw="${{ inputs.framework }}"
# Build markdown table with results and deltas
python3 -c '
import sys, re, os
log_file, fw, comparison_file = sys.argv[1], sys.argv[2], sys.argv[3]
# Parse deltas from compare.sh output
deltas = {}
if os.path.exists(comparison_file):
with open(comparison_file) as f:
comp = f.read()
cur_profile = None
conn_cols = []
for line in comp.splitlines():
m = re.match(r"^### (.+)", line)
if m:
cur_profile = m.group(1).strip()
conn_cols = []
continue
if cur_profile and line.startswith("| Metric"):
conn_cols = re.findall(r"(\d+)c", line)
continue
if cur_profile and conn_cols and line.startswith("| **RPS**"):
cells = [c.strip() for c in line.split("|")[1:] if c.strip()]
for i, conns in enumerate(conn_cols):
idx = i * 2 + 2
if idx < len(cells):
deltas.setdefault(cur_profile + "/" + conns, {})["rps"] = cells[idx]
if cur_profile and conn_cols and line.startswith("| **Memory**"):
cells = [c.strip() for c in line.split("|")[1:] if c.strip()]
for i, conns in enumerate(conn_cols):
idx = i * 2 + 2
if idx < len(cells):
deltas.setdefault(cur_profile + "/" + conns, {})["mem"] = cells[idx]
# Parse log for results
rows = []
cur_profile = None
cur_conns = None
with open(log_file) as f:
for line in f:
line = line.rstrip()
m = re.match(r"^=== " + re.escape(fw) + r" / (.+) / (\d+)c .* ===$", line)
if m:
cur_profile = m.group(1)
cur_conns = m.group(2)
continue
if cur_profile and re.match(r"^=== Best:", line):
m2 = re.match(r"^=== Best: (\d+) req/s \(CPU: ([\d.]+)%, Mem: ([^\)]+)\)", line)
if m2:
rps = int(m2.group(1))
cpu = m2.group(2)
mem = m2.group(3)
key = cur_profile + "/" + cur_conns
d = deltas.get(key, {})
rps_str = "{:,}".format(rps)
d_rps = d.get("rps", "")
d_mem = d.get("mem", "")
rows.append((cur_profile, cur_conns, rps_str, cpu + "%", mem, d_rps, d_mem))
cur_profile = None
# Build table
with open("/tmp/bench_body.txt", "w") as out:
out.write("| Test | Conn | RPS | CPU | Mem | \u0394 RPS | \u0394 Mem |\n")
out.write("|------|------|-----|-----|-----|-------|-------|\n")
for prof, conns, rps, cpu, mem, d_rps, d_mem in rows:
out.write("| {} | {} | {} | {} | {} | {} | {} |\n".format(
prof, conns, rps, cpu, mem, d_rps, d_mem))
' "$log" "$fw" /tmp/bench_comparison.md
body="## Benchmark Results"$'\n\n'
body+="**Framework:** \`${fw}\` | **Test:** \`${profile_label}\`"$'\n\n'
body+=$(cat /tmp/bench_body.txt 2>/dev/null || echo "No results captured")
body+=$'\n\n'
body+="<details><summary>Full log</summary>"$'\n\n```\n'
tail -200 "$log" >> /tmp/bench_full.txt 2>/dev/null || true
body+=$(cat /tmp/bench_full.txt 2>/dev/null || echo "No log")
body+=$'\n```\n</details>'
gh pr comment "${{ inputs.pr }}" --repo "${{ github.repository }}" --body "$body"
rm -f "$log" /tmp/bench_table.txt /tmp/bench_full.txt /tmp/bench_comparison.md /tmp/pr_site_data /tmp/main_site_data 2>/dev/null || true