Skip to content

Commit 26ac030

Browse files
alexeyrclaude
andcommitted
Fix Bencher reporting permanently broken on pushes to main
The benchmark workflow passed --start-point main --start-point-hash <github.event.before> for push-to-main events. Since main IS the base branch, Bencher tried to look up a version of main at the "before" hash — which often didn't exist (e.g., docs-only commits skipped by paths-ignore). This caused a 404, the report was never stored, and subsequent pushes also failed because their "before" hash was also missing. This cascading failure meant no main data was stored after the first version (Jan 18). Fix: don't pass --start-point args for pushes to main (thresholds are defined inline via --threshold-* args). For PRs/dispatch where the start-point hash may be missing, retry without --start-point-hash so the report still gets stored using the latest available baseline. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent efe3503 commit 26ac030

1 file changed

Lines changed: 87 additions & 73 deletions

File tree

.github/workflows/benchmark.yml

Lines changed: 87 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -558,102 +558,116 @@ jobs:
558558
BOUNDARY=0.95
559559
MAX_SAMPLE=64
560560
561-
# Set branch and start-point based on event type
561+
# Set branch and start-point based on event type.
562+
# Main pushes don't need --start-point because main IS the base
563+
# branch — new reports compare against main's own history.
564+
# Feature branches (PRs, dispatch) use --start-point to inherit
565+
# historical data and thresholds from main.
562566
if [ "${{ github.event_name }}" = "push" ]; then
563567
BRANCH="main"
564-
START_POINT="main"
565-
START_POINT_HASH="${{ github.event.before }}"
566-
EXTRA_ARGS=""
568+
START_POINT_ARGS=""
567569
elif [ "${{ github.event_name }}" = "pull_request" ]; then
568570
BRANCH="$GITHUB_HEAD_REF"
569-
START_POINT="$GITHUB_BASE_REF"
570-
START_POINT_HASH="${{ github.event.pull_request.base.sha }}"
571-
EXTRA_ARGS="--start-point-reset"
571+
START_POINT_ARGS="--start-point $GITHUB_BASE_REF --start-point-hash ${{ github.event.pull_request.base.sha }} --start-point-clone-thresholds --start-point-reset"
572572
elif [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
573-
# Get merge-base from GitHub API (avoids needing deep fetch)
574-
# See: https://stackoverflow.com/a/74710919
575573
BRANCH="${{ github.ref_name }}"
576-
START_POINT="main"
577-
START_POINT_HASH=$(gh api "repos/${{ github.repository }}/compare/main...$BRANCH" --jq '.merge_base_commit.sha' || true)
578-
579-
if [ -n "$START_POINT_HASH" ]; then
580-
echo "Found merge-base via API: $START_POINT_HASH"
574+
if [ "$BRANCH" = "main" ]; then
575+
START_POINT_ARGS=""
581576
else
582-
echo "⚠️ Could not find merge-base with main via GitHub API, continuing without it"
577+
# Get merge-base from GitHub API (avoids needing deep fetch)
578+
# See: https://stackoverflow.com/a/74710919
579+
START_POINT_HASH=$(gh api "repos/${{ github.repository }}/compare/main...$BRANCH" --jq '.merge_base_commit.sha' || true)
580+
if [ -n "$START_POINT_HASH" ]; then
581+
echo "Found merge-base via API: $START_POINT_HASH"
582+
START_POINT_ARGS="--start-point main --start-point-hash $START_POINT_HASH --start-point-clone-thresholds --start-point-reset"
583+
else
584+
echo "⚠️ Could not find merge-base with main via GitHub API, continuing without hash"
585+
START_POINT_ARGS="--start-point main --start-point-clone-thresholds --start-point-reset"
586+
fi
583587
fi
584-
EXTRA_ARGS=""
585588
else
586589
echo "❌ ERROR: Unexpected event type: ${{ github.event_name }}"
587590
exit 1
588591
fi
589592
590-
# Run bencher and capture HTML output (stdout) while letting stderr go to a file
591-
# so we can distinguish missing baselines (404) from actual regression alerts.
592-
# Use set +e to capture exit code without failing immediately.
593+
# Wrap bencher run in a function so we can retry with different
594+
# start-point args if the pinned hash isn't found in Bencher.
595+
run_bencher() {
596+
local sp_args="$1"
597+
# Intentional word-splitting: sp_args contains multiple flags
598+
# (e.g. "--start-point main --start-point-hash abc123") that must
599+
# be split into separate argv entries.
600+
# shellcheck disable=SC2086
601+
bencher run \
602+
--project react-on-rails-t8a9ncxo \
603+
--token '${{ secrets.BENCHER_API_TOKEN }}' \
604+
--branch "$BRANCH" \
605+
$sp_args \
606+
--testbed github-actions \
607+
--adapter json \
608+
--file bench_results/benchmark.json \
609+
--err \
610+
--quiet \
611+
--format html \
612+
--threshold-measure rps \
613+
--threshold-test t_test \
614+
--threshold-max-sample-size $MAX_SAMPLE \
615+
--threshold-lower-boundary $BOUNDARY \
616+
--threshold-upper-boundary _ \
617+
--threshold-measure p50_latency \
618+
--threshold-test t_test \
619+
--threshold-max-sample-size $MAX_SAMPLE \
620+
--threshold-lower-boundary _ \
621+
--threshold-upper-boundary $BOUNDARY \
622+
--threshold-measure p90_latency \
623+
--threshold-test t_test \
624+
--threshold-max-sample-size $MAX_SAMPLE \
625+
--threshold-lower-boundary _ \
626+
--threshold-upper-boundary $BOUNDARY \
627+
--threshold-measure p99_latency \
628+
--threshold-test t_test \
629+
--threshold-max-sample-size $MAX_SAMPLE \
630+
--threshold-lower-boundary _ \
631+
--threshold-upper-boundary $BOUNDARY \
632+
--threshold-measure failed_pct \
633+
--threshold-test t_test \
634+
--threshold-max-sample-size $MAX_SAMPLE \
635+
--threshold-lower-boundary _ \
636+
--threshold-upper-boundary $BOUNDARY
637+
}
638+
639+
# Run bencher and capture HTML output (stdout) while letting stderr
640+
# go to a file so we can inspect failure reasons.
593641
BENCHER_STDERR=$(mktemp)
594642
trap 'rm -f "$BENCHER_STDERR"' EXIT
595643
set +e
596-
bencher run \
597-
--project react-on-rails-t8a9ncxo \
598-
--token '${{ secrets.BENCHER_API_TOKEN }}' \
599-
--branch "$BRANCH" \
600-
--start-point "$START_POINT" \
601-
--start-point-hash "$START_POINT_HASH" \
602-
--start-point-clone-thresholds \
603-
--testbed github-actions \
604-
--adapter json \
605-
--file bench_results/benchmark.json \
606-
--err \
607-
--quiet \
608-
--format html \
609-
--threshold-measure rps \
610-
--threshold-test t_test \
611-
--threshold-max-sample-size $MAX_SAMPLE \
612-
--threshold-lower-boundary $BOUNDARY \
613-
--threshold-upper-boundary _ \
614-
--threshold-measure p50_latency \
615-
--threshold-test t_test \
616-
--threshold-max-sample-size $MAX_SAMPLE \
617-
--threshold-lower-boundary _ \
618-
--threshold-upper-boundary $BOUNDARY \
619-
--threshold-measure p90_latency \
620-
--threshold-test t_test \
621-
--threshold-max-sample-size $MAX_SAMPLE \
622-
--threshold-lower-boundary _ \
623-
--threshold-upper-boundary $BOUNDARY \
624-
--threshold-measure p99_latency \
625-
--threshold-test t_test \
626-
--threshold-max-sample-size $MAX_SAMPLE \
627-
--threshold-lower-boundary _ \
628-
--threshold-upper-boundary $BOUNDARY \
629-
--threshold-measure failed_pct \
630-
--threshold-test t_test \
631-
--threshold-max-sample-size $MAX_SAMPLE \
632-
--threshold-lower-boundary _ \
633-
--threshold-upper-boundary $BOUNDARY \
634-
$EXTRA_ARGS > bench_results/bencher_report.html 2>"$BENCHER_STDERR"
644+
run_bencher "$START_POINT_ARGS" > bench_results/bencher_report.html 2>"$BENCHER_STDERR"
635645
BENCHER_EXIT_CODE=$?
636646
set -e
637647
638648
# Print stderr for visibility in logs
639649
cat "$BENCHER_STDERR" >&2
640650
641-
# If bencher failed due to missing baseline data (404 Not Found) and there
642-
# are no regression alerts, treat as a warning instead of failing the workflow.
643-
# This commonly happens when the PR base commit was a docs-only change
644-
# skipped by paths-ignore, so no benchmark data exists in Bencher.
651+
# If bencher failed because the start-point hash doesn't exist in
652+
# Bencher (e.g., the base commit was a docs-only change skipped by
653+
# paths-ignore), retry without --start-point-hash so the report
654+
# still gets stored using the latest available baseline.
645655
#
646-
# Safety checks before overriding exit code:
647-
# 1. stderr must contain "404 Not Found" (HTTP status from Bencher API)
648-
# 2. stderr must NOT contain regression indicators ("alert", "threshold",
649-
# or "boundary") to avoid suppressing actual performance regressions
650-
if [ $BENCHER_EXIT_CODE -ne 0 ] && grep -q "404 Not Found" "$BENCHER_STDERR" && ! grep -qiE "alert|threshold violation|boundary violation" "$BENCHER_STDERR"; then
651-
echo "⚠️ Bencher baseline not found for start-point hash '$START_POINT_HASH' — this is expected when the base commit was not benchmarked (e.g., docs-only changes skipped by paths-ignore)"
652-
echo "⚠️ Benchmark data was collected but regression comparison is unavailable for this run"
653-
echo "📋 Bencher stderr output:"
654-
cat "$BENCHER_STDERR"
655-
echo "::warning::Bencher baseline not found for start-point hash '$START_POINT_HASH' — regression comparison unavailable for this run"
656-
BENCHER_EXIT_CODE=0
656+
# Bencher emits: 'Head Version (..., Some(GitHash("<sha>"))) not found'
657+
# when the pinned hash isn't in its DB. A single combined pattern
658+
# avoids false-positive matches across unrelated lines.
659+
if [ $BENCHER_EXIT_CODE -ne 0 ] && grep -q "Head Version.*not found" "$BENCHER_STDERR" && ! grep -qiE "alert|threshold violation|boundary violation" "$BENCHER_STDERR"; then
660+
RETRY_ARGS=$(echo "$START_POINT_ARGS" | sed 's/--start-point-hash [^ ]*//')
661+
if [ "$RETRY_ARGS" != "$START_POINT_ARGS" ]; then
662+
echo ""
663+
echo "⚠️ Start-point hash not found in Bencher — retrying without --start-point-hash (will use latest baseline)"
664+
echo "::warning::Start-point hash not found in Bencher — falling back to latest baseline for comparison"
665+
set +e
666+
run_bencher "$RETRY_ARGS" > bench_results/bencher_report.html 2>"$BENCHER_STDERR"
667+
BENCHER_EXIT_CODE=$?
668+
set -e
669+
cat "$BENCHER_STDERR" >&2
670+
fi
657671
fi
658672
659673
# Distinguish regression alerts from operational failures (auth/API/network/CLI)

0 commit comments

Comments
 (0)