feat(guard): pre-push benchmark + coverage regression guard

claude · claude · commit f8fe392d25e4 · 2026-05-28T04:54:07.000+08:00
Adds a pre-push hook that runs before every git push:

Benchmarks (go test -bench=. -benchmem -count=3):
- Runs via cpulimit/nice at 85% CPU, -p=85% nproc
- Compares ns/op against coverage/bench-baseline.txt using benchstat
  (if installed) or a built-in awk parser
- Blocks push if any benchmark regresses &gt; 20% (BENCH_REGRESS_PCT env)

Coverage (go test -coverprofile -covermode=atomic):
- Uses nice -n 15 (not cpulimit): cpulimit only throttles the parent
  go test process, not per-package children, causing partial profiles
- Blocks push if total coverage drops &gt; 1% (COVER_DROP_PCT env)
- Baseline: 21.1%

Both baselines are committed (bench-baseline.txt, coverage-baseline.txt).
Use 'make bench-baseline' to reset after intentional perf/coverage changes.
Fix: $NF → $$NF in Makefile awk to prevent make variable expansion.
Fix: cpulimit not used for coverage (child process throttle issue).

Co-Authored-By: Claude Sonnet 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/.githooks/pre-push b/.githooks/pre-push
@@ -0,0 +1,4 @@
+#!/bin/sh
+for check in "$(dirname "$0")"/push-checks/*.sh; do
+    sh "$check" || exit 1
+done
diff --git a/.githooks/push-checks/01-bench-cover.sh b/.githooks/push-checks/01-bench-cover.sh
@@ -0,0 +1,167 @@
+#!/bin/sh
+# pre-push: benchmark regression + coverage drop guard.
+#
+# Baselines (committed, so shared across the team):
+#   coverage/bench-baseline.txt   — raw Go benchmark output (-count=3)
+#   coverage/coverage-baseline.txt — total coverage percentage
+#
+# Thresholds (override via env):
+#   BENCH_REGRESS_PCT  — max allowed slowdown in ns/op  (default: 20)
+#   COVER_DROP_PCT     — max allowed coverage drop in %  (default: 1)
+#
+# Tools:
+#   benchstat (golang.org/x/perf/cmd/benchstat) — used when installed.
+#   Otherwise: built-in awk parser for ns/op comparison.
+#   go tool cover — always available.
+#
+# To reset baselines after an intentional change:
+#   make bench-baseline   (or see instructions at end of this file)
+
+REPO="$(git rev-parse --show-toplevel)"
+NCPU=$(nproc)
+P85=$(( NCPU * 85 / 100 ))
+[ "$P85" -lt 1 ] && P85=1
+
+# CPU limiter
+if command -v cpulimit >/dev/null 2>&1; then
+    LIMIT_PCT=$(( NCPU * 85 ))
+    RUNNER="cpulimit -l ${LIMIT_PCT} --"
+else
+    RUNNER="nice -n 15"
+fi
+
+export CGO_ENABLED=0
+BENCH_REGRESS_PCT="${BENCH_REGRESS_PCT:-20}"
+COVER_DROP_PCT="${COVER_DROP_PCT:-1}"
+
+BENCH_BASELINE="${REPO}/coverage/bench-baseline.txt"
+BENCH_CURRENT="${REPO}/coverage/bench-current.txt"
+COV_BASELINE="${REPO}/coverage/coverage-baseline.txt"
+COV_PROFILE="${REPO}/coverage/coverage.out"
+mkdir -p "${REPO}/coverage"
+
+FAILED=0
+
+# ── Benchmarks ──────────────────────────────────────────────────────────────
+
+echo "pre-push: running benchmarks (count=3, cpu≤85%)..."
+$RUNNER go test -bench=. -benchmem -count=3 \
+    -p "${P85}" ./... 2>/dev/null | \
+    grep -v "^---" > "$BENCH_CURRENT"
+
+if [ ! -s "$BENCH_CURRENT" ]; then
+    echo "WARNING: no benchmark output produced — skipping bench guard."
+else
+    if [ -f "$BENCH_BASELINE" ]; then
+        echo "pre-push: comparing benchmarks against baseline..."
+
+        if command -v benchstat >/dev/null 2>&1; then
+            # benchstat is the gold standard.
+            BENCH_DIFF=$(benchstat "$BENCH_BASELINE" "$BENCH_CURRENT" 2>&1)
+            echo "$BENCH_DIFF"
+            # Detect regressions: lines with +XX.XX% where XX > threshold.
+            REGRESSIONS=$(echo "$BENCH_DIFF" | awk -v thr="$BENCH_REGRESS_PCT" '
+                /\+[0-9]+\.[0-9]+%/ {
+                    match($0, /\+([0-9]+\.[0-9]+)%/, m)
+                    if (m[1]+0 > thr+0) print $0
+                }')
+            if [ -n "$REGRESSIONS" ]; then
+                echo "" >&2
+                echo "PUSH BLOCKED: benchmark regression > ${BENCH_REGRESS_PCT}%:" >&2
+                echo "$REGRESSIONS" >&2
+                echo "" >&2
+                echo "To reset: make bench-baseline" >&2
+                FAILED=1
+            fi
+        else
+            # Fallback: parse ns/op with awk and compare.
+            REGRESSIONS=$(awk -v thr="$BENCH_REGRESS_PCT" '
+                # Pass 1 (baseline): build name→ns map.
+                NR==FNR && /^Benchmark/ {
+                    name = $1
+                    for (i=2; i<=NF; i++) {
+                        if ($(i) == "ns/op") { base[name] = $(i-1)+0; break }
+                    }
+                    next
+                }
+                # Pass 2 (current): compare.
+                /^Benchmark/ {
+                    name = $1
+                    for (i=2; i<=NF; i++) {
+                        if ($(i) == "ns/op") {
+                            curr = $(i-1)+0
+                            if (name in base && base[name] > 0) {
+                                pct = (curr - base[name]) / base[name] * 100
+                                if (pct > thr+0) {
+                                    printf "  %s: +%.1f%%  (%.1f → %.1f ns/op)\n",
+                                        name, pct, base[name], curr
+                                }
+                            }
+                            break
+                        }
+                    }
+                }
+            ' "$BENCH_BASELINE" "$BENCH_CURRENT")
+            if [ -n "$REGRESSIONS" ]; then
+                echo "" >&2
+                echo "PUSH BLOCKED: benchmark regression > ${BENCH_REGRESS_PCT}% ns/op:" >&2
+                echo "$REGRESSIONS" >&2
+                echo "" >&2
+                echo "To reset: make bench-baseline" >&2
+                FAILED=1
+            else
+                echo "pre-push: benchmarks ok (no regression > ${BENCH_REGRESS_PCT}%)."
+            fi
+        fi
+    else
+        echo "pre-push: no benchmark baseline — recording now."
+    fi
+
+    # Update baseline only when no regression was found.
+    if [ "$FAILED" -eq 0 ]; then
+        cp "$BENCH_CURRENT" "$BENCH_BASELINE"
+    fi
+fi
+
+# ── Coverage ─────────────────────────────────────────────────────────────────
+
+echo "pre-push: running coverage..."
+# Use nice -n 15 (not cpulimit) for coverage: cpulimit only limits the main
+# go test process but not the per-package child binaries, which causes
+# go test to write only partial coverage data to the profile.
+nice -n 15 go test -timeout 300s -p "${P85}" -parallel "${P85}" \
+    -coverprofile="$COV_PROFILE" -covermode=atomic \
+    ./... >/dev/null 2>&1
+
+CURRENT_COV=$(go tool cover -func="$COV_PROFILE" 2>/dev/null | \
+    awk '/^total:/ { gsub(/%/, "", $NF); printf "%.1f", $NF }')
+
+if [ -z "$CURRENT_COV" ]; then
+    echo "WARNING: could not determine coverage — skipping coverage guard."
+else
+    echo "pre-push: coverage ${CURRENT_COV}%"
+    if [ -f "$COV_BASELINE" ]; then
+        BASELINE_COV=$(cat "$COV_BASELINE")
+        DROPPED=$(awk -v curr="$CURRENT_COV" -v base="$BASELINE_COV" \
+            -v thr="$COVER_DROP_PCT" \
+            'BEGIN { print (base - curr > thr+0) ? "yes" : "no" }')
+        if [ "$DROPPED" = "yes" ]; then
+            echo "" >&2
+            echo "PUSH BLOCKED: coverage dropped ${BASELINE_COV}% → ${CURRENT_COV}% (limit: -${COVER_DROP_PCT}%)." >&2
+            echo "" >&2
+            echo "To reset: echo ${CURRENT_COV} > coverage/coverage-baseline.txt" >&2
+            FAILED=1
+        else
+            echo "pre-push: coverage ok (baseline: ${BASELINE_COV}%)."
+        fi
+    else
+        echo "pre-push: no coverage baseline — recording now."
+    fi
+
+    if [ "$FAILED" -eq 0 ]; then
+        echo "$CURRENT_COV" > "$COV_BASELINE"
+    fi
+fi
+
+[ "$FAILED" -eq 0 ] && echo "pre-push: all guards passed."
+exit "$FAILED"
diff --git a/.gitignore b/.gitignore
@@ -36,6 +36,8 @@ coverage.html
 coverage/*
 # Exception: benchmark baseline IS committed
 !coverage/bench-baseline.json
+!coverage/bench-baseline.txt
+!coverage/coverage-baseline.txt
 reference/mendix-repl/
 reference/mxbuild/
 reference/mendixmodellib/
diff --git a/Makefile b/Makefile
@@ -51,7 +51,7 @@ TEST_PARALLEL ?= $(_85PCT)
 # Hard ceiling on how long the full test suite may run.
 TEST_TIMEOUT ?= 180s
 
-.PHONY: build build-debug release clean test test-mdl report report-bench report-reset-baseline grammar sync-skills sync-commands sync-lint-rules sync-changelog sync-examples sync-all docs documentation docs-site docs-serve source-tree sbom sbom-report lint lint-go fmt vet update-helpdesk-golden test-helpdesk-regression setup
+.PHONY: build build-debug release clean test _test-inner test-mdl report report-bench report-reset-baseline bench-baseline grammar sync-skills sync-commands sync-lint-rules sync-changelog sync-examples sync-all docs documentation docs-site docs-serve source-tree sbom sbom-report lint lint-go fmt vet update-helpdesk-golden test-helpdesk-regression setup
 
 setup:
 	git config core.hooksPath .githooks
@@ -205,19 +205,42 @@ report:
 		--bench-diff coverage/bench-diff.txt \
 		--out-html coverage/report.html
 
-# Run only benchmarks and update the baseline
+# Record benchmark + coverage baselines used by the pre-push guard.
+# Run this after intentional perf changes to silence the guard.
+#
+# Benchmarks: cpulimit/nice wraps go test directly (single process per pkg).
+# Coverage:   always uses nice -n 15; cpulimit only limits the parent
+#             go test process, not the per-package child binaries, which
+#             causes go test to write only partial coverage data.
+bench-baseline:
+	@mkdir -p coverage
+	@echo "Recording benchmark baseline (count=3, cpu≤85%)..."
+	$(_CPU_RUNNER) go test -bench=. -benchmem -count=3 \
+		-p $(_85PCT) ./... 2>/dev/null | grep -v "^---" > coverage/bench-baseline.txt
+	@echo "Recording coverage baseline..."
+	nice -n 15 go test -timeout 300s \
+		-p $(_85PCT) -parallel $(_85PCT) \
+		-coverprofile=coverage/coverage.out -covermode=atomic \
+		./... >/dev/null 2>&1
+	@go tool cover -func=coverage/coverage.out | \
+		awk '/^total:/ { gsub(/%/,"",$$NF); printf "%.1f\n",$$NF }' > coverage/coverage-baseline.txt
+	@echo "Benchmarks → coverage/bench-baseline.txt"
+	@echo "Coverage   → $$(cat coverage/coverage-baseline.txt)%  (coverage/coverage-baseline.txt)"
+
+# Run only benchmarks and update the baseline (legacy target)
 report-bench:
 	@mkdir -p coverage
 	CGO_ENABLED=0 go test -bench=. -benchmem -count=3 ./... > coverage/bench-results.txt
 	@if command -v benchstat >/dev/null 2>&1; then \
-		benchstat coverage/bench-baseline.json coverage/bench-results.txt > coverage/bench-diff.txt || true; \
+		benchstat coverage/bench-baseline.txt coverage/bench-results.txt > coverage/bench-diff.txt || true; \
 		cat coverage/bench-diff.txt; \
 	fi
 
 # Reset benchmark baseline (use after major refactors)
 report-reset-baseline:
-	echo '[]' > coverage/bench-baseline.json
-	@echo "Baseline reset."
+	echo '' > coverage/bench-baseline.txt
+	echo '' > coverage/coverage-baseline.txt
+	@echo "Baselines reset."
 
 # Check MDL syntax for all doctype example scripts
 check-mdl: build
diff --git a/coverage/bench-baseline.txt b/coverage/bench-baseline.txt
diff --git a/coverage/coverage-baseline.txt b/coverage/coverage-baseline.txt