perf(ci): add baseline regression gate for pull requests

vitormattos · vitormattos · commit 3d064dca6449 · 2026-06-01T01:12:45.000-03:00
Signed-off-by: Vitor Mattos &lt;1079143+vitormattos@users.noreply.github.com&gt;
diff --git a/.github/.performance/baseline.json b/.github/.performance/baseline.json
@@ -1,13 +1,14 @@
 {
   "version": "1.0.0",
   "created_at": "2026-06-01",
+  "allowed_regression_pct": 25.0,
   "benchmarks": {
     "LibreSign\\XObjectTemplate\\Benchmarks\\CompilerBench::benchSimpleHtml": {
-      "mean": 2.5,
+      "mean": 0.356085,
       "memory_real": 768
     },
     "LibreSign\\XObjectTemplate\\Benchmarks\\CompilerBench::benchComplexHtml": {
-      "mean": 3.2,
+      "mean": 1.366,
       "memory_real": 1024
     }
   }
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -29,6 +29,10 @@ PHPBench automatically:
 - Executes multiple revisions for statistical confidence
 - Reports mean, min, max, stdev, variance per benchmark (CI dump file: `build/benchmark-results.xml`)
 
+Baseline persistence policy:
+- Regression gate compares PR results against `.github/.performance/baseline.json`.
+- Baseline updates are made via pull request (no direct commit to protected `main`).
+
 ## Compliance and contribution
 
 - DCO sign-off is mandatory for every commit.
diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
@@ -25,17 +25,83 @@ jobs:
       - uses: shivammathur/setup-php@v2
         with:
           php-version: ${{ steps.php_min.outputs.version }}
+          coverage: none
       - run: composer install --no-interaction --prefer-dist
       - run: composer bin all install --no-interaction --prefer-dist
       
       - name: Run benchmarks (strict CI mode with verbose output)
         run: |
-          composer benchmark:run:ci
+          composer benchmark:run:ci | tee build/benchmark-output.txt
           
           echo "✓ Benchmarks completed"
           echo ""
           echo "Results saved to build/benchmark-results.xml"
           echo "Review: Ensure no individual benchmark takes >10ms average"
+
+      - name: Validate regression against baseline
+        if: github.event_name == 'pull_request'
+        run: |
+          python3 - <<'PY'
+          import json
+          import pathlib
+          import re
+          import sys
+
+          baseline_path = pathlib.Path('.github/.performance/baseline.json')
+          output_path = pathlib.Path('build/benchmark-output.txt')
+
+          if not baseline_path.exists():
+            print('Baseline file not found, skipping regression validation.')
+            sys.exit(0)
+
+          if not output_path.exists():
+            print('Benchmark output not found, cannot validate regression.')
+            sys.exit(1)
+
+          baseline = json.loads(baseline_path.read_text())
+          output = output_path.read_text()
+
+          patterns = {
+            'LibreSign\\XObjectTemplate\\Benchmarks\\CompilerBench::benchSimpleHtml': r'benchSimpleHtml.*?Mo([0-9]+(?:\\.[0-9]+)?)(μs|us|ms)',
+            'LibreSign\\XObjectTemplate\\Benchmarks\\CompilerBench::benchComplexHtml': r'benchComplexHtml.*?Mo([0-9]+(?:\\.[0-9]+)?)(μs|us|ms)',
+          }
+
+          tolerance = float(baseline.get('allowed_regression_pct', 25.0)) / 100.0
+          benchmark_baseline = baseline.get('benchmarks', {})
+
+          failures = []
+          for name, pattern in patterns.items():
+            match = re.search(pattern, output)
+            if not match:
+              failures.append(f'{name}: metric not found in benchmark output')
+              continue
+
+            value = float(match.group(1))
+            unit = match.group(2)
+            current_ms = value / 1000.0 if unit in ('μs', 'us') else value
+
+            if name not in benchmark_baseline:
+              failures.append(f'{name}: missing baseline entry')
+              continue
+
+            baseline_ms = float(benchmark_baseline[name]['mean'])
+            limit_ms = baseline_ms * (1.0 + tolerance)
+
+            print(f'{name}: current={current_ms:.6f}ms baseline={baseline_ms:.6f}ms limit={limit_ms:.6f}ms')
+
+            if current_ms > limit_ms:
+              failures.append(
+                f'{name}: regression detected ({current_ms:.6f}ms > {limit_ms:.6f}ms)'
+              )
+
+          if failures:
+            print('\nPerformance regression gate failed:')
+            for failure in failures:
+              print(f' - {failure}')
+            sys.exit(1)
+
+          print('\nPerformance regression gate passed.')
+          PY
       
       - name: Save benchmark results as artifact
         if: always()

Original file line number	Diff line number	Diff line change
`@@ -1,13 +1,14 @@`
`1`	`1`	`{`
`2`	`2`	`"version": "1.0.0",`
`3`	`3`	`"created_at": "2026-06-01",`
	`4`	`+ "allowed_regression_pct": 25.0,`
`4`	`5`	`"benchmarks": {`
`5`	`6`	`"LibreSign\\XObjectTemplate\\Benchmarks\\CompilerBench::benchSimpleHtml": {`
`6`		`- "mean": 2.5,`
	`7`	`+ "mean": 0.356085,`
`7`	`8`	`"memory_real": 768`
`8`	`9`	`},`
`9`	`10`	`"LibreSign\\XObjectTemplate\\Benchmarks\\CompilerBench::benchComplexHtml": {`
`10`		`- "mean": 3.2,`
	`11`	`+ "mean": 1.366,`
`11`	`12`	`"memory_real": 1024`
`12`	`13`	`}`
`13`	`14`	`}`