@@ -16,6 +16,16 @@ import { describe, expect, test } from 'vitest';
1616
1717// ── Configuration ────────────────────────────────────────────────────────
1818
19+ /**
20+ * When BENCH_CANARY=1, only incremental-benchmark checks run and all timing
21+ * thresholds are raised to 50%. This mode is used by the per-PR perf-canary
22+ * workflow (.github/workflows/perf-canary.yml) which runs only on PRs
23+ * touching src/extractors/, src/domain/graph/, or crates/. The looser
24+ * threshold absorbs CI runner variance while still catching the class of
25+ * catastrophic regressions that hit v3.12.0 (+98%/+1827%).
26+ */
27+ const BENCH_CANARY = process . env . BENCH_CANARY === '1' ;
28+
1929/**
2030 * Maximum allowed regression (as a fraction, e.g. 0.25 = 25%).
2131 *
@@ -26,8 +36,10 @@ import { describe, expect, test } from 'vitest';
2636 *
2737 * Genuinely high-variance sub-30ms metrics get a wider tolerance via
2838 * `NOISY_METRICS` below — see that set's docstring for rationale.
39+ *
40+ * In BENCH_CANARY mode this is overridden to 0.5 (50%) — see above.
2941 */
30- const REGRESSION_THRESHOLD = 0.25 ;
42+ const REGRESSION_THRESHOLD = BENCH_CANARY ? 0.5 : 0.25 ;
3143
3244/**
3345 * Wider regression threshold applied to metrics in NOISY_METRICS.
@@ -41,8 +53,11 @@ const REGRESSION_THRESHOLD = 0.25;
4153 * Keeping the global threshold at 25% means a regression in the 30–100ms
4254 * range is still caught (e.g. 50ms→63ms = +26%, flagged), while sub-30ms
4355 * metrics in this set get the wider 50% allowance.
56+ *
57+ * In BENCH_CANARY mode this is overridden to 1.0 (100%) — the canary's
58+ * purpose is to catch gross regressions (+50%+), not sub-30ms jitter.
4459 */
45- const NOISY_METRIC_THRESHOLD = 0.5 ;
60+ const NOISY_METRIC_THRESHOLD = BENCH_CANARY ? 1.0 : 0.5 ;
4661
4762/**
4863 * Metric labels treated as high-variance and given the NOISY_METRIC_THRESHOLD
@@ -86,8 +101,12 @@ const NOISY_METRICS = new Set<string>(['No-op rebuild', '1-file rebuild', 'fnDep
86101 * v3.0.1–3.4.0), which 75% still flags, while absorbing the ≤71% shared-runner
87102 * jitter. Size metrics (DB bytes/file) are engine-independent and excluded from
88103 * this widening via SIZE_METRICS below — they keep the strict threshold.
104+ *
105+ * In BENCH_CANARY mode this is overridden to 1.5 (150%) — the canary targets
106+ * gross regressions only, and WASM incremental metrics have extreme variance
107+ * on shared runners.
89108 */
90- const WASM_TIMING_THRESHOLD = 0.75 ;
109+ const WASM_TIMING_THRESHOLD = BENCH_CANARY ? 1.5 : 0.75 ;
91110
92111/**
93112 * Metric labels that measure size/count rather than wall-clock time. These are
@@ -608,6 +627,10 @@ interface IncrementalEntry {
608627// in the default `npm test` run so docs commits that merge already-recorded
609628// regressed history into main don't trigger false failures — by then the
610629// release has already passed the gate.
630+ //
631+ // When BENCH_CANARY=1 (set by .github/workflows/perf-canary.yml), only the
632+ // incremental-benchmark suite runs and thresholds are raised to 50% — see
633+ // the BENCH_CANARY constant above.
611634const RUN_REGRESSION_GUARD = process . env . RUN_REGRESSION_GUARD === '1' ;
612635
613636describe . runIf ( RUN_REGRESSION_GUARD ) ( 'Benchmark regression guard' , ( ) => {
@@ -627,7 +650,9 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => {
627650 // Warn when KNOWN_REGRESSIONS entries are stale (more than 1 minor version
628651 // behind the current package version). This makes the stale-exemption
629652 // problem self-detecting rather than requiring manual bookkeeping.
630- test ( 'KNOWN_REGRESSIONS entries are not stale' , ( ) => {
653+ // Skipped in canary mode — this check is maintenance-only and irrelevant
654+ // for a lightweight build-time regression gate.
655+ test . skipIf ( BENCH_CANARY ) ( 'KNOWN_REGRESSIONS entries are not stale' , ( ) => {
631656 // eslint-disable-next-line @typescript-eslint/no-require-imports
632657 const pkgVersion : string = JSON . parse (
633658 fs . readFileSync ( path . join ( ROOT , 'package.json' ) , 'utf8' ) ,
@@ -656,18 +681,22 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => {
656681 ) . toBe ( 0 ) ;
657682 } ) ;
658683
659- // Validate newest-first ordering assumption for all history arrays
660- test ( 'build history is sorted newest-first' , ( ) => {
684+ // Validate newest-first ordering assumption for all history arrays.
685+ // Build/query ordering checks are skipped in canary mode (only incremental
686+ // history is updated by the canary workflow).
687+ test . skipIf ( BENCH_CANARY ) ( 'build history is sorted newest-first' , ( ) => {
661688 assertNewestFirst ( buildHistory , 'Build benchmark' ) ;
662689 } ) ;
663- test ( 'query history is sorted newest-first' , ( ) => {
690+ test . skipIf ( BENCH_CANARY ) ( 'query history is sorted newest-first' , ( ) => {
664691 assertNewestFirst ( queryHistory , 'Query benchmark' ) ;
665692 } ) ;
666693 test ( 'incremental history is sorted newest-first' , ( ) => {
667694 assertNewestFirst ( incrementalHistory , 'Incremental benchmark' ) ;
668695 } ) ;
669696
670- describe ( 'build benchmarks' , ( ) => {
697+ // In canary mode only the incremental suite runs — build/query/resolution
698+ // benchmarks are not measured by the perf-canary workflow.
699+ describe . skipIf ( BENCH_CANARY ) ( 'build benchmarks' , ( ) => {
671700 for ( const engineKey of [ 'native' , 'wasm' ] as const ) {
672701 const pair = findLatestPair ( buildHistory , ( e ) => e [ engineKey ] != null ) ;
673702 if ( ! pair ) continue ;
@@ -700,7 +729,7 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => {
700729 } ) ;
701730 } ) ;
702731
703- describe ( 'query benchmarks' , ( ) => {
732+ describe . skipIf ( BENCH_CANARY ) ( 'query benchmarks' , ( ) => {
704733 for ( const engineKey of [ 'native' , 'wasm' ] as const ) {
705734 const pair = findLatestPair ( queryHistory , ( e ) => e [ engineKey ] != null ) ;
706735 if ( ! pair ) continue ;
@@ -803,7 +832,7 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => {
803832 } ) ;
804833 } ) ;
805834
806- describe ( 'resolution benchmarks' , ( ) => {
835+ describe . skipIf ( BENCH_CANARY ) ( 'resolution benchmarks' , ( ) => {
807836 /**
808837 * Resolution precision/recall regression thresholds.
809838 * These are percentage-point drops (not relative %) because resolution
0 commit comments