Skip to content

Commit 65bd28a

Browse files
authored
ci(bench): add per-PR perf canary for extractor/graph/native changes (#1488)
* chore: gitignore napi-generated artifacts in crates/codegraph-core * chore(tests): remove unused biome suppression in visitor.test.ts * fix(titan-run): sync --start-from enum and phase-timestamp list with actual phases * fix(hooks): track Bash file modifications via before/after git status diff Adds snapshot-pre-bash.sh (PreToolUse Bash) + track-bash-writes.sh (PostToolUse Bash): the pre-hook captures git status --porcelain to a per-worktree temp file before each Bash call; the post-hook diffs the before/after state and appends newly modified or created files to .claude/session-edits.log. This closes the gap where files written by sed -i, printf redirects, tee, heredocs, or build tools (Cargo.lock, lockfiles) were never recorded, causing guard-git.sh to emit false-positive BLOCKED errors. Closes #1457 * chore(native): remove dead code (unused var, method, variant, fields) - clojure.rs: annotate lifetime-anchor assignment to silence false-positive - cfg.rs: remove never-called start_line_of method - complexity.rs: remove never-constructed NotHandled variant; convert irrefutable if-let patterns to plain let destructures - dataflow.rs: remove never-read callee fields from CallReturn/Destructured - incremental.rs: remove never-read lang field from CacheEntry cargo check and cargo clippy both clean after these changes. * refactor(native): extract emit_pts_alias_edges params into PtsAliasCtx struct * fix(wasm): sort call targets by confidence before emit to match native engine * fix(bench): add 2 warmup runs and raise INCREMENTAL_RUNS to 5 for incremental tiers * ci(bench): add per-PR perf canary for extractor/graph/native changes Adds .github/workflows/perf-canary.yml — a path-filtered workflow that fires on PRs touching src/extractors/, src/domain/graph/, or crates/** and runs only the incremental-benchmark suite (full build + no-op + 1-file rebuild, both engines). Catches the class of regressions that accumulated invisibly across the Phase 8.x PRs and were only detected at v3.12.0 publish time. The regression guard gains BENCH_CANARY=1 mode: raises thresholds to 50%/100%/150% (standard/noisy/WASM) and skips the build, query, and resolution suites — only incremental checks run. This absorbs shared- runner timing variance while still blocking catastrophic regressions (+98% full build, +1827% 1-file rebuild from v3.12.0). Closes #1433 * ci(bench): add permissions block, self-referential path filters, drop excess fetch-depth - Add `permissions: {}` to lock down token scope for the read-only canary - Add `scripts/update-incremental-report.ts` and `tests/benchmarks/regression-guard.test.ts` to path filter so PRs that modify the canary machinery itself also trigger the canary - Remove `fetch-depth: 0` (full history not needed; canary compares against committed benchmark data, not git refs) - Align `node-version: 22` with the integer format used in ci.yml
1 parent 08bdc55 commit 65bd28a

2 files changed

Lines changed: 152 additions & 10 deletions

File tree

.github/workflows/perf-canary.yml

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
name: Perf Canary
2+
3+
# Lightweight per-PR build-time regression gate for PRs that touch the
4+
# extractor, graph-builder, or native Rust layers — the parts of the codebase
5+
# that caused the v3.12.0 regressions (+1827% 1-file rebuild, +98% full build).
6+
#
7+
# Only the incremental-benchmark suite is run (full build + no-op + 1-file
8+
# rebuild for both engines). The regression guard uses BENCH_CANARY=1 mode,
9+
# which applies a 50% threshold instead of the full suite's 25% — enough
10+
# to catch catastrophic regressions while tolerating CI runner variance.
11+
#
12+
# This is intentionally separate from the full pre-publish-benchmark job in
13+
# ci.yml, which runs unconditionally on every PR and measures the complete
14+
# suite. The canary completes in roughly 5–10 minutes; the full suite takes
15+
# 20–60 minutes.
16+
17+
on:
18+
pull_request:
19+
paths:
20+
- "src/extractors/**"
21+
- "src/domain/graph/**"
22+
- "crates/**"
23+
- "scripts/benchmark.ts"
24+
- "scripts/incremental-benchmark.ts"
25+
- "scripts/lib/bench-config.ts"
26+
- "scripts/lib/fork-engine.ts"
27+
- "scripts/update-incremental-report.ts"
28+
- "tests/benchmarks/regression-guard.test.ts"
29+
30+
permissions: {}
31+
32+
concurrency:
33+
group: perf-canary-${{ github.ref }}
34+
cancel-in-progress: true
35+
36+
jobs:
37+
perf-canary:
38+
name: Perf canary (incremental tiers)
39+
runs-on: ubuntu-latest
40+
env:
41+
CODEGRAPH_FAST_SKIP_DIAG: "1"
42+
43+
steps:
44+
- uses: actions/checkout@v6
45+
46+
- uses: actions/setup-node@v6
47+
with:
48+
node-version: 22
49+
cache: "npm"
50+
51+
- name: Setup Rust
52+
uses: dtolnay/rust-toolchain@stable
53+
54+
- name: Rust cache
55+
uses: Swatinem/rust-cache@v2
56+
with:
57+
workspaces: crates/codegraph-core
58+
59+
- name: Install napi-rs CLI
60+
timeout-minutes: 5
61+
run: npm install -g @napi-rs/cli@3
62+
63+
- name: Build native addon
64+
working-directory: crates/codegraph-core
65+
run: napi build --release
66+
67+
- name: Install dependencies
68+
timeout-minutes: 20
69+
shell: bash
70+
run: |
71+
for attempt in 1 2 3; do
72+
npm install && break
73+
if [ "$attempt" -lt 3 ]; then
74+
echo "::warning::npm install attempt $attempt failed, retrying in 15s..."
75+
sleep 15
76+
else
77+
echo "::error::npm install failed after 3 attempts"
78+
exit 1
79+
fi
80+
done
81+
82+
- name: Install native addon over published binary
83+
run: node scripts/ci-install-native.mjs
84+
85+
# Build dist/ so benchmarks load the same compiled JS that ships to npm,
86+
# matching the methodology used by the full pre-publish-benchmark gate.
87+
- name: Build TypeScript
88+
run: npm run build
89+
90+
- name: Run incremental benchmark
91+
timeout-minutes: 15
92+
run: |
93+
STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
94+
node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/incremental-benchmark.ts --version dev --dist > incremental-canary-result.json
95+
96+
- name: Update incremental report
97+
run: |
98+
STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
99+
node $STRIP_FLAG scripts/update-incremental-report.ts incremental-canary-result.json
100+
101+
- name: Regression guard (50% threshold)
102+
env:
103+
RUN_REGRESSION_GUARD: "1"
104+
BENCH_CANARY: "1"
105+
run: npm run test:regression-guard
106+
107+
- name: Upload canary result
108+
if: always()
109+
uses: actions/upload-artifact@v7
110+
with:
111+
name: incremental-canary-result
112+
path: incremental-canary-result.json
113+
if-no-files-found: warn

tests/benchmarks/regression-guard.test.ts

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,16 @@ import { describe, expect, test } from 'vitest';
1616

1717
// ── Configuration ────────────────────────────────────────────────────────
1818

19+
/**
20+
* When BENCH_CANARY=1, only incremental-benchmark checks run and all timing
21+
* thresholds are raised to 50%. This mode is used by the per-PR perf-canary
22+
* workflow (.github/workflows/perf-canary.yml) which runs only on PRs
23+
* touching src/extractors/, src/domain/graph/, or crates/. The looser
24+
* threshold absorbs CI runner variance while still catching the class of
25+
* catastrophic regressions that hit v3.12.0 (+98%/+1827%).
26+
*/
27+
const BENCH_CANARY = process.env.BENCH_CANARY === '1';
28+
1929
/**
2030
* Maximum allowed regression (as a fraction, e.g. 0.25 = 25%).
2131
*
@@ -26,8 +36,10 @@ import { describe, expect, test } from 'vitest';
2636
*
2737
* Genuinely high-variance sub-30ms metrics get a wider tolerance via
2838
* `NOISY_METRICS` below — see that set's docstring for rationale.
39+
*
40+
* In BENCH_CANARY mode this is overridden to 0.5 (50%) — see above.
2941
*/
30-
const REGRESSION_THRESHOLD = 0.25;
42+
const REGRESSION_THRESHOLD = BENCH_CANARY ? 0.5 : 0.25;
3143

3244
/**
3345
* Wider regression threshold applied to metrics in NOISY_METRICS.
@@ -41,8 +53,11 @@ const REGRESSION_THRESHOLD = 0.25;
4153
* Keeping the global threshold at 25% means a regression in the 30–100ms
4254
* range is still caught (e.g. 50ms→63ms = +26%, flagged), while sub-30ms
4355
* metrics in this set get the wider 50% allowance.
56+
*
57+
* In BENCH_CANARY mode this is overridden to 1.0 (100%) — the canary's
58+
* purpose is to catch gross regressions (+50%+), not sub-30ms jitter.
4459
*/
45-
const NOISY_METRIC_THRESHOLD = 0.5;
60+
const NOISY_METRIC_THRESHOLD = BENCH_CANARY ? 1.0 : 0.5;
4661

4762
/**
4863
* Metric labels treated as high-variance and given the NOISY_METRIC_THRESHOLD
@@ -86,8 +101,12 @@ const NOISY_METRICS = new Set<string>(['No-op rebuild', '1-file rebuild', 'fnDep
86101
* v3.0.1–3.4.0), which 75% still flags, while absorbing the ≤71% shared-runner
87102
* jitter. Size metrics (DB bytes/file) are engine-independent and excluded from
88103
* this widening via SIZE_METRICS below — they keep the strict threshold.
104+
*
105+
* In BENCH_CANARY mode this is overridden to 1.5 (150%) — the canary targets
106+
* gross regressions only, and WASM incremental metrics have extreme variance
107+
* on shared runners.
89108
*/
90-
const WASM_TIMING_THRESHOLD = 0.75;
109+
const WASM_TIMING_THRESHOLD = BENCH_CANARY ? 1.5 : 0.75;
91110

92111
/**
93112
* Metric labels that measure size/count rather than wall-clock time. These are
@@ -622,6 +641,10 @@ interface IncrementalEntry {
622641
// in the default `npm test` run so docs commits that merge already-recorded
623642
// regressed history into main don't trigger false failures — by then the
624643
// release has already passed the gate.
644+
//
645+
// When BENCH_CANARY=1 (set by .github/workflows/perf-canary.yml), only the
646+
// incremental-benchmark suite runs and thresholds are raised to 50% — see
647+
// the BENCH_CANARY constant above.
625648
const RUN_REGRESSION_GUARD = process.env.RUN_REGRESSION_GUARD === '1';
626649

627650
describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => {
@@ -641,7 +664,9 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => {
641664
// Warn when KNOWN_REGRESSIONS entries are stale (more than 1 minor version
642665
// behind the current package version). This makes the stale-exemption
643666
// problem self-detecting rather than requiring manual bookkeeping.
644-
test('KNOWN_REGRESSIONS entries are not stale', () => {
667+
// Skipped in canary mode — this check is maintenance-only and irrelevant
668+
// for a lightweight build-time regression gate.
669+
test.skipIf(BENCH_CANARY)('KNOWN_REGRESSIONS entries are not stale', () => {
645670
// eslint-disable-next-line @typescript-eslint/no-require-imports
646671
const pkgVersion: string = JSON.parse(
647672
fs.readFileSync(path.join(ROOT, 'package.json'), 'utf8'),
@@ -670,18 +695,22 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => {
670695
).toBe(0);
671696
});
672697

673-
// Validate newest-first ordering assumption for all history arrays
674-
test('build history is sorted newest-first', () => {
698+
// Validate newest-first ordering assumption for all history arrays.
699+
// Build/query ordering checks are skipped in canary mode (only incremental
700+
// history is updated by the canary workflow).
701+
test.skipIf(BENCH_CANARY)('build history is sorted newest-first', () => {
675702
assertNewestFirst(buildHistory, 'Build benchmark');
676703
});
677-
test('query history is sorted newest-first', () => {
704+
test.skipIf(BENCH_CANARY)('query history is sorted newest-first', () => {
678705
assertNewestFirst(queryHistory, 'Query benchmark');
679706
});
680707
test('incremental history is sorted newest-first', () => {
681708
assertNewestFirst(incrementalHistory, 'Incremental benchmark');
682709
});
683710

684-
describe('build benchmarks', () => {
711+
// In canary mode only the incremental suite runs — build/query/resolution
712+
// benchmarks are not measured by the perf-canary workflow.
713+
describe.skipIf(BENCH_CANARY)('build benchmarks', () => {
685714
for (const engineKey of ['native', 'wasm'] as const) {
686715
const pair = findLatestPair(buildHistory, (e) => e[engineKey] != null);
687716
if (!pair) continue;
@@ -714,7 +743,7 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => {
714743
});
715744
});
716745

717-
describe('query benchmarks', () => {
746+
describe.skipIf(BENCH_CANARY)('query benchmarks', () => {
718747
for (const engineKey of ['native', 'wasm'] as const) {
719748
const pair = findLatestPair(queryHistory, (e) => e[engineKey] != null);
720749
if (!pair) continue;
@@ -817,7 +846,7 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => {
817846
});
818847
});
819848

820-
describe('resolution benchmarks', () => {
849+
describe.skipIf(BENCH_CANARY)('resolution benchmarks', () => {
821850
/**
822851
* Resolution precision/recall regression thresholds.
823852
* These are percentage-point drops (not relative %) because resolution

0 commit comments

Comments
 (0)