Skip to content

Commit a7c2c00

Browse files
authored
fix(scripts): trend annotations fall back to nearest non-null prior release (#1120)
* fix(scripts): fall back to nearest non-null prior release for trend annotations scripts/update-incremental-report.ts compared each release's metrics against the immediately prior release only. When that prior release had null build/rebuild metrics (e.g. 3.9.5 had null after both engine workers were SIGKILL'd on timeout), the trend cell was silently emitted blank, hiding real regressions: 3.9.6 wasm full-build was 14.0s vs 3.9.4's 7.6s (~86% slower) and no-op was 131ms vs 19ms (~589% slower), but the table showed no arrow at all. Replace the per-release prev lookup with findPrevMetric — a per-metric walk back through history that skips releases without a value for that specific metric. Apply consistently in engineRow (the table) and the regression-warning block. Regenerated the report so existing trend annotations now reflect the fallback (3.9.6, 3.8.0 native, 3.6.0 wasm). Closes #1043 * fix(scripts): guard trend against zero baseline; hoist resolve trend lookup (#1120) - trend(): treat previous === 0 the same as null to avoid emitting '↑Infinity%' if a historical entry stores 0 for a metric. Matches the existing previous === 0 guard in checkRegression. - engineRow(): accept pre-computed prevResolveNative/prevResolveJs from the caller instead of recomputing them inside the function. Resolve metrics are release-level (not engine-specific), so the previous code walked history twice per release (once for native, once for wasm) returning identical results both times.
1 parent ec86a08 commit a7c2c00

3 files changed

Lines changed: 211 additions & 41 deletions

File tree

generated/benchmarks/INCREMENTAL-BENCHMARKS.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ Import resolution: native batch vs JS fallback throughput.
88
|---------|--------|------:|-----------:|------:|-------:|------------------:|-------------:|
99
| 3.10.0 | native | 745 | 2.0s ↓34% | 15ms ↑50% | 66ms ↑22% | 5ms ↓18% | 7ms ↓38% |
1010
| 3.10.0 | wasm | 745 | 8.4s ↓40% | 13ms ↓90% | 51ms ↓18% | 5ms ↓18% | 7ms ↓38% |
11-
| 3.9.6 | native | 744 | 3.0s | 10ms | 54ms | 7ms ~ | 11ms ↑7% |
12-
| 3.9.6 | wasm | 744 | 14.0s | 131ms | 62ms | 7ms ~ | 11ms ↑7% |
11+
| 3.9.6 | native | 744 | 3.0s ↑39% | 10ms ↑11% | 54ms ↓87% | 7ms ~ | 11ms ↑7% |
12+
| 3.9.6 | wasm | 744 | 14.0s ↑86% | 131ms ↑589% | 62ms ~ | 7ms ~ | 11ms ↑7% |
1313
| 3.9.4 | native | 668 | 2.1s ↓4% | 9ms ~ | 406ms ↑7% | 6ms ↑7% | 9ms ↓15% |
1414
| 3.9.4 | wasm | 668 | 7.6s ~ | 19ms ↑6% | 61ms ↓90% | 6ms ↑7% | 9ms ↓15% |
1515
| 3.9.3 | native | 667 | 2.2s ↓76% | 9ms ↑13% | 380ms ↓32% | 6ms ↓5% | 11ms ↑6% |
@@ -22,10 +22,10 @@ Import resolution: native batch vs JS fallback throughput.
2222
| 3.9.0 | wasm | 567 | 6.8s ↓3% | 12ms ↓20% | 541ms ↓10% | 5ms ↓18% | 11ms ~ |
2323
| 3.8.1 | native | 565 | 6.6s ↑468% | 8ms ↑14% | 41ms ↑24% | 6ms ↑51% | 11ms ↓14% |
2424
| 3.8.1 | wasm | 565 | 7.0s ↑493% | 15ms ↑88% | 603ms ↑1727% | 6ms ↑51% | 11ms ↓14% |
25-
| 3.8.0 | native | 564 | 1.2s | 7ms | 33ms | 4ms ↑2% | 12ms ↓19% |
25+
| 3.8.0 | native | 564 | 1.2s ↓50% | 7ms ↓46% | 33ms ↓90% | 4ms ↑2% | 12ms ↓19% |
2626
| 3.8.0 | wasm | 564 | 1.2s ↓82% | 8ms ↓58% | 33ms ↓94% | 4ms ↑2% | 12ms ↓19% |
2727
| 3.7.0 | wasm | 532 | 6.4s ↑5% | 19ms ↑46% | 558ms ↑2% | 4ms ↑3% | 15ms ↑31% |
28-
| 3.6.0 | wasm | 514 | 6.1s | 13ms | 545ms | 4ms ↓3% | 12ms ↑9% |
28+
| 3.6.0 | wasm | 514 | 6.1s ↑20% | 13ms ~ | 545ms ↑7% | 4ms ↓3% | 12ms ↑9% |
2929
| 3.4.1 | native | 473 | 2.4s ↑3% | 13ms ↑8% | 331ms ↓26% | 4ms ~ | 13ms ↑8% |
3030
| 3.4.1 | wasm | 473 | 5.1s ~ | 13ms ↑8% | 511ms ↓17% | 4ms ~ | 13ms ↑8% |
3131
| 3.4.0 | native | 473 | 2.3s ↓4% | 12ms ↑9% | 448ms ↑29% | 4ms ↓58% | 12ms ↓54% |

scripts/update-incremental-report.ts

Lines changed: 64 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,24 @@ if (!isDev) {
6363
}
6464
history.unshift(entry);
6565

66-
function findPrevRelease(hist, fromIdx) {
66+
// Walk back through release history to find the most recent non-null value
67+
// for a specific metric. Lets trend annotations skip past releases whose
68+
// workers crashed and stored null, instead of hiding regressions behind an
69+
// empty cell.
70+
function findPrevMetric(hist, fromIdx, getter) {
6771
for (let i = fromIdx + 1; i < hist.length; i++) {
68-
if (hist[i].version !== 'dev') return hist[i];
72+
if (hist[i].version === 'dev') continue;
73+
const v = getter(hist[i]);
74+
if (v != null) return v;
6975
}
7076
return null;
7177
}
7278

7379
// ── Helpers ──────────────────────────────────────────────────────────────
7480
function trend(current, previous, lowerIsBetter = true) {
75-
if (current == null || previous == null) return '';
81+
// Guard against null/undefined and a zero baseline — dividing by zero would
82+
// emit `↑Infinity%`. Matches the `previous === 0` guard in checkRegression.
83+
if (current == null || previous == null || previous === 0) return '';
7684
const pct = ((current - previous) / previous) * 100;
7785
if (Math.abs(pct) < 2) return ' ~';
7886
if (lowerIsBetter) {
@@ -86,19 +94,21 @@ function formatMs(ms) {
8694
return `${Math.round(ms)}ms`;
8795
}
8896

89-
function engineRow(h, prev, engineKey) {
97+
function engineRow(hist, i, engineKey, prevResolveNative, prevResolveJs) {
98+
const h = hist[i];
9099
const e = h[engineKey];
91-
const p = prev?.[engineKey] || null;
92100
if (!e) return null;
93101

94-
const fullT = trend(e.fullBuildMs, p?.fullBuildMs);
95-
const noopT = trend(e.noopRebuildMs, p?.noopRebuildMs);
96-
const oneT = trend(e.oneFileRebuildMs, p?.oneFileRebuildMs);
102+
const fullT = trend(e.fullBuildMs, findPrevMetric(hist, i, (r) => r[engineKey]?.fullBuildMs));
103+
const noopT = trend(e.noopRebuildMs, findPrevMetric(hist, i, (r) => r[engineKey]?.noopRebuildMs));
104+
const oneT = trend(
105+
e.oneFileRebuildMs,
106+
findPrevMetric(hist, i, (r) => r[engineKey]?.oneFileRebuildMs),
107+
);
97108

98109
const r = h.resolve;
99-
const pr = prev?.resolve || null;
100-
const natT = r.nativeBatchMs != null ? trend(r.nativeBatchMs, pr?.nativeBatchMs) : '';
101-
const jsT = trend(r.jsFallbackMs, pr?.jsFallbackMs);
110+
const natT = r.nativeBatchMs != null ? trend(r.nativeBatchMs, prevResolveNative) : '';
111+
const jsT = trend(r.jsFallbackMs, prevResolveJs);
102112

103113
const noopCell = e.noopRebuildMs != null ? `${formatMs(e.noopRebuildMs)}${noopT}` : 'n/a';
104114
const oneFileCell = e.oneFileRebuildMs != null ? `${formatMs(e.oneFileRebuildMs)}${oneT}` : 'n/a';
@@ -125,11 +135,13 @@ md +=
125135
'|---------|--------|------:|-----------:|------:|-------:|------------------:|-------------:|\n';
126136

127137
for (let i = 0; i < history.length; i++) {
128-
const h = history[i];
129-
const prev = findPrevRelease(history, i);
130-
131-
const nativeRow = engineRow(h, prev, 'native');
132-
const wasmRow = engineRow(h, prev, 'wasm');
138+
// Resolve metrics are release-level (not engine-specific), so pre-compute
139+
// their fallback baselines once per release instead of having engineRow walk
140+
// history twice (once per engine type).
141+
const prevResolveNative = findPrevMetric(history, i, (x) => x.resolve?.nativeBatchMs);
142+
const prevResolveJs = findPrevMetric(history, i, (x) => x.resolve?.jsFallbackMs);
143+
const nativeRow = engineRow(history, i, 'native', prevResolveNative, prevResolveJs);
144+
const wasmRow = engineRow(history, i, 'wasm', prevResolveNative, prevResolveJs);
133145
if (nativeRow) md += nativeRow + '\n';
134146
if (wasmRow) md += wasmRow + '\n';
135147
}
@@ -175,7 +187,6 @@ console.error(`Updated ${path.relative(root, reportPath)}`);
175187

176188
// ── Regression detection ─────────────────────────────────────────────────
177189
const REGRESSION_THRESHOLD = 0.15; // 15% regression triggers a warning
178-
const prev = findPrevRelease(history, 0);
179190

180191
function checkRegression(label, current, previous) {
181192
if (previous == null || previous === 0) return;
@@ -190,26 +201,42 @@ function checkRegression(label, current, previous) {
190201
}
191202
}
192203

193-
if (prev) {
194-
for (const engineKey of ['native', 'wasm']) {
195-
const e = latest[engineKey];
196-
const p = prev[engineKey];
197-
if (!e || !p) continue;
198-
const tag = `[${engineKey}]`;
199-
checkRegression(`${tag} Full build`, e.fullBuildMs, p.fullBuildMs);
200-
if (e.noopRebuildMs != null && p.noopRebuildMs != null) {
201-
checkRegression(`${tag} No-op rebuild`, e.noopRebuildMs, p.noopRebuildMs);
202-
}
203-
if (e.oneFileRebuildMs != null && p.oneFileRebuildMs != null) {
204-
checkRegression(`${tag} 1-file rebuild`, e.oneFileRebuildMs, p.oneFileRebuildMs);
205-
}
204+
for (const engineKey of ['native', 'wasm']) {
205+
const e = latest[engineKey];
206+
if (!e) continue;
207+
const tag = `[${engineKey}]`;
208+
checkRegression(
209+
`${tag} Full build`,
210+
e.fullBuildMs,
211+
findPrevMetric(history, 0, (r) => r[engineKey]?.fullBuildMs),
212+
);
213+
if (e.noopRebuildMs != null) {
214+
checkRegression(
215+
`${tag} No-op rebuild`,
216+
e.noopRebuildMs,
217+
findPrevMetric(history, 0, (r) => r[engineKey]?.noopRebuildMs),
218+
);
206219
}
207-
const re = latest.resolve;
208-
const rp = prev.resolve;
209-
if (re && rp) {
210-
checkRegression(`[resolve] JS fallback`, re.jsFallbackMs, rp.jsFallbackMs);
211-
if (re.nativeBatchMs != null && rp.nativeBatchMs != null) {
212-
checkRegression(`[resolve] Native batch`, re.nativeBatchMs, rp.nativeBatchMs);
213-
}
220+
if (e.oneFileRebuildMs != null) {
221+
checkRegression(
222+
`${tag} 1-file rebuild`,
223+
e.oneFileRebuildMs,
224+
findPrevMetric(history, 0, (r) => r[engineKey]?.oneFileRebuildMs),
225+
);
226+
}
227+
}
228+
const re = latest.resolve;
229+
if (re) {
230+
checkRegression(
231+
`[resolve] JS fallback`,
232+
re.jsFallbackMs,
233+
findPrevMetric(history, 0, (r) => r.resolve?.jsFallbackMs),
234+
);
235+
if (re.nativeBatchMs != null) {
236+
checkRegression(
237+
`[resolve] Native batch`,
238+
re.nativeBatchMs,
239+
findPrevMetric(history, 0, (r) => r.resolve?.nativeBatchMs),
240+
);
214241
}
215242
}
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import { execFileSync } from 'node:child_process';
2+
import fs from 'node:fs';
3+
import os from 'node:os';
4+
import path from 'node:path';
5+
import { fileURLToPath } from 'node:url';
6+
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
7+
8+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
9+
const repoRoot = path.resolve(__dirname, '..', '..');
10+
const scriptPath = path.join(repoRoot, 'scripts', 'update-incremental-report.ts');
11+
12+
// --experimental-strip-types works in Node 22.6+ through current 24.x; the
13+
// renamed --strip-types was added then removed again across 24.x minor
14+
// versions, so prefer the experimental name for compatibility.
15+
const stripFlag = '--experimental-strip-types';
16+
17+
let tmpDir: string;
18+
let reportPath: string;
19+
let entryPath: string;
20+
21+
beforeEach(() => {
22+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-trend-fallback-'));
23+
reportPath = path.join(tmpDir, 'INCREMENTAL-BENCHMARKS.md');
24+
entryPath = path.join(tmpDir, 'entry.json');
25+
});
26+
27+
afterEach(() => {
28+
if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true });
29+
});
30+
31+
function runScript() {
32+
execFileSync('node', [stripFlag, scriptPath, entryPath], {
33+
env: { ...process.env, CODEGRAPH_INCREMENTAL_REPORT_PATH: reportPath },
34+
stdio: 'pipe',
35+
});
36+
}
37+
38+
function row(content: string, version: string, engine: string): string {
39+
const re = new RegExp(`^\\| ${version.replace(/\./g, '\\.')} \\| ${engine} \\|.*$`, 'm');
40+
const m = content.match(re);
41+
if (!m) throw new Error(`row not found for ${version}/${engine}`);
42+
return m[0];
43+
}
44+
45+
describe('update-incremental-report trend fallback', () => {
46+
it('falls back to nearest non-null prior release when previous metrics are null', () => {
47+
// Seed: 9.9.4 has full data. 9.9.5 is the null release (workers crashed).
48+
const initial = `# Codegraph Incremental Build Benchmarks
49+
50+
<!-- INCREMENTAL_BENCHMARK_DATA
51+
[
52+
{
53+
"version": "9.9.5",
54+
"date": "2026-05-01",
55+
"files": 100,
56+
"wasm": null,
57+
"native": null,
58+
"resolve": { "imports": 100, "nativeBatchMs": 5, "jsFallbackMs": 10, "perImportNativeMs": 0, "perImportJsMs": 0 }
59+
},
60+
{
61+
"version": "9.9.4",
62+
"date": "2026-04-18",
63+
"files": 100,
64+
"wasm": { "fullBuildMs": 1000, "noopRebuildMs": 10, "oneFileRebuildMs": 50 },
65+
"native": { "fullBuildMs": 500, "noopRebuildMs": 5, "oneFileRebuildMs": 25 },
66+
"resolve": { "imports": 100, "nativeBatchMs": 5, "jsFallbackMs": 10, "perImportNativeMs": 0, "perImportJsMs": 0 }
67+
}
68+
]
69+
-->
70+
`;
71+
fs.writeFileSync(reportPath, initial);
72+
// New release 9.9.6 doubles wasm full-build and 13x's no-op rebuild — should
73+
// compare against 9.9.4 (skipping null 9.9.5) and show large regressions.
74+
fs.writeFileSync(
75+
entryPath,
76+
JSON.stringify({
77+
version: '9.9.6',
78+
date: '2026-04-30',
79+
files: 100,
80+
wasm: { fullBuildMs: 2000, noopRebuildMs: 130, oneFileRebuildMs: 60 },
81+
native: { fullBuildMs: 600, noopRebuildMs: 8, oneFileRebuildMs: 30 },
82+
resolve: {
83+
imports: 100,
84+
nativeBatchMs: 5,
85+
jsFallbackMs: 10,
86+
perImportNativeMs: 0,
87+
perImportJsMs: 0,
88+
},
89+
}),
90+
);
91+
92+
runScript();
93+
const out = fs.readFileSync(reportPath, 'utf8');
94+
95+
const wasmRow = row(out, '9.9.6', 'wasm');
96+
// Full build: 1000 → 2000 = +100%
97+
expect(wasmRow).toContain('↑100%');
98+
// No-op: 10 → 130 = +1200%
99+
expect(wasmRow).toContain('↑1200%');
100+
// 1-file: 50 → 60 = +20%
101+
expect(wasmRow).toContain('↑20%');
102+
103+
// And the null 9.9.5 row still renders without a trend (no prior data
104+
// for it to compare against — it itself has no metrics).
105+
expect(out).not.toMatch(/^\| 9\.9\.5 \| wasm \|/m);
106+
expect(out).not.toMatch(/^\| 9\.9\.5 \| native \|/m);
107+
});
108+
109+
it('leaves trend empty when no prior release has the metric at all', () => {
110+
const initial = `# Codegraph Incremental Build Benchmarks
111+
112+
<!-- INCREMENTAL_BENCHMARK_DATA
113+
[]
114+
-->
115+
`;
116+
fs.writeFileSync(reportPath, initial);
117+
fs.writeFileSync(
118+
entryPath,
119+
JSON.stringify({
120+
version: '1.0.0',
121+
date: '2026-01-01',
122+
files: 50,
123+
wasm: { fullBuildMs: 1000, noopRebuildMs: 10, oneFileRebuildMs: 50 },
124+
native: { fullBuildMs: 500, noopRebuildMs: 5, oneFileRebuildMs: 25 },
125+
resolve: {
126+
imports: 50,
127+
nativeBatchMs: 1,
128+
jsFallbackMs: 2,
129+
perImportNativeMs: 0,
130+
perImportJsMs: 0,
131+
},
132+
}),
133+
);
134+
135+
runScript();
136+
const out = fs.readFileSync(reportPath, 'utf8');
137+
const wasmRow = row(out, '1.0.0', 'wasm');
138+
// No prior history => no arrow annotations on any cell
139+
expect(wasmRow).not.toContain('↑');
140+
expect(wasmRow).not.toContain('↓');
141+
expect(wasmRow).not.toContain('~');
142+
});
143+
});

0 commit comments

Comments
 (0)