Skip to content

Commit 41084f1

Browse files
authored
Merge pull request #81 from optave/fix/review-comments-benchmarks
feat: add query + incremental regression benchmarks
2 parents fd33140 + 922c7a7 commit 41084f1

4 files changed

Lines changed: 131 additions & 0 deletions

File tree

scripts/update-benchmark-report.js

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,35 @@ fs.mkdirSync(path.dirname(benchmarkPath), { recursive: true });
154154
fs.writeFileSync(benchmarkPath, md);
155155
console.error(`Updated ${path.relative(root, benchmarkPath)}`);
156156

157+
// ── Regression detection ─────────────────────────────────────────────────
158+
const REGRESSION_THRESHOLD = 0.15; // 15% regression triggers a warning
159+
const prev = history[1] || null;
160+
161+
function checkRegression(label, current, previous) {
162+
if (previous == null || previous === 0) return;
163+
const pct = (current - previous) / previous;
164+
if (pct > REGRESSION_THRESHOLD) {
165+
const msg = `${label}: ${previous}${current} (+${Math.round(pct * 100)}%, threshold ${Math.round(REGRESSION_THRESHOLD * 100)}%)`;
166+
if (process.env.GITHUB_ACTIONS) {
167+
console.error(`::warning title=Benchmark Regression::${msg}`);
168+
} else {
169+
console.error(`⚠ REGRESSION: ${msg}`);
170+
}
171+
}
172+
}
173+
174+
if (prev) {
175+
for (const engineKey of ['native', 'wasm']) {
176+
const e = latest[engineKey];
177+
const p = prev[engineKey];
178+
if (!e || !p) continue;
179+
const tag = `[${engineKey}]`;
180+
checkRegression(`${tag} Build ms/file`, e.perFile.buildTimeMs, p.perFile.buildTimeMs);
181+
checkRegression(`${tag} Query time`, e.queryTimeMs, p.queryTimeMs);
182+
checkRegression(`${tag} DB bytes/file`, e.perFile.dbSizeBytes, p.perFile.dbSizeBytes);
183+
}
184+
}
185+
157186
// ── Patch README.md ──────────────────────────────────────────────────────
158187
if (fs.existsSync(readmePath)) {
159188
let readme = fs.readFileSync(readmePath, 'utf8');

scripts/update-embedding-report.js

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,3 +132,35 @@ md += `\n<!-- EMBEDDING_BENCHMARK_DATA\n${JSON.stringify(history, null, 2)}\n-->
132132
fs.mkdirSync(path.dirname(reportPath), { recursive: true });
133133
fs.writeFileSync(reportPath, md);
134134
console.error(`Updated ${path.relative(root, reportPath)}`);
135+
136+
// ── Regression detection ─────────────────────────────────────────────────
137+
const REGRESSION_THRESHOLD = 0.15; // 15% regression triggers a warning
138+
const prev = history[1] || null;
139+
140+
function checkRegression(label, current, previous, lowerIsBetter = true) {
141+
if (previous == null || previous === 0) return;
142+
const pct = (current - previous) / previous;
143+
const regressed = lowerIsBetter ? pct > REGRESSION_THRESHOLD : pct < -REGRESSION_THRESHOLD;
144+
if (regressed) {
145+
const delta = lowerIsBetter ? `+${Math.round(pct * 100)}%` : `${Math.round(pct * 100)}%`;
146+
const msg = `${label}: ${previous}${current} (${delta}, threshold ${Math.round(REGRESSION_THRESHOLD * 100)}%)`;
147+
if (process.env.GITHUB_ACTIONS) {
148+
console.error(`::warning title=Benchmark Regression::${msg}`);
149+
} else {
150+
console.error(`⚠ REGRESSION: ${msg}`);
151+
}
152+
}
153+
}
154+
155+
if (prev) {
156+
for (const [modelKey, m] of Object.entries(latest.models)) {
157+
const pm = prev.models?.[modelKey];
158+
if (!pm) continue;
159+
const tag = `[${modelKey}]`;
160+
// Hit rates: higher is better (regression = drop)
161+
checkRegression(`${tag} Hit@1`, m.hits1 / m.total, pm.hits1 / pm.total, false);
162+
checkRegression(`${tag} Hit@5`, m.hits5 / m.total, pm.hits5 / pm.total, false);
163+
// Embed time: lower is better (regression = increase)
164+
checkRegression(`${tag} Embed time`, m.embedTimeMs, pm.embedTimeMs);
165+
}
166+
}

scripts/update-incremental-report.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,3 +148,40 @@ md += `<!-- INCREMENTAL_BENCHMARK_DATA\n${JSON.stringify(history, null, 2)}\n-->
148148
fs.mkdirSync(path.dirname(reportPath), { recursive: true });
149149
fs.writeFileSync(reportPath, md);
150150
console.error(`Updated ${path.relative(root, reportPath)}`);
151+
152+
// ── Regression detection ─────────────────────────────────────────────────
153+
const REGRESSION_THRESHOLD = 0.15; // 15% regression triggers a warning
154+
const prev = history[1] || null;
155+
156+
function checkRegression(label, current, previous) {
157+
if (previous == null || previous === 0) return;
158+
const pct = (current - previous) / previous;
159+
if (pct > REGRESSION_THRESHOLD) {
160+
const msg = `${label}: ${previous}${current} (+${Math.round(pct * 100)}%, threshold ${Math.round(REGRESSION_THRESHOLD * 100)}%)`;
161+
if (process.env.GITHUB_ACTIONS) {
162+
console.error(`::warning title=Benchmark Regression::${msg}`);
163+
} else {
164+
console.error(`⚠ REGRESSION: ${msg}`);
165+
}
166+
}
167+
}
168+
169+
if (prev) {
170+
for (const engineKey of ['native', 'wasm']) {
171+
const e = latest[engineKey];
172+
const p = prev[engineKey];
173+
if (!e || !p) continue;
174+
const tag = `[${engineKey}]`;
175+
checkRegression(`${tag} Full build`, e.fullBuildMs, p.fullBuildMs);
176+
checkRegression(`${tag} No-op rebuild`, e.noopRebuildMs, p.noopRebuildMs);
177+
checkRegression(`${tag} 1-file rebuild`, e.oneFileRebuildMs, p.oneFileRebuildMs);
178+
}
179+
const re = latest.resolve;
180+
const rp = prev.resolve;
181+
if (re && rp) {
182+
checkRegression(`[resolve] JS fallback`, re.jsFallbackMs, rp.jsFallbackMs);
183+
if (re.nativeBatchMs != null && rp.nativeBatchMs != null) {
184+
checkRegression(`[resolve] Native batch`, re.nativeBatchMs, rp.nativeBatchMs);
185+
}
186+
}
187+
}

scripts/update-query-report.js

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,3 +142,36 @@ md += `<!-- QUERY_BENCHMARK_DATA\n${JSON.stringify(history, null, 2)}\n-->\n`;
142142
fs.mkdirSync(path.dirname(reportPath), { recursive: true });
143143
fs.writeFileSync(reportPath, md);
144144
console.error(`Updated ${path.relative(root, reportPath)}`);
145+
146+
// ── Regression detection ─────────────────────────────────────────────────
147+
const REGRESSION_THRESHOLD = 0.15; // 15% regression triggers a warning
148+
const prev = history[1] || null;
149+
150+
function checkRegression(label, current, previous) {
151+
if (previous == null || previous === 0) return;
152+
const pct = (current - previous) / previous;
153+
if (pct > REGRESSION_THRESHOLD) {
154+
const msg = `${label}: ${previous}${current} (+${Math.round(pct * 100)}%, threshold ${Math.round(REGRESSION_THRESHOLD * 100)}%)`;
155+
if (process.env.GITHUB_ACTIONS) {
156+
console.error(`::warning title=Benchmark Regression::${msg}`);
157+
} else {
158+
console.error(`⚠ REGRESSION: ${msg}`);
159+
}
160+
}
161+
}
162+
163+
if (prev) {
164+
for (const engineKey of ['native', 'wasm']) {
165+
const e = latest[engineKey];
166+
const p = prev[engineKey];
167+
if (!e || !p) continue;
168+
const tag = `[${engineKey}]`;
169+
checkRegression(`${tag} fnDeps d1`, e.fnDeps.depth1Ms, p.fnDeps.depth1Ms);
170+
checkRegression(`${tag} fnDeps d3`, e.fnDeps.depth3Ms, p.fnDeps.depth3Ms);
171+
checkRegression(`${tag} fnDeps d5`, e.fnDeps.depth5Ms, p.fnDeps.depth5Ms);
172+
checkRegression(`${tag} fnImpact d1`, e.fnImpact.depth1Ms, p.fnImpact.depth1Ms);
173+
checkRegression(`${tag} fnImpact d3`, e.fnImpact.depth3Ms, p.fnImpact.depth3Ms);
174+
checkRegression(`${tag} fnImpact d5`, e.fnImpact.depth5Ms, p.fnImpact.depth5Ms);
175+
checkRegression(`${tag} diffImpact`, e.diffImpact.latencyMs, p.diffImpact.latencyMs);
176+
}
177+
}

0 commit comments

Comments
 (0)