fix: stabilize benchmark targets across engines and preserve README links (#527)

carlos-alm · web-flow · commit b476409a3c24 · 2026-03-19T03:27:59.000-06:00
* fix: stabilize benchmark target selection across engines and preserve README links

- Share selectTargets() results from first engine worker to second via env,
  ensuring both engines benchmark the same hub/mid/leaf symbols
- Preserve existing README benchmark links instead of hardcoding a subset
- Add regression notes explaining v3.1.4 → v3.3.0 build performance increase

Impact: 2 functions changed, 4 affected

* fix: use non-greedy regex to preserve markdown links in README benchmark line

The previous [^)]+ pattern stopped at the first ) inside markdown link
URLs, truncating and corrupting the captured link string on every run.
Switch to (.+?): which anchors on the outer ): delimiter.

* fix: remove dead results.native fallback in target propagation

results.native is always null at this point since native runs after
target propagation. The || results.native arm was unreachable.

Impact: 1 functions changed, 0 affected
diff --git a/generated/benchmarks/BUILD-BENCHMARKS.md b/generated/benchmarks/BUILD-BENCHMARKS.md
@@ -166,6 +166,18 @@ remains at 6.6 ms/file (vs 5.0 in v2.0.0). The WASM/Native ratio widened from
 2.0x to 3.5x. Further optimization of WASM boundary crossings in the JS
 extractor is needed to recover the regression.
 
+**Build regression (v3.1.4 3.5 ms/file → v3.3.0 8 ms/file, +129% native):** The codebase grew from
+398 to 429 files (+8%), but the per-file regression is real and driven by richer extraction. Between
+v3.1.4 and v3.3.0, type inference was extended to all typed languages (#501), receiver type tracking
+with graded confidence was added (#505), re-exported barrel file symbols are now tracked (#515), and
+package.json exports + monorepo workspace resolution was introduced (#509). These produce 33% more
+nodes/file (13.4 → 17.8) and 28% more edges/file (28.8 → 36.8). The Parse phase tripled on native
+(468 → 1511 ms) because extractors now perform additional AST traversals for type annotations and
+receiver resolution. The Complexity phase grew 10× (16 → 179 ms) because 33% more functions each
+require full AST analysis. Major refactors also decomposed monolithic extractors into per-category
+handlers (#490) and split domain/feature modules (#491, #492), adding 31 new source files — the
+benchmark measures codegraph on itself, so more source files amplify per-file overhead.
+
 **Native build regression (v3.0.0 4.4 ms/file → v3.0.3 12.3 ms/file):** The regression is entirely
 from new build phases added in v3.0.1 that are now default-on: AST node extraction (651ms),
 dataflow analysis (367ms), and CFG construction (169ms) — totalling ~1,187ms of new work. The original
diff --git a/scripts/benchmark.js b/scripts/benchmark.js
@@ -16,7 +16,7 @@ import { performance } from 'node:perf_hooks';
 import { fileURLToPath } from 'node:url';
 import Database from 'better-sqlite3';
 import { resolveBenchmarkSource, srcImport } from './lib/bench-config.js';
-import { isWorker, workerEngine, forkEngines } from './lib/fork-engine.js';
+import { isWorker, workerEngine, workerTargets, forkEngines } from './lib/fork-engine.js';
 
 // ── Parent process: fork one child per engine, assemble final output ─────
 if (!isWorker()) {
@@ -179,7 +179,7 @@ try {
 
 // ── Query benchmarks ────────────────────────────────────────────────
 console.error(`  [${engine}] Benchmarking queries...`);
-const targets = selectTargets();
+const targets = workerTargets() || selectTargets();
 console.error(`    hub=${targets.hub}, leaf=${targets.leaf}`);
 
 function benchQuery(fn, ...args) {
@@ -219,6 +219,7 @@ const workerResult = {
 	oneFileRebuildMs,
 	oneFilePhases,
 	queries,
+	targets,
 	phases: buildResult?.phases || null,
 };
 
diff --git a/scripts/lib/fork-engine.js b/scripts/lib/fork-engine.js
@@ -25,6 +25,7 @@ import { fork } from 'node:child_process';
 import { fileURLToPath } from 'node:url';
 
 const WORKER_ENV_KEY = '__BENCH_ENGINE__';
+const TARGETS_ENV_KEY = '__BENCH_TARGETS__';
 
 /**
  * Returns true when running inside a forked worker process.
@@ -43,6 +44,16 @@ export function workerEngine() {
 	return engine;
 }
 
+/**
+ * Returns pre-selected targets passed from the parent process, or null if
+ * this is the first engine run (no targets yet).
+ */
+export function workerTargets() {
+	const raw = process.env[TARGETS_ENV_KEY];
+	if (!raw) return null;
+	try { return JSON.parse(raw); } catch { return null; }
+}
+
 /**
  * Fork a single worker subprocess and collect its JSON output.
  *
@@ -158,17 +169,28 @@ export async function forkEngines(scriptUrl, argv = [], opts = {}) {
 	const results = { wasm: null, native: null };
 
 	// Run engines sequentially — they share the DB file and filesystem state.
+	// After the first engine completes, extract its targets and pass them to
+	// the second engine via TARGETS_ENV_KEY so both benchmark the same symbols.
 	if (hasWasm) {
 		results.wasm = await forkWorker(scriptPath, WORKER_ENV_KEY, 'wasm', argv, timeoutMs);
 	} else {
 		console.error('WASM grammars not built — skipping WASM benchmark');
 	}
 
+	// Propagate targets from the first engine to the second
+	const firstResult = results.wasm;
+	if (firstResult?.targets) {
+		process.env[TARGETS_ENV_KEY] = JSON.stringify(firstResult.targets);
+	}
+
 	if (hasNative) {
 		results.native = await forkWorker(scriptPath, WORKER_ENV_KEY, 'native', argv, timeoutMs);
 	} else {
 		console.error('Native engine not available — skipping native benchmark');
 	}
 
+	// Clean up env
+	delete process.env[TARGETS_ENV_KEY];
+
 	return results;
 }
diff --git a/scripts/query-benchmark.js b/scripts/query-benchmark.js
@@ -17,7 +17,7 @@ import { performance } from 'node:perf_hooks';
 import { fileURLToPath } from 'node:url';
 import Database from 'better-sqlite3';
 import { resolveBenchmarkSource, srcImport } from './lib/bench-config.js';
-import { isWorker, workerEngine, forkEngines } from './lib/fork-engine.js';
+import { isWorker, workerEngine, workerTargets, forkEngines } from './lib/fork-engine.js';
 
 // ── Parent process: fork one child per engine, assemble final output ─────
 if (!isWorker()) {
@@ -186,7 +186,7 @@ function benchDiffImpact(hubName) {
 if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath);
 await buildGraph(root, { engine, incremental: false });
 
-const targets = selectTargets();
+const targets = workerTargets() || selectTargets();
 console.error(`Targets: hub=${targets.hub}, mid=${targets.mid}, leaf=${targets.leaf}`);
 
 const fnDeps = {};
diff --git a/scripts/update-benchmark-report.js b/scripts/update-benchmark-report.js
@@ -349,9 +349,17 @@ if (fs.existsSync(readmePath)) {
 		: formatMs(latest.wasm.perFile.buildTimeMs * ESTIMATE_FILES);
 	rows += `| ~${(ESTIMATE_FILES).toLocaleString()} files (est.) | **~${estBuild} build** |\n`;
 
+	// Preserve existing benchmark link line from README rather than hardcoding.
+	// Fall back to a default if we can't find it.
+	let benchmarkLinks = '[build benchmarks](generated/benchmarks/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/benchmarks/EMBEDDING-BENCHMARKS.md)';
+	const linksMatch = readme.match(/Self-measured on every release via CI \((.+?)\):/);
+	if (linksMatch) {
+		benchmarkLinks = linksMatch[1];
+	}
+
 	const perfSection = `## 📊 Performance
 
-Self-measured on every release via CI ([build benchmarks](generated/benchmarks/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/benchmarks/EMBEDDING-BENCHMARKS.md)):
+Self-measured on every release via CI (${benchmarkLinks}):
 
 | Metric | Latest |
 |---|---|