optave · carlos-alm · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -9,7 +9,7 @@ on:
 permissions: {}
 
 jobs:
-  benchmark:
+  build-benchmark:
     runs-on: ubuntu-latest
     if: >-
       github.event_name == 'workflow_dispatch' ||
@@ -31,16 +31,22 @@ jobs:
 
       - run: npm install
 
-      - name: Run benchmark
+      - name: Run build benchmark
         run: node scripts/benchmark.js 2>/dev/null > benchmark-result.json
 
-      - name: Update report
+      - name: Update build report
         run: node scripts/update-benchmark-report.js benchmark-result.json
 
+      - name: Upload build result
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-benchmark-result
+          path: benchmark-result.json
+
       - name: Check for changes
         id: changes
         run: |
-          if git diff --quiet HEAD -- generated/BENCHMARKS.md README.md; then
+          if git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md; then
             echo "changed=false" >> "$GITHUB_OUTPUT"
           else
             echo "changed=true" >> "$GITHUB_OUTPUT"
@@ -54,20 +60,89 @@ jobs:
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
 
-          BRANCH="benchmark/update-$(date +%Y%m%d-%H%M%S)"
+          BRANCH="benchmark/build-$(date +%Y%m%d-%H%M%S)"
           git checkout -b "$BRANCH"
-          git add generated/BENCHMARKS.md README.md
-          git commit -m "docs: update performance benchmarks"
+          git add generated/BUILD-BENCHMARKS.md README.md
+          git commit -m "docs: update build performance benchmarks"
           git push origin "$BRANCH"
 
           gh pr create \
             --base main \
             --head "$BRANCH" \
-            --title "docs: update performance benchmarks" \
-            --body "Automated benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
+            --title "docs: update build performance benchmarks" \
+            --body "Automated build benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
+
+  embedding-benchmark:
+    runs-on: ubuntu-latest
+    if: >-
+      github.event_name == 'workflow_dispatch' ||
+      github.event.workflow_run.conclusion == 'success'
+    permissions:
+      contents: write
+      pull-requests: write
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          ref: main
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "22"
+
+      - run: npm install
+
+      - name: Cache HuggingFace models
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/huggingface
+          key: hf-models-${{ runner.os }}-${{ hashFiles('src/embedder.js') }}
+          restore-keys: hf-models-${{ runner.os }}-
+
+      - name: Build graph
+        run: node src/cli.js build .
 
-      - name: Upload result artifact
+      - name: Run embedding benchmark
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: node scripts/embedding-benchmark.js 2>/dev/null > embedding-benchmark-result.json
+
+      - name: Update embedding report
+        run: node scripts/update-embedding-report.js embedding-benchmark-result.json
+
+      - name: Upload embedding result
         uses: actions/upload-artifact@v4
         with:
-          name: benchmark-result
-          path: benchmark-result.json
+          name: embedding-benchmark-result
+          path: embedding-benchmark-result.json
+
+      - name: Check for changes
+        id: changes
+        run: |
+          if git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md; then
+            echo "changed=false" >> "$GITHUB_OUTPUT"
+          else
+            echo "changed=true" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Commit and push via PR
+        if: steps.changes.outputs.changed == 'true'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+          BRANCH="benchmark/embedding-$(date +%Y%m%d-%H%M%S)"
+          git checkout -b "$BRANCH"
+          git add generated/EMBEDDING-BENCHMARKS.md
+          git commit -m "docs: update embedding benchmarks"
+          git push origin "$BRANCH"
+
+          gh pr create \
+            --base main \
+            --head "$BRANCH" \
+            --title "docs: update embedding benchmarks" \
+            --body "Automated embedding benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -141,6 +141,47 @@ tests/
 - Parser tests use inline code strings parsed directly with tree-sitter
 - Always run the full suite (`npm test`) before submitting a PR
 
+## Regression Benchmarks
+
+Two regression benchmark scripts live in `scripts/`. These are **not** unit
+tests — they measure performance metrics that reviewers use to judge whether a
+change is acceptable. If your PR touches code covered by a benchmark, you
+**must** run it before and after your changes and include the results in the PR
+description.
+
+| Benchmark | What it measures | When to run |
+|-----------|-----------------|-------------|
+| `node scripts/benchmark.js` | Build speed (native vs WASM), query latency | Changes to `builder.js`, `parser.js`, `queries.js`, `resolve.js`, `db.js`, or the native engine |
+| `node scripts/embedding-benchmark.js` | Search recall (Hit@1/3/5/10) across models | Changes to `embedder.js` or embedding strategies |
+
+### How to report results
+
+Both scripts output JSON to stdout (progress goes to stderr). Run the relevant
+benchmark on `main` (before), then on your branch (after), and paste both in
+your PR description:
+
+```bash
+git stash && git checkout main
+node scripts/benchmark.js > before.json
+
+git checkout - && git stash pop
+node scripts/benchmark.js > after.json
+```
+
+In the PR, include a table like:
+
+```
+## Benchmark results
+
+| Metric       | Before | After  | Delta |
+|--------------|--------|--------|-------|
+| Build (ms)   | 1200   | 1180   | -20   |
+| Hit@1        | 75.5%  | 76.2%  | +0.7% |
+```
+
+Regressions are not automatically blocking, but unexplained drops in speed or
+recall will be questioned during review.
+
 ## Common Contribution Types
 
 ### Bug Fixes

diff --git a/README.md b/README.md
@@ -373,7 +373,7 @@ Codegraph also extracts symbols from common callback patterns: Commander `.comma
 
 ## 📊 Performance
 
-Self-measured on every release via CI ([full history](generated/BENCHMARKS.md)):
+Self-measured on every release via CI ([build benchmarks](generated/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/EMBEDDING-BENCHMARKS.md)):
 
 | Metric | Latest |
 |---|---|

diff --git a/generated/BENCHMARKS.md → generated/BUILD-BENCHMARKS.md b/generated/BENCHMARKS.md → generated/BUILD-BENCHMARKS.md
diff --git a/scripts/embedding-benchmark.js b/scripts/embedding-benchmark.js
@@ -0,0 +1,145 @@
+#!/usr/bin/env node
+
+/**
+ * Embedding benchmark runner — measures search recall across all models.
+ *
+ * For every function/method/class in the graph, generates a query from the
+ * symbol name (splitIdentifier) and checks if search finds that symbol.
+ * Tests all available embedding models, outputs JSON to stdout.
+ *
+ * Skips jina-code when HF_TOKEN is not set (gated model).
+ *
+ * Usage: node scripts/embedding-benchmark.js > result.json
+ */
+
+import fs from 'node:fs';
+import path from 'node:path';
+import { performance } from 'node:perf_hooks';
+import { fileURLToPath } from 'node:url';
+import Database from 'better-sqlite3';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const root = path.resolve(__dirname, '..');
+
+const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8'));
+const dbPath = path.join(root, '.codegraph', 'graph.db');
+
+const { buildEmbeddings, MODELS, searchData } = await import(
+	new URL('../src/embedder.js', import.meta.url).href
+);
+
+// Redirect console.log to stderr so only JSON goes to stdout
+const origLog = console.log;
+console.log = (...args) => console.error(...args);
+
+const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;
+
+function splitIdentifier(name) {
+	return name
+		.replace(/([a-z])([A-Z])/g, '$1 $2')
+		.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
+		.replace(/[_-]+/g, ' ')
+		.trim();
+}
+
+function loadSymbols() {
+	const db = new Database(dbPath, { readonly: true });
+	let rows = db
+		.prepare(
+			`SELECT name, kind, file FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`,
+		)
+		.all();
+	db.close();
+
+	rows = rows.filter((r) => !TEST_PATTERN.test(r.file));
+
+	const seen = new Set();
+	const symbols = [];
+	for (const row of rows) {
+		if (seen.has(row.name)) continue;
+		seen.add(row.name);
+		const query = splitIdentifier(row.name);
+		if (query.length < 4) continue;
+		symbols.push({ name: row.name, kind: row.kind, file: row.file, query });
+	}
+	return symbols;
+}
+
+async function benchmarkModel(modelKey, symbols) {
+	const embedStart = performance.now();
+	await buildEmbeddings(root, modelKey, dbPath, { strategy: 'structured' });
+	const embedTimeMs = Math.round(performance.now() - embedStart);
+
+	let hits1 = 0;
+	let hits3 = 0;
+	let hits5 = 0;
+	let hits10 = 0;
+
+	const searchStart = performance.now();
+	for (const { name, query } of symbols) {
+		const data = await searchData(query, dbPath, { minScore: 0.01, limit: 10 });
+		if (!data) continue;
+
+		const names = data.results.map((r) => r.name);
+		const rank = names.indexOf(name) + 1;
+		if (rank === 1) hits1++;
+		if (rank >= 1 && rank <= 3) hits3++;
+		if (rank >= 1 && rank <= 5) hits5++;
+		if (rank >= 1 && rank <= 10) hits10++;
+	}
+	const searchTimeMs = Math.round(performance.now() - searchStart);
+
+	const total = symbols.length;
+	return {
+		dim: MODELS[modelKey].dim,
+		contextWindow: MODELS[modelKey].contextWindow,
+		hits1,
+		hits3,
+		hits5,
+		hits10,
+		misses: total - hits10,
+		total,
+		embedTimeMs,
+		searchTimeMs,
+	};
+}
+
+// ── Run benchmarks ──────────────────────────────────────────────────────
+
+const symbols = loadSymbols();
+console.error(`Loaded ${symbols.length} symbols for benchmark`);
+
+const hasHfToken = !!process.env.HF_TOKEN;
+const modelKeys = Object.keys(MODELS);
+const results = {};
+
+for (const key of modelKeys) {
+	if (key === 'jina-code' && !hasHfToken) {
+		console.error(`Skipping ${key} (HF_TOKEN not set)`);
+		continue;
+	}
+
+	console.error(`\nBenchmarking model: ${key}...`);
+	try {
+		results[key] = await benchmarkModel(key, symbols);
+		const r = results[key];
+		console.error(
+			`  Hit@1=${r.hits1}/${r.total} Hit@3=${r.hits3}/${r.total} Hit@5=${r.hits5}/${r.total} misses=${r.misses}`,
+		);
+	} catch (err) {
+		console.error(`  FAILED: ${err.message}`);
+	}
+}
+
+// Restore console.log for JSON output
+console.log = origLog;
+
+const output = {
+	version: pkg.version,
+	date: new Date().toISOString().slice(0, 10),
+	strategy: 'structured',
+	symbols: symbols.length,
+	models: results,
+};
+
+console.log(JSON.stringify(output, null, 2));
diff --git a/scripts/update-benchmark-report.js b/scripts/update-benchmark-report.js
@@ -2,7 +2,7 @@
 
 /**
  * Update benchmark report — reads benchmark JSON and updates:
- *   1. generated/BENCHMARKS.md  (historical table + raw JSON in HTML comment)
+ *   1. generated/BUILD-BENCHMARKS.md  (historical table + raw JSON in HTML comment)
  *   2. README.md                (performance section with latest numbers)
  *
  * Usage:
@@ -28,10 +28,10 @@ if (arg) {
 const entry = JSON.parse(jsonText);
 
 // ── Paths ────────────────────────────────────────────────────────────────
-const benchmarkPath = path.join(root, 'generated', 'BENCHMARKS.md');
+const benchmarkPath = path.join(root, 'generated', 'BUILD-BENCHMARKS.md');
 const readmePath = path.join(root, 'README.md');
 
-// ── Load existing history from BENCHMARKS.md ─────────────────────────────
+// ── Load existing history from BUILD-BENCHMARKS.md ─────────────────────────────
 let history = [];
 if (fs.existsSync(benchmarkPath)) {
 	const content = fs.readFileSync(benchmarkPath, 'utf8');
@@ -96,7 +96,7 @@ function engineRow(h, prev, engineKey) {
 	);
 }
 
-// ── Build BENCHMARKS.md ──────────────────────────────────────────────────
+// ── Build BUILD-BENCHMARKS.md ──────────────────────────────────────────────────
 let md = '# Codegraph Performance Benchmarks\n\n';
 md += 'Self-measured on every release by running codegraph on its own codebase.\n';
 md += 'Metrics are normalized per file for cross-version comparability.\n\n';
@@ -177,7 +177,7 @@ if (fs.existsSync(readmePath)) {
 
 	const perfSection = `## 📊 Performance
 
-Self-measured on every release via CI ([full history](generated/BENCHMARKS.md)):
+Self-measured on every release via CI ([build benchmarks](generated/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/EMBEDDING-BENCHMARKS.md)):
 
 | Metric | Latest |
 |---|---|