diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 438f495c4..94e2f131a 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -9,7 +9,7 @@ on:
 permissions: {}
 
 jobs:
-  benchmark:
+  build-benchmark:
     runs-on: ubuntu-latest
     if: >-
       github.event_name == 'workflow_dispatch' ||
@@ -31,16 +31,22 @@ jobs:
 
       - run: npm install
 
-      - name: Run benchmark
+      - name: Run build benchmark
         run: node scripts/benchmark.js 2>/dev/null > benchmark-result.json
 
-      - name: Update report
+      - name: Update build report
         run: node scripts/update-benchmark-report.js benchmark-result.json
 
+      - name: Upload build result
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-benchmark-result
+          path: benchmark-result.json
+
       - name: Check for changes
         id: changes
         run: |
-          if git diff --quiet HEAD -- generated/BENCHMARKS.md README.md; then
+          if git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md; then
             echo "changed=false" >> "$GITHUB_OUTPUT"
           else
             echo "changed=true" >> "$GITHUB_OUTPUT"
@@ -54,20 +60,89 @@ jobs:
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
 
-          BRANCH="benchmark/update-$(date +%Y%m%d-%H%M%S)"
+          BRANCH="benchmark/build-$(date +%Y%m%d-%H%M%S)"
           git checkout -b "$BRANCH"
-          git add generated/BENCHMARKS.md README.md
-          git commit -m "docs: update performance benchmarks"
+          git add generated/BUILD-BENCHMARKS.md README.md
+          git commit -m "docs: update build performance benchmarks"
           git push origin "$BRANCH"
 
           gh pr create \
             --base main \
             --head "$BRANCH" \
-            --title "docs: update performance benchmarks" \
-            --body "Automated benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
+            --title "docs: update build performance benchmarks" \
+            --body "Automated build benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
+
+  embedding-benchmark:
+    runs-on: ubuntu-latest
+    if: >-
+      github.event_name == 'workflow_dispatch' ||
+      github.event.workflow_run.conclusion == 'success'
+    permissions:
+      contents: write
+      pull-requests: write
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          ref: main
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "22"
+
+      - run: npm install
+
+      - name: Cache HuggingFace models
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/huggingface
+          key: hf-models-${{ runner.os }}-${{ hashFiles('src/embedder.js') }}
+          restore-keys: hf-models-${{ runner.os }}-
+
+      - name: Build graph
+        run: node src/cli.js build .
 
-      - name: Upload result artifact
+      - name: Run embedding benchmark
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: node scripts/embedding-benchmark.js 2>/dev/null > embedding-benchmark-result.json
+
+      - name: Update embedding report
+        run: node scripts/update-embedding-report.js embedding-benchmark-result.json
+
+      - name: Upload embedding result
         uses: actions/upload-artifact@v4
         with:
-          name: benchmark-result
-          path: benchmark-result.json
+          name: embedding-benchmark-result
+          path: embedding-benchmark-result.json
+
+      - name: Check for changes
+        id: changes
+        run: |
+          if git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md; then
+            echo "changed=false" >> "$GITHUB_OUTPUT"
+          else
+            echo "changed=true" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Commit and push via PR
+        if: steps.changes.outputs.changed == 'true'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+          BRANCH="benchmark/embedding-$(date +%Y%m%d-%H%M%S)"
+          git checkout -b "$BRANCH"
+          git add generated/EMBEDDING-BENCHMARKS.md
+          git commit -m "docs: update embedding benchmarks"
+          git push origin "$BRANCH"
+
+          gh pr create \
+            --base main \
+            --head "$BRANCH" \
+            --title "docs: update embedding benchmarks" \
+            --body "Automated embedding benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 8239ffa83..53ec6a57a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -141,6 +141,47 @@ tests/
 - Parser tests use inline code strings parsed directly with tree-sitter
 - Always run the full suite (`npm test`) before submitting a PR
 
+## Regression Benchmarks
+
+Two regression benchmark scripts live in `scripts/`. These are **not** unit
+tests — they measure performance metrics that reviewers use to judge whether a
+change is acceptable. If your PR touches code covered by a benchmark, you
+**must** run it before and after your changes and include the results in the PR
+description.
+
+| Benchmark | What it measures | When to run |
+|-----------|-----------------|-------------|
+| `node scripts/benchmark.js` | Build speed (native vs WASM), query latency | Changes to `builder.js`, `parser.js`, `queries.js`, `resolve.js`, `db.js`, or the native engine |
+| `node scripts/embedding-benchmark.js` | Search recall (Hit@1/3/5/10) across models | Changes to `embedder.js` or embedding strategies |
+
+### How to report results
+
+Both scripts output JSON to stdout (progress goes to stderr). Run the relevant
+benchmark on `main` (before), then on your branch (after), and paste both in
+your PR description:
+
+```bash
+git stash && git checkout main
+node scripts/benchmark.js > before.json
+
+git checkout - && git stash pop
+node scripts/benchmark.js > after.json
+```
+
+In the PR, include a table like:
+
+```
+## Benchmark results
+
+| Metric       | Before | After  | Delta |
+|--------------|--------|--------|-------|
+| Build (ms)   | 1200   | 1180   | -20   |
+| Hit@1        | 75.5%  | 76.2%  | +0.7% |
+```
+
+Regressions are not automatically blocking, but unexplained drops in speed or
+recall will be questioned during review.
+
 ## Common Contribution Types
 
 ### Bug Fixes
diff --git a/README.md b/README.md
index cdc82d778..ddee7f34d 100644
--- a/README.md
+++ b/README.md
@@ -373,7 +373,7 @@ Codegraph also extracts symbols from common callback patterns: Commander `.comma
 
 ## 📊 Performance
 
-Self-measured on every release via CI ([full history](generated/BENCHMARKS.md)):
+Self-measured on every release via CI ([build benchmarks](generated/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/EMBEDDING-BENCHMARKS.md)):
 
 | Metric | Latest |
 |---|---|
diff --git a/generated/BENCHMARKS.md b/generated/BUILD-BENCHMARKS.md
similarity index 100%
rename from generated/BENCHMARKS.md
rename to generated/BUILD-BENCHMARKS.md
diff --git a/scripts/embedding-benchmark.js b/scripts/embedding-benchmark.js
new file mode 100644
index 000000000..73fe2d8e0
--- /dev/null
+++ b/scripts/embedding-benchmark.js
@@ -0,0 +1,145 @@
+#!/usr/bin/env node
+
+/**
+ * Embedding benchmark runner — measures search recall across all models.
+ *
+ * For every function/method/class in the graph, generates a query from the
+ * symbol name (splitIdentifier) and checks if search finds that symbol.
+ * Tests all available embedding models, outputs JSON to stdout.
+ *
+ * Skips jina-code when HF_TOKEN is not set (gated model).
+ *
+ * Usage: node scripts/embedding-benchmark.js > result.json
+ */
+
+import fs from 'node:fs';
+import path from 'node:path';
+import { performance } from 'node:perf_hooks';
+import { fileURLToPath } from 'node:url';
+import Database from 'better-sqlite3';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const root = path.resolve(__dirname, '..');
+
+const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8'));
+const dbPath = path.join(root, '.codegraph', 'graph.db');
+
+const { buildEmbeddings, MODELS, searchData } = await import(
+	new URL('../src/embedder.js', import.meta.url).href
+);
+
+// Redirect console.log to stderr so only JSON goes to stdout
+const origLog = console.log;
+console.log = (...args) => console.error(...args);
+
+const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;
+
+function splitIdentifier(name) {
+	return name
+		.replace(/([a-z])([A-Z])/g, '$1 $2')
+		.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
+		.replace(/[_-]+/g, ' ')
+		.trim();
+}
+
+function loadSymbols() {
+	const db = new Database(dbPath, { readonly: true });
+	let rows = db
+		.prepare(
+			`SELECT name, kind, file FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`,
+		)
+		.all();
+	db.close();
+
+	rows = rows.filter((r) => !TEST_PATTERN.test(r.file));
+
+	const seen = new Set();
+	const symbols = [];
+	for (const row of rows) {
+		if (seen.has(row.name)) continue;
+		seen.add(row.name);
+		const query = splitIdentifier(row.name);
+		if (query.length < 4) continue;
+		symbols.push({ name: row.name, kind: row.kind, file: row.file, query });
+	}
+	return symbols;
+}
+
+async function benchmarkModel(modelKey, symbols) {
+	const embedStart = performance.now();
+	await buildEmbeddings(root, modelKey, dbPath, { strategy: 'structured' });
+	const embedTimeMs = Math.round(performance.now() - embedStart);
+
+	let hits1 = 0;
+	let hits3 = 0;
+	let hits5 = 0;
+	let hits10 = 0;
+
+	const searchStart = performance.now();
+	for (const { name, query } of symbols) {
+		const data = await searchData(query, dbPath, { minScore: 0.01, limit: 10 });
+		if (!data) continue;
+
+		const names = data.results.map((r) => r.name);
+		const rank = names.indexOf(name) + 1;
+		if (rank === 1) hits1++;
+		if (rank >= 1 && rank <= 3) hits3++;
+		if (rank >= 1 && rank <= 5) hits5++;
+		if (rank >= 1 && rank <= 10) hits10++;
+	}
+	const searchTimeMs = Math.round(performance.now() - searchStart);
+
+	const total = symbols.length;
+	return {
+		dim: MODELS[modelKey].dim,
+		contextWindow: MODELS[modelKey].contextWindow,
+		hits1,
+		hits3,
+		hits5,
+		hits10,
+		misses: total - hits10,
+		total,
+		embedTimeMs,
+		searchTimeMs,
+	};
+}
+
+// ── Run benchmarks ──────────────────────────────────────────────────────
+
+const symbols = loadSymbols();
+console.error(`Loaded ${symbols.length} symbols for benchmark`);
+
+const hasHfToken = !!process.env.HF_TOKEN;
+const modelKeys = Object.keys(MODELS);
+const results = {};
+
+for (const key of modelKeys) {
+	if (key === 'jina-code' && !hasHfToken) {
+		console.error(`Skipping ${key} (HF_TOKEN not set)`);
+		continue;
+	}
+
+	console.error(`\nBenchmarking model: ${key}...`);
+	try {
+		results[key] = await benchmarkModel(key, symbols);
+		const r = results[key];
+		console.error(
+			`  Hit@1=${r.hits1}/${r.total} Hit@3=${r.hits3}/${r.total} Hit@5=${r.hits5}/${r.total} misses=${r.misses}`,
+		);
+	} catch (err) {
+		console.error(`  FAILED: ${err.message}`);
+	}
+}
+
+// Restore console.log for JSON output
+console.log = origLog;
+
+const output = {
+	version: pkg.version,
+	date: new Date().toISOString().slice(0, 10),
+	strategy: 'structured',
+	symbols: symbols.length,
+	models: results,
+};
+
+console.log(JSON.stringify(output, null, 2));
diff --git a/scripts/update-benchmark-report.js b/scripts/update-benchmark-report.js
index 3a18393ae..0f2bb1b59 100644
--- a/scripts/update-benchmark-report.js
+++ b/scripts/update-benchmark-report.js
@@ -2,7 +2,7 @@
 
 /**
  * Update benchmark report — reads benchmark JSON and updates:
- *   1. generated/BENCHMARKS.md  (historical table + raw JSON in HTML comment)
+ *   1. generated/BUILD-BENCHMARKS.md  (historical table + raw JSON in HTML comment)
  *   2. README.md                (performance section with latest numbers)
  *
  * Usage:
@@ -28,10 +28,10 @@ if (arg) {
 const entry = JSON.parse(jsonText);
 
 // ── Paths ────────────────────────────────────────────────────────────────
-const benchmarkPath = path.join(root, 'generated', 'BENCHMARKS.md');
+const benchmarkPath = path.join(root, 'generated', 'BUILD-BENCHMARKS.md');
 const readmePath = path.join(root, 'README.md');
 
-// ── Load existing history from BENCHMARKS.md ─────────────────────────────
+// ── Load existing history from BUILD-BENCHMARKS.md ─────────────────────────────
 let history = [];
 if (fs.existsSync(benchmarkPath)) {
 	const content = fs.readFileSync(benchmarkPath, 'utf8');
@@ -96,7 +96,7 @@ function engineRow(h, prev, engineKey) {
 	);
 }
 
-// ── Build BENCHMARKS.md ──────────────────────────────────────────────────
+// ── Build BUILD-BENCHMARKS.md ──────────────────────────────────────────────────
 let md = '# Codegraph Performance Benchmarks\n\n';
 md += 'Self-measured on every release by running codegraph on its own codebase.\n';
 md += 'Metrics are normalized per file for cross-version comparability.\n\n';
@@ -177,7 +177,7 @@ if (fs.existsSync(readmePath)) {
 
 	const perfSection = `## 📊 Performance
 
-Self-measured on every release via CI ([full history](generated/BENCHMARKS.md)):
+Self-measured on every release via CI ([build benchmarks](generated/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/EMBEDDING-BENCHMARKS.md)):
 
 | Metric | Latest |
 |---|---|
diff --git a/scripts/update-embedding-report.js b/scripts/update-embedding-report.js
new file mode 100644
index 000000000..d866eb8e9
--- /dev/null
+++ b/scripts/update-embedding-report.js
@@ -0,0 +1,134 @@
+#!/usr/bin/env node
+
+/**
+ * Update embedding benchmark report — reads benchmark JSON and updates:
+ *   generated/EMBEDDING-BENCHMARKS.md (historical table + raw JSON in HTML comment)
+ *
+ * Usage:
+ *   node scripts/update-embedding-report.js embedding-benchmark-result.json
+ *   node scripts/embedding-benchmark.js | node scripts/update-embedding-report.js
+ */
+
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const root = path.resolve(__dirname, '..');
+
+// ── Read benchmark JSON from file arg or stdin ───────────────────────────
+let jsonText;
+const arg = process.argv[2];
+if (arg) {
+	jsonText = fs.readFileSync(path.resolve(arg), 'utf8');
+} else {
+	jsonText = fs.readFileSync('/dev/stdin', 'utf8');
+}
+const entry = JSON.parse(jsonText);
+
+// ── Paths ────────────────────────────────────────────────────────────────
+const reportPath = path.join(root, 'generated', 'EMBEDDING-BENCHMARKS.md');
+
+// ── Load existing history ────────────────────────────────────────────────
+let history = [];
+if (fs.existsSync(reportPath)) {
+	const content = fs.readFileSync(reportPath, 'utf8');
+	const match = content.match(/<!--\s*EMBEDDING_BENCHMARK_DATA\s*([\s\S]*?)\s*-->/);
+	if (match) {
+		try {
+			history = JSON.parse(match[1]);
+		} catch {
+			/* start fresh if corrupt */
+		}
+	}
+}
+
+// Add new entry (deduplicate by version)
+const idx = history.findIndex((h) => h.version === entry.version);
+if (idx >= 0) {
+	history[idx] = entry;
+} else {
+	history.unshift(entry);
+}
+
+// ── Helpers ──────────────────────────────────────────────────────────────
+function pct(n, total) {
+	return `${((n / total) * 100).toFixed(1)}%`;
+}
+
+function trend(current, previous) {
+	if (previous == null) return '';
+	const diff = current - previous;
+	if (Math.abs(diff) < 0.5) return ' ~';
+	return diff > 0 ? ` ↑${diff.toFixed(1)}pp` : ` ↓${Math.abs(diff).toFixed(1)}pp`;
+}
+
+function pctVal(n, total) {
+	return (n / total) * 100;
+}
+
+function formatMs(ms) {
+	if (ms >= 1000) return `${(ms / 1000).toFixed(1)}s`;
+	return `${Math.round(ms)}ms`;
+}
+
+// ── Build EMBEDDING-BENCHMARKS.md ────────────────────────────────────────
+let md = '# Codegraph Embedding Benchmarks\n\n';
+md += 'Self-measured on every release using auto-generated queries from symbol names.\n';
+md += 'Each symbol\'s name is split into words (e.g. `buildGraph` → `"build graph"`) and used as the search query.\n';
+md += 'Hit@N = expected symbol found in top N results.\n\n';
+
+md +=
+	'| Version | Model | Symbols | Hit@1 | Hit@3 | Hit@5 | Misses | Embed Time |\n';
+md +=
+	'|---------|-------|--------:|------:|------:|------:|-------:|-----------:|\n';
+
+for (let i = 0; i < history.length; i++) {
+	const h = history[i];
+	const prev = history[i + 1] || null;
+
+	for (const [modelKey, m] of Object.entries(h.models)) {
+		const pm = prev?.models?.[modelKey] || null;
+
+		const h1 = pctVal(m.hits1, m.total);
+		const h3 = pctVal(m.hits3, m.total);
+		const h5 = pctVal(m.hits5, m.total);
+		const ph1 = pm ? pctVal(pm.hits1, pm.total) : null;
+		const ph3 = pm ? pctVal(pm.hits3, pm.total) : null;
+		const ph5 = pm ? pctVal(pm.hits5, pm.total) : null;
+
+		md += `| ${h.version} | ${modelKey} | ${m.total} `;
+		md += `| ${pct(m.hits1, m.total)}${trend(h1, ph1)} `;
+		md += `| ${pct(m.hits3, m.total)}${trend(h3, ph3)} `;
+		md += `| ${pct(m.hits5, m.total)}${trend(h5, ph5)} `;
+		md += `| ${m.misses} `;
+		md += `| ${formatMs(m.embedTimeMs)} |\n`;
+	}
+}
+
+// ── Latest summary ───────────────────────────────────────────────────────
+const latest = history[0];
+md += '\n### Latest results\n\n';
+md += `**Version:** ${latest.version} | **Strategy:** ${latest.strategy} | **Symbols:** ${latest.symbols} | **Date:** ${latest.date}\n\n`;
+
+md += '| Model | Dim | Context | Hit@1 | Hit@3 | Hit@5 | Hit@10 | Misses | Embed | Search |\n';
+md += '|-------|----:|--------:|------:|------:|------:|-------:|-------:|------:|-------:|\n';
+
+for (const [modelKey, m] of Object.entries(latest.models)) {
+	md += `| ${modelKey} `;
+	md += `| ${m.dim} `;
+	md += `| ${m.contextWindow} `;
+	md += `| ${pct(m.hits1, m.total)} `;
+	md += `| ${pct(m.hits3, m.total)} `;
+	md += `| ${pct(m.hits5, m.total)} `;
+	md += `| ${pct(m.hits10, m.total)} `;
+	md += `| ${m.misses} `;
+	md += `| ${formatMs(m.embedTimeMs)} `;
+	md += `| ${formatMs(m.searchTimeMs)} |\n`;
+}
+
+md += `\n<!-- EMBEDDING_BENCHMARK_DATA\n${JSON.stringify(history, null, 2)}\n-->\n`;
+
+fs.mkdirSync(path.dirname(reportPath), { recursive: true });
+fs.writeFileSync(reportPath, md);
+console.error(`Updated ${path.relative(root, reportPath)}`);
diff --git a/tests/search/embedding-benchmark.js b/tests/search/embedding-benchmark.js
deleted file mode 100644
index 11dc9aad0..000000000
--- a/tests/search/embedding-benchmark.js
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/usr/bin/env node
-
-/**
- * Embedding strategy benchmark — compares structured vs source strategies
- * against real search queries on the current project's graph.
- *
- * Prerequisites:
- *   - @huggingface/transformers installed
- *   - codegraph build already run (graph.db exists)
- *
- * Usage:
- *   node tests/search/embedding-benchmark.js
- *   node tests/search/embedding-benchmark.js --model minilm
- */
-
-import path from 'node:path';
-import { buildEmbeddings, DEFAULT_MODEL, MODELS, searchData } from '../../src/embedder.js';
-
-const model = process.argv.includes('--model')
-  ? process.argv[process.argv.indexOf('--model') + 1]
-  : DEFAULT_MODEL;
-
-const rootDir = '.';
-const dbPath = path.resolve('.codegraph/graph.db');
-
-// Queries with expected best-match symbol name
-const QUERIES = [
-  { q: 'parse source code with tree-sitter', expect: 'parseFilesAuto' },
-  { q: 'find circular dependencies', expect: 'findCycles' },
-  { q: 'build dependency graph from source files', expect: 'buildGraph' },
-  { q: 'resolve import path to actual file', expect: 'resolveImportPath' },
-  { q: 'cosine similarity between vectors', expect: 'cosineSim' },
-  { q: 'export graph as DOT format', expect: 'exportDOT' },
-  { q: 'semantic search with embeddings', expect: 'search' },
-  { q: 'incremental file hashing', expect: 'hashFile' },
-  { q: 'load configuration from file', expect: 'loadConfig' },
-  { q: 'extract functions and classes from code', expect: 'extractJavaScript' },
-  { q: 'impact analysis of code changes', expect: 'diffImpactData' },
-  { q: 'start MCP server for AI agents', expect: 'startMCPServer' },
-  { q: 'watch files for changes', expect: 'watchProject' },
-  { q: 'reciprocal rank fusion for multi-query search', expect: 'multiSearchData' },
-];
-
-async function benchmark(strategy) {
-  await buildEmbeddings(rootDir, model, dbPath, { strategy });
-
-  let hits1 = 0;
-  let hits3 = 0;
-  let hits5 = 0;
-  const details = [];
-
-  for (const { q, expect: expected } of QUERIES) {
-    const data = await searchData(q, dbPath, { minScore: 0.01, limit: 10 });
-    if (!data) continue;
-
-    const names = data.results.map((r) => r.name);
-    const rank = names.indexOf(expected) + 1; // 0 = not found
-    if (rank === 1) hits1++;
-    if (rank >= 1 && rank <= 3) hits3++;
-    if (rank >= 1 && rank <= 5) hits5++;
-
-    const matchScore = rank > 0 ? data.results[rank - 1].similarity.toFixed(3) : 'miss';
-    details.push({
-      q: q.slice(0, 50),
-      expected,
-      rank: rank || '>10',
-      actual: names[0],
-      matchScore,
-    });
-  }
-
-  return { strategy, hits1, hits3, hits5, total: QUERIES.length, details };
-}
-
-const modelConfig = MODELS[model];
-console.log('=== Embedding Strategy Benchmark ===');
-console.log(`Model: ${model} (${modelConfig.dim}d, ${modelConfig.contextWindow} token context)`);
-console.log(`Queries: ${QUERIES.length}`);
-console.log('');
-
-const structured = await benchmark('structured');
-const source = await benchmark('source');
-
-// Summary table
-console.log('');
-console.log('=== RESULTS ===');
-console.log('');
-console.log(`${'Metric'.padEnd(12)}${'structured'.padEnd(16)}${'source'.padEnd(16)}delta`);
-for (const [label, key] of [
-  ['Hit@1', 'hits1'],
-  ['Hit@3', 'hits3'],
-  ['Hit@5', 'hits5'],
-]) {
-  const s = structured[key];
-  const o = source[key];
-  const sp = `${s}/${structured.total} (${((s / structured.total) * 100).toFixed(0)}%)`;
-  const op = `${o}/${source.total} (${((o / source.total) * 100).toFixed(0)}%)`;
-  const delta = s - o;
-  const sign = delta > 0 ? '+' : '';
-  console.log(`${label.padEnd(12)}${sp.padEnd(16)}${op.padEnd(16)}${sign}${delta}`);
-}
-
-// Per-query comparison
-console.log('');
-console.log(`${'Query'.padEnd(52)}${'Expected'.padEnd(22)}Struct  Source`);
-for (let i = 0; i < QUERIES.length; i++) {
-  const s = structured.details[i];
-  const o = source.details[i];
-  const sw =
-    typeof s.rank === 'number' && (typeof o.rank !== 'number' || s.rank < o.rank) ? '*' : ' ';
-  const ow =
-    typeof o.rank === 'number' && (typeof s.rank !== 'number' || o.rank < s.rank) ? '*' : ' ';
-  console.log(
-    s.q.padEnd(52) +
-      s.expected.padEnd(22) +
-      String(s.rank).padEnd(4) +
-      sw +
-      '   ' +
-      String(o.rank).padEnd(4) +
-      ow,
-  );
-}
-console.log('');
-console.log('* = better rank for that query');