|
| 1 | +#!/usr/bin/env node |
| 2 | + |
| 3 | +/** |
| 4 | + * Embedding benchmark runner — measures search recall across all models. |
| 5 | + * |
| 6 | + * For every function/method/class in the graph, generates a query from the |
| 7 | + * symbol name (splitIdentifier) and checks if search finds that symbol. |
| 8 | + * Tests all available embedding models, outputs JSON to stdout. |
| 9 | + * |
| 10 | + * Skips jina-code when HF_TOKEN is not set (gated model). |
| 11 | + * |
| 12 | + * Usage: node scripts/embedding-benchmark.js > result.json |
| 13 | + */ |
| 14 | + |
| 15 | +import fs from 'node:fs'; |
| 16 | +import path from 'node:path'; |
| 17 | +import { performance } from 'node:perf_hooks'; |
| 18 | +import { fileURLToPath } from 'node:url'; |
| 19 | +import Database from 'better-sqlite3'; |
| 20 | + |
| 21 | +const __dirname = path.dirname(fileURLToPath(import.meta.url)); |
| 22 | +const root = path.resolve(__dirname, '..'); |
| 23 | + |
| 24 | +const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8')); |
| 25 | +const dbPath = path.join(root, '.codegraph', 'graph.db'); |
| 26 | + |
| 27 | +const { buildEmbeddings, MODELS, searchData } = await import( |
| 28 | + new URL('../src/embedder.js', import.meta.url).href |
| 29 | +); |
| 30 | + |
| 31 | +// Redirect console.log to stderr so only JSON goes to stdout |
| 32 | +const origLog = console.log; |
| 33 | +console.log = (...args) => console.error(...args); |
| 34 | + |
| 35 | +const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./; |
| 36 | + |
| 37 | +function splitIdentifier(name) { |
| 38 | + return name |
| 39 | + .replace(/([a-z])([A-Z])/g, '$1 $2') |
| 40 | + .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2') |
| 41 | + .replace(/[_-]+/g, ' ') |
| 42 | + .trim(); |
| 43 | +} |
| 44 | + |
| 45 | +function loadSymbols() { |
| 46 | + const db = new Database(dbPath, { readonly: true }); |
| 47 | + let rows = db |
| 48 | + .prepare( |
| 49 | + `SELECT name, kind, file FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`, |
| 50 | + ) |
| 51 | + .all(); |
| 52 | + db.close(); |
| 53 | + |
| 54 | + rows = rows.filter((r) => !TEST_PATTERN.test(r.file)); |
| 55 | + |
| 56 | + const seen = new Set(); |
| 57 | + const symbols = []; |
| 58 | + for (const row of rows) { |
| 59 | + if (seen.has(row.name)) continue; |
| 60 | + seen.add(row.name); |
| 61 | + const query = splitIdentifier(row.name); |
| 62 | + if (query.length < 4) continue; |
| 63 | + symbols.push({ name: row.name, kind: row.kind, file: row.file, query }); |
| 64 | + } |
| 65 | + return symbols; |
| 66 | +} |
| 67 | + |
| 68 | +async function benchmarkModel(modelKey, symbols) { |
| 69 | + const embedStart = performance.now(); |
| 70 | + await buildEmbeddings(root, modelKey, dbPath, { strategy: 'structured' }); |
| 71 | + const embedTimeMs = Math.round(performance.now() - embedStart); |
| 72 | + |
| 73 | + let hits1 = 0; |
| 74 | + let hits3 = 0; |
| 75 | + let hits5 = 0; |
| 76 | + let hits10 = 0; |
| 77 | + |
| 78 | + const searchStart = performance.now(); |
| 79 | + for (const { name, query } of symbols) { |
| 80 | + const data = await searchData(query, dbPath, { minScore: 0.01, limit: 10 }); |
| 81 | + if (!data) continue; |
| 82 | + |
| 83 | + const names = data.results.map((r) => r.name); |
| 84 | + const rank = names.indexOf(name) + 1; |
| 85 | + if (rank === 1) hits1++; |
| 86 | + if (rank >= 1 && rank <= 3) hits3++; |
| 87 | + if (rank >= 1 && rank <= 5) hits5++; |
| 88 | + if (rank >= 1 && rank <= 10) hits10++; |
| 89 | + } |
| 90 | + const searchTimeMs = Math.round(performance.now() - searchStart); |
| 91 | + |
| 92 | + const total = symbols.length; |
| 93 | + return { |
| 94 | + dim: MODELS[modelKey].dim, |
| 95 | + contextWindow: MODELS[modelKey].contextWindow, |
| 96 | + hits1, |
| 97 | + hits3, |
| 98 | + hits5, |
| 99 | + hits10, |
| 100 | + misses: total - hits10, |
| 101 | + total, |
| 102 | + embedTimeMs, |
| 103 | + searchTimeMs, |
| 104 | + }; |
| 105 | +} |
| 106 | + |
| 107 | +// ── Run benchmarks ────────────────────────────────────────────────────── |
| 108 | + |
| 109 | +const symbols = loadSymbols(); |
| 110 | +console.error(`Loaded ${symbols.length} symbols for benchmark`); |
| 111 | + |
| 112 | +const hasHfToken = !!process.env.HF_TOKEN; |
| 113 | +const modelKeys = Object.keys(MODELS); |
| 114 | +const results = {}; |
| 115 | + |
| 116 | +for (const key of modelKeys) { |
| 117 | + if (key === 'jina-code' && !hasHfToken) { |
| 118 | + console.error(`Skipping ${key} (HF_TOKEN not set)`); |
| 119 | + continue; |
| 120 | + } |
| 121 | + |
| 122 | + console.error(`\nBenchmarking model: ${key}...`); |
| 123 | + try { |
| 124 | + results[key] = await benchmarkModel(key, symbols); |
| 125 | + const r = results[key]; |
| 126 | + console.error( |
| 127 | + ` Hit@1=${r.hits1}/${r.total} Hit@3=${r.hits3}/${r.total} Hit@5=${r.hits5}/${r.total} misses=${r.misses}`, |
| 128 | + ); |
| 129 | + } catch (err) { |
| 130 | + console.error(` FAILED: ${err.message}`); |
| 131 | + } |
| 132 | +} |
| 133 | + |
| 134 | +// Restore console.log for JSON output |
| 135 | +console.log = origLog; |
| 136 | + |
| 137 | +const output = { |
| 138 | + version: pkg.version, |
| 139 | + date: new Date().toISOString().slice(0, 10), |
| 140 | + strategy: 'structured', |
| 141 | + symbols: symbols.length, |
| 142 | + models: results, |
| 143 | +}; |
| 144 | + |
| 145 | +console.log(JSON.stringify(output, null, 2)); |
0 commit comments