ops-codegraph-tool/scripts/embedding-benchmark.js at fcc19d67acabead1533edb5ea9c20dab2a30a4d0 · optave/ops-codegraph-tool · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env node

/**
 * Embedding benchmark runner — measures search recall across all models.
 *
 * For every function/method/class in the graph, generates a query from the
 * symbol name (splitIdentifier) and checks if search finds that symbol.
 * Tests all available embedding models, outputs JSON to stdout.
 *
 * Skips jina-code when HF_TOKEN is not set (gated model).
 *
 * Usage: node scripts/embedding-benchmark.js > result.json
 */

import fs from 'node:fs';
import path from 'node:path';
import { performance } from 'node:perf_hooks';
import { fileURLToPath } from 'node:url';
import Database from 'better-sqlite3';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const root = path.resolve(__dirname, '..');

const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8'));
const dbPath = path.join(root, '.codegraph', 'graph.db');

const { buildEmbeddings, MODELS, searchData } = await import(
	new URL('../src/embedder.js', import.meta.url).href
);

// Redirect console.log to stderr so only JSON goes to stdout
const origLog = console.log;
console.log = (...args) => console.error(...args);

const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;

function splitIdentifier(name) {
	return name
		.replace(/([a-z])([A-Z])/g, '$1 $2')
		.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
		.replace(/[_-]+/g, ' ')
		.trim();
}

function loadSymbols() {
	const db = new Database(dbPath, { readonly: true });
	let rows = db
		.prepare(
			`SELECT name, kind, file FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`,
		)
		.all();
	db.close();

	rows = rows.filter((r) => !TEST_PATTERN.test(r.file));

	const seen = new Set();
	const symbols = [];
	for (const row of rows) {
		if (seen.has(row.name)) continue;
		seen.add(row.name);
		const query = splitIdentifier(row.name);
		if (query.length < 4) continue;
		symbols.push({ name: row.name, kind: row.kind, file: row.file, query });
	}
	return symbols;
}

async function benchmarkModel(modelKey, symbols) {
	const embedStart = performance.now();
	await buildEmbeddings(root, modelKey, dbPath, { strategy: 'structured' });
	const embedTimeMs = Math.round(performance.now() - embedStart);

	let hits1 = 0;
	let hits3 = 0;
	let hits5 = 0;
	let hits10 = 0;

	const searchStart = performance.now();
	for (const { name, query } of symbols) {
		const data = await searchData(query, dbPath, { minScore: 0.01, limit: 10 });
		if (!data) continue;

		const names = data.results.map((r) => r.name);
		const rank = names.indexOf(name) + 1;
		if (rank === 1) hits1++;
		if (rank >= 1 && rank <= 3) hits3++;
		if (rank >= 1 && rank <= 5) hits5++;
		if (rank >= 1 && rank <= 10) hits10++;
	}
	const searchTimeMs = Math.round(performance.now() - searchStart);

	const total = symbols.length;
	return {
		dim: MODELS[modelKey].dim,
		contextWindow: MODELS[modelKey].contextWindow,
		hits1,
		hits3,
		hits5,
		hits10,
		misses: total - hits10,
		total,
		embedTimeMs,
		searchTimeMs,
	};
}

// ── Run benchmarks ──────────────────────────────────────────────────────

const symbols = loadSymbols();
console.error(`Loaded ${symbols.length} symbols for benchmark`);

const hasHfToken = !!process.env.HF_TOKEN;
const modelKeys = Object.keys(MODELS);
const results = {};

for (const key of modelKeys) {
	if (key === 'jina-code' && !hasHfToken) {
		console.error(`Skipping ${key} (HF_TOKEN not set)`);
		continue;
	}

	console.error(`\nBenchmarking model: ${key}...`);
	try {
		results[key] = await benchmarkModel(key, symbols);
		const r = results[key];
		console.error(
			`  Hit@1=${r.hits1}/${r.total} Hit@3=${r.hits3}/${r.total} Hit@5=${r.hits5}/${r.total} misses=${r.misses}`,
		);
	} catch (err) {
		console.error(`  FAILED: ${err.message}`);
	}
}

// Restore console.log for JSON output
console.log = origLog;

const output = {
	version: pkg.version,
	date: new Date().toISOString().slice(0, 10),
	strategy: 'structured',
	symbols: symbols.length,
	models: results,
};

console.log(JSON.stringify(output, null, 2));