Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 87 additions & 12 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
permissions: {}

jobs:
benchmark:
build-benchmark:
runs-on: ubuntu-latest
if: >-
github.event_name == 'workflow_dispatch' ||
Expand All @@ -31,16 +31,22 @@ jobs:

- run: npm install

- name: Run benchmark
- name: Run build benchmark
run: node scripts/benchmark.js 2>/dev/null > benchmark-result.json

- name: Update report
- name: Update build report
run: node scripts/update-benchmark-report.js benchmark-result.json

- name: Upload build result
uses: actions/upload-artifact@v4
with:
name: build-benchmark-result
path: benchmark-result.json

- name: Check for changes
id: changes
run: |
if git diff --quiet HEAD -- generated/BENCHMARKS.md README.md; then
if git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
Expand All @@ -54,20 +60,89 @@ jobs:
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"

BRANCH="benchmark/update-$(date +%Y%m%d-%H%M%S)"
BRANCH="benchmark/build-$(date +%Y%m%d-%H%M%S)"
git checkout -b "$BRANCH"
git add generated/BENCHMARKS.md README.md
git commit -m "docs: update performance benchmarks"
git add generated/BUILD-BENCHMARKS.md README.md
git commit -m "docs: update build performance benchmarks"
git push origin "$BRANCH"

gh pr create \
--base main \
--head "$BRANCH" \
--title "docs: update performance benchmarks" \
--body "Automated benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
--title "docs: update build performance benchmarks" \
--body "Automated build benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."

embedding-benchmark:
runs-on: ubuntu-latest
if: >-
github.event_name == 'workflow_dispatch' ||
github.event.workflow_run.conclusion == 'success'
permissions:
contents: write
pull-requests: write

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: main
token: ${{ secrets.GITHUB_TOKEN }}

- uses: actions/setup-node@v4
with:
node-version: "22"

- run: npm install

- name: Cache HuggingFace models
uses: actions/cache@v4
with:
path: ~/.cache/huggingface
key: hf-models-${{ runner.os }}-${{ hashFiles('src/embedder.js') }}
restore-keys: hf-models-${{ runner.os }}-

- name: Build graph
run: node src/cli.js build .

- name: Upload result artifact
- name: Run embedding benchmark
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: node scripts/embedding-benchmark.js 2>/dev/null > embedding-benchmark-result.json

- name: Update embedding report
run: node scripts/update-embedding-report.js embedding-benchmark-result.json

- name: Upload embedding result
uses: actions/upload-artifact@v4
with:
name: benchmark-result
path: benchmark-result.json
name: embedding-benchmark-result
path: embedding-benchmark-result.json

- name: Check for changes
id: changes
run: |
if git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi

- name: Commit and push via PR
if: steps.changes.outputs.changed == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"

BRANCH="benchmark/embedding-$(date +%Y%m%d-%H%M%S)"
git checkout -b "$BRANCH"
git add generated/EMBEDDING-BENCHMARKS.md
git commit -m "docs: update embedding benchmarks"
git push origin "$BRANCH"

gh pr create \
--base main \
--head "$BRANCH" \
--title "docs: update embedding benchmarks" \
--body "Automated embedding benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ Codegraph also extracts symbols from common callback patterns: Commander `.comma

## 📊 Performance

Self-measured on every release via CI ([full history](generated/BENCHMARKS.md)):
Self-measured on every release via CI ([build benchmarks](generated/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/EMBEDDING-BENCHMARKS.md)):

| Metric | Latest |
|---|---|
Expand Down
File renamed without changes.
145 changes: 145 additions & 0 deletions scripts/embedding-benchmark.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#!/usr/bin/env node

/**
* Embedding benchmark runner — measures search recall across all models.
*
* For every function/method/class in the graph, generates a query from the
* symbol name (splitIdentifier) and checks if search finds that symbol.
* Tests all available embedding models, outputs JSON to stdout.
*
* Skips jina-code when HF_TOKEN is not set (gated model).
*
* Usage: node scripts/embedding-benchmark.js > result.json
*/

import fs from 'node:fs';
import path from 'node:path';
import { performance } from 'node:perf_hooks';
import { fileURLToPath } from 'node:url';
import Database from 'better-sqlite3';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const root = path.resolve(__dirname, '..');

const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8'));
const dbPath = path.join(root, '.codegraph', 'graph.db');

const { buildEmbeddings, MODELS, searchData } = await import(
new URL('../src/embedder.js', import.meta.url).href
);

// Redirect console.log to stderr so only JSON goes to stdout
const origLog = console.log;
console.log = (...args) => console.error(...args);

const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;

function splitIdentifier(name) {
return name
.replace(/([a-z])([A-Z])/g, '$1 $2')
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
.replace(/[_-]+/g, ' ')
.trim();
}

function loadSymbols() {
const db = new Database(dbPath, { readonly: true });
let rows = db
.prepare(
`SELECT name, kind, file FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`,
)
.all();
db.close();

rows = rows.filter((r) => !TEST_PATTERN.test(r.file));

const seen = new Set();
const symbols = [];
for (const row of rows) {
if (seen.has(row.name)) continue;
seen.add(row.name);
const query = splitIdentifier(row.name);
if (query.length < 4) continue;
symbols.push({ name: row.name, kind: row.kind, file: row.file, query });
}
return symbols;
}

async function benchmarkModel(modelKey, symbols) {
const embedStart = performance.now();
await buildEmbeddings(root, modelKey, dbPath, { strategy: 'structured' });
const embedTimeMs = Math.round(performance.now() - embedStart);

let hits1 = 0;
let hits3 = 0;
let hits5 = 0;
let hits10 = 0;

const searchStart = performance.now();
for (const { name, query } of symbols) {
const data = await searchData(query, dbPath, { minScore: 0.01, limit: 10 });
if (!data) continue;

const names = data.results.map((r) => r.name);
const rank = names.indexOf(name) + 1;
if (rank === 1) hits1++;
if (rank >= 1 && rank <= 3) hits3++;
if (rank >= 1 && rank <= 5) hits5++;
if (rank >= 1 && rank <= 10) hits10++;
}
const searchTimeMs = Math.round(performance.now() - searchStart);

const total = symbols.length;
return {
dim: MODELS[modelKey].dim,
contextWindow: MODELS[modelKey].contextWindow,
hits1,
hits3,
hits5,
hits10,
misses: total - hits10,
total,
embedTimeMs,
searchTimeMs,
};
}

// ── Run benchmarks ──────────────────────────────────────────────────────

const symbols = loadSymbols();
console.error(`Loaded ${symbols.length} symbols for benchmark`);

const hasHfToken = !!process.env.HF_TOKEN;
const modelKeys = Object.keys(MODELS);
const results = {};

for (const key of modelKeys) {
if (key === 'jina-code' && !hasHfToken) {
console.error(`Skipping ${key} (HF_TOKEN not set)`);
continue;
}

console.error(`\nBenchmarking model: ${key}...`);
try {
results[key] = await benchmarkModel(key, symbols);
const r = results[key];
console.error(
` Hit@1=${r.hits1}/${r.total} Hit@3=${r.hits3}/${r.total} Hit@5=${r.hits5}/${r.total} misses=${r.misses}`,
);
} catch (err) {
console.error(` FAILED: ${err.message}`);
}
}

// Restore console.log for JSON output
console.log = origLog;

const output = {
version: pkg.version,
date: new Date().toISOString().slice(0, 10),
strategy: 'structured',
symbols: symbols.length,
models: results,
};

console.log(JSON.stringify(output, null, 2));
10 changes: 5 additions & 5 deletions scripts/update-benchmark-report.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

/**
* Update benchmark report — reads benchmark JSON and updates:
* 1. generated/BENCHMARKS.md (historical table + raw JSON in HTML comment)
* 1. generated/BUILD-BENCHMARKS.md (historical table + raw JSON in HTML comment)
* 2. README.md (performance section with latest numbers)
*
* Usage:
Expand All @@ -28,10 +28,10 @@ if (arg) {
const entry = JSON.parse(jsonText);

// ── Paths ────────────────────────────────────────────────────────────────
const benchmarkPath = path.join(root, 'generated', 'BENCHMARKS.md');
const benchmarkPath = path.join(root, 'generated', 'BUILD-BENCHMARKS.md');
const readmePath = path.join(root, 'README.md');

// ── Load existing history from BENCHMARKS.md ─────────────────────────────
// ── Load existing history from BUILD-BENCHMARKS.md ─────────────────────────────
let history = [];
if (fs.existsSync(benchmarkPath)) {
const content = fs.readFileSync(benchmarkPath, 'utf8');
Expand Down Expand Up @@ -96,7 +96,7 @@ function engineRow(h, prev, engineKey) {
);
}

// ── Build BENCHMARKS.md ──────────────────────────────────────────────────
// ── Build BUILD-BENCHMARKS.md ──────────────────────────────────────────────────
let md = '# Codegraph Performance Benchmarks\n\n';
md += 'Self-measured on every release by running codegraph on its own codebase.\n';
md += 'Metrics are normalized per file for cross-version comparability.\n\n';
Expand Down Expand Up @@ -177,7 +177,7 @@ if (fs.existsSync(readmePath)) {

const perfSection = `## 📊 Performance

Self-measured on every release via CI ([full history](generated/BENCHMARKS.md)):
Self-measured on every release via CI ([build benchmarks](generated/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/EMBEDDING-BENCHMARKS.md)):

| Metric | Latest |
|---|---|
Expand Down
Loading