Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 87 additions & 12 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
permissions: {}

jobs:
benchmark:
build-benchmark:
runs-on: ubuntu-latest
if: >-
github.event_name == 'workflow_dispatch' ||
Expand All @@ -31,16 +31,22 @@ jobs:

- run: npm install

- name: Run benchmark
- name: Run build benchmark
run: node scripts/benchmark.js 2>/dev/null > benchmark-result.json

- name: Update report
- name: Update build report
run: node scripts/update-benchmark-report.js benchmark-result.json

- name: Upload build result
uses: actions/upload-artifact@v4
with:
name: build-benchmark-result
path: benchmark-result.json

- name: Check for changes
id: changes
run: |
if git diff --quiet HEAD -- generated/BENCHMARKS.md README.md; then
if git diff --quiet HEAD -- generated/BUILD-BENCHMARKS.md README.md; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
Expand All @@ -54,20 +60,89 @@ jobs:
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"

BRANCH="benchmark/update-$(date +%Y%m%d-%H%M%S)"
BRANCH="benchmark/build-$(date +%Y%m%d-%H%M%S)"
git checkout -b "$BRANCH"
git add generated/BENCHMARKS.md README.md
git commit -m "docs: update performance benchmarks"
git add generated/BUILD-BENCHMARKS.md README.md
git commit -m "docs: update build performance benchmarks"
git push origin "$BRANCH"

gh pr create \
--base main \
--head "$BRANCH" \
--title "docs: update performance benchmarks" \
--body "Automated benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
--title "docs: update build performance benchmarks" \
--body "Automated build benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."

embedding-benchmark:
runs-on: ubuntu-latest
if: >-
github.event_name == 'workflow_dispatch' ||
github.event.workflow_run.conclusion == 'success'
permissions:
contents: write
pull-requests: write

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: main
token: ${{ secrets.GITHUB_TOKEN }}

- uses: actions/setup-node@v4
with:
node-version: "22"

- run: npm install

- name: Cache HuggingFace models
uses: actions/cache@v4
with:
path: ~/.cache/huggingface
key: hf-models-${{ runner.os }}-${{ hashFiles('src/embedder.js') }}
restore-keys: hf-models-${{ runner.os }}-

- name: Build graph
run: node src/cli.js build .

- name: Upload result artifact
- name: Run embedding benchmark
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: node scripts/embedding-benchmark.js 2>/dev/null > embedding-benchmark-result.json

- name: Update embedding report
run: node scripts/update-embedding-report.js embedding-benchmark-result.json

- name: Upload embedding result
uses: actions/upload-artifact@v4
with:
name: benchmark-result
path: benchmark-result.json
name: embedding-benchmark-result
path: embedding-benchmark-result.json

- name: Check for changes
id: changes
run: |
if git diff --quiet HEAD -- generated/EMBEDDING-BENCHMARKS.md; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi

- name: Commit and push via PR
if: steps.changes.outputs.changed == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"

BRANCH="benchmark/embedding-$(date +%Y%m%d-%H%M%S)"
git checkout -b "$BRANCH"
git add generated/EMBEDDING-BENCHMARKS.md
git commit -m "docs: update embedding benchmarks"
git push origin "$BRANCH"

gh pr create \
--base main \
--head "$BRANCH" \
--title "docs: update embedding benchmarks" \
--body "Automated embedding benchmark update from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
41 changes: 41 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,47 @@ tests/
- Parser tests use inline code strings parsed directly with tree-sitter
- Always run the full suite (`npm test`) before submitting a PR

## Regression Benchmarks

Two regression benchmark scripts live in `scripts/`. These are **not** unit
tests — they measure performance metrics that reviewers use to judge whether a
change is acceptable. If your PR touches code covered by a benchmark, you
**must** run it before and after your changes and include the results in the PR
description.

| Benchmark | What it measures | When to run |
|-----------|-----------------|-------------|
| `node scripts/benchmark.js` | Build speed (native vs WASM), query latency | Changes to `builder.js`, `parser.js`, `queries.js`, `resolve.js`, `db.js`, or the native engine |
| `node scripts/embedding-benchmark.js` | Search recall (Hit@1/3/5/10) across models | Changes to `embedder.js` or embedding strategies |

### How to report results

Both scripts output JSON to stdout (progress goes to stderr). Run the relevant
benchmark on `main` (before), then on your branch (after), and paste both in
your PR description:

```bash
git stash && git checkout main
node scripts/benchmark.js > before.json

git checkout - && git stash pop
node scripts/benchmark.js > after.json
```

In the PR, include a table like:

```
## Benchmark results

| Metric | Before | After | Delta |
|--------------|--------|--------|-------|
| Build (ms) | 1200 | 1180 | -20 |
| Hit@1 | 75.5% | 76.2% | +0.7% |
```

Regressions are not automatically blocking, but unexplained drops in speed or
recall will be questioned during review.

## Common Contribution Types

### Bug Fixes
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ Codegraph also extracts symbols from common callback patterns: Commander `.comma

## 📊 Performance

Self-measured on every release via CI ([full history](generated/BENCHMARKS.md)):
Self-measured on every release via CI ([build benchmarks](generated/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/EMBEDDING-BENCHMARKS.md)):

| Metric | Latest |
|---|---|
Expand Down
File renamed without changes.
145 changes: 145 additions & 0 deletions scripts/embedding-benchmark.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#!/usr/bin/env node

/**
* Embedding benchmark runner — measures search recall across all models.
*
* For every function/method/class in the graph, generates a query from the
* symbol name (splitIdentifier) and checks if search finds that symbol.
* Tests all available embedding models, outputs JSON to stdout.
*
* Skips jina-code when HF_TOKEN is not set (gated model).
*
* Usage: node scripts/embedding-benchmark.js > result.json
*/

import fs from 'node:fs';
import path from 'node:path';
import { performance } from 'node:perf_hooks';
import { fileURLToPath } from 'node:url';
import Database from 'better-sqlite3';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const root = path.resolve(__dirname, '..');

const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8'));
const dbPath = path.join(root, '.codegraph', 'graph.db');

const { buildEmbeddings, MODELS, searchData } = await import(
new URL('../src/embedder.js', import.meta.url).href
);

// Redirect console.log to stderr so only JSON goes to stdout
const origLog = console.log;
console.log = (...args) => console.error(...args);

const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;

function splitIdentifier(name) {
return name
.replace(/([a-z])([A-Z])/g, '$1 $2')
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
.replace(/[_-]+/g, ' ')
.trim();
}

function loadSymbols() {
const db = new Database(dbPath, { readonly: true });
let rows = db
.prepare(
`SELECT name, kind, file FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`,
)
.all();
db.close();

rows = rows.filter((r) => !TEST_PATTERN.test(r.file));

const seen = new Set();
const symbols = [];
for (const row of rows) {
if (seen.has(row.name)) continue;
seen.add(row.name);
const query = splitIdentifier(row.name);
if (query.length < 4) continue;
symbols.push({ name: row.name, kind: row.kind, file: row.file, query });
}
return symbols;
}

async function benchmarkModel(modelKey, symbols) {
const embedStart = performance.now();
await buildEmbeddings(root, modelKey, dbPath, { strategy: 'structured' });
const embedTimeMs = Math.round(performance.now() - embedStart);

let hits1 = 0;
let hits3 = 0;
let hits5 = 0;
let hits10 = 0;

const searchStart = performance.now();
for (const { name, query } of symbols) {
const data = await searchData(query, dbPath, { minScore: 0.01, limit: 10 });
if (!data) continue;

const names = data.results.map((r) => r.name);
const rank = names.indexOf(name) + 1;
if (rank === 1) hits1++;
if (rank >= 1 && rank <= 3) hits3++;
if (rank >= 1 && rank <= 5) hits5++;
if (rank >= 1 && rank <= 10) hits10++;
}
const searchTimeMs = Math.round(performance.now() - searchStart);

const total = symbols.length;
return {
dim: MODELS[modelKey].dim,
contextWindow: MODELS[modelKey].contextWindow,
hits1,
hits3,
hits5,
hits10,
misses: total - hits10,
total,
embedTimeMs,
searchTimeMs,
};
}

// ── Run benchmarks ──────────────────────────────────────────────────────

const symbols = loadSymbols();
console.error(`Loaded ${symbols.length} symbols for benchmark`);

const hasHfToken = !!process.env.HF_TOKEN;
const modelKeys = Object.keys(MODELS);
const results = {};

for (const key of modelKeys) {
if (key === 'jina-code' && !hasHfToken) {
console.error(`Skipping ${key} (HF_TOKEN not set)`);
continue;
}

console.error(`\nBenchmarking model: ${key}...`);
try {
results[key] = await benchmarkModel(key, symbols);
const r = results[key];
console.error(
` Hit@1=${r.hits1}/${r.total} Hit@3=${r.hits3}/${r.total} Hit@5=${r.hits5}/${r.total} misses=${r.misses}`,
);
} catch (err) {
console.error(` FAILED: ${err.message}`);
}
}

// Restore console.log for JSON output
console.log = origLog;

const output = {
version: pkg.version,
date: new Date().toISOString().slice(0, 10),
strategy: 'structured',
symbols: symbols.length,
models: results,
};

console.log(JSON.stringify(output, null, 2));
10 changes: 5 additions & 5 deletions scripts/update-benchmark-report.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

/**
* Update benchmark report — reads benchmark JSON and updates:
* 1. generated/BENCHMARKS.md (historical table + raw JSON in HTML comment)
* 1. generated/BUILD-BENCHMARKS.md (historical table + raw JSON in HTML comment)
* 2. README.md (performance section with latest numbers)
*
* Usage:
Expand All @@ -28,10 +28,10 @@ if (arg) {
const entry = JSON.parse(jsonText);

// ── Paths ────────────────────────────────────────────────────────────────
const benchmarkPath = path.join(root, 'generated', 'BENCHMARKS.md');
const benchmarkPath = path.join(root, 'generated', 'BUILD-BENCHMARKS.md');
const readmePath = path.join(root, 'README.md');

// ── Load existing history from BENCHMARKS.md ─────────────────────────────
// ── Load existing history from BUILD-BENCHMARKS.md ─────────────────────────────
let history = [];
if (fs.existsSync(benchmarkPath)) {
const content = fs.readFileSync(benchmarkPath, 'utf8');
Expand Down Expand Up @@ -96,7 +96,7 @@ function engineRow(h, prev, engineKey) {
);
}

// ── Build BENCHMARKS.md ──────────────────────────────────────────────────
// ── Build BUILD-BENCHMARKS.md ──────────────────────────────────────────────────
let md = '# Codegraph Performance Benchmarks\n\n';
md += 'Self-measured on every release by running codegraph on its own codebase.\n';
md += 'Metrics are normalized per file for cross-version comparability.\n\n';
Expand Down Expand Up @@ -177,7 +177,7 @@ if (fs.existsSync(readmePath)) {

const perfSection = `## 📊 Performance

Self-measured on every release via CI ([full history](generated/BENCHMARKS.md)):
Self-measured on every release via CI ([build benchmarks](generated/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/EMBEDDING-BENCHMARKS.md)):

| Metric | Latest |
|---|---|
Expand Down
Loading