Skip to content

Commit b7f9451

Browse files
authored
Merge branch 'main' into dependabot/npm_and_yarn/tree-sitter-erlang-0.17
2 parents 36868ce + c7e600a commit b7f9451

8 files changed

Lines changed: 196 additions & 22 deletions

File tree

scripts/benchmark.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import path from 'node:path';
1515
import { performance } from 'node:perf_hooks';
1616
import { fileURLToPath } from 'node:url';
1717
import Database from 'better-sqlite3';
18-
import { resolveBenchmarkSource, srcImport } from './lib/bench-config.js';
18+
import { resolveBenchmarkExcludes, resolveBenchmarkSource, srcImport } from './lib/bench-config.js';
1919
import { isWorker, workerEngine, workerTargets, forkEngines } from './lib/fork-engine.js';
2020

2121
// ── Parent process: fork one child per engine, assemble final output ─────
@@ -94,6 +94,7 @@ const INCREMENTAL_RUNS = 3;
9494
const QUERY_RUNS = 5;
9595
const QUERY_WARMUP_RUNS = 3;
9696
const PROBE_FILE = path.join(root, 'src', 'domain', 'queries.ts');
97+
const BENCH_EXCLUDE = [...resolveBenchmarkExcludes()];
9798

9899
function median(arr) {
99100
const sorted = [...arr].sort((a, b) => a - b);
@@ -131,7 +132,7 @@ console.log = (...args) => console.error(...args);
131132
if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath);
132133

133134
const buildStart = performance.now();
134-
const buildResult = await buildGraph(root, { engine, incremental: false });
135+
const buildResult = await buildGraph(root, { engine, incremental: false, exclude: BENCH_EXCLUDE });
135136
const buildTimeMs = performance.now() - buildStart;
136137

137138
// Warmed median of QUERY_RUNS samples with `noTests: true` to match the
@@ -156,7 +157,7 @@ try {
156157
const noopTimings = [];
157158
for (let i = 0; i < INCREMENTAL_RUNS; i++) {
158159
const start = performance.now();
159-
await buildGraph(root, { engine, incremental: true });
160+
await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE });
160161
noopTimings.push(performance.now() - start);
161162
}
162163
noopRebuildMs = Math.round(median(noopTimings));
@@ -173,7 +174,7 @@ try {
173174
for (let i = 0; i < INCREMENTAL_RUNS; i++) {
174175
fs.writeFileSync(PROBE_FILE, original + `\n// probe-${i}\n`);
175176
const start = performance.now();
176-
const res = await buildGraph(root, { engine, incremental: true });
177+
const res = await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE });
177178
oneFileRuns.push({ ms: performance.now() - start, phases: res?.phases || null });
178179
}
179180
oneFileRuns.sort((a, b) => a.ms - b.ms);
@@ -185,7 +186,7 @@ try {
185186
} finally {
186187
fs.writeFileSync(PROBE_FILE, original);
187188
try {
188-
await buildGraph(root, { engine, incremental: true });
189+
await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE });
189190
} catch {
190191
// Cleanup rebuild failed — probe file is already restored, move on
191192
}

scripts/incremental-benchmark.ts

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import fs from 'node:fs';
1414
import path from 'node:path';
1515
import { performance } from 'node:perf_hooks';
1616
import { fileURLToPath } from 'node:url';
17-
import { resolveBenchmarkSource, srcImport } from './lib/bench-config.js';
17+
import { resolveBenchmarkExcludes, resolveBenchmarkSource, srcImport } from './lib/bench-config.js';
1818
import { isWorker, workerEngine, forkEngines } from './lib/fork-engine.js';
1919

2020
// ── Parent process: fork one child per engine, assemble final output ─────
@@ -181,14 +181,14 @@ const PROBE_FILE = path.join(root, 'src', 'domain', 'queries.ts');
181181
// CI-amplified false regressions on sub-30ms metrics like No-op rebuild.
182182
const WARMUP_RUNS = 2;
183183

184-
// Resolution-benchmark fixtures live under the repo root and get pulled into
185-
// every self-build sweep. They are hand-annotated test corpora — not real
186-
// codegraph codeand a single heavy grammar (e.g. tree-sitter-verilog) can
187-
// add hundreds of milliseconds to fullBuildMs purely from fixture parsing
188-
// (#1112). Exclude them so adding native support for a new language doesn't
189-
// silently inflate the incremental-benchmark numbers.
190-
const BENCH_EXCLUDE = ['tests/benchmarks/resolution/fixtures/**'];
191-
const BUILD_OPTS = { engine, exclude: BENCH_EXCLUDE };
184+
// Resolution-benchmark fixtures (`BENCHMARK_EXCLUDES` in scripts/lib/bench-config.ts)
185+
// are excluded from every benchmark `buildGraph` call. See that constant for the
186+
// full rationaleshort version: hand-annotated fixtures aren't representative
187+
// of real source, and heavyweight grammars (#1107) silently inflate timings.
188+
// `resolveBenchmarkExcludes` returns `[]` in `--npm` mode so the baseline (an
189+
// older published version that ignores `opts.exclude`) and the dev run sweep
190+
// the same corpus.
191+
const BUILD_OPTS = { engine, exclude: [...resolveBenchmarkExcludes()] };
192192

193193
function median(arr) {
194194
const sorted = [...arr].sort((a, b) => a - b);

scripts/lib/bench-config.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,57 @@ import { pathToFileURL } from 'node:url';
1515

1616
import { getBenchmarkVersion } from '../bench-version.js';
1717

18+
/**
19+
* Globs excluded from every benchmark's `buildGraph(root, ...)` invocation.
20+
*
21+
* Resolution-benchmark fixtures (`tests/benchmarks/resolution/fixtures/**`)
22+
* are hand-annotated scaffolding for the static-resolution test suite, not
23+
* representative source code. They inflate dogfooding timing measurements
24+
* disproportionately whenever a new-language PR lands a heavyweight grammar
25+
* (e.g. tree-sitter-verilog added ~850ms to native fullBuildMs in #1107).
26+
* Excluding them here keeps build/query/incremental benchmarks measuring
27+
* codegraph's own source rather than its test fixtures.
28+
*
29+
* NOTE: callers should generally prefer `resolveBenchmarkExcludes()` instead
30+
* of this constant. The helper returns `[]` in `--npm` mode so the dev-vs-
31+
* baseline corpus stays consistent — published versions before this PR
32+
* silently dropped `opts.exclude`, which would otherwise leave the baseline
33+
* sweeping fixtures while the dev run skipped them.
34+
*/
35+
export const BENCHMARK_EXCLUDES: readonly string[] = [
36+
'tests/benchmarks/resolution/fixtures/**',
37+
];
38+
39+
/**
40+
* `BENCHMARK_EXCLUDES` in local mode; `[]` in `--npm` mode.
41+
*
42+
* `--npm` benchmarks load `buildGraph` from a previously-published version
43+
* via `srcImport(srcDir, ...)`. Releases before `BuildGraphOpts.exclude`
44+
* landed don't recognise the option and silently drop it, so passing the
45+
* excludes to a stale baseline would make it sweep ~745 files while the dev
46+
* run sweeps ~607 — a corpus-mismatch that disguises measurement-shift as a
47+
* perf delta. Emitting `[]` in `--npm` mode keeps the comparison apples-to-
48+
* apples; the warning makes the methodology shift explicit.
49+
*
50+
* Idempotent across calls (the warning is printed on the first invocation
51+
* only — `process.stderr.write` is a no-op but the helper is conceptually
52+
* "compute once, return constant"); intentionally synchronous because
53+
* `parseArgs` is.
54+
*/
55+
let warnedAboutNpmExcludeSkip = false;
56+
export function resolveBenchmarkExcludes(): readonly string[] {
57+
const { npm } = parseArgs();
58+
if (!npm) return BENCHMARK_EXCLUDES;
59+
if (!warnedAboutNpmExcludeSkip) {
60+
console.error(
61+
'Note: --npm mode skips BENCHMARK_EXCLUDES so the baseline and dev runs sweep the same corpus. ' +
62+
'Published versions before #1134 ignore opts.exclude silently; passing it would skew dev timings down by ~138 fewer files.',
63+
);
64+
warnedAboutNpmExcludeSkip = true;
65+
}
66+
return [];
67+
}
68+
1869
// On Windows, `npm` is `npm.cmd` and Node refuses to spawn `.cmd`/`.bat`
1970
// without `shell: true` (since Node 18.20 / 20.15). When `shell: true`, the
2071
// Windows `cmd.exe` shell resolves bare `npm` to `npm.cmd` automatically, so

scripts/query-benchmark.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import path from 'node:path';
1616
import { performance } from 'node:perf_hooks';
1717
import { fileURLToPath } from 'node:url';
1818
import Database from 'better-sqlite3';
19-
import { resolveBenchmarkSource, srcImport } from './lib/bench-config.js';
19+
import { resolveBenchmarkExcludes, resolveBenchmarkSource, srcImport } from './lib/bench-config.js';
2020
import { isWorker, workerEngine, workerTargets, forkEngines } from './lib/fork-engine.js';
2121

2222
// ── Parent process: fork one child per engine, assemble final output ─────
@@ -254,7 +254,7 @@ function benchDiffImpact(hubName) {
254254

255255
// Build graph for this engine
256256
if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath);
257-
await buildGraph(root, { engine, incremental: false });
257+
await buildGraph(root, { engine, incremental: false, exclude: [...resolveBenchmarkExcludes()] });
258258

259259
const targets = workerTargets() || selectTargets();
260260
console.error(`Targets: hub=${targets.hub}, mid=${targets.mid}, leaf=${targets.leaf}`);

src/domain/graph/builder/pipeline.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,15 @@ function setupPipeline(ctx: PipelineContext): void {
185185
initSchema(ctx.db);
186186

187187
ctx.config = loadConfig(ctx.rootDir);
188-
if (ctx.opts.exclude && ctx.opts.exclude.length > 0) {
189-
ctx.config = { ...ctx.config, exclude: [...ctx.config.exclude, ...ctx.opts.exclude] };
188+
// Merge caller-supplied excludes on top of the file-config excludes so
189+
// programmatic callers (e.g. benchmark scripts) can extend exclusion
190+
// without mutating .codegraphrc.json. Native orchestrator picks this up
191+
// automatically — it reads exclude off the serialized ctx.config below.
192+
if (ctx.opts.exclude?.length) {
193+
ctx.config = {
194+
...ctx.config,
195+
exclude: [...(ctx.config.exclude ?? []), ...ctx.opts.exclude],
196+
};
190197
}
191198
ctx.incremental =
192199
ctx.opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false;

src/types.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,13 +1063,14 @@ export interface BuildGraphOpts {
10631063
complexity?: boolean;
10641064
cfg?: boolean;
10651065
scope?: string[];
1066-
skipRegistry?: boolean;
10671066
/**
1068-
* Extra exclude globs appended to `config.exclude`. Lets benchmark scripts
1069-
* skip fixture directories that bloat self-build timings without permanently
1070-
* affecting `.codegraphrc.json` for normal users.
1067+
* Glob patterns merged on top of `config.exclude` for this build only.
1068+
* Lets callers extend exclusion programmatically without writing a config
1069+
* file — used by the benchmark scripts to skip resolution-benchmark
1070+
* fixtures that aren't representative of real code.
10711071
*/
10721072
exclude?: string[];
1073+
skipRegistry?: boolean;
10731074
}
10741075

10751076
/** Build timing result from buildGraph. */

tests/benchmarks/regression-guard.test.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,20 @@ const SKIP_VERSIONS = new Set(['3.8.0']);
209209
* (24.3 → 45.6ms) is consistent with the other depths. Tracked in #1113
210210
* alongside depth 1, depth 5, and Query time; remove all four once
211211
* 3.11.0+ data confirms the new steady-state.
212+
*
213+
* - 3.10.0:DB bytes/file — one-time per-file methodology shift introduced
214+
* by #1134, which excludes `tests/benchmarks/resolution/fixtures/**`
215+
* from the dogfooding `buildGraph` sweep so heavyweight new grammars
216+
* (e.g. Verilog #1107) no longer inflate timing. The 3.10.0 baseline
217+
* was measured with fixtures in the corpus (~745 files); dev now
218+
* measures the codegraph source alone (~607 files). DB content is
219+
* dominated by `src/`, so total bytes stay roughly constant while the
220+
* denominator drops, inflating `dbSizeBytes/file` from 41614 → ~52211
221+
* (+25%, exactly at the threshold). This is the same shape as the old
222+
* `3.10.0:Full build` exemption (now removed because Full build absolute
223+
* timing actually improved) — a measurement-scope artifact, not a real
224+
* regression in the schema or extraction layer. Exempt this release;
225+
* remove once 3.11.0+ data is captured under the post-#1134 methodology.
212226
*/
213227
const KNOWN_REGRESSIONS = new Set([
214228
'3.9.6:Build ms/file',
@@ -222,6 +236,7 @@ const KNOWN_REGRESSIONS = new Set([
222236
'3.10.0:fnDeps depth 3',
223237
'3.10.0:fnDeps depth 5',
224238
'3.10.0:Query time',
239+
'3.10.0:DB bytes/file',
225240
]);
226241

227242
/**

tests/integration/config-include-exclude.test.ts

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,4 +178,103 @@ describe('config.include / config.exclude (issue #981)', () => {
178178
// Paths are already relative to each run's own tmpDir so they compare directly.
179179
expect(nativeFiles).toEqual(wasmFiles);
180180
});
181+
182+
// ── opts.exclude (programmatic, no on-disk config) ───────────────
183+
184+
async function buildWithOptsExclude(
185+
root: string,
186+
engine: EngineName,
187+
optsExclude: string[],
188+
): Promise<string[]> {
189+
clearConfigCache();
190+
const dbDir = path.join(root, '.codegraph');
191+
if (fs.existsSync(dbDir)) fs.rmSync(dbDir, { recursive: true, force: true });
192+
await buildGraph(root, { engine, exclude: optsExclude, skipRegistry: true });
193+
const files = readFileRows(path.join(dbDir, 'graph.db'));
194+
return files.map((f) => f.replace(/\\/g, '/')).sort();
195+
}
196+
197+
it('wasm: opts.exclude rejects matching files without writing config', async () => {
198+
const root = fs.mkdtempSync(path.join(tmpDir, 'opts-wasm-'));
199+
writeFixture(root);
200+
const files = await buildWithOptsExclude(root, 'wasm', ['**/*.test.js', '**/*.spec.js']);
201+
expect(files).toContain('src/math.js');
202+
expect(files).not.toContain('src/math.test.js');
203+
expect(files).not.toContain('src/util.spec.js');
204+
});
205+
206+
itNative('native: opts.exclude rejects matching files without writing config', async () => {
207+
const root = fs.mkdtempSync(path.join(tmpDir, 'opts-native-'));
208+
writeFixture(root);
209+
const files = await buildWithOptsExclude(root, 'native', ['**/*.test.js', '**/*.spec.js']);
210+
expect(files).toContain('src/math.js');
211+
expect(files).not.toContain('src/math.test.js');
212+
expect(files).not.toContain('src/util.spec.js');
213+
});
214+
215+
// ── opts.exclude incremental round trip ──────────────────────────
216+
//
217+
// Greptile feedback on PR #1134: the opts.exclude tests above always wipe
218+
// the DB before building, so they only exercise the fresh-build path. The
219+
// scenario where files that were previously indexed become excluded on a
220+
// subsequent incremental run (i.e. opts.exclude changes between builds
221+
// against the same DB) was untested. This round trip locks in the
222+
// collect → detect behaviour: the second build must observe the newly
223+
// excluded files as removals and drop them from file_hashes.
224+
225+
async function buildSameDb(
226+
root: string,
227+
engine: EngineName,
228+
optsExclude: string[] | undefined,
229+
): Promise<string[]> {
230+
clearConfigCache();
231+
await buildGraph(root, {
232+
engine,
233+
...(optsExclude !== undefined ? { exclude: optsExclude } : {}),
234+
skipRegistry: true,
235+
});
236+
const files = readFileRows(path.join(root, '.codegraph', 'graph.db'));
237+
return files.map((f) => f.replace(/\\/g, '/')).sort();
238+
}
239+
240+
it('wasm: opts.exclude introduced on second incremental build drops previously-indexed files', async () => {
241+
const root = fs.mkdtempSync(path.join(tmpDir, 'opts-inc-wasm-'));
242+
writeFixture(root);
243+
// Wipe DB so the first build is a clean baseline that indexes everything.
244+
const dbDir = path.join(root, '.codegraph');
245+
if (fs.existsSync(dbDir)) fs.rmSync(dbDir, { recursive: true, force: true });
246+
247+
// First build: no exclude — every supported file is indexed.
248+
const firstFiles = await buildSameDb(root, 'wasm', undefined);
249+
expect(firstFiles).toContain('src/math.test.js');
250+
expect(firstFiles).toContain('src/util.spec.js');
251+
252+
// Second build against the same DB with exclude — previously-indexed
253+
// test files must be detected as removals and disappear from file_hashes.
254+
const secondFiles = await buildSameDb(root, 'wasm', ['**/*.test.js', '**/*.spec.js']);
255+
expect(secondFiles).toContain('src/math.js');
256+
expect(secondFiles).toContain('src/util.js');
257+
expect(secondFiles).not.toContain('src/math.test.js');
258+
expect(secondFiles).not.toContain('src/util.spec.js');
259+
});
260+
261+
itNative(
262+
'native: opts.exclude introduced on second incremental build drops previously-indexed files',
263+
async () => {
264+
const root = fs.mkdtempSync(path.join(tmpDir, 'opts-inc-native-'));
265+
writeFixture(root);
266+
const dbDir = path.join(root, '.codegraph');
267+
if (fs.existsSync(dbDir)) fs.rmSync(dbDir, { recursive: true, force: true });
268+
269+
const firstFiles = await buildSameDb(root, 'native', undefined);
270+
expect(firstFiles).toContain('src/math.test.js');
271+
expect(firstFiles).toContain('src/util.spec.js');
272+
273+
const secondFiles = await buildSameDb(root, 'native', ['**/*.test.js', '**/*.spec.js']);
274+
expect(secondFiles).toContain('src/math.js');
275+
expect(secondFiles).toContain('src/util.js');
276+
expect(secondFiles).not.toContain('src/math.test.js');
277+
expect(secondFiles).not.toContain('src/util.spec.js');
278+
},
279+
);
181280
});

0 commit comments

Comments
 (0)