Skip to content

Commit 8ade0b3

Browse files
committed
refactor(ci): let tracer-validation reuse benchmark tracer output
The pre-publish-benchmark job's `Run resolution benchmark` step already spawns `run-tracer.mjs` once per language fixture for telemetry (writing `dynamicEdges` / `dynamicConfirmed` counts into resolution-result.json). The `Run tracer validation` step that follows then ran the same per- language tracer subprocess again to compute same-file recall — doubling the tracer cost in the pre-publish job. Extend the script's per-language LangResult with a `tracer` artifact containing status ('ok' | 'skipped') and the raw captured edges. The status distinction mirrors the gate test's `runTracer` semantics (null-return on toolchain-missing → 'skipped'). Refactor the gate test to consume that artifact when RESOLUTION_RESULT_JSON is set, falling back to running run-tracer.mjs directly when unset so devs can still execute `npx vitest run tests/benchmarks/resolution/tracer/...` standalone. Wire the env var through the workflow's tracer-validation step. Verified locally: gate test in artifact mode passes 35/35 in ~160ms against an artifact produced by scripts/resolution-benchmark.ts (vs ~320ms for the standalone path that respawns the JS tracer subprocess). The resolution-benchmark gate test still passes 170/170 with the new `tracer` field present in the artifact. Closes #1166
1 parent 0198c57 commit 8ade0b3

3 files changed

Lines changed: 77 additions & 6 deletions

File tree

.github/workflows/publish.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,10 @@ jobs:
337337

338338
- name: Run tracer validation (same-file edge recall)
339339
timeout-minutes: 10
340+
# Reuse the tracer subprocess output captured by the resolution benchmark
341+
# step above (#1166) instead of re-running run-tracer.mjs per fixture.
342+
env:
343+
RESOLUTION_RESULT_JSON: ${{ github.workspace }}/resolution-result.json
340344
run: npx vitest run tests/benchmarks/resolution/tracer/tracer-validation.test.ts --reporter=verbose
341345

342346
- name: Merge resolution into build result

scripts/resolution-benchmark.ts

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,20 @@ interface DynamicEdge {
5555
target_file: string;
5656
}
5757

58+
/**
59+
* Per-language tracer artifact consumed by tests/benchmarks/resolution/tracer/tracer-validation.test.ts
60+
* to avoid re-running tracer subprocesses. See #1166.
61+
*
62+
* status:
63+
* - 'ok' — tracer subprocess produced edges (possibly an empty array if the language has no
64+
* same-file calls). Consumers should treat edges as authoritative.
65+
* - 'skipped' — toolchain not available or tracer crashed. Consumers should skip recall assertions.
66+
*/
67+
interface TracerArtifact {
68+
status: 'ok' | 'skipped';
69+
edges: DynamicEdge[];
70+
}
71+
5872
interface LangResult {
5973
precision: number;
6074
recall: number;
@@ -66,6 +80,7 @@ interface LangResult {
6680
byMode: Record<string, ModeMetrics>;
6781
dynamicEdges?: number;
6882
dynamicConfirmed?: number;
83+
tracer?: TracerArtifact;
6984
}
7085

7186
// ── Helpers ──────────────────────────────────────────────────────────────
@@ -153,10 +168,17 @@ const TRACER_SCRIPT = path.join(root, 'tests', 'benchmarks', 'resolution', 'trac
153168

154169
/**
155170
* Attempt to run the dynamic call tracer for a language fixture.
156-
* Returns captured edges on success, empty array on failure or unavailability.
171+
*
172+
* Returns a TracerArtifact discriminating between:
173+
* - 'ok' — tracer ran (edges may be empty if the fixture has no captured calls)
174+
* - 'skipped' — tracer script missing, toolchain unavailable, or subprocess crashed
175+
*
176+
* The status distinction mirrors the semantics in
177+
* tests/benchmarks/resolution/tracer/tracer-validation.test.ts so its `runTracer`
178+
* can reuse this artifact directly (#1166).
157179
*/
158-
function runDynamicTracer(lang: string): DynamicEdge[] {
159-
if (!fs.existsSync(TRACER_SCRIPT)) return [];
180+
function runDynamicTracer(lang: string): TracerArtifact {
181+
if (!fs.existsSync(TRACER_SCRIPT)) return { status: 'skipped', edges: [] };
160182

161183
const fixtureDir = path.join(FIXTURES_DIR, lang);
162184
try {
@@ -167,12 +189,15 @@ function runDynamicTracer(lang: string): DynamicEdge[] {
167189
stdio: ['pipe', 'pipe', 'pipe'],
168190
});
169191
const parsed = JSON.parse(result);
192+
const edges = Array.isArray(parsed.edges) ? parsed.edges : [];
170193
if (parsed.error) {
171194
console.error(` Dynamic tracer for ${lang}: ${parsed.error}`);
195+
// Treat "error reported and no edges" as toolchain-missing skip
196+
if (edges.length === 0) return { status: 'skipped', edges: [] };
172197
}
173-
return Array.isArray(parsed.edges) ? parsed.edges : [];
198+
return { status: 'ok', edges };
174199
} catch {
175-
return [];
200+
return { status: 'skipped', edges: [] };
176201
}
177202
}
178203

@@ -276,7 +301,8 @@ try {
276301
const expectedEdges: ExpectedEdge[] = manifest.edges;
277302

278303
// Run dynamic tracer if available
279-
const dynamicEdges = runDynamicTracer(lang);
304+
const tracerArtifact = runDynamicTracer(lang);
305+
const dynamicEdges = tracerArtifact.edges;
280306
const { dynamicConfirmed } = mergeWithDynamic(expectedEdges, dynamicEdges);
281307

282308
// Use only expected edges for metrics (dynamic edges are supplemental)
@@ -285,6 +311,9 @@ try {
285311
metrics.dynamicEdges = dynamicEdges.length;
286312
metrics.dynamicConfirmed = dynamicConfirmed;
287313
}
314+
// Emit raw tracer artifact so the tracer-validation gate test can reuse it
315+
// without spawning a second subprocess per fixture (#1166).
316+
metrics.tracer = tracerArtifact;
288317
results[lang] = metrics;
289318

290319
const dynamicInfo =

tests/benchmarks/resolution/tracer/tracer-validation.test.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,34 @@ interface ExpectedEdge {
4040
const FIXTURES_DIR = path.join(import.meta.dirname, '..', 'fixtures');
4141
const RUN_TRACER = path.join(import.meta.dirname, 'run-tracer.mjs');
4242

43+
/**
44+
* When set, points to a resolution-result.json artifact produced by
45+
* scripts/resolution-benchmark.ts. The benchmark script already runs each
46+
* language's tracer subprocess and embeds the raw edges + status under
47+
* `<lang>.tracer`. Reading that artifact lets the gate test skip a second
48+
* subprocess per fixture in CI (~doubling the tracer cost otherwise — see #1166).
49+
*
50+
* When unset, the test falls back to running run-tracer.mjs directly so devs
51+
* can still execute `npx vitest run tests/benchmarks/resolution/tracer/...`
52+
* standalone.
53+
*/
54+
const RESOLUTION_RESULT_JSON = process.env.RESOLUTION_RESULT_JSON;
55+
56+
interface ArtifactTracerEntry {
57+
status: 'ok' | 'skipped';
58+
edges: TracerEdge[];
59+
}
60+
61+
const artifactResults: Record<string, { tracer?: ArtifactTracerEntry }> | null = (() => {
62+
if (!RESOLUTION_RESULT_JSON) return null;
63+
if (!fs.existsSync(RESOLUTION_RESULT_JSON)) {
64+
throw new Error(
65+
`RESOLUTION_RESULT_JSON=${RESOLUTION_RESULT_JSON} does not exist — produce it with scripts/resolution-benchmark.ts first.`,
66+
);
67+
}
68+
return JSON.parse(fs.readFileSync(RESOLUTION_RESULT_JSON, 'utf-8'));
69+
})();
70+
4371
/**
4472
* Per-language same-file recall thresholds.
4573
*
@@ -113,6 +141,16 @@ function basename(filePath: string): string {
113141
}
114142

115143
function runTracer(lang: string): TracerEdge[] | null {
144+
// Artifact mode: reuse the tracer output already produced by
145+
// scripts/resolution-benchmark.ts during the publish workflow's resolution
146+
// benchmark step (#1166). The script writes status='skipped' for missing
147+
// toolchains, mirroring this function's null-return semantics.
148+
if (artifactResults) {
149+
const entry = artifactResults[lang]?.tracer;
150+
if (!entry || entry.status === 'skipped') return null;
151+
return entry.edges;
152+
}
153+
116154
const fixtureDir = path.join(FIXTURES_DIR, lang);
117155
if (!fs.existsSync(fixtureDir)) return null;
118156

0 commit comments

Comments
 (0)