Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,12 @@ jobs:

- name: Run tracer validation (same-file edge recall)
timeout-minutes: 10
# Reuse the tracer edges captured by Run resolution benchmark instead
# of re-spawning the per-language tracer subprocess (issue #1166). The
# test falls back to spawning run-tracer.mjs when this env var is
# unset, so local runs still work standalone.
env:
RESOLUTION_RESULT_JSON: ${{ github.workspace }}/resolution-result.json
run: npx vitest run tests/benchmarks/resolution/tracer/tracer-validation.test.ts --reporter=verbose

- name: Merge resolution into build result
Expand Down
56 changes: 42 additions & 14 deletions scripts/resolution-benchmark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ interface DynamicEdge {
target_file: string;
}

interface TracerArtifact {
// `skipped` means the tracer's toolchain was unavailable (no edges captured,
// not a recall failure). `ok` means the tracer ran — the edges array may
// still be empty if the language's tracer legitimately produced no calls.
status: 'ok' | 'skipped';
edges: DynamicEdge[];
}

interface LangResult {
precision: number;
recall: number;
Expand All @@ -68,6 +76,10 @@ interface LangResult {
// instead of rebuilding fixtures from scratch (see issue #1052).
falsePositiveEdges: string[];
falseNegativeEdges: string[];
// Raw tracer output so tests/benchmarks/resolution/tracer/tracer-validation.test.ts
// can reuse it instead of re-running the per-language tracer subprocess
// (see issue #1166).
tracer: TracerArtifact;
dynamicEdges?: number;
dynamicConfirmed?: number;
}
Expand Down Expand Up @@ -99,7 +111,10 @@ function edgeKey(sourceName: string, sourceFile: string, targetName: string, tar
return `${sourceName}@${normalizeFile(sourceFile)} -> ${targetName}@${normalizeFile(targetFile)}`;
}

function computeMetrics(resolvedEdges: ResolvedEdge[], expectedEdges: ExpectedEdge[]): LangResult {
function computeMetrics(
resolvedEdges: ResolvedEdge[],
expectedEdges: ExpectedEdge[],
): Omit<LangResult, 'tracer' | 'dynamicEdges' | 'dynamicConfirmed'> {
const resolvedSet = new Set(
resolvedEdges.map((e) => edgeKey(e.source_name, e.source_file, e.target_name, e.target_file)),
);
Expand Down Expand Up @@ -162,10 +177,15 @@ const TRACER_SCRIPT = path.join(root, 'tests', 'benchmarks', 'resolution', 'trac

/**
* Attempt to run the dynamic call tracer for a language fixture.
* Returns captured edges on success, empty array on failure or unavailability.
*
* Returns `{ status: 'ok', edges }` on a successful run (edges may be empty if
* the tracer produced no calls) and `{ status: 'skipped', edges: [] }` when
* the tracer subprocess failed or its toolchain wasn't available. The status
* distinction lets tests reusing this artifact (see issue #1166) preserve the
* tracer-validation suite's skip-on-toolchain-missing behavior.
*/
function runDynamicTracer(lang: string): DynamicEdge[] {
if (!fs.existsSync(TRACER_SCRIPT)) return [];
function runDynamicTracer(lang: string): TracerArtifact {
if (!fs.existsSync(TRACER_SCRIPT)) return { status: 'skipped', edges: [] };

const fixtureDir = path.join(FIXTURES_DIR, lang);
try {
Expand All @@ -176,12 +196,17 @@ function runDynamicTracer(lang: string): DynamicEdge[] {
stdio: ['pipe', 'pipe', 'pipe'],
});
const parsed = JSON.parse(result);
if (parsed.error) {
const edges: DynamicEdge[] = Array.isArray(parsed.edges) ? parsed.edges : [];
// Mirror tracer-validation.test.ts: when run-tracer.mjs reports an error
// and produced no edges, treat the run as a toolchain-missing skip
// rather than a recall failure.
if (parsed.error && edges.length === 0) {
console.error(` Dynamic tracer for ${lang}: ${parsed.error}`);
return { status: 'skipped', edges: [] };
}
return Array.isArray(parsed.edges) ? parsed.edges : [];
return { status: 'ok', edges };
} catch {
return [];
return { status: 'skipped', edges: [] };
}
}

Expand Down Expand Up @@ -285,20 +310,23 @@ try {
const expectedEdges: ExpectedEdge[] = manifest.edges;

// Run dynamic tracer if available
const dynamicEdges = runDynamicTracer(lang);
const { dynamicConfirmed } = mergeWithDynamic(expectedEdges, dynamicEdges);
const tracer = runDynamicTracer(lang);
const { dynamicConfirmed } = mergeWithDynamic(expectedEdges, tracer.edges);

// Use only expected edges for metrics (dynamic edges are supplemental)
const metrics = computeMetrics(resolvedEdges, expectedEdges);
if (dynamicEdges.length > 0) {
metrics.dynamicEdges = dynamicEdges.length;
const metrics: LangResult = {
...computeMetrics(resolvedEdges, expectedEdges),
tracer,
};
if (tracer.edges.length > 0) {
metrics.dynamicEdges = tracer.edges.length;
metrics.dynamicConfirmed = dynamicConfirmed;
}
results[lang] = metrics;

const dynamicInfo =
dynamicEdges.length > 0
? ` dynamic=${dynamicEdges.length} confirmed=${dynamicConfirmed}`
tracer.edges.length > 0
? ` dynamic=${tracer.edges.length} confirmed=${dynamicConfirmed}`
: '';
console.error(
` ${lang}: precision=${(metrics.precision * 100).toFixed(1)}% recall=${(metrics.recall * 100).toFixed(1)}%${dynamicInfo}`,
Expand Down
88 changes: 80 additions & 8 deletions tests/benchmarks/resolution/tracer/tracer-validation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
* 3. Load expected-edges.json and filter to mode === "same-file"
* 4. Compute recall = matched / total same-file expected edges
* 5. Assert recall >= per-language threshold
*
* **Artifact mode (CI):** when `RESOLUTION_RESULT_JSON` points at a result
* file produced by `scripts/resolution-benchmark.ts`, the suite reads the
* pre-captured tracer edges from that artifact instead of re-running the
* per-language tracer subprocess — avoiding the duplicate work that doubled
* pre-publish tracer cost (issue #1166). Local runs without the env var
* fall back to spawning `run-tracer.mjs` directly.
*/

import { execFileSync } from 'node:child_process';
Expand Down Expand Up @@ -135,6 +142,65 @@ function runTracer(lang: string): TracerEdge[] | null {
}
}

// ── Artifact loading (CI dedup, issue #1166) ─────────────────────────────

interface ArtifactTracer {
status: 'ok' | 'skipped';
edges: TracerEdge[];
}

interface ArtifactLangResult {
tracer?: ArtifactTracer;
}

const ARTIFACT_PATH = process.env.RESOLUTION_RESULT_JSON;

function loadArtifact(artifactPath: string): Record<string, ArtifactLangResult> {
if (!fs.existsSync(artifactPath)) {
throw new Error(
`RESOLUTION_RESULT_JSON=${artifactPath} not found — run scripts/resolution-benchmark.ts first.`,
);
}
const raw = fs.readFileSync(artifactPath, 'utf-8');
let parsed: Record<string, ArtifactLangResult>;
try {
parsed = JSON.parse(raw) as Record<string, ArtifactLangResult>;
} catch (err) {
const reason = err instanceof Error ? err.message : String(err);
throw new Error(
`RESOLUTION_RESULT_JSON=${artifactPath} contains malformed JSON (${reason}) — regenerate with scripts/resolution-benchmark.ts.`,
);
}
// Refuse to proceed on an empty artifact: with zero languages, vitest would
// register no test cases and exit 0, silently passing the gate.
if (!parsed || typeof parsed !== 'object' || Object.keys(parsed).length === 0) {
throw new Error(
`RESOLUTION_RESULT_JSON=${artifactPath} contains no language results — regenerate with scripts/resolution-benchmark.ts.`,
);
}
return parsed;
}
Comment on lines +158 to +182

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 JSON parse error not wrapped with a helpful message

If resolution-result.json exists but contains malformed JSON, JSON.parse will throw a raw SyntaxError with no reference to RESOLUTION_RESULT_JSON or what to do next. The existing helpful error messages ("not found", "contains no language results") cover the empty-file and missing-file cases, but a corrupt-file scenario produces a confusing traceback. Wrapping JSON.parse in a try/catch and re-throwing with the same "regenerate with scripts/resolution-benchmark.ts" context message would keep the DX consistent.

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed — wrapped JSON.parse in try/catch and re-throw with the 'regenerate with scripts/resolution-benchmark.ts' guidance, matching the helpful-error pattern used for the missing-file and empty-file cases. Verified locally that a malformed resolution-result.json now produces a clear error message instead of a raw SyntaxError traceback.


function tracerFromArtifact(
lang: string,
raw: ArtifactLangResult | undefined,
): TracerEdge[] | null {
if (!raw?.tracer) {
throw new Error(
`Resolution artifact for ${lang} is missing the tracer block — regenerate with the current scripts/resolution-benchmark.ts.`,
);
}
// Preserve the runTracer null-as-skipped contract so toolchain-missing
// languages are skipped gracefully in artifact mode too.
if (raw.tracer.status === 'skipped') return null;
if (!Array.isArray(raw.tracer.edges)) {
throw new Error(
`Resolution artifact for ${lang} has malformed tracer.edges — regenerate with the current scripts/resolution-benchmark.ts.`,
);
}
return raw.tracer.edges;
}

function loadExpectedEdges(lang: string): ExpectedEdge[] {
const edgesFile = path.join(FIXTURES_DIR, lang, 'expected-edges.json');
if (!fs.existsSync(edgesFile)) return [];
Expand Down Expand Up @@ -180,13 +246,19 @@ function computeSameFileRecall(

// ── Test Suite ───────────────────────────────────────────────────────────

// Discover all fixture languages that have expected-edges.json
const languages = fs
.readdirSync(FIXTURES_DIR, { withFileTypes: true })
.filter((d) => d.isDirectory())
.map((d) => d.name)
.filter((lang) => fs.existsSync(path.join(FIXTURES_DIR, lang, 'expected-edges.json')))
.sort();
const artifact = ARTIFACT_PATH ? loadArtifact(ARTIFACT_PATH) : null;

// In artifact mode, drive the suite from the keys in the artifact so we never
// silently skip a language the script reported. In local mode, discover from
// the filesystem like before.
const languages = artifact
? Object.keys(artifact).sort()
: fs
.readdirSync(FIXTURES_DIR, { withFileTypes: true })
.filter((d) => d.isDirectory())
.map((d) => d.name)
.filter((lang) => fs.existsSync(path.join(FIXTURES_DIR, lang, 'expected-edges.json')))
.sort();

describe('Dynamic Tracer — Same-File Edge Recall', () => {
// Summary table printed after all tests
Expand All @@ -211,7 +283,7 @@ describe('Dynamic Tracer — Same-File Edge Recall', () => {
return;
}

const tracerEdges = runTracer(lang);
const tracerEdges = artifact ? tracerFromArtifact(lang, artifact[lang]) : runTracer(lang);

// If tracer couldn't run (toolchain missing), skip gracefully.
// Many languages require runtimes (rustc, ghc, ruby, etc.) that aren't
Expand Down
Loading