optave · carlos-alm · May 20, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -343,6 +343,12 @@ jobs:
 
       - name: Run tracer validation (same-file edge recall)
         timeout-minutes: 10
+        # Reuse the tracer edges captured by Run resolution benchmark instead
+        # of re-spawning the per-language tracer subprocess (issue #1166). The
+        # test falls back to spawning run-tracer.mjs when this env var is
+        # unset, so local runs still work standalone.
+        env:
+          RESOLUTION_RESULT_JSON: ${{ github.workspace }}/resolution-result.json
         run: npx vitest run tests/benchmarks/resolution/tracer/tracer-validation.test.ts --reporter=verbose
 
       - name: Merge resolution into build result

diff --git a/scripts/resolution-benchmark.ts b/scripts/resolution-benchmark.ts
@@ -55,6 +55,14 @@ interface DynamicEdge {
 	target_file: string;
 }
 
+interface TracerArtifact {
+	// `skipped` means the tracer's toolchain was unavailable (no edges captured,
+	// not a recall failure). `ok` means the tracer ran — the edges array may
+	// still be empty if the language's tracer legitimately produced no calls.
+	status: 'ok' | 'skipped';
+	edges: DynamicEdge[];
+}
+
 interface LangResult {
 	precision: number;
 	recall: number;
@@ -68,6 +76,10 @@ interface LangResult {
 	// instead of rebuilding fixtures from scratch (see issue #1052).
 	falsePositiveEdges: string[];
 	falseNegativeEdges: string[];
+	// Raw tracer output so tests/benchmarks/resolution/tracer/tracer-validation.test.ts
+	// can reuse it instead of re-running the per-language tracer subprocess
+	// (see issue #1166).
+	tracer: TracerArtifact;
 	dynamicEdges?: number;
 	dynamicConfirmed?: number;
 }
@@ -99,7 +111,10 @@ function edgeKey(sourceName: string, sourceFile: string, targetName: string, tar
 	return `${sourceName}@${normalizeFile(sourceFile)} -> ${targetName}@${normalizeFile(targetFile)}`;
 }
 
-function computeMetrics(resolvedEdges: ResolvedEdge[], expectedEdges: ExpectedEdge[]): LangResult {
+function computeMetrics(
+	resolvedEdges: ResolvedEdge[],
+	expectedEdges: ExpectedEdge[],
+): Omit<LangResult, 'tracer' | 'dynamicEdges' | 'dynamicConfirmed'> {
 	const resolvedSet = new Set(
 		resolvedEdges.map((e) => edgeKey(e.source_name, e.source_file, e.target_name, e.target_file)),
 	);
@@ -162,10 +177,15 @@ const TRACER_SCRIPT = path.join(root, 'tests', 'benchmarks', 'resolution', 'trac
 
 /**
  * Attempt to run the dynamic call tracer for a language fixture.
- * Returns captured edges on success, empty array on failure or unavailability.
+ *
+ * Returns `{ status: 'ok', edges }` on a successful run (edges may be empty if
+ * the tracer produced no calls) and `{ status: 'skipped', edges: [] }` when
+ * the tracer subprocess failed or its toolchain wasn't available. The status
+ * distinction lets tests reusing this artifact (see issue #1166) preserve the
+ * tracer-validation suite's skip-on-toolchain-missing behavior.
  */
-function runDynamicTracer(lang: string): DynamicEdge[] {
-	if (!fs.existsSync(TRACER_SCRIPT)) return [];
+function runDynamicTracer(lang: string): TracerArtifact {
+	if (!fs.existsSync(TRACER_SCRIPT)) return { status: 'skipped', edges: [] };
 
 	const fixtureDir = path.join(FIXTURES_DIR, lang);
 	try {
@@ -176,12 +196,17 @@ function runDynamicTracer(lang: string): DynamicEdge[] {
 			stdio: ['pipe', 'pipe', 'pipe'],
 		});
 		const parsed = JSON.parse(result);
-		if (parsed.error) {
+		const edges: DynamicEdge[] = Array.isArray(parsed.edges) ? parsed.edges : [];
+		// Mirror tracer-validation.test.ts: when run-tracer.mjs reports an error
+		// and produced no edges, treat the run as a toolchain-missing skip
+		// rather than a recall failure.
+		if (parsed.error && edges.length === 0) {
 			console.error(`    Dynamic tracer for ${lang}: ${parsed.error}`);
+			return { status: 'skipped', edges: [] };
 		}
-		return Array.isArray(parsed.edges) ? parsed.edges : [];
+		return { status: 'ok', edges };
 	} catch {
-		return [];
+		return { status: 'skipped', edges: [] };
 	}
 }
 
@@ -285,20 +310,23 @@ try {
 			const expectedEdges: ExpectedEdge[] = manifest.edges;
 
 			// Run dynamic tracer if available
-			const dynamicEdges = runDynamicTracer(lang);
-			const { dynamicConfirmed } = mergeWithDynamic(expectedEdges, dynamicEdges);
+			const tracer = runDynamicTracer(lang);
+			const { dynamicConfirmed } = mergeWithDynamic(expectedEdges, tracer.edges);
 
 			// Use only expected edges for metrics (dynamic edges are supplemental)
-			const metrics = computeMetrics(resolvedEdges, expectedEdges);
-			if (dynamicEdges.length > 0) {
-				metrics.dynamicEdges = dynamicEdges.length;
+			const metrics: LangResult = {
+				...computeMetrics(resolvedEdges, expectedEdges),
+				tracer,
+			};
+			if (tracer.edges.length > 0) {
+				metrics.dynamicEdges = tracer.edges.length;
 				metrics.dynamicConfirmed = dynamicConfirmed;
 			}
 			results[lang] = metrics;
 
 			const dynamicInfo =
-				dynamicEdges.length > 0
-					? ` dynamic=${dynamicEdges.length} confirmed=${dynamicConfirmed}`
+				tracer.edges.length > 0
+					? ` dynamic=${tracer.edges.length} confirmed=${dynamicConfirmed}`
 					: '';
 			console.error(
 				`    ${lang}: precision=${(metrics.precision * 100).toFixed(1)}% recall=${(metrics.recall * 100).toFixed(1)}%${dynamicInfo}`,

diff --git a/tests/benchmarks/resolution/tracer/tracer-validation.test.ts b/tests/benchmarks/resolution/tracer/tracer-validation.test.ts
@@ -11,6 +11,13 @@
  *   3. Load expected-edges.json and filter to mode === "same-file"
  *   4. Compute recall = matched / total same-file expected edges
  *   5. Assert recall >= per-language threshold
+ *
+ * **Artifact mode (CI):** when `RESOLUTION_RESULT_JSON` points at a result
+ * file produced by `scripts/resolution-benchmark.ts`, the suite reads the
+ * pre-captured tracer edges from that artifact instead of re-running the
+ * per-language tracer subprocess — avoiding the duplicate work that doubled
+ * pre-publish tracer cost (issue #1166). Local runs without the env var
+ * fall back to spawning `run-tracer.mjs` directly.
  */
 
 import { execFileSync } from 'node:child_process';
@@ -135,6 +142,65 @@ function runTracer(lang: string): TracerEdge[] | null {
   }
 }
 
+// ── Artifact loading (CI dedup, issue #1166) ─────────────────────────────
+
+interface ArtifactTracer {
+  status: 'ok' | 'skipped';
+  edges: TracerEdge[];
+}
+
+interface ArtifactLangResult {
+  tracer?: ArtifactTracer;
+}
+
+const ARTIFACT_PATH = process.env.RESOLUTION_RESULT_JSON;
+
+function loadArtifact(artifactPath: string): Record<string, ArtifactLangResult> {
+  if (!fs.existsSync(artifactPath)) {
+    throw new Error(
+      `RESOLUTION_RESULT_JSON=${artifactPath} not found — run scripts/resolution-benchmark.ts first.`,
+    );
+  }
+  const raw = fs.readFileSync(artifactPath, 'utf-8');
+  let parsed: Record<string, ArtifactLangResult>;
+  try {
+    parsed = JSON.parse(raw) as Record<string, ArtifactLangResult>;
+  } catch (err) {
+    const reason = err instanceof Error ? err.message : String(err);
+    throw new Error(
+      `RESOLUTION_RESULT_JSON=${artifactPath} contains malformed JSON (${reason}) — regenerate with scripts/resolution-benchmark.ts.`,
+    );
+  }
+  // Refuse to proceed on an empty artifact: with zero languages, vitest would
+  // register no test cases and exit 0, silently passing the gate.
+  if (!parsed || typeof parsed !== 'object' || Object.keys(parsed).length === 0) {
+    throw new Error(
+      `RESOLUTION_RESULT_JSON=${artifactPath} contains no language results — regenerate with scripts/resolution-benchmark.ts.`,
+    );
+  }
+  return parsed;
+}
+
+function tracerFromArtifact(
+  lang: string,
+  raw: ArtifactLangResult | undefined,
+): TracerEdge[] | null {
+  if (!raw?.tracer) {
+    throw new Error(
+      `Resolution artifact for ${lang} is missing the tracer block — regenerate with the current scripts/resolution-benchmark.ts.`,
+    );
+  }
+  // Preserve the runTracer null-as-skipped contract so toolchain-missing
+  // languages are skipped gracefully in artifact mode too.
+  if (raw.tracer.status === 'skipped') return null;
+  if (!Array.isArray(raw.tracer.edges)) {
+    throw new Error(
+      `Resolution artifact for ${lang} has malformed tracer.edges — regenerate with the current scripts/resolution-benchmark.ts.`,
+    );
+  }
+  return raw.tracer.edges;
+}
+
 function loadExpectedEdges(lang: string): ExpectedEdge[] {
   const edgesFile = path.join(FIXTURES_DIR, lang, 'expected-edges.json');
   if (!fs.existsSync(edgesFile)) return [];
@@ -180,13 +246,19 @@ function computeSameFileRecall(
 
 // ── Test Suite ───────────────────────────────────────────────────────────
 
-// Discover all fixture languages that have expected-edges.json
-const languages = fs
-  .readdirSync(FIXTURES_DIR, { withFileTypes: true })
-  .filter((d) => d.isDirectory())
-  .map((d) => d.name)
-  .filter((lang) => fs.existsSync(path.join(FIXTURES_DIR, lang, 'expected-edges.json')))
-  .sort();
+const artifact = ARTIFACT_PATH ? loadArtifact(ARTIFACT_PATH) : null;
+
+// In artifact mode, drive the suite from the keys in the artifact so we never
+// silently skip a language the script reported. In local mode, discover from
+// the filesystem like before.
+const languages = artifact
+  ? Object.keys(artifact).sort()
+  : fs
+      .readdirSync(FIXTURES_DIR, { withFileTypes: true })
+      .filter((d) => d.isDirectory())
+      .map((d) => d.name)
+      .filter((lang) => fs.existsSync(path.join(FIXTURES_DIR, lang, 'expected-edges.json')))
+      .sort();
 
 describe('Dynamic Tracer — Same-File Edge Recall', () => {
   // Summary table printed after all tests
@@ -211,7 +283,7 @@ describe('Dynamic Tracer — Same-File Edge Recall', () => {
         return;
       }
 
-      const tracerEdges = runTracer(lang);
+      const tracerEdges = artifact ? tracerFromArtifact(lang, artifact[lang]) : runTracer(lang);
 
       // If tracer couldn't run (toolchain missing), skip gracefully.
       // Many languages require runtimes (rustc, ghc, ruby, etc.) that aren't