88 * per language and per resolution mode.
99 *
1010 * CI gate: fails if precision or recall drops below per-language thresholds.
11+ *
12+ * **Artifact mode (CI):** when `RESOLUTION_RESULT_JSON` points at a result
13+ * file produced by `scripts/resolution-benchmark.ts`, the suite reads those
14+ * pre-computed metrics and skips the fixture rebuild — avoiding the duplicate
15+ * work that doubled pre-publish CI time (issue #1052). Local runs without
16+ * the env var fall back to the build-from-fixtures path.
1117 */
1218
1319import fs from 'node:fs' ;
@@ -262,6 +268,79 @@ function formatReport(lang: string, metrics: BenchmarkMetrics): string {
262268 return lines . join ( '\n' ) ;
263269}
264270
271+ // ── Artifact loading (CI dedup, issue #1052) ─────────────────────────────
272+
273+ const ARTIFACT_PATH = process . env . RESOLUTION_RESULT_JSON ;
274+
275+ interface ArtifactLangResult {
276+ precision : number ;
277+ recall : number ;
278+ truePositives : number ;
279+ falsePositives : number ;
280+ falseNegatives : number ;
281+ totalResolved : number ;
282+ totalExpected : number ;
283+ byMode : Record < string , ModeMetrics > ;
284+ falsePositiveEdges ?: string [ ] ;
285+ falseNegativeEdges ?: string [ ] ;
286+ }
287+
288+ function loadArtifact ( artifactPath : string ) : Record < string , ArtifactLangResult > {
289+ if ( ! fs . existsSync ( artifactPath ) ) {
290+ throw new Error (
291+ `RESOLUTION_RESULT_JSON=${ artifactPath } not found — run scripts/resolution-benchmark.ts first.` ,
292+ ) ;
293+ }
294+ const parsed = JSON . parse ( fs . readFileSync ( artifactPath , 'utf-8' ) ) as Record <
295+ string ,
296+ ArtifactLangResult
297+ > ;
298+ // Refuse to proceed on an empty artifact: with zero languages, vitest would
299+ // register no describe blocks and exit 0, silently passing the gate without
300+ // evaluating a single threshold.
301+ if ( ! parsed || typeof parsed !== 'object' || Object . keys ( parsed ) . length === 0 ) {
302+ throw new Error (
303+ `RESOLUTION_RESULT_JSON=${ artifactPath } contains no language results — regenerate with scripts/resolution-benchmark.ts.` ,
304+ ) ;
305+ }
306+ return parsed ;
307+ }
308+
309+ function metricsFromArtifact ( lang : string , raw : ArtifactLangResult ) : BenchmarkMetrics {
310+ if (
311+ typeof raw . precision !== 'number' ||
312+ typeof raw . recall !== 'number' ||
313+ typeof raw . truePositives !== 'number' ||
314+ typeof raw . falsePositives !== 'number' ||
315+ typeof raw . falseNegatives !== 'number' ||
316+ typeof raw . totalResolved !== 'number' ||
317+ typeof raw . totalExpected !== 'number' ||
318+ ! raw . byMode ||
319+ typeof raw . byMode !== 'object'
320+ ) {
321+ throw new Error (
322+ `Resolution artifact for ${ lang } is missing required numeric fields — regenerate with the current resolution-benchmark.ts.` ,
323+ ) ;
324+ }
325+ if ( ! Array . isArray ( raw . falsePositiveEdges ) || ! Array . isArray ( raw . falseNegativeEdges ) ) {
326+ throw new Error (
327+ `Resolution artifact for ${ lang } is missing falsePositiveEdges/falseNegativeEdges — regenerate with the current resolution-benchmark.ts.` ,
328+ ) ;
329+ }
330+ return {
331+ precision : raw . precision ,
332+ recall : raw . recall ,
333+ truePositives : raw . truePositives ,
334+ falsePositives : raw . falsePositives ,
335+ falseNegatives : raw . falseNegatives ,
336+ totalResolved : raw . totalResolved ,
337+ totalExpected : raw . totalExpected ,
338+ byMode : raw . byMode ,
339+ falsePositiveEdges : raw . falsePositiveEdges ,
340+ falseNegativeEdges : raw . falseNegativeEdges ,
341+ } ;
342+ }
343+
265344// ── Tests ────────────────────────────────────────────────────────────────
266345
267346function discoverFixtures ( ) : string [ ] {
@@ -276,7 +355,11 @@ function discoverFixtures(): string[] {
276355 return languages ;
277356}
278357
279- const languages = discoverFixtures ( ) ;
358+ const artifact = ARTIFACT_PATH ? loadArtifact ( ARTIFACT_PATH ) : null ;
359+ // In artifact mode, drive the suite from the keys in the artifact so we never
360+ // silently skip a language the script reported. In local mode, discover from
361+ // the filesystem like before.
362+ const languages = artifact ? Object . keys ( artifact ) . sort ( ) : discoverFixtures ( ) ;
280363
281364/** Stores all results for the final summary */
282365const allResults : Record < string , BenchmarkMetrics > = { } ;
@@ -309,22 +392,24 @@ describe('Call Resolution Precision/Recall', () => {
309392
310393 for ( const lang of languages ) {
311394 describe ( lang , ( ) => {
312- let fixtureDir : string ;
313- let resolvedEdges : ResolvedEdge [ ] ;
314- let expectedEdges : ExpectedEdge [ ] ;
395+ let fixtureDir : string | null = null ;
315396 let metrics : BenchmarkMetrics ;
316397
317398 beforeAll ( async ( ) => {
318- fixtureDir = copyFixture ( lang ) ;
319- await buildFixtureGraph ( fixtureDir ) ;
399+ if ( artifact ) {
400+ metrics = metricsFromArtifact ( lang , artifact [ lang ] ) ;
401+ } else {
402+ fixtureDir = copyFixture ( lang ) ;
403+ await buildFixtureGraph ( fixtureDir ) ;
320404
321- resolvedEdges = extractResolvedEdges ( fixtureDir ) ;
405+ const resolvedEdges = extractResolvedEdges ( fixtureDir ) as ResolvedEdge [ ] ;
322406
323- const manifestPath = path . join ( FIXTURES_DIR , lang , 'expected-edges.json' ) ;
324- const manifest = JSON . parse ( fs . readFileSync ( manifestPath , 'utf-8' ) ) ;
325- expectedEdges = manifest . edges ;
407+ const manifestPath = path . join ( FIXTURES_DIR , lang , 'expected-edges.json' ) ;
408+ const manifest = JSON . parse ( fs . readFileSync ( manifestPath , 'utf-8' ) ) ;
409+ const expectedEdges : ExpectedEdge [ ] = manifest . edges ;
326410
327- metrics = computeMetrics ( resolvedEdges , expectedEdges ) ;
411+ metrics = computeMetrics ( resolvedEdges , expectedEdges ) ;
412+ }
328413 allResults [ lang ] = metrics ;
329414 } , 60_000 ) ;
330415
@@ -334,16 +419,13 @@ describe('Call Resolution Precision/Recall', () => {
334419 }
335420 } ) ;
336421
337- test ( 'builds graph successfully' , ( ) => {
338- expect ( resolvedEdges ) . toBeDefined ( ) ;
339- expect ( Array . isArray ( resolvedEdges ) ) . toBe ( true ) ;
340- // Some languages may have 0 resolved call edges if resolution isn't
341- // implemented yet — that's okay, the precision/recall tests will
342- // catch it at the appropriate threshold level.
422+ test ( 'metrics are populated' , ( ) => {
423+ expect ( metrics ) . toBeDefined ( ) ;
424+ expect ( metrics . totalResolved ) . toBeGreaterThanOrEqual ( 0 ) ;
343425 } ) ;
344426
345427 test ( 'expected edges manifest is non-empty' , ( ) => {
346- expect ( expectedEdges . length ) . toBeGreaterThan ( 0 ) ;
428+ expect ( metrics . totalExpected ) . toBeGreaterThan ( 0 ) ;
347429 } ) ;
348430
349431 test ( 'precision meets threshold' , ( ) => {
0 commit comments