Skip to content

Commit d678615

Browse files
authored
feat(results): materialize per-test task bundles
1 parent 02d5763 commit d678615

10 files changed

Lines changed: 1102 additions & 610 deletions

File tree

.beads/issues.jsonl

Lines changed: 7 additions & 7 deletions
Large diffs are not rendered by default.

apps/cli/src/commands/eval/artifact-writer.ts

Lines changed: 233 additions & 382 deletions
Large diffs are not rendered by default.

apps/cli/src/commands/eval/run-eval.ts

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ import {
3939
parseJsonlResults,
4040
writeArtifactsFromResults,
4141
writeInitialBenchmarkArtifact,
42-
writeRunSourceArtifact,
4342
} from './artifact-writer.js';
4443
import { writeBenchmarkJson } from './benchmark-writer.js';
4544
import { loadEnvFromHierarchy } from './env.js';
@@ -60,6 +59,7 @@ import {
6059
formatMatrixSummary,
6160
} from './statistics.js';
6261
import { type TargetSelection, selectMultipleTargets, selectTarget } from './targets.js';
62+
import type { TaskBundleTargetSelection } from './task-bundle.js';
6363

6464
const DEFAULT_WORKERS = 3;
6565

@@ -726,6 +726,27 @@ async function prepareFileMetadata(params: {
726726
};
727727
}
728728

729+
function buildTaskBundleTargetSelections(
730+
activeTestFiles: readonly string[],
731+
fileMetadata: ReadonlyMap<
732+
string,
733+
{ readonly selections: readonly { readonly selection: TargetSelection }[] }
734+
>,
735+
): readonly TaskBundleTargetSelection[] {
736+
return activeTestFiles.flatMap((testFilePath) => {
737+
const meta = fileMetadata.get(testFilePath);
738+
if (!meta) {
739+
return [];
740+
}
741+
return meta.selections.map(({ selection }) => ({
742+
evalFileAbsolutePath: testFilePath,
743+
targetName: selection.targetName,
744+
resolvedTargetName: selection.resolvedTarget.name,
745+
definitions: selection.definitions,
746+
}));
747+
});
748+
}
749+
729750
async function runSingleEvalFile(params: {
730751
readonly testFilePath: string;
731752
readonly cwd: string;
@@ -1671,45 +1692,41 @@ export async function runEvalCommand(
16711692
const sourceTests = activeTestFiles.flatMap(
16721693
(activeTestFile) => fileMetadata.get(activeTestFile)?.testCases ?? [],
16731694
);
1695+
const taskBundleTargets = buildTaskBundleTargetSelections(activeTestFiles, fileMetadata);
16741696
if (isResumeAppend) {
16751697
// Resume mode: write per-test artifacts for newly-run tests, then aggregate
16761698
// from the full index.jsonl (old + new results with deduplication)
16771699
const { writePerTestArtifacts } = await import('./artifact-writer.js');
16781700
await writePerTestArtifacts(allResults, runDir, {
16791701
experiment: normalizeExperimentName(options.experiment),
1702+
cwd,
1703+
repoRoot,
1704+
sourceTests,
1705+
taskBundleTargets,
16801706
});
16811707
const { benchmarkPath: workspaceBenchmarkPath, timingPath } = await aggregateRunDir(
16821708
runDir,
16831709
{ evalFile, experiment: normalizeExperimentName(options.experiment) },
16841710
);
1685-
const runSourcePath = await writeRunSourceArtifact(summaryResults, runDir, {
1686-
evalFile,
1687-
cwd,
1688-
repoRoot,
1689-
sourceTests,
1690-
});
16911711
const indexPath = path.join(runDir, 'index.jsonl');
16921712
console.log(`Artifact workspace updated: ${runDir}`);
16931713
console.log(` Index: ${indexPath}`);
16941714
console.log(` Per-test artifacts: ${runDir} (${allResults.length} new test directories)`);
16951715
console.log(` Timing: ${timingPath}`);
16961716
console.log(` Benchmark: ${workspaceBenchmarkPath}`);
1697-
if (runSourcePath) {
1698-
console.log(` Run source: ${runSourcePath}`);
1699-
}
17001717
} else {
17011718
const {
17021719
testArtifactDir,
17031720
timingPath,
17041721
benchmarkPath: workspaceBenchmarkPath,
17051722
indexPath,
1706-
runSourcePath,
17071723
} = await writeArtifactsFromResults(allResults, runDir, {
17081724
evalFile,
17091725
experiment: normalizeExperimentName(options.experiment),
17101726
cwd,
17111727
repoRoot,
17121728
sourceTests,
1729+
taskBundleTargets,
17131730
});
17141731
console.log(`Artifact workspace written to: ${runDir}`);
17151732
console.log(` Index: ${indexPath}`);
@@ -1718,9 +1735,6 @@ export async function runEvalCommand(
17181735
);
17191736
console.log(` Timing: ${timingPath}`);
17201737
console.log(` Benchmark: ${workspaceBenchmarkPath}`);
1721-
if (runSourcePath) {
1722-
console.log(` Run source: ${runSourcePath}`);
1723-
}
17241738
}
17251739
}
17261740

0 commit comments

Comments
 (0)