@@ -84,10 +84,10 @@ interface NormalizedOptions {
8484 readonly targetsPath ?: string ;
8585 readonly filter ?: string | readonly string [ ] ;
8686 readonly workers ?: number ;
87- /** --output <dir>: artifact directory (new canonical meaning) */
87+ /** --output <dir>: canonical artifact directory */
8888 readonly outputDir ?: string ;
89- /** Legacy --out <path>: deprecated, treated as artifact dir */
90- readonly outPath ?: string ;
89+ /** Removed: use --output for run directories and --export for extra files */
90+ readonly removedOut ?: string ;
9191 /** --export <paths...>: additional output files */
9292 readonly exportPaths : readonly string [ ] ;
9393 readonly dryRun : boolean ;
@@ -115,8 +115,10 @@ interface NormalizedOptions {
115115 readonly keepWorkspaces : boolean ;
116116 /** Deprecated: benchmark.json is always written to artifact dir */
117117 readonly benchmarkJson ?: string ;
118- /** Deprecated : use --output instead */
118+ /** Removed : use --output instead */
119119 readonly artifacts ?: string ;
120+ /** Removed: the run directory always uses index.jsonl */
121+ readonly outputFormat ?: string ;
120122 readonly graderTarget ?: string ;
121123 readonly model ?: string ;
122124 readonly outputMessages : number | 'all' ;
@@ -227,6 +229,43 @@ function normalizeSourceMetadataByEvalFile(
227229 return undefined ;
228230}
229231
232+ const LEGACY_OUTPUT_FILE_EXTENSIONS = new Set ( [
233+ '.jsonl' ,
234+ '.json' ,
235+ '.xml' ,
236+ '.yaml' ,
237+ '.yml' ,
238+ '.html' ,
239+ '.htm' ,
240+ ] ) ;
241+
242+ function looksLikeLegacyOutputFilePath ( value : string ) : boolean {
243+ return LEGACY_OUTPUT_FILE_EXTENSIONS . has ( path . extname ( value ) . toLowerCase ( ) ) ;
244+ }
245+
246+ function outputFileMigrationMessage ( value : string ) : string {
247+ const ext = path . extname ( value ) . toLowerCase ( ) ;
248+ const exportHint =
249+ ext === '.xml'
250+ ? `Use --export ${ value } for JUnit XML.`
251+ : `Use --export ${ value } if you still need that extra file.` ;
252+ return `--output expects a run directory, not a file path: ${ value } \n${ exportHint } Set --output <dir> for the canonical run artifacts; AgentV always writes <dir>/index.jsonl.` ;
253+ }
254+
255+ function artifactsMigrationMessage ( artifactsDir : string , outputDir ?: string ) : string {
256+ const lines = [ `--artifacts was removed from agentv eval. Use --output ${ artifactsDir } instead.` ] ;
257+ if ( outputDir && looksLikeLegacyOutputFilePath ( outputDir ) ) {
258+ const ext = path . extname ( outputDir ) . toLowerCase ( ) ;
259+ lines . push (
260+ ext === '.xml'
261+ ? `Use --export ${ outputDir } for JUnit XML.`
262+ : `Use --export ${ outputDir } if you still need that extra file.` ,
263+ ) ;
264+ lines . push ( `Migration example: --output ${ artifactsDir } --export ${ outputDir } ` ) ;
265+ }
266+ return lines . join ( '\n' ) ;
267+ }
268+
230269/**
231270 * Check whether an eval file's tags satisfy --tag / --exclude-tag filters.
232271 *
@@ -316,7 +355,6 @@ function normalizeOptions(
316355 const configWorkers = config ?. execution ?. workers ;
317356 const workers = cliWorkers ?? configWorkers ?? 0 ;
318357
319- // --output is now a single optional string (artifact directory)
320358 const cliOutputDir = normalizeString ( rawOptions . output ) ;
321359
322360 // --export is the new repeatable flag for additional output files
@@ -354,9 +392,9 @@ function normalizeOptions(
354392 const configCacheEnabled = config ?. cache ?. enabled ;
355393 const configCachePath = normalizeString ( config ?. cache ?. path ) ;
356394
357- // Output dir: CLI --out > config output.dir > auto-generated
395+ // Output dir: CLI --output > config output.dir > auto-generated
358396 const cliOut = normalizeString ( rawOptions . out ) ;
359- const configOut = config ?. output ?. dir ;
397+ const configOutputDir = normalizeString ( config ?. output ?. dir ) ;
360398 const cliWorkspacePath = normalizeString ( rawOptions . workspacePath ) ;
361399 const cliWorkspaceModeRaw = normalizeString ( rawOptions . workspaceMode ) ;
362400 const cliWorkspaceMode = normalizeWorkspaceMode ( rawOptions . workspaceMode ) ;
@@ -376,8 +414,8 @@ function normalizeOptions(
376414 targetsPath : normalizeString ( rawOptions . targets ) ,
377415 filter : normalizeFilter ( rawOptions . filter ) ,
378416 workers : workers > 0 ? workers : undefined ,
379- outputDir : cliOutputDir ,
380- outPath : cliOut ?? configOut ,
417+ outputDir : cliOutputDir ?? configOutputDir ,
418+ removedOut : cliOut ,
381419 exportPaths,
382420 dryRun : normalizeBoolean ( rawOptions . dryRun ) ,
383421 dryRunDelay : normalizeNumber ( rawOptions . dryRunDelay , 0 ) ,
@@ -425,6 +463,7 @@ function normalizeOptions(
425463 config ?. execution ?. keepWorkspaces === true ,
426464 benchmarkJson : normalizeString ( rawOptions . benchmarkJson ) ,
427465 artifacts : normalizeString ( rawOptions . artifacts ) ,
466+ outputFormat : normalizeString ( rawOptions . outputFormat ) ,
428467 graderTarget : normalizeString ( rawOptions . graderTarget ) ,
429468 model : normalizeString ( rawOptions . model ) ,
430469 outputMessages : normalizeOutputMessages ( normalizeString ( rawOptions . outputMessages ) ) ,
@@ -1096,6 +1135,27 @@ export async function runEvalCommand(
10961135 throw new Error ( '--grader-target agentv requires --model (e.g., --model openai:gpt-5-mini)' ) ;
10971136 }
10981137
1138+ if ( options . removedOut ) {
1139+ throw new Error (
1140+ [
1141+ '--out was removed from agentv eval. Use --output <dir> for the canonical run directory.' ,
1142+ 'If you need an additional flat file, add --export <file>.' ,
1143+ `Migration example: --out ${ options . removedOut } -> --output <dir> --export ${ options . removedOut } ` ,
1144+ ] . join ( '\n' ) ,
1145+ ) ;
1146+ }
1147+ if ( options . outputFormat ) {
1148+ throw new Error (
1149+ '--output-format was removed from agentv eval. The run directory always writes index.jsonl; use --export <file> for JSON, XML/JUnit, YAML, or HTML copies.' ,
1150+ ) ;
1151+ }
1152+ if ( options . artifacts ) {
1153+ throw new Error ( artifactsMigrationMessage ( options . artifacts , options . outputDir ) ) ;
1154+ }
1155+ if ( options . outputDir && looksLikeLegacyOutputFilePath ( options . outputDir ) ) {
1156+ throw new Error ( outputFileMigrationMessage ( options . outputDir ) ) ;
1157+ }
1158+
10991159 // --retry-errors: resume from a previous run by re-running execution_error and missing test cases.
11001160 // Uses an exclusion filter to skip already-completed (non-error) cases, which naturally includes
11011161 // both error cases and cases that never ran (e.g., due to a crash or interrupt).
@@ -1125,7 +1185,7 @@ export async function runEvalCommand(
11251185 // last-known run dir for this cwd from .agentv/cache.json. Matches promptfoo's
11261186 // `--resume [evalId]` and OpenCompass's `-r [timestamp]` "latest by default"
11271187 // convention. The cache pointer is written by saveRunCache after every eval.
1128- if ( options . resume && ! options . retryErrors && ! options . outputDir && ! options . artifacts ) {
1188+ if ( options . resume && ! options . retryErrors && ! options . outputDir ) {
11291189 const cachedDir = await resolveCachedRunDir ( cwd ) ;
11301190 if ( cachedDir ) {
11311191 options = { ...options , outputDir : cachedDir } ;
@@ -1140,7 +1200,7 @@ export async function runEvalCommand(
11401200 let resumeSkipKeys : Set < string > | undefined ;
11411201 let isResumeAppend = false ;
11421202 if ( options . resume && ! options . retryErrors ) {
1143- const explicitResumeDir = options . outputDir ?? options . artifacts ;
1203+ const explicitResumeDir = options . outputDir ;
11441204 if ( explicitResumeDir ) {
11451205 const resumeIndexPath = path . join ( path . resolve ( explicitResumeDir ) , 'index.jsonl' ) ;
11461206 if ( existsSync ( resumeIndexPath ) ) {
@@ -1190,50 +1250,27 @@ export async function runEvalCommand(
11901250 console . log ( `Repository root: ${ repoRoot } ` ) ;
11911251 }
11921252
1193- // Emit deprecation warnings for legacy flags
1194- if ( options . outPath ) {
1195- console . warn ( 'Warning: --out is deprecated. Use --output <dir> to set the artifact directory.' ) ;
1196- }
1197- if ( options . artifacts ) {
1198- console . warn (
1199- 'Warning: --artifacts is deprecated. Use --output <dir> to set the artifact directory.' ,
1200- ) ;
1201- }
1253+ // Emit deprecation warnings for remaining legacy flags.
12021254 if ( options . benchmarkJson ) {
12031255 console . warn (
12041256 'Warning: --benchmark-json is deprecated. benchmark.json is always written to the artifact directory.' ,
12051257 ) ;
12061258 }
1207- if ( normalizeString ( input . rawOptions . outputFormat ) ) {
1208- console . warn (
1209- 'Warning: --output-format is deprecated. The artifact directory always uses JSONL.' ,
1210- ) ;
1211- }
12121259
12131260 // Resolve artifact directory (runDir) and primary output path.
1214- // Precedence: --output > --artifacts (deprecated) > --out (deprecated) > default
1215- const explicitDir = options . outputDir ?? options . artifacts ;
1261+ // Precedence: --output > config output.dir > default
1262+ const explicitDir = options . outputDir ;
12161263 let runDir : string ;
12171264 let outputPath : string ;
1218- let usesDefaultArtifactWorkspace : boolean ;
12191265
12201266 if ( explicitDir ) {
1221- // --output <dir> or --artifacts <dir>: use as artifact directory
12221267 runDir = path . resolve ( explicitDir ) ;
12231268 mkdirSync ( runDir , { recursive : true } ) ;
12241269 outputPath = path . join ( runDir , 'index.jsonl' ) ;
1225- usesDefaultArtifactWorkspace = true ;
1226- } else if ( options . outPath ) {
1227- // --out <path> (deprecated): use dirname as artifact dir
1228- outputPath = path . resolve ( options . outPath ) ;
1229- runDir = path . dirname ( outputPath ) ;
1230- mkdirSync ( runDir , { recursive : true } ) ;
1231- usesDefaultArtifactWorkspace = false ;
12321270 } else {
12331271 // Default: .agentv/results/runs/<experiment>/<timestamp>/
12341272 outputPath = buildDefaultOutputPathForExperiment ( cwd , options . experiment ) ;
12351273 runDir = path . dirname ( outputPath ) ;
1236- usesDefaultArtifactWorkspace = true ;
12371274 }
12381275
12391276 // Initialize OTel exporter if --export-otel flag is set or file export flags are used
@@ -1545,7 +1582,7 @@ export async function runEvalCommand(
15451582 // has execution_status: ok. The end-of-run write preserves this value via
15461583 // readPlannedTestCount inside aggregateRunDir / writeArtifactsFromResults.
15471584 // Skip on resume — we want to preserve the *original* planned count.
1548- if ( ! isResumeAppend && usesDefaultArtifactWorkspace && totalEvalCount > 0 ) {
1585+ if ( ! isResumeAppend && totalEvalCount > 0 ) {
15491586 const evalFile = activeTestFiles . length === 1 ? activeTestFiles [ 0 ] : '' ;
15501587 await writeInitialBenchmarkArtifact ( runDir , {
15511588 evalFile,
@@ -1719,7 +1756,7 @@ export async function runEvalCommand(
17191756
17201757 // When resuming, compute summary from ALL results (old + new, deduplicated)
17211758 let summaryResults = allResults ;
1722- if ( isResumeAppend && usesDefaultArtifactWorkspace ) {
1759+ if ( isResumeAppend ) {
17231760 const content = await readFile ( outputPath , 'utf8' ) ;
17241761 summaryResults = deduplicateByTestIdTarget ( parseJsonlResults ( content ) ) ;
17251762 }
@@ -1747,7 +1784,7 @@ export async function runEvalCommand(
17471784 }
17481785
17491786 // Write artifacts to the run directory (always, not conditional on flags)
1750- if ( usesDefaultArtifactWorkspace && allResults . length > 0 ) {
1787+ if ( allResults . length > 0 ) {
17511788 const evalFile = activeTestFiles . length === 1 ? activeTestFiles [ 0 ] : '' ;
17521789 const sourceTests = activeTestFiles . flatMap (
17531790 ( activeTestFile ) => fileMetadata . get ( activeTestFile ) ?. testCases ?? [ ] ,
0 commit comments