@@ -84,10 +84,10 @@ interface NormalizedOptions {
8484 readonly targetsPath ?: string ;
8585 readonly filter ?: string | readonly string [ ] ;
8686 readonly workers ?: number ;
87- /** --output <dir>: artifact directory (new canonical meaning) */
87+ /** --output <dir>: canonical artifact directory */
8888 readonly outputDir ?: string ;
89- /** Legacy --out <path>: deprecated, treated as artifact dir */
90- readonly outPath ?: string ;
89+ /** Removed: use --output for run directories and --export for extra files */
90+ readonly removedOut ?: string ;
9191 /** --export <paths...>: additional output files */
9292 readonly exportPaths : readonly string [ ] ;
9393 readonly dryRun : boolean ;
@@ -115,8 +115,10 @@ interface NormalizedOptions {
115115 readonly keepWorkspaces : boolean ;
116116 /** Deprecated: benchmark.json is always written to artifact dir */
117117 readonly benchmarkJson ?: string ;
118- /** Deprecated : use --output instead */
118+ /** Removed : use --output instead */
119119 readonly artifacts ?: string ;
120+ /** Removed: the run directory always uses index.jsonl */
121+ readonly outputFormat ?: string ;
120122 readonly graderTarget ?: string ;
121123 readonly model ?: string ;
122124 readonly outputMessages : number | 'all' ;
@@ -197,6 +199,43 @@ function normalizeFilter(value: unknown): string | readonly string[] | undefined
197199 return normalizeString ( value ) ;
198200}
199201
202+ const LEGACY_OUTPUT_FILE_EXTENSIONS = new Set ( [
203+ '.jsonl' ,
204+ '.json' ,
205+ '.xml' ,
206+ '.yaml' ,
207+ '.yml' ,
208+ '.html' ,
209+ '.htm' ,
210+ ] ) ;
211+
212+ function looksLikeLegacyOutputFilePath ( value : string ) : boolean {
213+ return LEGACY_OUTPUT_FILE_EXTENSIONS . has ( path . extname ( value ) . toLowerCase ( ) ) ;
214+ }
215+
216+ function outputFileMigrationMessage ( value : string ) : string {
217+ const ext = path . extname ( value ) . toLowerCase ( ) ;
218+ const exportHint =
219+ ext === '.xml'
220+ ? `Use --export ${ value } for JUnit XML.`
221+ : `Use --export ${ value } if you still need that extra file.` ;
222+ return `--output expects a run directory, not a file path: ${ value } \n${ exportHint } Set --output <dir> for the canonical run artifacts; AgentV always writes <dir>/index.jsonl.` ;
223+ }
224+
225+ function artifactsMigrationMessage ( artifactsDir : string , outputDir ?: string ) : string {
226+ const lines = [ `--artifacts was removed from agentv eval. Use --output ${ artifactsDir } instead.` ] ;
227+ if ( outputDir && looksLikeLegacyOutputFilePath ( outputDir ) ) {
228+ const ext = path . extname ( outputDir ) . toLowerCase ( ) ;
229+ lines . push (
230+ ext === '.xml'
231+ ? `Use --export ${ outputDir } for JUnit XML.`
232+ : `Use --export ${ outputDir } if you still need that extra file.` ,
233+ ) ;
234+ lines . push ( `Migration example: --output ${ artifactsDir } --export ${ outputDir } ` ) ;
235+ }
236+ return lines . join ( '\n' ) ;
237+ }
238+
200239/**
201240 * Check whether an eval file's tags satisfy --tag / --exclude-tag filters.
202241 *
@@ -286,7 +325,6 @@ function normalizeOptions(
286325 const configWorkers = config ?. execution ?. workers ;
287326 const workers = cliWorkers ?? configWorkers ?? 0 ;
288327
289- // --output is now a single optional string (artifact directory)
290328 const cliOutputDir = normalizeString ( rawOptions . output ) ;
291329
292330 // --export is the new repeatable flag for additional output files
@@ -324,9 +362,9 @@ function normalizeOptions(
324362 const configCacheEnabled = config ?. cache ?. enabled ;
325363 const configCachePath = normalizeString ( config ?. cache ?. path ) ;
326364
327- // Output dir: CLI --out > config output.dir > auto-generated
365+ // Output dir: CLI --output > config output.dir > auto-generated
328366 const cliOut = normalizeString ( rawOptions . out ) ;
329- const configOut = config ?. output ?. dir ;
367+ const configOutputDir = normalizeString ( config ?. output ?. dir ) ;
330368 const cliWorkspacePath = normalizeString ( rawOptions . workspacePath ) ;
331369 const cliWorkspaceModeRaw = normalizeString ( rawOptions . workspaceMode ) ;
332370 const cliWorkspaceMode = normalizeWorkspaceMode ( rawOptions . workspaceMode ) ;
@@ -346,8 +384,8 @@ function normalizeOptions(
346384 targetsPath : normalizeString ( rawOptions . targets ) ,
347385 filter : normalizeFilter ( rawOptions . filter ) ,
348386 workers : workers > 0 ? workers : undefined ,
349- outputDir : cliOutputDir ,
350- outPath : cliOut ?? configOut ,
387+ outputDir : cliOutputDir ?? configOutputDir ,
388+ removedOut : cliOut ,
351389 exportPaths,
352390 dryRun : normalizeBoolean ( rawOptions . dryRun ) ,
353391 dryRunDelay : normalizeNumber ( rawOptions . dryRunDelay , 0 ) ,
@@ -395,6 +433,7 @@ function normalizeOptions(
395433 config ?. execution ?. keepWorkspaces === true ,
396434 benchmarkJson : normalizeString ( rawOptions . benchmarkJson ) ,
397435 artifacts : normalizeString ( rawOptions . artifacts ) ,
436+ outputFormat : normalizeString ( rawOptions . outputFormat ) ,
398437 graderTarget : normalizeString ( rawOptions . graderTarget ) ,
399438 model : normalizeString ( rawOptions . model ) ,
400439 outputMessages : normalizeOutputMessages ( normalizeString ( rawOptions . outputMessages ) ) ,
@@ -1044,6 +1083,27 @@ export async function runEvalCommand(
10441083 throw new Error ( '--grader-target agentv requires --model (e.g., --model openai:gpt-5-mini)' ) ;
10451084 }
10461085
1086+ if ( options . removedOut ) {
1087+ throw new Error (
1088+ [
1089+ '--out was removed from agentv eval. Use --output <dir> for the canonical run directory.' ,
1090+ 'If you need an additional flat file, add --export <file>.' ,
1091+ `Migration example: --out ${ options . removedOut } -> --output <dir> --export ${ options . removedOut } ` ,
1092+ ] . join ( '\n' ) ,
1093+ ) ;
1094+ }
1095+ if ( options . outputFormat ) {
1096+ throw new Error (
1097+ '--output-format was removed from agentv eval. The run directory always writes index.jsonl; use --export <file> for JSON, XML/JUnit, YAML, or HTML copies.' ,
1098+ ) ;
1099+ }
1100+ if ( options . artifacts ) {
1101+ throw new Error ( artifactsMigrationMessage ( options . artifacts , options . outputDir ) ) ;
1102+ }
1103+ if ( options . outputDir && looksLikeLegacyOutputFilePath ( options . outputDir ) ) {
1104+ throw new Error ( outputFileMigrationMessage ( options . outputDir ) ) ;
1105+ }
1106+
10471107 // --retry-errors: resume from a previous run by re-running execution_error and missing test cases.
10481108 // Uses an exclusion filter to skip already-completed (non-error) cases, which naturally includes
10491109 // both error cases and cases that never ran (e.g., due to a crash or interrupt).
@@ -1073,7 +1133,7 @@ export async function runEvalCommand(
10731133 // last-known run dir for this cwd from .agentv/cache.json. Matches promptfoo's
10741134 // `--resume [evalId]` and OpenCompass's `-r [timestamp]` "latest by default"
10751135 // convention. The cache pointer is written by saveRunCache after every eval.
1076- if ( options . resume && ! options . retryErrors && ! options . outputDir && ! options . artifacts ) {
1136+ if ( options . resume && ! options . retryErrors && ! options . outputDir ) {
10771137 const cachedDir = await resolveCachedRunDir ( cwd ) ;
10781138 if ( cachedDir ) {
10791139 options = { ...options , outputDir : cachedDir } ;
@@ -1088,7 +1148,7 @@ export async function runEvalCommand(
10881148 let resumeSkipKeys : Set < string > | undefined ;
10891149 let isResumeAppend = false ;
10901150 if ( options . resume && ! options . retryErrors ) {
1091- const explicitResumeDir = options . outputDir ?? options . artifacts ;
1151+ const explicitResumeDir = options . outputDir ;
10921152 if ( explicitResumeDir ) {
10931153 const resumeIndexPath = path . join ( path . resolve ( explicitResumeDir ) , 'index.jsonl' ) ;
10941154 if ( existsSync ( resumeIndexPath ) ) {
@@ -1138,50 +1198,27 @@ export async function runEvalCommand(
11381198 console . log ( `Repository root: ${ repoRoot } ` ) ;
11391199 }
11401200
1141- // Emit deprecation warnings for legacy flags
1142- if ( options . outPath ) {
1143- console . warn ( 'Warning: --out is deprecated. Use --output <dir> to set the artifact directory.' ) ;
1144- }
1145- if ( options . artifacts ) {
1146- console . warn (
1147- 'Warning: --artifacts is deprecated. Use --output <dir> to set the artifact directory.' ,
1148- ) ;
1149- }
1201+ // Emit deprecation warnings for remaining legacy flags.
11501202 if ( options . benchmarkJson ) {
11511203 console . warn (
11521204 'Warning: --benchmark-json is deprecated. benchmark.json is always written to the artifact directory.' ,
11531205 ) ;
11541206 }
1155- if ( normalizeString ( input . rawOptions . outputFormat ) ) {
1156- console . warn (
1157- 'Warning: --output-format is deprecated. The artifact directory always uses JSONL.' ,
1158- ) ;
1159- }
11601207
11611208 // Resolve artifact directory (runDir) and primary output path.
1162- // Precedence: --output > --artifacts (deprecated) > --out (deprecated) > default
1163- const explicitDir = options . outputDir ?? options . artifacts ;
1209+ // Precedence: --output > config output.dir > default
1210+ const explicitDir = options . outputDir ;
11641211 let runDir : string ;
11651212 let outputPath : string ;
1166- let usesDefaultArtifactWorkspace : boolean ;
11671213
11681214 if ( explicitDir ) {
1169- // --output <dir> or --artifacts <dir>: use as artifact directory
11701215 runDir = path . resolve ( explicitDir ) ;
11711216 mkdirSync ( runDir , { recursive : true } ) ;
11721217 outputPath = path . join ( runDir , 'index.jsonl' ) ;
1173- usesDefaultArtifactWorkspace = true ;
1174- } else if ( options . outPath ) {
1175- // --out <path> (deprecated): use dirname as artifact dir
1176- outputPath = path . resolve ( options . outPath ) ;
1177- runDir = path . dirname ( outputPath ) ;
1178- mkdirSync ( runDir , { recursive : true } ) ;
1179- usesDefaultArtifactWorkspace = false ;
11801218 } else {
11811219 // Default: .agentv/results/runs/<experiment>/<timestamp>/
11821220 outputPath = buildDefaultOutputPathForExperiment ( cwd , options . experiment ) ;
11831221 runDir = path . dirname ( outputPath ) ;
1184- usesDefaultArtifactWorkspace = true ;
11851222 }
11861223
11871224 // Initialize OTel exporter if --export-otel flag is set or file export flags are used
@@ -1493,7 +1530,7 @@ export async function runEvalCommand(
14931530 // has execution_status: ok. The end-of-run write preserves this value via
14941531 // readPlannedTestCount inside aggregateRunDir / writeArtifactsFromResults.
14951532 // Skip on resume — we want to preserve the *original* planned count.
1496- if ( ! isResumeAppend && usesDefaultArtifactWorkspace && totalEvalCount > 0 ) {
1533+ if ( ! isResumeAppend && totalEvalCount > 0 ) {
14971534 const evalFile = activeTestFiles . length === 1 ? activeTestFiles [ 0 ] : '' ;
14981535 await writeInitialBenchmarkArtifact ( runDir , {
14991536 evalFile,
@@ -1659,7 +1696,7 @@ export async function runEvalCommand(
16591696
16601697 // When resuming, compute summary from ALL results (old + new, deduplicated)
16611698 let summaryResults = allResults ;
1662- if ( isResumeAppend && usesDefaultArtifactWorkspace ) {
1699+ if ( isResumeAppend ) {
16631700 const content = await readFile ( outputPath , 'utf8' ) ;
16641701 summaryResults = deduplicateByTestIdTarget ( parseJsonlResults ( content ) ) ;
16651702 }
@@ -1687,7 +1724,7 @@ export async function runEvalCommand(
16871724 }
16881725
16891726 // Write artifacts to the run directory (always, not conditional on flags)
1690- if ( usesDefaultArtifactWorkspace && allResults . length > 0 ) {
1727+ if ( allResults . length > 0 ) {
16911728 const evalFile = activeTestFiles . length === 1 ? activeTestFiles [ 0 ] : '' ;
16921729 const sourceTests = activeTestFiles . flatMap (
16931730 ( activeTestFile ) => fileMetadata . get ( activeTestFile ) ?. testCases ?? [ ] ,
0 commit comments