@@ -126,6 +126,7 @@ interface NormalizedOptions {
126126 readonly transcript ?: string ;
127127 readonly experiment ?: string ;
128128 readonly budgetUsd ?: number ;
129+ readonly sourceMetadataByEvalFile ?: ReadonlyMap < string , Record < string , unknown > > ;
129130}
130131
131132function normalizeBoolean ( value : unknown ) : boolean {
@@ -197,6 +198,35 @@ function normalizeFilter(value: unknown): string | readonly string[] | undefined
197198 return normalizeString ( value ) ;
198199}
199200
201+ function normalizeSourceMetadataByEvalFile (
202+ value : unknown ,
203+ ) : ReadonlyMap < string , Record < string , unknown > > | undefined {
204+ if ( value instanceof Map ) {
205+ const entries = [ ...value . entries ( ) ] . filter (
206+ ( entry ) : entry is [ string , Record < string , unknown > ] =>
207+ typeof entry [ 0 ] === 'string' &&
208+ typeof entry [ 1 ] === 'object' &&
209+ entry [ 1 ] !== null &&
210+ ! Array . isArray ( entry [ 1 ] ) ,
211+ ) ;
212+ return entries . length > 0
213+ ? new Map ( entries . map ( ( [ key , metadata ] ) => [ path . resolve ( key ) , metadata ] ) )
214+ : undefined ;
215+ }
216+
217+ if ( typeof value === 'object' && value !== null && ! Array . isArray ( value ) ) {
218+ const entries = Object . entries ( value ) . filter (
219+ ( entry ) : entry is [ string , Record < string , unknown > ] =>
220+ typeof entry [ 1 ] === 'object' && entry [ 1 ] !== null && ! Array . isArray ( entry [ 1 ] ) ,
221+ ) ;
222+ return entries . length > 0
223+ ? new Map ( entries . map ( ( [ key , metadata ] ) => [ path . resolve ( key ) , metadata ] ) )
224+ : undefined ;
225+ }
226+
227+ return undefined ;
228+ }
229+
200230/**
201231 * Check whether an eval file's tags satisfy --tag / --exclude-tag filters.
202232 *
@@ -404,9 +434,30 @@ function normalizeOptions(
404434 transcript : normalizeString ( rawOptions . transcript ) ,
405435 experiment : normalizeString ( rawOptions . experiment ) ,
406436 budgetUsd : normalizeOptionalNumber ( rawOptions . budgetUsd ) ,
437+ sourceMetadataByEvalFile : normalizeSourceMetadataByEvalFile (
438+ rawOptions . sourceMetadataByEvalFile ,
439+ ) ,
407440 } satisfies NormalizedOptions ;
408441}
409442
443+ function withSourceMetadata (
444+ result : EvaluationResult ,
445+ testFilePath : string ,
446+ options : NormalizedOptions ,
447+ ) : EvaluationResult {
448+ const sourceMetadata = options . sourceMetadataByEvalFile ?. get ( path . resolve ( testFilePath ) ) ;
449+ if ( ! sourceMetadata ) {
450+ return result ;
451+ }
452+ return {
453+ ...result ,
454+ metadata : {
455+ ...result . metadata ,
456+ ...sourceMetadata ,
457+ } ,
458+ } ;
459+ }
460+
410461async function ensureFileExists ( filePath : string , description : string ) : Promise < void > {
411462 try {
412463 await access ( filePath , constants . F_OK ) ;
@@ -919,9 +970,10 @@ async function runSingleEvalFile(params: {
919970 // Trim output messages for results JSONL based on --output-messages.
920971 // Each message is trimmed to { role, content } only (no toolCalls, startTime, etc.).
921972 // Full output with tool calls goes to OTel.
922- const trimmedOutput = trimOutputMessages ( result . output , options . outputMessages ) ;
973+ const resultWithMetadata = withSourceMetadata ( result , testFilePath , options ) ;
974+ const trimmedOutput = trimOutputMessages ( resultWithMetadata . output , options . outputMessages ) ;
923975 const trimmedResult : EvaluationResult = {
924- ...result ,
976+ ...resultWithMetadata ,
925977 output : trimmedOutput ,
926978 } ;
927979 await outputWriter . append ( trimmedResult ) ;
@@ -976,7 +1028,7 @@ async function runSingleEvalFile(params: {
9761028 } ,
9771029 } ) ;
9781030
979- return { results : [ ... results ] } ;
1031+ return { results : results . map ( ( result ) => withSourceMetadata ( result , testFilePath , options ) ) } ;
9801032}
9811033
9821034export interface RunEvalResult {
@@ -1529,9 +1581,11 @@ export async function runEvalCommand(
15291581 target : selection . targetName ,
15301582 } ) ) ;
15311583 for ( const r of skippedResults ) {
1532- await outputWriter . append ( r ) ;
1584+ await outputWriter . append ( withSourceMetadata ( r , testFilePath , options ) ) ;
15331585 }
1534- allResults . push ( ...skippedResults ) ;
1586+ allResults . push (
1587+ ...skippedResults . map ( ( r ) => withSourceMetadata ( r , testFilePath , options ) ) ,
1588+ ) ;
15351589 }
15361590 continue ;
15371591 }
@@ -1614,21 +1668,27 @@ export async function runEvalCommand(
16141668 console . error (
16151669 `\n[ERROR] ⚠ Eval file failed: ${ path . basename ( testFilePath ) } — ${ message } \n` ,
16161670 ) ;
1617- const errorResults : EvaluationResult [ ] = filteredTestCases . map ( ( testCase ) => ( {
1618- timestamp : new Date ( ) . toISOString ( ) ,
1619- testId : testCase . id ,
1620- score : 0 ,
1621- assertions : [ ] ,
1622- output : [ ] ,
1623- scores : [ ] ,
1624- error : message ,
1625- executionStatus : 'execution_error' as const ,
1626- failureStage : 'setup' as const ,
1627- failureReasonCode : 'setup_error' as const ,
1628- durationMs : 0 ,
1629- tokenUsage : { input : 0 , output : 0 , inputTokens : 0 , outputTokens : 0 } ,
1630- target : selection . targetName ,
1631- } ) ) ;
1671+ const errorResults : EvaluationResult [ ] = filteredTestCases . map ( ( testCase ) =>
1672+ withSourceMetadata (
1673+ {
1674+ timestamp : new Date ( ) . toISOString ( ) ,
1675+ testId : testCase . id ,
1676+ score : 0 ,
1677+ assertions : [ ] ,
1678+ output : [ ] ,
1679+ scores : [ ] ,
1680+ error : message ,
1681+ executionStatus : 'execution_error' as const ,
1682+ failureStage : 'setup' as const ,
1683+ failureReasonCode : 'setup_error' as const ,
1684+ durationMs : 0 ,
1685+ tokenUsage : { input : 0 , output : 0 } ,
1686+ target : selection . targetName ,
1687+ } ,
1688+ testFilePath ,
1689+ options ,
1690+ ) ,
1691+ ) ;
16321692 for ( const errResult of errorResults ) {
16331693 await outputWriter . append ( errResult ) ;
16341694 }
0 commit comments