@@ -23,7 +23,7 @@ export const EvalModule = {
2323
2424interface Options {
2525 environment ?: string ;
26- model : string ;
26+ model : string [ ] ;
2727 runner : RunnerName ;
2828 local : boolean ;
2929 limit : number ;
@@ -57,8 +57,9 @@ function builder(argv: Argv): Argv<Options> {
5757 } )
5858 . option ( 'model' , {
5959 type : 'string' ,
60- default : DEFAULT_MODEL_NAME ,
61- descript : 'Model to use when generating code' ,
60+ array : true ,
61+ default : [ DEFAULT_MODEL_NAME ] ,
62+ descript : 'Model(s) to use when generating code' ,
6263 } )
6364 // Option is a noop right now when using a remote environment.
6465 . option ( 'runner' , {
@@ -206,46 +207,60 @@ async function handler(cliArgs: Arguments<Options>): Promise<void> {
206207 process . on ( 'SIGTERM' , ( ) => abortCtrl . abort ( ) ) ;
207208 process . on ( 'exit' , ( ) => abortCtrl . abort ( ) ) ;
208209
209- try {
210- const runInfo = await generateCodeAndAssess ( {
211- runner : cliArgs . runner ,
212- model : cliArgs . model ,
213- environment : {
214- configPath : BUILT_IN_ENVIRONMENTS . get ( cliArgs . environment ) || cliArgs . environment ,
215- } ,
216- localMode : cliArgs . local ,
217- limit : cliArgs . limit ,
218- concurrency : cliArgs . concurrency as number ,
219- reportName : cliArgs . reportName ! ,
220- skipScreenshots : ! ! cliArgs . skipScreenshots ,
221- startMcp : cliArgs . mcp ,
222- ragEndpoint : cliArgs . ragEndpoint ,
223- outputDirectory : cliArgs . outputDirectory ,
224- promptFilter : cliArgs . promptFilter ,
225- labels : cliArgs . labels || [ ] ,
226- skipAxeTesting : ! ! cliArgs . skipAxeTesting ,
227- enableUserJourneyTesting : cliArgs . enableUserJourneyTesting ,
228- enableAutoCsp : cliArgs . enableAutoCsp ,
229- logging : cliArgs . logging ,
230- autoraterModel : cliArgs . autoraterModel ,
231- skipAiSummary : cliArgs . skipAiSummary ,
232- skipLighthouse : cliArgs . skipLighthouse ,
233- maxBuildRepairAttempts : cliArgs . maxBuildRepairAttempts ,
234- maxTestRepairAttempts : cliArgs . maxTestRepairAttempts ,
235- promptTimeoutRetries : cliArgs . promptTimeoutRetries ,
236- abortSignal : abortCtrl . signal ,
237- } ) ;
210+ const models = cliArgs . model ;
211+ const baseReportName = cliArgs . reportName ! ;
212+
213+ for ( const model of models ) {
214+ const reportName =
215+ models . length > 1
216+ ? `${ baseReportName } --${ model . replace ( / [ ^ a - z A - Z 0 - 9 - ] / g, '-' ) } `
217+ : baseReportName ;
218+
219+ if ( models . length > 1 ) {
220+ console . log ( chalk . cyan ( `\nStarting evaluation with model: ${ model } \n` ) ) ;
221+ }
222+
223+ try {
224+ const runInfo = await generateCodeAndAssess ( {
225+ runner : cliArgs . runner ,
226+ model,
227+ environment : {
228+ configPath : BUILT_IN_ENVIRONMENTS . get ( cliArgs . environment ) || cliArgs . environment ,
229+ } ,
230+ localMode : cliArgs . local ,
231+ limit : cliArgs . limit ,
232+ concurrency : cliArgs . concurrency as number ,
233+ reportName,
234+ skipScreenshots : ! ! cliArgs . skipScreenshots ,
235+ startMcp : cliArgs . mcp ,
236+ ragEndpoint : cliArgs . ragEndpoint ,
237+ outputDirectory : cliArgs . outputDirectory ,
238+ promptFilter : cliArgs . promptFilter ,
239+ labels : cliArgs . labels || [ ] ,
240+ skipAxeTesting : ! ! cliArgs . skipAxeTesting ,
241+ enableUserJourneyTesting : cliArgs . enableUserJourneyTesting ,
242+ enableAutoCsp : cliArgs . enableAutoCsp ,
243+ logging : cliArgs . logging ,
244+ autoraterModel : cliArgs . autoraterModel ,
245+ skipAiSummary : cliArgs . skipAiSummary ,
246+ skipLighthouse : cliArgs . skipLighthouse ,
247+ maxBuildRepairAttempts : cliArgs . maxBuildRepairAttempts ,
248+ maxTestRepairAttempts : cliArgs . maxTestRepairAttempts ,
249+ promptTimeoutRetries : cliArgs . promptTimeoutRetries ,
250+ abortSignal : abortCtrl . signal ,
251+ } ) ;
238252
239- logReportToConsole ( runInfo ) ;
240- await writeReportToDisk ( runInfo , runInfo . details . summary . environmentId , REPORTS_ROOT_DIR ) ;
241- } catch ( error : unknown ) {
242- if ( error instanceof UserFacingError ) {
243- console . error ( chalk . red ( error . message ) ) ;
244- } else {
245- console . error ( chalk . red ( 'An error occurred during the assessment process:' ) ) ;
246- console . error ( chalk . red ( error ) ) ;
247- if ( process . env . DEBUG === '1' && ( error as Partial < Error > ) . stack ) {
248- console . error ( chalk . red ( ( error as Error ) . stack ) ) ;
253+ logReportToConsole ( runInfo ) ;
254+ await writeReportToDisk ( runInfo , runInfo . details . summary . environmentId , REPORTS_ROOT_DIR ) ;
255+ } catch ( error : unknown ) {
256+ if ( error instanceof UserFacingError ) {
257+ console . error ( chalk . red ( error . message ) ) ;
258+ } else {
259+ console . error ( chalk . red ( 'An error occurred during the assessment process:' ) ) ;
260+ console . error ( chalk . red ( error ) ) ;
261+ if ( process . env . DEBUG === '1' && ( error as Partial < Error > ) . stack ) {
262+ console . error ( chalk . red ( ( error as Error ) . stack ) ) ;
263+ }
249264 }
250265 }
251266 }
0 commit comments