@@ -3,6 +3,7 @@ import type { EvaluationLevel, Evaluator, EvaluatorConfig } from '../../schema';
33import { EvaluationLevelSchema , EvaluatorSchema } from '../../schema' ;
44import { getErrorMessage } from '../errors' ;
55import type { RemovalPreview , RemovalResult , SchemaChange } from '../operations/remove/types' ;
6+ import { renderCodeBasedEvaluatorTemplate } from '../templates/EvaluatorRenderer' ;
67import {
78 LEVEL_PLACEHOLDERS ,
89 RATING_SCALE_PRESETS ,
@@ -12,6 +13,9 @@ import {
1213import { BasePrimitive } from './BasePrimitive' ;
1314import type { AddResult , AddScreenComponent , RemovableResource } from './types' ;
1415import type { Command } from '@commander-js/extra-typings' ;
16+ import { existsSync } from 'node:fs' ;
17+ import { rm } from 'node:fs/promises' ;
18+ import { dirname , join } from 'node:path' ;
1519
1620export interface AddEvaluatorOptions {
1721 name : string ;
@@ -22,6 +26,9 @@ export interface AddEvaluatorOptions {
2226
2327export type RemovableEvaluator = RemovableResource ;
2428
29+ const DEFAULT_CODE_ENTRYPOINT = 'lambda_function.handler' ;
30+ const DEFAULT_CODE_TIMEOUT = 60 ;
31+
2532/**
2633 * EvaluatorPrimitive handles all evaluator add/remove operations.
2734 */
@@ -31,9 +38,20 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
3138 override readonly article = 'an' ;
3239 readonly primitiveSchema = EvaluatorSchema ;
3340
34- async add ( options : AddEvaluatorOptions ) : Promise < AddResult < { evaluatorName : string } > > {
41+ async add ( options : AddEvaluatorOptions ) : Promise < AddResult < { evaluatorName : string ; codePath ?: string } > > {
3542 try {
3643 const evaluator = await this . createEvaluator ( options ) ;
44+
45+ // Scaffold code for managed code-based evaluators
46+ if ( options . config . codeBased ?. managed ) {
47+ const configRoot = findConfigRoot ( ) ! ;
48+ const projectRoot = dirname ( configRoot ) ;
49+ const codeLocation = options . config . codeBased . managed . codeLocation ;
50+ const targetDir = join ( projectRoot , codeLocation ) ;
51+ await renderCodeBasedEvaluatorTemplate ( options . name , targetDir ) ;
52+ return { success : true , evaluatorName : evaluator . name , codePath : codeLocation } ;
53+ }
54+
3755 return { success : true , evaluatorName : evaluator . name } ;
3856 } catch ( err ) {
3957 return { success : false , error : getErrorMessage ( err ) } ;
@@ -59,6 +77,17 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
5977 } ;
6078 }
6179
80+ // Delete scaffolded code directory for managed code-based evaluators
81+ const evaluator = project . evaluators [ index ] ! ;
82+ if ( evaluator . config . codeBased ?. managed ) {
83+ const configRoot = findConfigRoot ( ) ! ;
84+ const projectRoot = dirname ( configRoot ) ;
85+ const codeDir = join ( projectRoot , evaluator . config . codeBased . managed . codeLocation ) ;
86+ if ( existsSync ( codeDir ) ) {
87+ await rm ( codeDir , { recursive : true , force : true } ) ;
88+ }
89+ }
90+
6291 project . evaluators . splice ( index , 1 ) ;
6392 await this . writeProjectSpec ( project ) ;
6493
@@ -77,6 +106,7 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
77106 }
78107
79108 const summary : string [ ] = [ `Removing evaluator: ${ evaluatorName } ` ] ;
109+ const directoriesToDelete : string [ ] = [ ] ;
80110 const schemaChanges : SchemaChange [ ] = [ ] ;
81111
82112 const referencingConfigs = project . onlineEvalConfigs . filter ( c => c . evaluators . includes ( evaluatorName ) ) ;
@@ -86,6 +116,18 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
86116 ) ;
87117 }
88118
119+ // Preview code directory deletion for managed code-based evaluators
120+ if ( evaluator . config . codeBased ?. managed ) {
121+ const configRoot = findConfigRoot ( ) ! ;
122+ const projectRoot = dirname ( configRoot ) ;
123+ const codeLocation = evaluator . config . codeBased . managed . codeLocation ;
124+ const codeDir = join ( projectRoot , codeLocation ) ;
125+ if ( existsSync ( codeDir ) ) {
126+ directoriesToDelete . push ( codeLocation ) ;
127+ summary . push ( `Will delete directory: ${ codeLocation } ` ) ;
128+ }
129+ }
130+
89131 const afterSpec = {
90132 ...project ,
91133 evaluators : project . evaluators . filter ( e => e . name !== evaluatorName ) ,
@@ -97,7 +139,7 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
97139 after : afterSpec ,
98140 } ) ;
99141
100- return { summary, directoriesToDelete : [ ] , schemaChanges } ;
142+ return { summary, directoriesToDelete, schemaChanges } ;
101143 }
102144
103145 async getRemovable ( ) : Promise < RemovableEvaluator [ ] > {
@@ -124,17 +166,17 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
124166 addCmd
125167 . command ( this . kind )
126168 . description ( 'Add a custom evaluator to the project' )
127- . option ( '--name <name>' , 'Evaluator name [non-interactive]' )
128- . option ( '--level <level>' , 'Evaluation level: SESSION, TRACE, TOOL_CALL [non-interactive]' )
129- . option ( '--model <model>' , 'Bedrock model ID for LLM-as-a-Judge [non-interactive]' )
169+ . option ( '--name <name>' , 'Evaluator name' )
170+ . option ( '--level <level>' , 'Evaluation level: SESSION, TRACE, TOOL_CALL' )
171+ . option ( '--type <type>' , 'Evaluator type: llm-as-a-judge (default) or code-based' )
172+ . option ( '--model <model>' , '[LLM] Bedrock model ID for LLM-as-a-Judge' )
130173 . option (
131174 '--instructions <text>' ,
132- 'Evaluation prompt instructions (must include level-appropriate placeholders, e.g. {context}) [non-interactive]'
133- )
134- . option (
135- '--rating-scale <preset>' ,
136- `Rating scale preset: ${ presetIds . join ( ', ' ) } (default: 1-5-quality) [non-interactive]`
175+ '[LLM] Evaluation prompt instructions (must include level-appropriate placeholders, e.g. {context})'
137176 )
177+ . option ( '--rating-scale <preset>' , `[LLM] Rating scale preset: ${ presetIds . join ( ', ' ) } (default: 1-5-quality)` )
178+ . option ( '--lambda-arn <arn>' , '[Code-based] Existing Lambda function ARN (external)' )
179+ . option ( '--timeout <seconds>' , '[Code-based] Lambda timeout in seconds, 1-300 (default: 60)' )
138180 . option (
139181 '--config <path>' ,
140182 'Path to evaluator config JSON file (overrides --model, --instructions, --rating-scale) [non-interactive]'
@@ -144,9 +186,12 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
144186 async ( cliOptions : {
145187 name ?: string ;
146188 level ?: string ;
189+ type ?: string ;
147190 model ?: string ;
148191 instructions ?: string ;
149192 ratingScale ?: string ;
193+ lambdaArn ?: string ;
194+ timeout ?: string ;
150195 config ?: string ;
151196 json ?: boolean ;
152197 } ) => {
@@ -170,21 +215,40 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
170215 fail ( '--name and --level are required in non-interactive mode' ) ;
171216 }
172217
173- if ( ! cliOptions . config && ! cliOptions . model ) {
174- fail ( 'Either --config or --model is required' ) ;
175- }
176-
177218 const levelResult = EvaluationLevelSchema . safeParse ( cliOptions . level ) ;
178219 if ( ! levelResult . success ) {
179220 fail ( `Invalid --level "${ cliOptions . level } ". Must be one of: SESSION, TRACE, TOOL_CALL` ) ;
180221 }
181222
223+ const evalType = cliOptions . type ?? 'llm-as-a-judge' ;
224+ if ( evalType !== 'llm-as-a-judge' && evalType !== 'code-based' ) {
225+ fail ( `Invalid --type "${ evalType } ". Must be one of: llm-as-a-judge, code-based` ) ;
226+ }
227+
228+ // Cross-validate flags against evaluator type
229+ if ( evalType !== 'code-based' ) {
230+ if ( cliOptions . lambdaArn ) fail ( '--lambda-arn requires --type code-based' ) ;
231+ if ( cliOptions . timeout ) fail ( '--timeout requires --type code-based' ) ;
232+ }
233+ if ( evalType === 'code-based' ) {
234+ if ( cliOptions . model ) fail ( '--model cannot be used with --type code-based' ) ;
235+ if ( cliOptions . instructions ) fail ( '--instructions cannot be used with --type code-based' ) ;
236+ if ( cliOptions . ratingScale ) fail ( '--rating-scale cannot be used with --type code-based' ) ;
237+ }
238+
182239 let configJson : EvaluatorConfig ;
240+
183241 if ( cliOptions . config ) {
184242 const { readFileSync } = await import ( 'fs' ) ;
185243 configJson = JSON . parse ( readFileSync ( cliOptions . config , 'utf-8' ) ) as EvaluatorConfig ;
244+ } else if ( evalType === 'code-based' ) {
245+ configJson = this . buildCodeBasedConfig ( cliOptions . name ! , cliOptions . lambdaArn , cliOptions . timeout ) ;
186246 } else {
187- // --instructions is required when not using --config
247+ // LLM-as-a-Judge flow
248+ if ( ! cliOptions . model ) {
249+ fail ( 'Either --config or --model is required for LLM-as-a-Judge evaluators' ) ;
250+ }
251+
188252 if ( ! cliOptions . instructions ) {
189253 const level = levelResult . data ! ;
190254 const placeholders = LEVEL_PLACEHOLDERS [ level ] . map ( p => `{${ p } }` ) . join ( ', ' ) ;
@@ -194,21 +258,18 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
194258 ) ;
195259 }
196260
197- // Validate placeholders
198261 const placeholderCheck = validateInstructionPlaceholders ( cliOptions . instructions ! , levelResult . data ! ) ;
199262 if ( placeholderCheck !== true ) {
200263 fail ( placeholderCheck ) ;
201264 }
202265
203- // Resolve rating scale
204- let ratingScale : EvaluatorConfig [ 'llmAsAJudge' ] [ 'ratingScale' ] ;
266+ let ratingScale : NonNullable < EvaluatorConfig [ 'llmAsAJudge' ] > [ 'ratingScale' ] ;
205267 const scaleInput = cliOptions . ratingScale ?? '1-5-quality' ;
206268
207269 const preset = RATING_SCALE_PRESETS . find ( p => p . id === scaleInput ) ;
208270 if ( preset ) {
209271 ratingScale = preset . ratingScale ;
210272 } else {
211- // Try parsing as custom format: "1:Poor:Fails, 2:Fair:Partially meets" or "Pass:Meets, Fail:Does not"
212273 const isNumerical = / ^ \d / . test ( scaleInput . trim ( ) ) ;
213274 const parsed = parseCustomRatingScale ( scaleInput , isNumerical ? 'numerical' : 'categorical' ) ;
214275 if ( ! parsed . success ) {
@@ -239,7 +300,16 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
239300 if ( cliOptions . json ) {
240301 console . log ( JSON . stringify ( result ) ) ;
241302 } else if ( result . success ) {
242- console . log ( `Added evaluator '${ result . evaluatorName } '` ) ;
303+ if ( result . codePath ) {
304+ console . log ( `Created evaluator '${ result . evaluatorName } '` ) ;
305+ console . log ( ` Code: ${ result . codePath } lambda_function.py` ) ;
306+ console . log ( ` IAM: ${ result . codePath } execution-role-policy.json` ) ;
307+ console . log (
308+ `\n Next: Edit lambda_function.py with your evaluation logic, then run \`agentcore deploy\``
309+ ) ;
310+ } else {
311+ console . log ( `Added evaluator '${ result . evaluatorName } '` ) ;
312+ }
243313 } else {
244314 console . error ( result . error ) ;
245315 }
@@ -280,6 +350,28 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
280350 return null ;
281351 }
282352
353+ private buildCodeBasedConfig ( name : string , lambdaArn ?: string , timeoutStr ?: string ) : EvaluatorConfig {
354+ if ( lambdaArn ) {
355+ return {
356+ codeBased : {
357+ external : { lambdaArn } ,
358+ } ,
359+ } ;
360+ }
361+
362+ const timeoutSeconds = timeoutStr ? parseInt ( timeoutStr , 10 ) : DEFAULT_CODE_TIMEOUT ;
363+ return {
364+ codeBased : {
365+ managed : {
366+ codeLocation : `app/${ name } /` ,
367+ entrypoint : DEFAULT_CODE_ENTRYPOINT ,
368+ timeoutSeconds,
369+ additionalPolicies : [ 'execution-role-policy.json' ] ,
370+ } ,
371+ } ,
372+ } ;
373+ }
374+
283375 private async createEvaluator ( options : AddEvaluatorOptions ) : Promise < Evaluator > {
284376 const project = await this . readProjectSpec ( ) ;
285377
0 commit comments