@@ -3,6 +3,12 @@ import type { EvaluationLevel, Evaluator, EvaluatorConfig } from '../../schema';
33import { EvaluationLevelSchema , EvaluatorSchema } from '../../schema' ;
44import { getErrorMessage } from '../errors' ;
55import type { RemovalPreview , RemovalResult , SchemaChange } from '../operations/remove/types' ;
6+ import {
7+ LEVEL_PLACEHOLDERS ,
8+ RATING_SCALE_PRESETS ,
9+ parseCustomRatingScale ,
10+ validateInstructionPlaceholders ,
11+ } from '../tui/screens/evaluator/types' ;
612import { BasePrimitive } from './BasePrimitive' ;
713import type { AddResult , AddScreenComponent , RemovableResource } from './types' ;
814import type { Command } from '@commander-js/extra-typings' ;
@@ -113,21 +119,31 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
113119 }
114120
115121 registerCommands ( addCmd : Command , removeCmd : Command ) : void {
122+ const presetIds = RATING_SCALE_PRESETS . map ( p => p . id ) ;
123+
116124 addCmd
117125 . command ( this . kind )
118126 . description ( 'Add a custom evaluator to the project' )
119127 . option ( '--name <name>' , 'Evaluator name' )
120128 . option ( '--level <level>' , 'Evaluation level: SESSION, TRACE, TOOL_CALL' )
121129 . option ( '--model <model>' , 'Bedrock model ID for LLM-as-a-Judge' )
122- . option ( '--instructions <text>' , 'Evaluation prompt instructions' )
123- . option ( '--config <path>' , 'Path to evaluator config JSON file (overrides --model, --instructions)' )
130+ . option (
131+ '--instructions <text>' ,
132+ 'Evaluation prompt instructions (must include level-appropriate placeholders, e.g. {context})'
133+ )
134+ . option ( '--rating-scale <preset>' , `Rating scale preset: ${ presetIds . join ( ', ' ) } (default: 1-5-quality)` )
135+ . option (
136+ '--config <path>' ,
137+ 'Path to evaluator config JSON file (overrides --model, --instructions, --rating-scale)'
138+ )
124139 . option ( '--json' , 'Output as JSON' )
125140 . action (
126141 async ( cliOptions : {
127142 name ?: string ;
128143 level ?: string ;
129144 model ?: string ;
130145 instructions ?: string ;
146+ ratingScale ?: string ;
131147 config ?: string ;
132148 json ?: boolean ;
133149 } ) => {
@@ -138,62 +154,82 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
138154 }
139155
140156 if ( cliOptions . name || cliOptions . json ) {
141- if ( ! cliOptions . name || ! cliOptions . level ) {
142- const error = '--name and --level are required in non-interactive mode' ;
157+ const fail = ( error : string ) => {
143158 if ( cliOptions . json ) {
144159 console . log ( JSON . stringify ( { success : false , error } ) ) ;
145160 } else {
146161 console . error ( error ) ;
147162 }
148163 process . exit ( 1 ) ;
164+ } ;
165+
166+ if ( ! cliOptions . name || ! cliOptions . level ) {
167+ fail ( '--name and --level are required in non-interactive mode' ) ;
149168 }
150169
151170 if ( ! cliOptions . config && ! cliOptions . model ) {
152- const error = 'Either --config or --model is required' ;
153- if ( cliOptions . json ) {
154- console . log ( JSON . stringify ( { success : false , error } ) ) ;
155- } else {
156- console . error ( error ) ;
157- }
158- process . exit ( 1 ) ;
171+ fail ( 'Either --config or --model is required' ) ;
159172 }
160173
161174 const levelResult = EvaluationLevelSchema . safeParse ( cliOptions . level ) ;
162175 if ( ! levelResult . success ) {
163- const error = `Invalid --level "${ cliOptions . level } ". Must be one of: SESSION, TRACE, TOOL_CALL` ;
164- if ( cliOptions . json ) {
165- console . log ( JSON . stringify ( { success : false , error } ) ) ;
166- } else {
167- console . error ( error ) ;
168- }
169- process . exit ( 1 ) ;
176+ fail ( `Invalid --level "${ cliOptions . level } ". Must be one of: SESSION, TRACE, TOOL_CALL` ) ;
170177 }
171178
172179 let configJson : EvaluatorConfig ;
173180 if ( cliOptions . config ) {
174181 const { readFileSync } = await import ( 'fs' ) ;
175182 configJson = JSON . parse ( readFileSync ( cliOptions . config , 'utf-8' ) ) as EvaluatorConfig ;
176183 } else {
184+ // --instructions is required when not using --config
185+ if ( ! cliOptions . instructions ) {
186+ const level = levelResult . data ! ;
187+ const placeholders = LEVEL_PLACEHOLDERS [ level ] . map ( p => `{${ p } }` ) . join ( ', ' ) ;
188+ fail (
189+ `--instructions is required in non-interactive mode (or use --config). ` +
190+ `Must include at least one placeholder for ${ level } : ${ placeholders } `
191+ ) ;
192+ }
193+
194+ // Validate placeholders
195+ const placeholderCheck = validateInstructionPlaceholders ( cliOptions . instructions ! , levelResult . data ! ) ;
196+ if ( placeholderCheck !== true ) {
197+ fail ( placeholderCheck ) ;
198+ }
199+
200+ // Resolve rating scale
201+ let ratingScale : EvaluatorConfig [ 'llmAsAJudge' ] [ 'ratingScale' ] ;
202+ const scaleInput = cliOptions . ratingScale ?? '1-5-quality' ;
203+
204+ const preset = RATING_SCALE_PRESETS . find ( p => p . id === scaleInput ) ;
205+ if ( preset ) {
206+ ratingScale = preset . ratingScale ;
207+ } else {
208+ // Try parsing as custom format: "1:Poor:Fails, 2:Fair:Partially meets" or "Pass:Meets, Fail:Does not"
209+ const isNumerical = / ^ \d / . test ( scaleInput . trim ( ) ) ;
210+ const parsed = parseCustomRatingScale ( scaleInput , isNumerical ? 'numerical' : 'categorical' ) ;
211+ if ( ! parsed . success ) {
212+ fail (
213+ `Invalid --rating-scale "${ scaleInput } ". Use a preset (${ presetIds . join ( ', ' ) } ) ` +
214+ `or custom format: "1:Label:Definition, 2:Label:Definition" (numerical) ` +
215+ `or "Label:Definition, Label:Definition" (categorical)`
216+ ) ;
217+ }
218+ ratingScale = parsed . success ? parsed . ratingScale : undefined ! ;
219+ }
220+
177221 configJson = {
178222 llmAsAJudge : {
179223 model : cliOptions . model ! ,
180- instructions : cliOptions . instructions ?? `Evaluate the quality. Context: {context}` ,
181- ratingScale : {
182- numerical : [
183- { value : 1 , label : 'Poor' , definition : 'Fails to meet expectations' } ,
184- { value : 2 , label : 'Fair' , definition : 'Partially meets expectations' } ,
185- { value : 3 , label : 'Good' , definition : 'Meets expectations' } ,
186- { value : 4 , label : 'Very Good' , definition : 'Exceeds expectations' } ,
187- { value : 5 , label : 'Excellent' , definition : 'Far exceeds expectations' } ,
188- ] ,
189- } ,
224+ instructions : cliOptions . instructions ! ,
225+ ratingScale,
190226 } ,
191227 } ;
192228 }
193229
194230 const result = await this . add ( {
195- name : cliOptions . name ,
196- level : levelResult . data ,
231+ name : cliOptions . name ! ,
232+ level : levelResult . data ! ,
197233 config : configJson ,
198234 } ) ;
199235
0 commit comments