feat(evaluator): Add kmsKeyArn support for custom evaluator

aws-aditya21 · aws-aditya21 · commit 9bc2d7065c0f · 2026-04-29T16:41:16.000-07:00
diff --git a/src/cli/aws/agentcore-control.ts b/src/cli/aws/agentcore-control.ts
@@ -463,6 +463,7 @@ export interface GetEvaluatorResult {
     llmAsAJudge?: GetEvaluatorLlmConfig;
     codeBased?: GetEvaluatorCodeBasedConfig;
   };
+  kmsKeyArn?: string;
   tags?: Record<string, string>;
 }
 
@@ -541,6 +542,7 @@ export async function getEvaluator(options: GetEvaluatorOptions): Promise<GetEva
     status: response.status ?? 'UNKNOWN',
     description: response.description,
     evaluatorConfig,
+    kmsKeyArn: response.kmsKeyArn,
     tags,
   };
 }
diff --git a/src/cli/commands/import/__tests__/import-evaluator.test.ts b/src/cli/commands/import/__tests__/import-evaluator.test.ts
@@ -210,6 +210,49 @@ describe('toEvaluatorSpec', () => {
 
     expect(result.tags).toBeUndefined();
   });
+
+  it('forwards kmsKeyArn when present', () => {
+    const detail: GetEvaluatorResult = {
+      evaluatorId: 'eval-kms',
+      evaluatorArn: 'arn:aws:bedrock-agentcore:us-west-2:123456789012:evaluator/eval-kms',
+      evaluatorName: 'kms_eval',
+      level: 'SESSION',
+      status: 'ACTIVE',
+      evaluatorConfig: {
+        llmAsAJudge: {
+          model: 'anthropic.claude-3-5-sonnet-20241022-v2:0',
+          instructions: 'Evaluate',
+          ratingScale: { numerical: [{ value: 1, label: 'Low', definition: 'Low' }] },
+        },
+      },
+      kmsKeyArn: 'arn:aws:kms:us-west-2:123456789012:key/12345678-1234-1234-1234-123456789012',
+    };
+
+    const result = toEvaluatorSpec(detail, 'kms_eval');
+
+    expect(result.kmsKeyArn).toBe('arn:aws:kms:us-west-2:123456789012:key/12345678-1234-1234-1234-123456789012');
+  });
+
+  it('omits kmsKeyArn when not present', () => {
+    const detail: GetEvaluatorResult = {
+      evaluatorId: 'eval-no-kms',
+      evaluatorArn: 'arn:aws:bedrock-agentcore:us-west-2:123456789012:evaluator/eval-no-kms',
+      evaluatorName: 'no_kms_eval',
+      level: 'SESSION',
+      status: 'ACTIVE',
+      evaluatorConfig: {
+        llmAsAJudge: {
+          model: 'anthropic.claude-3-5-sonnet-20241022-v2:0',
+          instructions: 'Evaluate',
+          ratingScale: { numerical: [{ value: 1, label: 'Low', definition: 'Low' }] },
+        },
+      },
+    };
+
+    const result = toEvaluatorSpec(detail, 'no_kms_eval');
+
+    expect(result.kmsKeyArn).toBeUndefined();
+  });
 });
 
 // ============================================================================
diff --git a/src/cli/commands/import/import-evaluator.ts b/src/cli/commands/import/import-evaluator.ts
@@ -49,6 +49,7 @@ export function toEvaluatorSpec(detail: GetEvaluatorResult, localName: string):
     level,
     ...(detail.description && { description: detail.description }),
     config,
+    ...(detail.kmsKeyArn && { kmsKeyArn: detail.kmsKeyArn }),
     ...(detail.tags && Object.keys(detail.tags).length > 0 && { tags: detail.tags }),
   };
 }
diff --git a/src/cli/primitives/EvaluatorPrimitive.ts b/src/cli/primitives/EvaluatorPrimitive.ts
@@ -23,6 +23,7 @@ export interface AddEvaluatorOptions {
   level: EvaluationLevel;
   description?: string;
   config: EvaluatorConfig;
+  kmsKeyArn?: string;
 }
 
 export type RemovableEvaluator = RemovableResource;
@@ -182,6 +183,7 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
         '--config <path>',
         'Path to evaluator config JSON file (overrides --model, --instructions, --rating-scale) [non-interactive]'
       )
+      .option('--kms-key-arn <arn>', 'KMS key ARN for evaluator encryption (optional)')
       .option('--json', 'Output as JSON [non-interactive]')
       .action(
         async (cliOptions: {
@@ -194,6 +196,7 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
           lambdaArn?: string;
           timeout?: string;
           config?: string;
+          kmsKeyArn?: string;
           json?: boolean;
         }) => {
           try {
@@ -292,10 +295,22 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
                 };
               }
 
+              if (
+                cliOptions.kmsKeyArn &&
+                !/^arn:aws(?:|-cn|-us-gov):kms:[a-zA-Z0-9-]*:[0-9]{12}:key\/[a-zA-Z0-9-]{36}$/.test(
+                  cliOptions.kmsKeyArn
+                )
+              ) {
+                fail(
+                  '--kms-key-arn must be a valid KMS key ARN (e.g. arn:aws:kms:us-east-1:123456789012:key/12345678-1234-1234-1234-123456789012)'
+                );
+              }
+
               const result = await this.add({
                 name: cliOptions.name!,
                 level: levelResult.data!,
                 config: configJson,
+                kmsKeyArn: cliOptions.kmsKeyArn,
               });
 
               if (cliOptions.json) {
@@ -385,6 +400,7 @@ export class EvaluatorPrimitive extends BasePrimitive<AddEvaluatorOptions, Remov
       level: options.level,
       ...(options.description && { description: options.description }),
       config: options.config,
+      ...(options.kmsKeyArn && { kmsKeyArn: options.kmsKeyArn }),
     };
 
     project.evaluators.push(evaluator);
diff --git a/src/cli/tui/hooks/useCreateEvaluator.ts b/src/cli/tui/hooks/useCreateEvaluator.ts
@@ -6,6 +6,7 @@ interface CreateEvaluatorConfig {
   name: string;
   level: string;
   config: EvaluatorConfig;
+  kmsKeyArn?: string;
 }
 
 export function useCreateEvaluator() {
@@ -20,6 +21,7 @@ export function useCreateEvaluator() {
         name: config.name,
         level: config.level as 'SESSION' | 'TRACE' | 'TOOL_CALL',
         config: config.config,
+        kmsKeyArn: config.kmsKeyArn,
       });
       if (!addResult.success) {
         throw new Error(addResult.error ?? 'Failed to create evaluator');
diff --git a/src/cli/tui/screens/evaluator/AddEvaluatorScreen.tsx b/src/cli/tui/screens/evaluator/AddEvaluatorScreen.tsx
@@ -91,6 +91,7 @@ export function AddEvaluatorScreen({ onComplete, onExit, existingEvaluatorNames
   const isRatingScaleCustomStep = wizard.step === 'ratingScale-custom';
   const isLambdaArnStep = wizard.step === 'lambda-arn';
   const isTimeoutStep = wizard.step === 'timeout';
+  const isKmsKeyArnStep = wizard.step === 'kms-key-arn';
   const isConfirmStep = wizard.step === 'confirm';
 
   const evaluatorTypeNav = useListNavigation({
@@ -163,6 +164,8 @@ export function AddEvaluatorScreen({ onComplete, onExit, existingEvaluatorNames
 
   // Build confirm fields based on evaluator type
   const confirmFields = useMemo(() => {
+    const kmsField = wizard.config.kmsKeyArn ? [{ label: 'KMS Key ARN', value: wizard.config.kmsKeyArn }] : [];
+
     if (wizard.evaluatorType === 'llm-as-a-judge') {
       const llm = wizard.config.config.llmAsAJudge!;
       return [
@@ -175,6 +178,7 @@ export function AddEvaluatorScreen({ onComplete, onExit, existingEvaluatorNames
           value: llm.instructions.length > 60 ? llm.instructions.slice(0, 60) + '...' : llm.instructions,
         },
         { label: 'Rating Scale', value: formatRatingScale(llm.ratingScale) },
+        ...kmsField,
       ];
     }
 
@@ -187,6 +191,7 @@ export function AddEvaluatorScreen({ onComplete, onExit, existingEvaluatorNames
         { label: 'Code', value: managed.codeLocation },
         { label: 'Entrypoint', value: managed.entrypoint },
         { label: 'Timeout', value: `${managed.timeoutSeconds}s` },
+        ...kmsField,
       ];
     }
 
@@ -197,6 +202,7 @@ export function AddEvaluatorScreen({ onComplete, onExit, existingEvaluatorNames
       { label: 'Name', value: wizard.config.name },
       { label: 'Level', value: wizard.config.level },
       { label: 'Lambda ARN', value: external.lambdaArn },
+      ...kmsField,
     ];
   }, [wizard.evaluatorType, wizard.codeBasedType, wizard.config]);
 
@@ -374,6 +380,21 @@ export function AddEvaluatorScreen({ onComplete, onExit, existingEvaluatorNames
           />
         )}
 
+        {isKmsKeyArnStep && (
+          <TextInput
+            key="kms-key-arn"
+            prompt="KMS key ARN for encryption (optional, press Enter to skip)"
+            initialValue=""
+            onSubmit={wizard.setKmsKeyArn}
+            onCancel={() => wizard.goBack()}
+            customValidation={value =>
+              value === '' ||
+              /^arn:aws(?:|-cn|-us-gov):kms:[a-zA-Z0-9-]*:[0-9]{12}:key\/[a-zA-Z0-9-]{36}$/.test(value) ||
+              'Must be a valid KMS key ARN (e.g. arn:aws:kms:us-east-1:123456789012:key/12345678-1234-1234-1234-123456789012)'
+            }
+          />
+        )}
+
         {isConfirmStep && <ConfirmReview fields={confirmFields} />}
       </Panel>
     </Screen>
diff --git a/src/cli/tui/screens/evaluator/types.ts b/src/cli/tui/screens/evaluator/types.ts
@@ -20,12 +20,14 @@ export type AddEvaluatorStep =
   | 'ratingScale-custom'
   | 'lambda-arn'
   | 'timeout'
+  | 'kms-key-arn'
   | 'confirm';
 
 export interface AddEvaluatorConfig {
   name: string;
   level: EvaluationLevel;
   config: EvaluatorConfig;
+  kmsKeyArn?: string;
 }
 
 export const EVALUATOR_STEP_LABELS: Record<AddEvaluatorStep, string> = {
@@ -41,6 +43,7 @@ export const EVALUATOR_STEP_LABELS: Record<AddEvaluatorStep, string> = {
   'ratingScale-custom': 'Scale',
   'lambda-arn': 'Lambda',
   timeout: 'Timeout',
+  'kms-key-arn': 'KMS Key',
   confirm: 'Confirm',
 };
 
diff --git a/src/cli/tui/screens/evaluator/useAddEvaluatorWizard.ts b/src/cli/tui/screens/evaluator/useAddEvaluatorWizard.ts
@@ -22,6 +22,7 @@ const LLM_STEPS: AddEvaluatorStep[] = [
   'model',
   'instructions',
   'ratingScale',
+  'kms-key-arn',
   'confirm',
 ];
 const CODE_MANAGED_STEPS: AddEvaluatorStep[] = [
@@ -30,6 +31,7 @@ const CODE_MANAGED_STEPS: AddEvaluatorStep[] = [
   'name',
   'level',
   'timeout',
+  'kms-key-arn',
   'confirm',
 ];
 const CODE_EXTERNAL_STEPS: AddEvaluatorStep[] = [
@@ -38,6 +40,7 @@ const CODE_EXTERNAL_STEPS: AddEvaluatorStep[] = [
   'name',
   'level',
   'lambda-arn',
+  'kms-key-arn',
   'confirm',
 ];
 
@@ -80,6 +83,7 @@ export function useAddEvaluatorWizard() {
   const [lambdaArn, setLambdaArnState] = useState('');
   const [timeout, setTimeoutState] = useState(DEFAULT_CODE_TIMEOUT);
   const [customRatingScaleType, setCustomRatingScaleType] = useState<CustomRatingScaleType>('numerical');
+  const [kmsKeyArn, setKmsKeyArnState] = useState('');
   const [step, setStep] = useState<AddEvaluatorStep>('evaluator-type');
 
   const steps = useMemo(() => getSteps(evaluatorType, codeBasedType), [evaluatorType, codeBasedType]);
@@ -109,11 +113,13 @@ export function useAddEvaluatorWizard() {
 
   // Build the final config based on current state
   const config: AddEvaluatorConfig = useMemo(() => {
+    const kms = kmsKeyArn || undefined;
     if (evaluatorType === 'llm-as-a-judge') {
       return {
         name,
         level,
         config: { llmAsAJudge: llmConfig },
+        ...(kms && { kmsKeyArn: kms }),
       };
     }
 
@@ -126,6 +132,7 @@ export function useAddEvaluatorWizard() {
             external: { lambdaArn },
           },
         },
+        ...(kms && { kmsKeyArn: kms }),
       };
     }
 
@@ -143,8 +150,9 @@ export function useAddEvaluatorWizard() {
           },
         },
       },
+      ...(kms && { kmsKeyArn: kms }),
     };
-  }, [evaluatorType, codeBasedType, name, level, llmConfig, lambdaArn, timeout]);
+  }, [evaluatorType, codeBasedType, name, level, llmConfig, lambdaArn, timeout, kmsKeyArn]);
 
   const selectEvaluatorType = useCallback((type: EvaluatorTypeId) => {
     setEvaluatorType(type);
@@ -256,6 +264,15 @@ export function useAddEvaluatorWizard() {
     [nextStep]
   );
 
+  const setKmsKeyArn = useCallback(
+    (arn: string) => {
+      setKmsKeyArnState(arn);
+      const next = nextStep('kms-key-arn');
+      if (next) setStep(next);
+    },
+    [nextStep]
+  );
+
   const reset = useCallback(() => {
     setEvaluatorType('code-based');
     setCodeBasedType('managed');
@@ -264,6 +281,7 @@ export function useAddEvaluatorWizard() {
     setLlmConfig(getDefaultLlmConfig().llmAsAJudge!);
     setLambdaArnState('');
     setTimeoutState(DEFAULT_CODE_TIMEOUT);
+    setKmsKeyArnState('');
     setStep('evaluator-type');
   }, []);
 
@@ -288,6 +306,7 @@ export function useAddEvaluatorWizard() {
     setCustomRatingScale,
     setLambdaArn,
     setTimeout,
+    setKmsKeyArn,
     reset,
   };
 }
diff --git a/src/schema/schemas/agentcore-project.ts b/src/schema/schemas/agentcore-project.ts
@@ -200,6 +200,7 @@ export const EvaluatorSchema = z.object({
   level: EvaluationLevelSchema,
   description: z.string().optional(),
   config: EvaluatorConfigSchema,
+  kmsKeyArn: z.string().optional(),
   tags: TagsSchema.optional(),
 });
 

Original file line number	Diff line number	Diff line change
`@@ -463,6 +463,7 @@ export interface GetEvaluatorResult {`
`463`	`463`	`llmAsAJudge?: GetEvaluatorLlmConfig;`
`464`	`464`	`codeBased?: GetEvaluatorCodeBasedConfig;`
`465`	`465`	`};`
	`466`	`+ kmsKeyArn?: string;`
`466`	`467`	`tags?: Record<string, string>;`
`467`	`468`	`}`
`468`	`469`
`@@ -541,6 +542,7 @@ export async function getEvaluator(options: GetEvaluatorOptions): Promise<GetEva`
`541`	`542`	`status: response.status ?? 'UNKNOWN',`
`542`	`543`	`description: response.description,`
`543`	`544`	`evaluatorConfig,`
	`545`	`+ kmsKeyArn: response.kmsKeyArn,`
`544`	`546`	`tags,`
`545`	`547`	`};`
`546`	`548`	`}`
Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,7 @@ export function toEvaluatorSpec(detail: GetEvaluatorResult, localName: string):`
`49`	`49`	`level,`
`50`	`50`	`...(detail.description && { description: detail.description }),`
`51`	`51`	`config,`
	`52`	`+ ...(detail.kmsKeyArn && { kmsKeyArn: detail.kmsKeyArn }),`
`52`	`53`	`...(detail.tags && Object.keys(detail.tags).length > 0 && { tags: detail.tags }),`
`53`	`54`	`};`
`54`	`55`	`}`