fix(eval): accept structured code grader content (#1360)

christso · web-flow · commit 0ace6fd8ad68 · 2026-06-12T09:39:19.000+10:00
diff --git a/packages/eval/src/schemas.ts b/packages/eval/src/schemas.ts
@@ -4,9 +4,11 @@
  *
  * ## Content model
  *
- * `Message.content` accepts `string | Content[]`:
+ * `Message.content` accepts `string | object[] | object`:
  * - `string` — backward-compatible plain text (most common case)
- * - `Content[]` — typed content blocks for multimodal messages
+ * - `object[]` — typed content blocks for multimodal messages, plus AgentV
+ *   eval input blocks such as `{ type: "file", value, path, text }`
+ * - `object` — structured YAML/JSON content such as expected outputs
  *
  * Content variants:
  * - `ContentText`  — `{ type: 'text', text: string }`
@@ -245,15 +247,20 @@ export const ContentSchema = z.discriminatedUnion('type', [
   ContentFileSchema,
 ]);
 
+const MessageContentBlockSchema = z.union([ContentSchema, z.record(z.unknown())]);
+
 /**
  * Unified message schema for input, expected, and output messages.
  *
- * `content` is either a plain string or a `Content[]` array of typed blocks.
- * Use `getTextContent()` from `@agentv/core` to extract plain text from either form.
+ * `content` is a plain string, an array of structured blocks, or a
+ * structured object from YAML/JSON eval files. Use `getTextContent()` from
+ * `@agentv/core` to extract plain text when the content is textual.
  */
 export const MessageSchema = z.object({
   role: z.enum(['assistant', 'user', 'system', 'tool']),
-  content: z.union([z.string(), z.array(ContentSchema)]).optional(),
+  content: z
+    .union([z.string(), z.array(MessageContentBlockSchema), z.record(z.unknown())])
+    .optional(),
   toolCalls: z.array(ToolCallSchema).optional(),
   name: z.string().optional(),
   startTime: z.string().optional(),
diff --git a/packages/eval/test/define-code-grader.test.ts b/packages/eval/test/define-code-grader.test.ts
@@ -127,6 +127,37 @@ describe('MessageSchema content variants', () => {
     expect(content.map((c) => c.type)).toEqual(['text', 'image', 'file']);
   });
 
+  it('accepts AgentV eval file/text input blocks', () => {
+    const msg = MessageSchema.parse({
+      role: 'user',
+      content: [
+        {
+          type: 'file',
+          value: '../skills/export-risk-assessment.md',
+          path: '../skills/export-risk-assessment.md',
+          text: '# instructions',
+          resolved_path: '/repo/examples/skills/export-risk-assessment.md',
+        },
+        {
+          type: 'text',
+          value: 'Assess export risk for this shipment',
+        },
+      ],
+    });
+    const content = msg.content as Record<string, unknown>[];
+    expect(content).toHaveLength(2);
+    expect(content[0].value).toBe('../skills/export-risk-assessment.md');
+    expect(content[1].value).toBe('Assess export risk for this shipment');
+  });
+
+  it('accepts structured object content from eval YAML', () => {
+    const msg = MessageSchema.parse({
+      role: 'assistant',
+      content: { riskLevel: 'High', reasoning: 'CHPL Tier 1 item' },
+    });
+    expect(msg.content).toEqual({ riskLevel: 'High', reasoning: 'CHPL Tier 1 item' });
+  });
+
   it('accepts undefined content', () => {
     const msg = MessageSchema.parse({ role: 'tool' });
     expect(msg.content).toBeUndefined();
@@ -230,6 +261,20 @@ describe('CodeGraderInputSchema', () => {
     const content = result.input[0].content as { type: string }[];
     expect(content).toHaveLength(2);
   });
+
+  it('accepts structured expectedOutput content objects', () => {
+    const inputWithStructuredExpectedOutput = {
+      ...validInput,
+      expectedOutput: [
+        {
+          role: 'assistant',
+          content: { riskLevel: 'High' },
+        },
+      ],
+    };
+    const result = CodeGraderInputSchema.parse(inputWithStructuredExpectedOutput);
+    expect(result.expectedOutput[0].content).toEqual({ riskLevel: 'High' });
+  });
 });
 
 describe('CodeGraderResultSchema', () => {