diff --git a/packages/eval/src/schemas.ts b/packages/eval/src/schemas.ts index 4d2eb340e..ea454ba67 100644 --- a/packages/eval/src/schemas.ts +++ b/packages/eval/src/schemas.ts @@ -4,9 +4,11 @@ * * ## Content model * - * `Message.content` accepts `string | Content[]`: + * `Message.content` accepts `string | object[] | object`: * - `string` — backward-compatible plain text (most common case) - * - `Content[]` — typed content blocks for multimodal messages + * - `object[]` — typed content blocks for multimodal messages, plus AgentV + * eval input blocks such as `{ type: "file", value, path, text }` + * - `object` — structured YAML/JSON content such as expected outputs * * Content variants: * - `ContentText` — `{ type: 'text', text: string }` @@ -245,15 +247,20 @@ export const ContentSchema = z.discriminatedUnion('type', [ ContentFileSchema, ]); +const MessageContentBlockSchema = z.union([ContentSchema, z.record(z.unknown())]); + /** * Unified message schema for input, expected, and output messages. * - * `content` is either a plain string or a `Content[]` array of typed blocks. - * Use `getTextContent()` from `@agentv/core` to extract plain text from either form. + * `content` is a plain string, an array of structured blocks, or a + * structured object from YAML/JSON eval files. Use `getTextContent()` from + * `@agentv/core` to extract plain text when the content is textual. */ export const MessageSchema = z.object({ role: z.enum(['assistant', 'user', 'system', 'tool']), - content: z.union([z.string(), z.array(ContentSchema)]).optional(), + content: z + .union([z.string(), z.array(MessageContentBlockSchema), z.record(z.unknown())]) + .optional(), toolCalls: z.array(ToolCallSchema).optional(), name: z.string().optional(), startTime: z.string().optional(), diff --git a/packages/eval/test/define-code-grader.test.ts b/packages/eval/test/define-code-grader.test.ts index 2db1be399..e09c0ba49 100644 --- a/packages/eval/test/define-code-grader.test.ts +++ b/packages/eval/test/define-code-grader.test.ts @@ -127,6 +127,37 @@ describe('MessageSchema content variants', () => { expect(content.map((c) => c.type)).toEqual(['text', 'image', 'file']); }); + it('accepts AgentV eval file/text input blocks', () => { + const msg = MessageSchema.parse({ + role: 'user', + content: [ + { + type: 'file', + value: '../skills/export-risk-assessment.md', + path: '../skills/export-risk-assessment.md', + text: '# instructions', + resolved_path: '/repo/examples/skills/export-risk-assessment.md', + }, + { + type: 'text', + value: 'Assess export risk for this shipment', + }, + ], + }); + const content = msg.content as Record[]; + expect(content).toHaveLength(2); + expect(content[0].value).toBe('../skills/export-risk-assessment.md'); + expect(content[1].value).toBe('Assess export risk for this shipment'); + }); + + it('accepts structured object content from eval YAML', () => { + const msg = MessageSchema.parse({ + role: 'assistant', + content: { riskLevel: 'High', reasoning: 'CHPL Tier 1 item' }, + }); + expect(msg.content).toEqual({ riskLevel: 'High', reasoning: 'CHPL Tier 1 item' }); + }); + it('accepts undefined content', () => { const msg = MessageSchema.parse({ role: 'tool' }); expect(msg.content).toBeUndefined(); @@ -230,6 +261,20 @@ describe('CodeGraderInputSchema', () => { const content = result.input[0].content as { type: string }[]; expect(content).toHaveLength(2); }); + + it('accepts structured expectedOutput content objects', () => { + const inputWithStructuredExpectedOutput = { + ...validInput, + expectedOutput: [ + { + role: 'assistant', + content: { riskLevel: 'High' }, + }, + ], + }; + const result = CodeGraderInputSchema.parse(inputWithStructuredExpectedOutput); + expect(result.expectedOutput[0].content).toEqual({ riskLevel: 'High' }); + }); }); describe('CodeGraderResultSchema', () => {