Skip to content

Commit 0ace6fd

Browse files
authored
fix(eval): accept structured code grader content (#1360)
1 parent 3c83b51 commit 0ace6fd

2 files changed

Lines changed: 57 additions & 5 deletions

File tree

packages/eval/src/schemas.ts

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
*
55
* ## Content model
66
*
7-
* `Message.content` accepts `string | Content[]`:
7+
* `Message.content` accepts `string | object[] | object`:
88
* - `string` — backward-compatible plain text (most common case)
9-
* - `Content[]` — typed content blocks for multimodal messages
9+
* - `object[]` — typed content blocks for multimodal messages, plus AgentV
10+
* eval input blocks such as `{ type: "file", value, path, text }`
11+
* - `object` — structured YAML/JSON content such as expected outputs
1012
*
1113
* Content variants:
1214
* - `ContentText` — `{ type: 'text', text: string }`
@@ -245,15 +247,20 @@ export const ContentSchema = z.discriminatedUnion('type', [
245247
ContentFileSchema,
246248
]);
247249

250+
const MessageContentBlockSchema = z.union([ContentSchema, z.record(z.unknown())]);
251+
248252
/**
249253
* Unified message schema for input, expected, and output messages.
250254
*
251-
* `content` is either a plain string or a `Content[]` array of typed blocks.
252-
* Use `getTextContent()` from `@agentv/core` to extract plain text from either form.
255+
* `content` is a plain string, an array of structured blocks, or a
256+
* structured object from YAML/JSON eval files. Use `getTextContent()` from
257+
* `@agentv/core` to extract plain text when the content is textual.
253258
*/
254259
export const MessageSchema = z.object({
255260
role: z.enum(['assistant', 'user', 'system', 'tool']),
256-
content: z.union([z.string(), z.array(ContentSchema)]).optional(),
261+
content: z
262+
.union([z.string(), z.array(MessageContentBlockSchema), z.record(z.unknown())])
263+
.optional(),
257264
toolCalls: z.array(ToolCallSchema).optional(),
258265
name: z.string().optional(),
259266
startTime: z.string().optional(),

packages/eval/test/define-code-grader.test.ts

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,37 @@ describe('MessageSchema content variants', () => {
127127
expect(content.map((c) => c.type)).toEqual(['text', 'image', 'file']);
128128
});
129129

130+
it('accepts AgentV eval file/text input blocks', () => {
131+
const msg = MessageSchema.parse({
132+
role: 'user',
133+
content: [
134+
{
135+
type: 'file',
136+
value: '../skills/export-risk-assessment.md',
137+
path: '../skills/export-risk-assessment.md',
138+
text: '# instructions',
139+
resolved_path: '/repo/examples/skills/export-risk-assessment.md',
140+
},
141+
{
142+
type: 'text',
143+
value: 'Assess export risk for this shipment',
144+
},
145+
],
146+
});
147+
const content = msg.content as Record<string, unknown>[];
148+
expect(content).toHaveLength(2);
149+
expect(content[0].value).toBe('../skills/export-risk-assessment.md');
150+
expect(content[1].value).toBe('Assess export risk for this shipment');
151+
});
152+
153+
it('accepts structured object content from eval YAML', () => {
154+
const msg = MessageSchema.parse({
155+
role: 'assistant',
156+
content: { riskLevel: 'High', reasoning: 'CHPL Tier 1 item' },
157+
});
158+
expect(msg.content).toEqual({ riskLevel: 'High', reasoning: 'CHPL Tier 1 item' });
159+
});
160+
130161
it('accepts undefined content', () => {
131162
const msg = MessageSchema.parse({ role: 'tool' });
132163
expect(msg.content).toBeUndefined();
@@ -230,6 +261,20 @@ describe('CodeGraderInputSchema', () => {
230261
const content = result.input[0].content as { type: string }[];
231262
expect(content).toHaveLength(2);
232263
});
264+
265+
it('accepts structured expectedOutput content objects', () => {
266+
const inputWithStructuredExpectedOutput = {
267+
...validInput,
268+
expectedOutput: [
269+
{
270+
role: 'assistant',
271+
content: { riskLevel: 'High' },
272+
},
273+
],
274+
};
275+
const result = CodeGraderInputSchema.parse(inputWithStructuredExpectedOutput);
276+
expect(result.expectedOutput[0].content).toEqual({ riskLevel: 'High' });
277+
});
233278
});
234279

235280
describe('CodeGraderResultSchema', () => {

0 commit comments

Comments
 (0)