Skip to content

Commit 9eba0fe

Browse files
cpinnclaude
andauthored
Allow autoevals to support both zod 3 and zod 4 (#155)
## Changes Allow autoevals to install either zod 3 or zod 4. The typescript sdk was updated to allow zod to be a peer dependency in order to work with either zod 3 or zod 4. This PR makes a similar update to the autoevals package and runs a matrix with zod 3 and zod 4 over the existing tests. Our internal uses of autoevals does not allow for a direct upgrade to zod v4 at this time but the peer dependency should unblock users use of using autoevals with zod 4. --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 74aafa3 commit 9eba0fe

9 files changed

Lines changed: 1472 additions & 189 deletions

File tree

.github/workflows/js.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,15 @@ on:
77

88
jobs:
99
build:
10+
name: node ${{ matrix.node-version }} / zod ${{ matrix.zod }}
1011
runs-on: ubuntu-latest
1112

1213
strategy:
14+
fail-fast: false
1315
matrix:
1416
node-version: [20.x, 22.x, 24.x]
17+
# zod is a peerDependency ("^3.25.0 || ^4.0.0"); test both majors.
18+
zod: ["3", "4"]
1519

1620
steps:
1721
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -25,6 +29,14 @@ jobs:
2529
node-version: ${{ matrix.node-version }}
2630
cache: pnpm
2731
- run: pnpm install --frozen-lockfile
32+
# The lockfile pins zod 3, so override across the whole tree
33+
- name: Force zod 4
34+
if: matrix.zod == '4'
35+
run: |
36+
printf '\noverrides:\n zod: ^4\n' >> pnpm-workspace.yaml
37+
pnpm install --no-frozen-lockfile
38+
- name: Verify resolved zod major
39+
run: node -e "const v=require('zod/package.json').version; console.log('zod', v); if (!v.startsWith('${{ matrix.zod }}.')) { console.error('expected zod ${{ matrix.zod }}.x'); process.exit(1); }"
2840
- run: pnpm run test
2941
env:
3042
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

evals/package.json

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,15 @@
1313
"license": "ISC",
1414
"dependencies": {
1515
"autoevals": "workspace:*",
16-
"braintrust": "^0.0.140",
17-
"zod": "^3.22.4"
16+
"braintrust": "2.0.1"
1817
},
1918
"devDependencies": {
2019
"@types/node": "^20.10.5",
2120
"duckdb": "^1.0.0",
22-
"tsx": "^3.14.0"
21+
"tsx": "^3.14.0",
22+
"zod": "3.25.67"
23+
},
24+
"peerDependencies": {
25+
"zod": "^3.25.0 || ^4.0.0"
2326
}
2427
}

evals/src/autoevals.eval.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import {
77
coqaCaseSchema,
88
dataDir,
99
} from "./datasets";
10-
import { z } from "zod";
10+
import { z } from "zod/v3";
1111
import {
1212
AnswerCorrectness,
1313
ClosedQA,

evals/src/datasets.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { z } from "zod";
1+
import { z } from "zod/v3";
22

33
import path from "path";
44

evals/src/sync_datasets.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { duckq, getDuckDBConn } from "./duckdb";
22

3-
import { z } from "zod";
3+
import { z } from "zod/v3";
44
import {
55
coqaSchema,
66
dataDir,

js/ragas.ts

Lines changed: 43 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,10 @@ import { LLMArgs } from "./llm";
106106
import { getDefaultModel, getDefaultEmbeddingModel } from "./oai";
107107
import { buildOpenAIClient, extractOpenAIArgs } from "./oai";
108108
import OpenAI from "openai";
109+
import { zodFunction } from "openai/helpers/zod";
109110
import { ListContains } from "./list";
110111
import { EmbeddingSimilarity } from "./string";
111-
import { z } from "zod";
112-
import zodToJsonSchema from "zod-to-json-schema";
112+
import { z } from "zod/v3";
113113
import { makePartial, ScorerWithPartial } from "./partial";
114114

115115
type RagasArgs = {
@@ -188,14 +188,11 @@ export const ContextEntityRecall: ScorerWithPartial<
188188
},
189189
],
190190
tools: [
191-
{
192-
type: "function",
193-
function: {
194-
name: "extract_entities",
195-
description: "Extract unique entities from a given text",
196-
parameters: zodToJsonSchema(entitySchema),
197-
},
198-
},
191+
zodFunction({
192+
name: "extract_entities",
193+
description: "Extract unique entities from a given text",
194+
parameters: entitySchema,
195+
}),
199196
],
200197
tool_choice: { type: "function", function: { name: "extract_entities" } },
201198
});
@@ -268,14 +265,11 @@ export const ContextRelevancy: ScorerWithPartial<string, RagasArgs> =
268265
},
269266
],
270267
tools: [
271-
{
272-
type: "function",
273-
function: {
274-
name: "extract_sentences",
275-
description: "Extract relevant sentences from a given context",
276-
parameters: zodToJsonSchema(relevantSentencesSchema),
277-
},
278-
},
268+
zodFunction({
269+
name: "extract_sentences",
270+
description: "Extract relevant sentences from a given context",
271+
parameters: relevantSentencesSchema,
272+
}),
279273
],
280274
tool_choice: {
281275
type: "function",
@@ -371,13 +365,10 @@ export const ContextRecall: ScorerWithPartial<string, RagasArgs> = makePartial(
371365
},
372366
],
373367
tools: [
374-
{
375-
type: "function",
376-
function: {
377-
name: "extract_statements",
378-
parameters: zodToJsonSchema(contextRecallSchema),
379-
},
380-
},
368+
zodFunction({
369+
name: "extract_statements",
370+
parameters: contextRecallSchema,
371+
}),
381372
],
382373
tool_choice: {
383374
type: "function",
@@ -473,15 +464,11 @@ export const ContextPrecision: ScorerWithPartial<string, RagasArgs> =
473464
},
474465
],
475466
tools: [
476-
{
477-
type: "function",
478-
function: {
479-
name: "verify",
480-
description:
481-
"Verify if context was useful in arriving at the answer",
482-
parameters: zodToJsonSchema(contextPrecisionSchema),
483-
},
484-
},
467+
zodFunction({
468+
name: "verify",
469+
description: "Verify if context was useful in arriving at the answer",
470+
parameters: contextPrecisionSchema,
471+
}),
485472
],
486473
tool_choice: { type: "function", function: { name: "verify" } },
487474
});
@@ -600,14 +587,11 @@ export const Faithfulness: ScorerWithPartial<string, RagasArgs> = makePartial(
600587
},
601588
],
602589
tools: [
603-
{
604-
type: "function",
605-
function: {
606-
name: "extract_statements",
607-
description: "Extract statements from an answer given a question",
608-
parameters: zodToJsonSchema(extractedStatementsSchema),
609-
},
610-
},
590+
zodFunction({
591+
name: "extract_statements",
592+
description: "Extract statements from an answer given a question",
593+
parameters: extractedStatementsSchema,
594+
}),
611595
],
612596
tool_choice: {
613597
type: "function",
@@ -631,15 +615,12 @@ export const Faithfulness: ScorerWithPartial<string, RagasArgs> = makePartial(
631615
},
632616
],
633617
tools: [
634-
{
635-
type: "function",
636-
function: {
637-
name: "judge_statements",
638-
description:
639-
"Judge whether the statements are faithful to the context",
640-
parameters: zodToJsonSchema(statementFaithfulnessSchema),
641-
},
642-
},
618+
zodFunction({
619+
name: "judge_statements",
620+
description:
621+
"Judge whether the statements are faithful to the context",
622+
parameters: statementFaithfulnessSchema,
623+
}),
643624
],
644625
tool_choice: { type: "function", function: { name: "judge_statements" } },
645626
});
@@ -741,15 +722,12 @@ export const AnswerRelevancy: ScorerWithPartial<
741722
},
742723
],
743724
tools: [
744-
{
745-
type: "function",
746-
function: {
747-
name: "generate_question",
748-
description:
749-
"Generate a question for the given answer and identify if the answer is noncommittal",
750-
parameters: zodToJsonSchema(questionGenSchema),
751-
},
752-
},
725+
zodFunction({
726+
name: "generate_question",
727+
description:
728+
"Generate a question for the given answer and identify if the answer is noncommittal",
729+
parameters: questionGenSchema,
730+
}),
753731
],
754732
tool_choice: {
755733
type: "function",
@@ -920,14 +898,11 @@ export const AnswerCorrectness: ScorerWithPartial<
920898
},
921899
],
922900
tools: [
923-
{
924-
type: "function",
925-
function: {
926-
name: "classify_statements",
927-
description: "Classify statements as TP, FP, or FN",
928-
parameters: zodToJsonSchema(answerCorrectnessClassificationSchema),
929-
},
930-
},
901+
zodFunction({
902+
name: "classify_statements",
903+
description: "Classify statements as TP, FP, or FN",
904+
parameters: answerCorrectnessClassificationSchema,
905+
}),
931906
],
932907
tool_choice: {
933908
type: "function",

js/templates.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { z } from "zod";
1+
import { z } from "zod/v3";
22
import * as yaml from "js-yaml";
33

44
import battle from "../templates/battle.yaml";

package.json

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,11 @@
4848
"typedoc": "^0.25.13",
4949
"typedoc-plugin-markdown": "^3.17.1",
5050
"typescript": "^5.9.2",
51-
"vitest": "^2.1.9"
51+
"vitest": "^2.1.9",
52+
"zod": "3.25.67"
53+
},
54+
"peerDependencies": {
55+
"zod": "^3.25.0 || ^4.0.0"
5256
},
5357
"dependencies": {
5458
"ajv": "^8.17.1",
@@ -57,9 +61,8 @@
5761
"js-yaml": "^4.1.0",
5862
"linear-sum-assignment": "^1.0.7",
5963
"mustache": "^4.2.0",
60-
"openai": "^6.3.0",
61-
"zod": "^3.25.76",
62-
"zod-to-json-schema": "^3.24.6"
64+
"openai": "^6.7.0",
65+
"zod-to-json-schema": "3.25.0"
6366
},
6467
"packageManager": "pnpm@10.33.0"
6568
}

0 commit comments

Comments
 (0)