Skip to content

Commit a98801a

Browse files
authored
Fix schema parsing bug for Pydantic .model_json_schema() (browserbase#1873)
# why JSON schema parsing for schemas generated from Pydantic `.model_json_schema()` were dropping the nested references # what changed Before (from test script) with schema: ```python class RequiredInputType(StrEnum): USERNAME = "username" EMAIL = "email" PHONE = "phone" OTP = "otp" PASSWORD = "password" CARD_LAST_8 = "card_last_8" SOMETHING_ELSE = "something_else" class PageState(StrEnum): LOGIN_FORM = "login_form" ERROR = "error" AUTHENTICATED = "authenticated" UNKNOWN = "unknown" class PageAnalysis(BaseModel): page_state: PageState required_input: RequiredInputType | None = None contact_id_of_required_input: str | None = None error_message: str | None = None reasoning: str = "" ``` <img width="654" height="373" alt="Screenshot 2026-03-23 at 1 09 01 PM" src="https://github.com/user-attachments/assets/29a24eea-cfda-40a8-8e43-10f959c71ad2" /> After: <img width="650" height="310" alt="Screenshot 2026-03-23 at 1 06 17 PM" src="https://github.com/user-attachments/assets/9f62d1ad-04bb-41e4-9dbc-85d7b68c84aa" /> 1. Added resolveRefs() function that inlines $ref/$defs before Zod conversion (Pydantic v2's model_json_schema() generates these) 2. Changed z.string().refine() → z.enum() so enum constraints are expressed in the schema sent to the LLM, not just validated post-hoc # test plan <!-- This is an auto-generated description by cubic. --> --- ## Summary by cubic Fixes missing nested `$ref` when converting Pydantic v2 `.model_json_schema()` to Zod in `@browserbasehq/stagehand-server-v3`, and emits proper `z.enum()` types so LLM-facing schemas are accurate. Adds a `test:unit` script, moves tests to `tests/`, and updates the runner to scan directories or single files. - **Bug Fixes** - Resolves `$ref` from `$defs` before conversion via `resolveRefs()` with a cycle guard, handling refs in objects, arrays, `anyOf`/`oneOf`/`allOf`, and root-level refs (unknown refs fall back safely). - Replaces `z.string().refine()` with `z.enum()` so enum values are surfaced in generated schemas and enforced. <sup>Written for commit ae9e2e5. Summary will update on new commits. <a href="https://cubic.dev/pr/browserbase/stagehand/pull/1873">Review in cubic</a></sup> <!-- End of auto-generated description by cubic. -->
1 parent f3fe7ce commit a98801a

17 files changed

Lines changed: 890 additions & 12 deletions

.changeset/tough-buttons-return.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand-server-v3": patch
3+
---
4+
5+
Fix schema parsing bug for Pydantic `.model_json_schema()` on missing nested references

packages/server-v3/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"test:server": "tsx scripts/test-server.ts",
1818
"test:integration": "pnpm run test:server -- packages/server-v3/dist/tests/integration",
1919
"test:integration:local": "STAGEHAND_SERVER_TARGET=local pnpm run test:server -- packages/server-v3/dist/tests/integration",
20+
"test:unit": "pnpm run test:server -- packages/server-v3/dist/tests/unit",
2021
"test:integration:sea": "STAGEHAND_SERVER_TARGET=sea pnpm run test:server -- packages/server-v3/dist/tests/integration",
2122
"gen:openapi": "tsx scripts/gen-openapi.ts"
2223
},

packages/server-v3/scripts/test-server.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ const writeCtrfFromJunit = (junitPath: string, tool: string) => {
9393
}
9494
};
9595

96-
const sourceTestsDir = `${repoRoot}/packages/server-v3/test`;
96+
const sourceTestsDir = `${repoRoot}/packages/server-v3/tests`;
9797
const sourceUnitDir = `${sourceTestsDir}/unit`;
9898
const sourceIntegrationDir = `${sourceTestsDir}/integration`;
9999
const unitDir = `${repoRoot}/packages/server-v3/dist/tests/unit`;
@@ -224,7 +224,12 @@ const nodeOptions = [process.env.NODE_OPTIONS, baseNodeOptions]
224224

225225
const allPaths =
226226
paths.length > 0
227-
? paths.map(resolveRepoRelative)
227+
? paths.flatMap((p) => {
228+
const abs = resolveRepoRelative(p);
229+
return fs.existsSync(abs) && fs.statSync(abs).isDirectory()
230+
? collectFiles(abs, ".test.js")
231+
: [abs];
232+
})
228233
: [
229234
...collectFiles(unitDir, ".test.js"),
230235
...collectFiles(integrationDir, ".test.js"),

packages/server-v3/src/lib/utils.ts

Lines changed: 72 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,62 @@ interface JSONSchema {
1818
anyOf?: JSONSchema[];
1919
oneOf?: JSONSchema[];
2020
allOf?: JSONSchema[];
21+
$defs?: Record<string, JSONSchema>;
22+
$ref?: string;
23+
}
24+
25+
/**
26+
* Resolves all $ref pointers in a JSON Schema against its $defs, returning
27+
* a self-contained schema with no $ref or $defs remaining.
28+
*/
29+
function resolveRefs(
30+
schema: JSONSchema,
31+
defs: Record<string, JSONSchema>,
32+
seen: Set<string> = new Set(),
33+
): JSONSchema {
34+
if (schema.$ref) {
35+
const match = schema.$ref.match(/^#\/\$defs\/(.+)$/);
36+
if (match && match[1] && defs[match[1]]) {
37+
if (seen.has(match[1])) {
38+
return {};
39+
}
40+
seen.add(match[1]);
41+
return resolveRefs(defs[match[1]], defs, seen);
42+
}
43+
return {};
44+
}
45+
46+
const resolved: JSONSchema = { ...schema };
47+
48+
if (resolved.properties) {
49+
const props: Record<string, JSONSchema> = {};
50+
for (const [key, val] of Object.entries(resolved.properties)) {
51+
props[key] = resolveRefs(val, defs, new Set(seen));
52+
}
53+
resolved.properties = props;
54+
}
55+
if (resolved.items) {
56+
resolved.items = resolveRefs(resolved.items, defs, new Set(seen));
57+
}
58+
if (resolved.anyOf) {
59+
resolved.anyOf = resolved.anyOf.map((s) =>
60+
resolveRefs(s, defs, new Set(seen)),
61+
);
62+
}
63+
if (resolved.oneOf) {
64+
resolved.oneOf = resolved.oneOf.map((s) =>
65+
resolveRefs(s, defs, new Set(seen)),
66+
);
67+
}
68+
if (resolved.allOf) {
69+
resolved.allOf = resolved.allOf.map((s) =>
70+
resolveRefs(s, defs, new Set(seen)),
71+
);
72+
}
73+
74+
delete resolved.$defs;
75+
delete resolved.$ref;
76+
return resolved;
2177
}
2278

2379
/**
@@ -26,10 +82,19 @@ interface JSONSchema {
2682
* @returns A Zod schema equivalent to the input JSON Schema
2783
*/
2884
export function jsonSchemaToZod(schema: JSONSchema): ZodTypeAny {
85+
// Resolve $ref/$defs before converting so all types are inlined
86+
const resolved =
87+
schema.$defs || schema.$ref
88+
? resolveRefs(schema, schema.$defs ?? {})
89+
: schema;
90+
return _jsonSchemaToZod(resolved);
91+
}
92+
93+
function _jsonSchemaToZod(schema: JSONSchema): ZodTypeAny {
2994
if (Array.isArray(schema.type)) {
3095
const subSchemas = schema.type.map((singleType) => {
3196
const sub = { ...schema, type: singleType };
32-
return jsonSchemaToZod(sub);
97+
return _jsonSchemaToZod(sub);
3398
});
3499

35100
if (subSchemas.length === 0) {
@@ -45,7 +110,7 @@ export function jsonSchemaToZod(schema: JSONSchema): ZodTypeAny {
45110
}
46111

47112
if (schema.anyOf && Array.isArray(schema.anyOf)) {
48-
const subSchemas = schema.anyOf.map((sub) => jsonSchemaToZod(sub));
113+
const subSchemas = schema.anyOf.map((sub) => _jsonSchemaToZod(sub));
49114
if (subSchemas.length === 0) {
50115
return z.any();
51116
} else if (subSchemas.length === 1) {
@@ -59,7 +124,7 @@ export function jsonSchemaToZod(schema: JSONSchema): ZodTypeAny {
59124
}
60125

61126
if (schema.oneOf && Array.isArray(schema.oneOf)) {
62-
const subSchemas = schema.oneOf.map((sub) => jsonSchemaToZod(sub));
127+
const subSchemas = schema.oneOf.map((sub) => _jsonSchemaToZod(sub));
63128
if (subSchemas.length === 0) {
64129
return z.any();
65130
} else if (subSchemas.length === 1) {
@@ -84,7 +149,7 @@ export function jsonSchemaToZod(schema: JSONSchema): ZodTypeAny {
84149
StatusCodes.BAD_REQUEST,
85150
);
86151
}
87-
shape[key] = jsonSchemaToZod(subSchema);
152+
shape[key] = _jsonSchemaToZod(subSchema);
88153
}
89154
let zodObject = z.object(shape);
90155

@@ -109,7 +174,7 @@ export function jsonSchemaToZod(schema: JSONSchema): ZodTypeAny {
109174

110175
case "array":
111176
if (schema.items) {
112-
let zodArray = z.array(jsonSchemaToZod(schema.items));
177+
let zodArray = z.array(_jsonSchemaToZod(schema.items));
113178
if (schema.description) {
114179
zodArray = zodArray.describe(schema.description);
115180
}
@@ -118,8 +183,8 @@ export function jsonSchemaToZod(schema: JSONSchema): ZodTypeAny {
118183
return z.array(z.any());
119184

120185
case "string": {
121-
if (schema.enum) {
122-
return z.string().refine((val) => schema.enum?.includes(val) ?? false);
186+
if (schema.enum && schema.enum.length > 0) {
187+
return z.enum(schema.enum as [string, ...string[]]);
123188
}
124189
let zodString = z.string();
125190

packages/server-v3/test/integration/api-server-cache.test.ts renamed to packages/server-v3/tests/integration/api-server-cache.test.ts

File renamed without changes.
File renamed without changes.
File renamed without changes.

packages/server-v3/test/integration/v3/agentExecute.test.ts renamed to packages/server-v3/tests/integration/v3/agentExecute.test.ts

File renamed without changes.
File renamed without changes.

packages/server-v3/test/integration/v3/extract.test.ts renamed to packages/server-v3/tests/integration/v3/extract.test.ts

File renamed without changes.

0 commit comments

Comments
 (0)