|
42 | 42 |
|
43 | 43 | import hashlib |
44 | 44 | import json |
| 45 | +import re |
45 | 46 | import uuid |
46 | 47 | from collections.abc import Sequence |
47 | 48 | from typing import Any, Literal, cast |
@@ -237,9 +238,8 @@ async def complete( |
237 | 238 | # On the fallback path, the wire-side messages list is an |
238 | 239 | # augmented COPY of the caller's messages — original messages |
239 | 240 | # MUST NOT be mutated. _augment_messages_with_schema_directive |
240 | | - # builds a fresh list; the original instances are reused |
241 | | - # (immutable Pydantic models) so the caller's sequence is |
242 | | - # untouched. |
| 241 | + # builds a fresh list and does not modify the reused Message |
| 242 | + # instances in place; the caller's sequence is untouched. |
243 | 243 | wire_messages: Sequence[Message] = messages |
244 | 244 | if schema_dict is not None and self._force_prompt_augmentation_fallback: |
245 | 245 | wire_messages = _augment_messages_with_schema_directive(messages, schema_dict) |
@@ -461,24 +461,38 @@ def _normalize_response_schema( |
461 | 461 | if response_schema is None: |
462 | 462 | return None, None |
463 | 463 | if isinstance(response_schema, type): |
464 | | - # Per the Protocol signature, the only class form accepted is |
465 | | - # a BaseModel subclass; non-BaseModel classes will AttributeError |
466 | | - # on model_json_schema below. |
| 464 | + # Defensive runtime check: the Protocol signature accepts |
| 465 | + # type[BaseModel], but Python doesn't enforce that at the call |
| 466 | + # boundary. Reject non-BaseModel classes with a canonical error |
| 467 | + # instead of letting AttributeError leak from model_json_schema. |
| 468 | + if not issubclass(response_schema, BaseModel): # pyright: ignore[reportUnnecessaryIsInstance] |
| 469 | + raise ProviderInvalidRequest( |
| 470 | + f"response_schema: class form MUST be a Pydantic BaseModel subclass " |
| 471 | + f"(got {response_schema.__name__})" |
| 472 | + ) |
467 | 473 | schema_dict = response_schema.model_json_schema() |
468 | 474 | validate_response_schema(schema_dict) |
469 | 475 | return schema_dict, response_schema |
470 | 476 | validate_response_schema(response_schema) |
471 | 477 | return response_schema, None |
472 | 478 |
|
473 | 479 |
|
| 480 | +# OpenAI's response_format.json_schema.name field is restricted to |
| 481 | +# letters, digits, underscores, and dashes with a max length of 64 |
| 482 | +# characters. A JSON Schema title can be any string ("Person Record", |
| 483 | +# "User's Profile", etc.), so verbatim use risks a 400 on the wire. |
| 484 | +_OPENAI_SCHEMA_NAME_RE = re.compile(r"^[a-zA-Z0-9_-]{1,64}$") |
| 485 | + |
| 486 | + |
474 | 487 | # Derive a stable identifier for the JSON Schema for OpenAI's |
475 | 488 | # response_format.json_schema.name field. Uses the schema's `title` |
476 | | -# when present (and a valid identifier-shaped string); otherwise |
477 | | -# derives a deterministic short hash so the same schema always |
478 | | -# produces the same name across calls. |
| 489 | +# when it satisfies the provider's name constraints; otherwise derives |
| 490 | +# a deterministic short hash so the same schema always produces the |
| 491 | +# same name across calls. Sanitizing-in-place would silently mutate |
| 492 | +# user intent; the hash is a more honest fallback. |
479 | 493 | def _derive_schema_name(schema: dict[str, Any]) -> str: |
480 | 494 | title = schema.get("title") |
481 | | - if isinstance(title, str) and title: |
| 495 | + if isinstance(title, str) and _OPENAI_SCHEMA_NAME_RE.match(title): |
482 | 496 | return title |
483 | 497 | canonical = json.dumps(schema, sort_keys=True).encode("utf-8") |
484 | 498 | return f"oa_schema_{hashlib.sha256(canonical).hexdigest()[:16]}" |
@@ -546,9 +560,11 @@ def _parse_and_validate( |
546 | 560 | # Construct a fresh message list with a schema directive added. The |
547 | 561 | # directive is appended to the existing system message's content when |
548 | 562 | # present, or prepended as a new system message otherwise. The caller's |
549 | | -# original list is never mutated; Message instances are reused because |
550 | | -# they are immutable Pydantic models. The serialized schema appears |
551 | | -# verbatim in the directive so callers that need to verify the directive |
| 563 | +# original list is never mutated; Message instances are reused, and |
| 564 | +# this helper does not modify them in place (the message models are |
| 565 | +# not frozen Pydantic models, so the safety is structural, not |
| 566 | +# enforced by the type). The serialized schema appears verbatim in |
| 567 | +# the directive so callers that need to verify the directive |
552 | 568 | # references the schema (conformance harnesses, observability spans) |
553 | 569 | # can substring-match the canonical JSON form. |
554 | 570 | def _augment_messages_with_schema_directive( |
@@ -585,7 +601,10 @@ def _message_to_wire(msg: Message) -> dict[str, Any]: |
585 | 601 | "type": "function", |
586 | 602 | "function": { |
587 | 603 | "name": tc.name, |
588 | | - "arguments": json.dumps(tc.arguments or {}), |
| 604 | + # Canonical compact form (no inter-token spaces). Matches |
| 605 | + # the spec's wire-mapping fixture (005, cases shape) and |
| 606 | + # the form OpenAI itself emits. |
| 607 | + "arguments": json.dumps(tc.arguments or {}, separators=(",", ":")), |
589 | 608 | }, |
590 | 609 | } |
591 | 610 | for tc in msg.tool_calls |
|
0 commit comments