Merge pull request #364 from CyberAgentAILab/fix/assert-genai-edit-square-output

proboscis · web-flow · commit a3dc83ba8554 · 2025-11-04T21:02:53.000+09:00
Add OpenRouter GPT-5 structured output e2e tests and bump to 0.3.1-beta
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Changelog
 
+## 0.3.1-beta (2025-11-04)
+
+### Added
+- Added end-to-end OpenRouter GPT-5 structured output tests covering both JSON schema metadata checks and BaseModel response_format usage.
+
 ## 0.3.0-beta (2025-06-25)
 
 ### Breaking Changes
diff --git a/packages/openai_support/tests/test_openrouter_gpt5_structured_basemodel_e2e.py b/packages/openai_support/tests/test_openrouter_gpt5_structured_basemodel_e2e.py
@@ -0,0 +1,105 @@
+"""End-to-end test: GPT-5 structured output using a Pydantic BaseModel schema."""
+
+import json
+
+import pytest
+from httpx import AsyncClient
+from pinjected import design, injected
+from pinjected.test import injected_pytest
+from pydantic import BaseModel
+
+from packages.openai_support.conftest import apikey_skip_if_needed
+from pinjected_openai.openrouter.util import build_openrouter_response_format
+
+
+pytestmark = pytest.mark.e2e
+apikey_skip_if_needed()
+
+
+class SimpleResponse(BaseModel):
+    """Minimal schema for GPT-5 structured output checks."""
+
+    answer: str
+    confidence: float
+
+
+@pytest.mark.asyncio
+@injected_pytest(design(openrouter_api_key=injected("openrouter_api_key__personal")))
+async def test_openrouter_gpt5_structured_with_basemodel(
+    openrouter_api_key: str,
+    logger,
+    /,
+) -> None:
+    """Send a BaseModel-derived response_format to OpenRouter GPT-5."""
+    headers = {
+        "Authorization": f"Bearer {openrouter_api_key}",
+        "Content-Type": "application/json",
+    }
+
+    # Build response_format from the BaseModel using the same helper as production code
+    response_format = build_openrouter_response_format(SimpleResponse)
+
+    payload = {
+        "model": "openai/gpt-5-nano",
+        "messages": [
+            {
+                "role": "user",
+                "content": (
+                    "Return the capital city of France as JSON with a confidence score."
+                ),
+            }
+        ],
+        "max_completion_tokens": 512,
+        "temperature": 0,
+        "response_format": response_format,
+    }
+
+    async with AsyncClient(timeout=60.0) as client:
+        completion_resp = await client.post(
+            "https://openrouter.ai/api/v1/chat/completions",
+            headers=headers,
+            json=payload,
+        )
+        completion_resp.raise_for_status()
+        body = completion_resp.json()
+        logger.info(
+            "OpenRouter GPT-5 BaseModel response metadata: %s"
+            % json.dumps(
+                {
+                    "id": body.get("id"),
+                    "provider": body.get("provider"),
+                    "usage": body.get("usage"),
+                }
+            )
+        )
+
+    if "error" in body:
+        error = body["error"]
+        if (
+            isinstance(error, dict)
+            and error.get("metadata", {}).get("raw", {}).get("code")
+            == "insufficient_quota"
+        ):
+            pytest.fail("GPT-5 structured BaseModel request failed: insufficient quota")
+        pytest.fail(f"OpenRouter returned error: {json.dumps(error, indent=2)}")
+
+    choices = body.get("choices", [])
+    if not choices:
+        pytest.fail("No choices returned from GPT-5 structured BaseModel call")
+
+    choice = choices[0]
+    message = choice.get("message", {})
+
+    if "error" in choice:
+        raw = choice["error"].get("metadata", {}).get("raw", {})
+        if raw.get("code") == "insufficient_quota":
+            pytest.fail("GPT-5 structured BaseModel choice failed: insufficient quota")
+        pytest.fail(f"Error in choice payload: {json.dumps(choice, indent=2)}")
+
+    content = message.get("content", "")
+    if not content.strip():
+        pytest.fail("GPT-5 returned empty content for structured BaseModel output")
+
+    parsed = SimpleResponse.model_validate_json(content)
+    assert "paris" in parsed.answer.lower(), parsed
+    assert parsed.confidence >= 0, parsed
diff --git a/packages/openai_support/tests/test_openrouter_gpt5_structured_e2e.py b/packages/openai_support/tests/test_openrouter_gpt5_structured_e2e.py
@@ -0,0 +1,133 @@
+"""End-to-end verification that GPT-5 models accept structured output via OpenRouter."""
+
+import json
+
+import pytest
+from httpx._client import (
+    AsyncClient as RealAsyncClient,
+)  # bypass test-time httpx patching
+from pinjected import design, injected
+from pinjected.test import injected_pytest
+from pydantic import BaseModel
+
+from packages.openai_support.conftest import apikey_skip_if_needed
+
+
+pytestmark = pytest.mark.e2e
+apikey_skip_if_needed()
+
+
+class SimpleResponse(BaseModel):
+    """Minimal schema to validate structured JSON output."""
+
+    answer: str
+    confidence: float
+
+
+@pytest.mark.asyncio
+@injected_pytest(design(openrouter_api_key=injected("openrouter_api_key__company")))
+async def test_openrouter_gpt5_structured_response(
+    openrouter_api_key: str,
+    logger,
+    /,
+) -> None:
+    """Ensure GPT-5 models report and honour structured output support."""
+    headers = {
+        "Authorization": f"Bearer {openrouter_api_key}",
+        "Content-Type": "application/json",
+    }
+
+    async with RealAsyncClient(timeout=60.0) as client:
+        # Check capability metadata first
+        models_resp = await client.get("https://openrouter.ai/api/v1/models")
+        models_resp.raise_for_status()
+        models_data = models_resp.json().get("data", [])
+
+        gpt5_models = {
+            m["id"]: m.get("supported_parameters", [])
+            for m in models_data
+            if m.get("id", "").startswith("openai/gpt-5")
+        }
+        assert gpt5_models, "No GPT-5 models returned from OpenRouter /models endpoint"
+
+        for model_id, params in gpt5_models.items():
+            assert "response_format" in params, (
+                f"{model_id} missing response_format support flag"
+            )
+            assert "structured_outputs" in params, (
+                f"{model_id} missing structured_outputs support flag"
+            )
+
+        # Run a live structured-output request against the smallest GPT-5 tier
+        payload = {
+            "model": "openai/gpt-5-nano",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": (
+                        "Return the capital city of France and your confidence as JSON."
+                        " Ensure the answer field contains 'Paris'."
+                    ),
+                }
+            ],
+            "max_completion_tokens": 512,
+            "temperature": 0,
+            "response_format": {
+                "type": "json_schema",
+                "json_schema": {
+                    "name": "SimpleResponse",
+                    "schema": {
+                        "type": "object",
+                        "properties": {
+                            "answer": {"type": "string"},
+                            "confidence": {"type": "number"},
+                        },
+                        "required": ["answer", "confidence"],
+                        "additionalProperties": False,
+                    },
+                },
+            },
+        }
+
+        completion_resp = await client.post(
+            "https://openrouter.ai/api/v1/chat/completions",
+            headers=headers,
+            json=payload,
+        )
+        completion_resp.raise_for_status()
+        body = completion_resp.json()
+        logger.info(
+            "OpenRouter GPT-5 structured response metadata: %s"
+            % json.dumps(
+                {
+                    "id": body.get("id"),
+                    "provider": body.get("provider"),
+                    "usage": body.get("usage"),
+                }
+            )
+        )
+
+        # Handle top-level errors
+        if "error" in body:
+            pytest.fail(f"OpenRouter returned error: {body['error']}")
+
+        choices = body.get("choices", [])
+        assert choices, (
+            f"No choices in OpenRouter response: {json.dumps(body, indent=2)}"
+        )
+        choice = choices[0]
+
+        # If provider reports insufficient quota, mark the test as skipped (environmental)
+        if "error" in choice:
+            raw = choice["error"].get("metadata", {}).get("raw", {})
+            if raw.get("code") == "insufficient_quota":
+                pytest.fail("OpenAI quota exhausted for GPT-5 structured output test")
+            pytest.fail(f"Error in choice payload: {json.dumps(choice, indent=2)}")
+
+        content = choice.get("message", {}).get("content", "")
+        if not content.strip():
+            pytest.fail("GPT-5 returned empty content for structured output")
+
+        parsed = SimpleResponse.model_validate_json(content)
+        assert "paris" in parsed.answer.lower(), parsed
+        assert parsed.confidence >= 0, parsed
diff --git a/pinjected/__init__.py b/pinjected/__init__.py
@@ -24,7 +24,7 @@
 
 # I want to use IProxy() as constructor. and also type check. what can i do?
 
-__version__ = "0.3.0-beta"
+__version__ = "0.3.1-beta"
 
 __all__ = [
     "AsyncResolver",
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "pinjected"
-version = "0.3.0-beta"
+version = "0.3.1-beta"
 description = "Immutable Dependency Injection for Python."
 authors = [
     { name = "proboscis", email = "nameissoap@gmail.com" }