feat: add structured output retry mechanism and validation to LLMClient and LLMConfig

duartebarbosadev · duartebarbosadev · commit ad0bf3fda90c · 2026-06-02T22:58:51.000+10:00
diff --git a/app/llm_provider.py b/app/llm_provider.py
@@ -13,12 +13,13 @@
 import re
 from dataclasses import dataclass
 from typing import Optional, Dict, Any, List
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, ValidationError
 import litellm
 
 logger = logging.getLogger(__name__)
 DEFAULT_LLM_TIMEOUT_SECONDS = 60
 DEFAULT_LLM_RETRIES = 2
+DEFAULT_STRUCTURED_OUTPUT_RETRIES = 1
 
 # Suppress noisy logging from litellm/openai unless error/warning
 litellm.set_verbose = False
@@ -120,13 +121,16 @@ class LLMConfig:
     send_site_info: bool = True
     timeout_seconds: int = DEFAULT_LLM_TIMEOUT_SECONDS
     num_retries: int = DEFAULT_LLM_RETRIES
+    structured_output_retries: int = DEFAULT_STRUCTURED_OUTPUT_RETRIES
 
     def __post_init__(self):
         """Validate configuration after initialization."""
         if not self.model:
             raise ValueError("Model name is required")
         if self.num_retries < 0:
             raise ValueError("Number of retries cannot be negative")
+        if self.structured_output_retries < 0:
+            raise ValueError("Number of structured output retries cannot be negative")
 
 
 class LLMClient:
@@ -190,6 +194,31 @@ def _get_message_value(message: Any, key: str) -> str:
             value = getattr(message, key, None)
         return value if isinstance(value, str) else ""
 
+    def _completion_content(self, api_params: Dict[str, Any]) -> str:
+        """Run LiteLLM completion and return the model text content."""
+        response = litellm.completion(**api_params)
+        message = response.choices[0].message
+        content = self._get_message_value(message, "content").strip()
+        reasoning_content = self._get_message_value(
+            message, "reasoning_content"
+        ).strip()
+
+        if not content and reasoning_content:
+            logger.info(
+                "Content is empty but reasoning_content is present. "
+                "Falling back to reasoning_content for structured output parsing."
+            )
+            content = reasoning_content
+
+        return content
+
+    def _parse_structured_response(
+        self, content: str, response_model: type[BaseModel]
+    ) -> BaseModel:
+        """Normalize and validate structured model output."""
+        content = self._coerce_structured_payload(content, response_model)
+        return response_model.model_validate_json(content)
+
     def chat_completion(
         self,
         messages: list,
@@ -200,6 +229,12 @@ def chat_completion(
         """
         Send a chat completion request to the LLM API using LiteLLM.
         """
+        structured_output_retries = kwargs.pop(
+            "structured_output_retries", self.config.structured_output_retries
+        )
+        if structured_output_retries < 0:
+            raise ValueError("Number of structured output retries cannot be negative")
+
         # Build payload parameters
         api_params = {
             "model": self.config.model,
@@ -236,25 +271,23 @@ def chat_completion(
         )
 
         try:
-            response = litellm.completion(**api_params)
-            message = response.choices[0].message
-            content = self._get_message_value(message, "content").strip()
-            reasoning_content = self._get_message_value(
-                message, "reasoning_content"
-            ).strip()
-
-            if not content and reasoning_content:
-                logger.info(
-                    "Content is empty but reasoning_content is present. "
-                    "Falling back to reasoning_content for structured output parsing."
-                )
-                content = reasoning_content
-
-            if response_model:
-                # Natively parse and validate the JSON string into the Pydantic model
-                content = self._coerce_structured_payload(content, response_model)
-                return response_model.model_validate_json(content)
-            return content
+            if not response_model:
+                return self._completion_content(api_params)
+
+            validation_attempts = structured_output_retries + 1
+            for attempt in range(1, validation_attempts + 1):
+                content = self._completion_content(api_params)
+                try:
+                    return self._parse_structured_response(content, response_model)
+                except ValidationError as e:
+                    if attempt >= validation_attempts:
+                        raise
+                    logger.warning(
+                        "LLM returned invalid structured output on attempt "
+                        f"{attempt}/{validation_attempts}; retrying. Error: {e}"
+                    )
+
+            raise RuntimeError("Structured output retry loop exited unexpectedly")
 
         except Exception as e:
             logger.error(f"Error during LLM API call: {e}")
diff --git a/app/tests/test_translation.py b/app/tests/test_translation.py
@@ -697,6 +697,87 @@ def test_llm_config_rejects_negative_retries(self):
                 num_retries=-1,
             )
 
+    def test_llm_client_retries_invalid_structured_output(self):
+        """Malformed model JSON should trigger a fresh structured-output attempt."""
+
+        bad_response = SimpleNamespace(
+            choices=[
+                SimpleNamespace(
+                    message=SimpleNamespace(
+                        content='{\n  "translations":',
+                        reasoning_content=None,
+                    )
+                )
+            ]
+        )
+        good_response = SimpleNamespace(
+            choices=[
+                SimpleNamespace(
+                    message=SimpleNamespace(
+                        content='{"translations": [{"key": "hello", "translation": "Hola"}]}',
+                        reasoning_content=None,
+                    )
+                )
+            ]
+        )
+        llm_config = LLMConfig(provider="openrouter", model="openrouter/owl-alpha")
+
+        with patch(
+            "llm_provider.litellm.completion",
+            side_effect=[bad_response, good_response],
+        ) as mock_completion:
+            result = LLMClient(llm_config).chat_completion(
+                messages=[],
+                response_model=StringBatchTranslation,
+                temperature=0,
+            )
+
+        self.assertEqual(mock_completion.call_count, 2)
+        self.assertEqual(
+            [(item.key, item.translation) for item in result.translations],
+            [("hello", "Hola")],
+        )
+
+    def test_llm_client_allows_structured_output_retry_override(self):
+        """Callers can disable app-level structured output retries per request."""
+
+        bad_response = SimpleNamespace(
+            choices=[
+                SimpleNamespace(
+                    message=SimpleNamespace(
+                        content='{\n  "translations":',
+                        reasoning_content=None,
+                    )
+                )
+            ]
+        )
+        llm_config = LLMConfig(provider="openrouter", model="openrouter/owl-alpha")
+
+        with patch(
+            "llm_provider.litellm.completion", return_value=bad_response
+        ) as mock_completion:
+            with self.assertRaisesRegex(ValueError, "Invalid JSON"):
+                LLMClient(llm_config).chat_completion(
+                    messages=[],
+                    response_model=StringBatchTranslation,
+                    temperature=0,
+                    structured_output_retries=0,
+                )
+
+        self.assertEqual(mock_completion.call_count, 1)
+
+    def test_llm_config_rejects_negative_structured_output_retries(self):
+        """Structured output retry count must not be negative."""
+
+        with self.assertRaisesRegex(
+            ValueError, "Number of structured output retries cannot be negative"
+        ):
+            LLMConfig(
+                provider="openrouter",
+                model="openrouter/owl-alpha",
+                structured_output_retries=-1,
+            )
+
     def test_llm_client_accepts_dict_style_message(self):
         """LiteLLM responses can expose message data with dict-style access."""