1313import re
1414from dataclasses import dataclass
1515from typing import Optional , Dict , Any , List
16- from pydantic import BaseModel , Field
16+ from pydantic import BaseModel , Field , ValidationError
1717import litellm
1818
1919logger = logging .getLogger (__name__ )
2020DEFAULT_LLM_TIMEOUT_SECONDS = 60
2121DEFAULT_LLM_RETRIES = 2
22+ DEFAULT_STRUCTURED_OUTPUT_RETRIES = 1
2223
2324# Suppress noisy logging from litellm/openai unless error/warning
2425litellm .set_verbose = False
@@ -120,13 +121,16 @@ class LLMConfig:
120121 send_site_info : bool = True
121122 timeout_seconds : int = DEFAULT_LLM_TIMEOUT_SECONDS
122123 num_retries : int = DEFAULT_LLM_RETRIES
124+ structured_output_retries : int = DEFAULT_STRUCTURED_OUTPUT_RETRIES
123125
124126 def __post_init__ (self ):
125127 """Validate configuration after initialization."""
126128 if not self .model :
127129 raise ValueError ("Model name is required" )
128130 if self .num_retries < 0 :
129131 raise ValueError ("Number of retries cannot be negative" )
132+ if self .structured_output_retries < 0 :
133+ raise ValueError ("Number of structured output retries cannot be negative" )
130134
131135
132136class LLMClient :
@@ -190,6 +194,31 @@ def _get_message_value(message: Any, key: str) -> str:
190194 value = getattr (message , key , None )
191195 return value if isinstance (value , str ) else ""
192196
197+ def _completion_content (self , api_params : Dict [str , Any ]) -> str :
198+ """Run LiteLLM completion and return the model text content."""
199+ response = litellm .completion (** api_params )
200+ message = response .choices [0 ].message
201+ content = self ._get_message_value (message , "content" ).strip ()
202+ reasoning_content = self ._get_message_value (
203+ message , "reasoning_content"
204+ ).strip ()
205+
206+ if not content and reasoning_content :
207+ logger .info (
208+ "Content is empty but reasoning_content is present. "
209+ "Falling back to reasoning_content for structured output parsing."
210+ )
211+ content = reasoning_content
212+
213+ return content
214+
215+ def _parse_structured_response (
216+ self , content : str , response_model : type [BaseModel ]
217+ ) -> BaseModel :
218+ """Normalize and validate structured model output."""
219+ content = self ._coerce_structured_payload (content , response_model )
220+ return response_model .model_validate_json (content )
221+
193222 def chat_completion (
194223 self ,
195224 messages : list ,
@@ -200,6 +229,12 @@ def chat_completion(
200229 """
201230 Send a chat completion request to the LLM API using LiteLLM.
202231 """
232+ structured_output_retries = kwargs .pop (
233+ "structured_output_retries" , self .config .structured_output_retries
234+ )
235+ if structured_output_retries < 0 :
236+ raise ValueError ("Number of structured output retries cannot be negative" )
237+
203238 # Build payload parameters
204239 api_params = {
205240 "model" : self .config .model ,
@@ -236,25 +271,23 @@ def chat_completion(
236271 )
237272
238273 try :
239- response = litellm .completion (** api_params )
240- message = response .choices [0 ].message
241- content = self ._get_message_value (message , "content" ).strip ()
242- reasoning_content = self ._get_message_value (
243- message , "reasoning_content"
244- ).strip ()
245-
246- if not content and reasoning_content :
247- logger .info (
248- "Content is empty but reasoning_content is present. "
249- "Falling back to reasoning_content for structured output parsing."
250- )
251- content = reasoning_content
252-
253- if response_model :
254- # Natively parse and validate the JSON string into the Pydantic model
255- content = self ._coerce_structured_payload (content , response_model )
256- return response_model .model_validate_json (content )
257- return content
274+ if not response_model :
275+ return self ._completion_content (api_params )
276+
277+ validation_attempts = structured_output_retries + 1
278+ for attempt in range (1 , validation_attempts + 1 ):
279+ content = self ._completion_content (api_params )
280+ try :
281+ return self ._parse_structured_response (content , response_model )
282+ except ValidationError as e :
283+ if attempt >= validation_attempts :
284+ raise
285+ logger .warning (
286+ "LLM returned invalid structured output on attempt "
287+ f"{ attempt } /{ validation_attempts } ; retrying. Error: { e } "
288+ )
289+
290+ raise RuntimeError ("Structured output retry loop exited unexpectedly" )
258291
259292 except Exception as e :
260293 logger .error (f"Error during LLM API call: { e } " )
0 commit comments