4646
4747# Default values when RH Identity auth is not configured
4848AUTH_DISABLED = "auth_disabled"
49+ # Keep this tuple centralized so infer_endpoint can catch all expected backend
50+ # failures in one place while preserving a single telemetry/error-mapping path.
51+ _INFER_HANDLED_EXCEPTIONS = (
52+ RuntimeError ,
53+ APIConnectionError ,
54+ RateLimitError ,
55+ APIStatusError ,
56+ OpenAIAPIStatusError ,
57+ )
4958
5059
5160def _get_rh_identity_context (request : Request ) -> tuple [str , str ]:
@@ -291,7 +300,8 @@ def _map_inference_error_to_http_exception(
291300 errors.
292301 """
293302 if isinstance (error , RuntimeError ):
294- if "context_length" in str (error ).lower ():
303+ error_message = str (error ).lower ()
304+ if "context_length" in error_message or "context length" in error_message :
295305 logger .error ("Prompt too long for request %s: %s" , request_id , error )
296306 error_response = PromptTooLongResponse (model = model_id )
297307 return HTTPException (** error_response .model_dump ())
@@ -361,7 +371,7 @@ async def infer_endpoint(
361371 input_source = infer_request .get_input_source ()
362372 instructions = _build_instructions (infer_request .context .systeminfo )
363373 model_id = _get_default_model_id ()
364- mcp_tools = await get_mcp_tools (request_headers = request .headers )
374+ mcp_tools : list [ Any ] = await get_mcp_tools (request_headers = request .headers )
365375 logger .debug (
366376 "Request %s: Combined input source length: %d" , request_id , len (input_source )
367377 )
@@ -375,13 +385,7 @@ async def infer_endpoint(
375385 model_id = model_id ,
376386 )
377387 inference_time = time .monotonic () - start_time
378- except (
379- RuntimeError ,
380- APIConnectionError ,
381- RateLimitError ,
382- APIStatusError ,
383- OpenAIAPIStatusError ,
384- ) as error :
388+ except _INFER_HANDLED_EXCEPTIONS as error :
385389 _record_inference_failure (
386390 background_tasks ,
387391 infer_request ,
0 commit comments