Skip to content

Commit 0c8bdb9

Browse files
committed
refactor(rlsapi): tighten infer exception typing
Signed-off-by: Major Hayden <major@redhat.com>
1 parent 8500949 commit 0c8bdb9

1 file changed

Lines changed: 13 additions & 9 deletions

File tree

src/app/endpoints/rlsapi_v1.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@
4646

4747
# Default values when RH Identity auth is not configured
4848
AUTH_DISABLED = "auth_disabled"
49+
# Keep this tuple centralized so infer_endpoint can catch all expected backend
50+
# failures in one place while preserving a single telemetry/error-mapping path.
51+
_INFER_HANDLED_EXCEPTIONS = (
52+
RuntimeError,
53+
APIConnectionError,
54+
RateLimitError,
55+
APIStatusError,
56+
OpenAIAPIStatusError,
57+
)
4958

5059

5160
def _get_rh_identity_context(request: Request) -> tuple[str, str]:
@@ -291,7 +300,8 @@ def _map_inference_error_to_http_exception(
291300
errors.
292301
"""
293302
if isinstance(error, RuntimeError):
294-
if "context_length" in str(error).lower():
303+
error_message = str(error).lower()
304+
if "context_length" in error_message or "context length" in error_message:
295305
logger.error("Prompt too long for request %s: %s", request_id, error)
296306
error_response = PromptTooLongResponse(model=model_id)
297307
return HTTPException(**error_response.model_dump())
@@ -361,7 +371,7 @@ async def infer_endpoint(
361371
input_source = infer_request.get_input_source()
362372
instructions = _build_instructions(infer_request.context.systeminfo)
363373
model_id = _get_default_model_id()
364-
mcp_tools = await get_mcp_tools(request_headers=request.headers)
374+
mcp_tools: list[Any] = await get_mcp_tools(request_headers=request.headers)
365375
logger.debug(
366376
"Request %s: Combined input source length: %d", request_id, len(input_source)
367377
)
@@ -375,13 +385,7 @@ async def infer_endpoint(
375385
model_id=model_id,
376386
)
377387
inference_time = time.monotonic() - start_time
378-
except (
379-
RuntimeError,
380-
APIConnectionError,
381-
RateLimitError,
382-
APIStatusError,
383-
OpenAIAPIStatusError,
384-
) as error:
388+
except _INFER_HANDLED_EXCEPTIONS as error:
385389
_record_inference_failure(
386390
background_tasks,
387391
infer_request,

0 commit comments

Comments
 (0)