@@ -126,6 +126,26 @@ def mock_api_connection_error_fixture(mocker: MockerFixture) -> None:
126126 )
127127
128128
129+ @pytest .fixture (name = "mock_runtime_error_context_length" )
130+ def mock_runtime_error_context_length_fixture (mocker : MockerFixture ) -> None :
131+ """Mock responses.create() to raise RuntimeError with context_length message."""
132+ _setup_responses_mock (
133+ mocker ,
134+ mocker .AsyncMock (
135+ side_effect = RuntimeError ("context_length exceeded maximum tokens" )
136+ ),
137+ )
138+
139+
140+ @pytest .fixture (name = "mock_runtime_error_other" )
141+ def mock_runtime_error_other_fixture (mocker : MockerFixture ) -> None :
142+ """Mock responses.create() to raise RuntimeError with non-context_length message."""
143+ _setup_responses_mock (
144+ mocker ,
145+ mocker .AsyncMock (side_effect = RuntimeError ("Some other runtime error" )),
146+ )
147+
148+
129149# --- Test _build_instructions ---
130150
131151
@@ -400,6 +420,50 @@ async def test_infer_api_connection_error_returns_503(
400420 assert exc_info .value .status_code == status .HTTP_503_SERVICE_UNAVAILABLE
401421
402422
423+ @pytest .mark .asyncio
424+ async def test_infer_runtime_error_context_length_returns_413 (
425+ mocker : MockerFixture ,
426+ mock_configuration : AppConfig ,
427+ mock_runtime_error_context_length : None ,
428+ mock_auth_resolvers : None ,
429+ ) -> None :
430+ """Test /infer returns 413 when LLM raises RuntimeError with context_length."""
431+ infer_request = RlsapiV1InferRequest (question = "Test question" )
432+ mock_request = _create_mock_request (mocker )
433+ mock_background_tasks = _create_mock_background_tasks (mocker )
434+
435+ with pytest .raises (HTTPException ) as exc_info :
436+ await infer_endpoint (
437+ infer_request = infer_request ,
438+ request = mock_request ,
439+ background_tasks = mock_background_tasks ,
440+ auth = MOCK_AUTH ,
441+ )
442+
443+ assert exc_info .value .status_code == status .HTTP_413_REQUEST_ENTITY_TOO_LARGE
444+
445+
446+ @pytest .mark .asyncio
447+ async def test_infer_runtime_error_other_reraises (
448+ mocker : MockerFixture ,
449+ mock_configuration : AppConfig ,
450+ mock_runtime_error_other : None ,
451+ mock_auth_resolvers : None ,
452+ ) -> None :
453+ """Test /infer re-raises RuntimeError when not context_length related."""
454+ infer_request = RlsapiV1InferRequest (question = "Test question" )
455+ mock_request = _create_mock_request (mocker )
456+ mock_background_tasks = _create_mock_background_tasks (mocker )
457+
458+ with pytest .raises (RuntimeError , match = "Some other runtime error" ):
459+ await infer_endpoint (
460+ infer_request = infer_request ,
461+ request = mock_request ,
462+ background_tasks = mock_background_tasks ,
463+ auth = MOCK_AUTH ,
464+ )
465+
466+
403467@pytest .mark .asyncio
404468async def test_infer_empty_llm_response_returns_fallback (
405469 mocker : MockerFixture ,
0 commit comments