fix: openai repsonses api error handling (#1931)

Unshure · web-flow · commit 1643a624a677 · 2026-03-18T16:04:58.000-04:00
diff --git a/src/strands/models/openai_responses.py b/src/strands/models/openai_responses.py
@@ -294,14 +294,14 @@ async def stream(
                             if hasattr(event, "response") and hasattr(event.response, "usage"):
                                 final_usage = event.response.usage
                             break
-            except openai.BadRequestError as e:
+            except openai.APIError as e:
                 if hasattr(e, "code") and e.code == "context_length_exceeded":
                     logger.warning(_CONTEXT_WINDOW_OVERFLOW_MSG)
                     raise ContextWindowOverflowException(str(e)) from e
+                if isinstance(e, openai.RateLimitError):
+                    logger.warning(_RATE_LIMIT_MSG)
+                    raise ModelThrottledException(str(e)) from e
                 raise
-            except openai.RateLimitError as e:
-                logger.warning(_RATE_LIMIT_MSG)
-                raise ModelThrottledException(str(e)) from e
 
             # Close current content block if we had any
             if data_type:
diff --git a/tests/strands/models/test_openai_responses.py b/tests/strands/models/test_openai_responses.py
@@ -653,6 +653,26 @@ async def test_stream_context_overflow_exception(openai_client, model, messages)
     assert exc_info.value.__cause__ == mock_error
 
 
+@pytest.mark.asyncio
+async def test_stream_context_overflow_exception_api_error_type(openai_client, model, messages):
+    """Test that OpenAI context overflow errors are properly converted to ContextWindowOverflowException."""
+    mock_error = openai.APIError(
+        message="This model's maximum context length is 4096 tokens.",
+        request=unittest.mock.MagicMock(),
+        body={"error": {"code": "context_length_exceeded"}},
+    )
+    mock_error.code = "context_length_exceeded"
+
+    openai_client.responses.create.side_effect = mock_error
+
+    with pytest.raises(ContextWindowOverflowException) as exc_info:
+        async for _ in model.stream(messages):
+            pass
+
+    assert "maximum context length" in str(exc_info.value)
+    assert exc_info.value.__cause__ == mock_error
+
+
 @pytest.mark.asyncio
 async def test_stream_rate_limit_as_throttle(openai_client, model, messages):
     """Test that rate limit errors are converted to ModelThrottledException."""
diff --git a/tests_integ/models/test_model_openai.py b/tests_integ/models/test_model_openai.py
@@ -225,16 +225,15 @@ def _rate_limit_params():
     return params
 
 
-@pytest.mark.parametrize("model_class,model_id", _rate_limit_params())
-def test_rate_limit_throttling_integration_no_retries(model_class, model_id):
+def test_rate_limit_throttling_integration_no_retries():
     """Integration test for rate limit handling with retries disabled.
 
     This test verifies that when a request exceeds OpenAI's rate limits,
     the model properly raises a ModelThrottledException. We disable retries
     to avoid waiting for the exponential backoff during testing.
     """
-    model = model_class(
-        model_id=model_id,
+    model = OpenAIModel(
+        model_id="gpt-4o",
         client_args={
             "api_key": os.getenv("OPENAI_API_KEY"),
         },