fix: handle reasoning content in OpenAIResponsesModel request formatting (#2013)

pgrayy · web-flow · commit e267a64b8a66 · 2026-03-31T12:50:05.000-04:00
diff --git a/src/strands/models/openai_responses.py b/src/strands/models/openai_responses.py
@@ -240,8 +240,13 @@ async def stream(
                                 if model_state is not None and response_id:
                                     model_state["response_id"] = response_id
 
-                        elif event.type == "response.reasoning_text.delta":
-                            # Reasoning content streaming (for o1/o3 reasoning models)
+                        elif event.type in (
+                            "response.reasoning_text.delta",
+                            "response.reasoning_summary_text.delta",
+                        ):
+                            # Reasoning content streaming:
+                            # - reasoning_text: full chain-of-thought (gpt-oss models)
+                            # - reasoning_summary_text: condensed summary (o-series models)
                             chunks, data_type = self._stream_switch_content("reasoning_content", data_type)
                             for chunk in chunks:
                                 yield chunk
@@ -510,10 +515,15 @@ def _format_request_messages(cls, messages: Messages) -> list[dict[str, Any]]:
             role = message["role"]
             contents = message["content"]
 
+            if any("reasoningContent" in content for content in contents):
+                logger.warning(
+                    "reasoningContent is not yet supported in multi-turn conversations with the Responses API"
+                )
+
             formatted_contents = [
                 cls._format_request_message_content(content, role=role)
                 for content in contents
-                if not any(block_type in content for block_type in ["toolResult", "toolUse"])
+                if not any(block_type in content for block_type in ["toolResult", "toolUse", "reasoningContent"])
             ]
 
             formatted_tool_calls = [
diff --git a/tests/strands/models/test_openai_responses.py b/tests/strands/models/test_openai_responses.py
@@ -596,9 +596,16 @@ async def test_stream_response_incomplete(openai_client, model, agenerator, alis
 
 
 @pytest.mark.asyncio
-async def test_stream_reasoning_content(openai_client, model, agenerator, alist):
-    """Test that reasoning content (o1/o3 models) is streamed correctly."""
-    mock_reasoning_event = unittest.mock.Mock(type="response.reasoning_text.delta", delta="Let me think...")
+@pytest.mark.parametrize(
+    "event_type",
+    [
+        "response.reasoning_text.delta",
+        "response.reasoning_summary_text.delta",
+    ],
+)
+async def test_stream_reasoning_content(openai_client, model, agenerator, alist, event_type):
+    """Test that reasoning content is streamed correctly for both full and summary reasoning events."""
+    mock_reasoning_event = unittest.mock.Mock(type=event_type, delta="Let me think...")
     mock_text_event = unittest.mock.Mock(type="response.output_text.delta", delta="The answer is 42")
     mock_complete_event = unittest.mock.Mock(
         type="response.completed",
@@ -1152,3 +1159,34 @@ async def test_stream_stateful(openai_client, model_id, agenerator, alist):
         "usage": {"inputTokens": 10, "outputTokens": 5, "totalTokens": 15},
         "metrics": {"latencyMs": 0},
     }
+
+
+def test_format_request_messages_excludes_reasoning_content(caplog):
+    """Test that reasoningContent blocks are filtered from messages with a warning."""
+    messages = [
+        {
+            "content": [{"text": "Hello"}],
+            "role": "user",
+        },
+        {
+            "content": [
+                {"reasoningContent": {"reasoningText": {"text": "Let me think..."}}},
+                {"text": "The answer is 42"},
+            ],
+            "role": "assistant",
+        },
+        {
+            "content": [{"text": "Thanks"}],
+            "role": "user",
+        },
+    ]
+
+    with caplog.at_level("WARNING"):
+        result = OpenAIResponsesModel._format_request_messages(messages)
+
+    assert result == [
+        {"role": "user", "content": [{"type": "input_text", "text": "Hello"}]},
+        {"role": "assistant", "content": [{"type": "output_text", "text": "The answer is 42"}]},
+        {"role": "user", "content": [{"type": "input_text", "text": "Thanks"}]},
+    ]
+    assert "reasoningContent is not yet supported" in caplog.text
diff --git a/tests_integ/models/test_model_mantle.py b/tests_integ/models/test_model_mantle.py
@@ -72,3 +72,28 @@ def test_responses_server_side_conversation(stateful_model):
 
     result = agent("What is my name?")
     assert "alice" in str(result).lower()
+
+
+def test_reasoning_content_multi_turn(client_args):
+    """Test that reasoning content from gpt-oss models doesn't break multi-turn conversations."""
+    model = OpenAIResponsesModel(
+        model_id="openai.gpt-oss-120b",
+        client_args=client_args,
+        params={"reasoning": {"effort": "low"}},
+    )
+    agent = Agent(model=model, system_prompt="Reply in one short sentence.", callback_handler=None)
+
+    result1 = agent("What is 2+2?")
+    assert "4" in str(result1)
+
+    # Verify reasoning content was produced
+    has_reasoning = any(
+        "reasoningContent" in block
+        for msg in agent.messages
+        if msg["role"] == "assistant"
+        for block in msg["content"]
+    )
+    assert has_reasoning
+
+    # Second turn should not raise despite reasoningContent in message history
+    agent("What about 3+3?")