Skip to content

Commit b66534b

Browse files
giulio-leonegiulio-leone
andauthored
fix(openai): always use string content for tool messages (#1878)
Co-authored-by: giulio-leone <giulio.leone@users.noreply.github.com>
1 parent 2e4c82b commit b66534b

2 files changed

Lines changed: 151 additions & 7 deletions

File tree

src/strands/models/openai.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -204,13 +204,33 @@ def format_request_tool_message(cls, tool_result: ToolResult, **kwargs: Any) ->
204204
],
205205
)
206206

207-
formatted_contents = [cls.format_request_message_content(content) for content in contents]
208-
209-
# If single text content, use string format for better model compatibility
210-
if len(formatted_contents) == 1 and formatted_contents[0].get("type") == "text":
211-
content: str | list[dict[str, Any]] = formatted_contents[0]["text"]
207+
# Merge adjacent text blocks while preserving the order of non-text
208+
# (image/document) content. When all content is text, join into a
209+
# single string for broad compatibility with OpenAI-compatible
210+
# endpoints (e.g., Kimi K2.5, vLLM, Ollama).
211+
# See https://github.com/strands-agents/sdk-python/issues/1696
212+
merged: list[dict[str, Any]] = []
213+
has_non_text = False
214+
for content_block in contents:
215+
if "text" in content_block:
216+
# Merge with the previous entry if it is also text (adjacent)
217+
if merged and merged[-1].get("type") == "text":
218+
merged[-1]["text"] += "\n" + content_block["text"]
219+
else:
220+
merged.append({"type": "text", "text": content_block["text"]})
221+
elif "image" in content_block or "document" in content_block:
222+
has_non_text = True
223+
merged.append(cls.format_request_message_content(content_block))
224+
225+
content: str | list[dict[str, Any]]
226+
if has_non_text:
227+
# Keep array format when images/documents are present so that
228+
# _split_tool_message_images can extract them into a user message.
229+
content = merged
212230
else:
213-
content = formatted_contents
231+
# All text — the loop already merged adjacent blocks with "\n",
232+
# so extract the single resulting entry.
233+
content = merged[0]["text"] if merged else ""
214234

215235
return {
216236
"role": "tool",

tests/strands/models/test_openai.py

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def test_format_request_tool_message():
173173

174174
tru_result = OpenAIModel.format_request_tool_message(tool_result)
175175
exp_result = {
176-
"content": [{"text": "4", "type": "text"}, {"text": '["4"]', "type": "text"}],
176+
"content": '4\n["4"]',
177177
"role": "tool",
178178
"tool_call_id": "c1",
179179
}
@@ -197,6 +197,130 @@ def test_format_request_tool_message_single_text_returns_string():
197197
assert tru_result == exp_result
198198

199199

200+
def test_format_request_tool_message_multi_text_returns_joined_string():
201+
"""Test that multi-content text results are joined into a single string.
202+
203+
Regression test for https://github.com/strands-agents/sdk-python/issues/1696.
204+
OpenAI-compatible endpoints (e.g., Kimi K2.5, vLLM, Ollama) only correctly
205+
parse string content for tool messages; array format causes hallucinated results.
206+
"""
207+
tool_result = {
208+
"content": [
209+
{"text": "Temperature: 72°F"},
210+
{"json": {"humidity": 45, "unit": "%"}},
211+
{"text": "Wind: 5 mph"},
212+
],
213+
"status": "success",
214+
"toolUseId": "c1",
215+
}
216+
217+
tru_result = OpenAIModel.format_request_tool_message(tool_result)
218+
exp_result = {
219+
"content": 'Temperature: 72°F\n{"humidity": 45, "unit": "%"}\nWind: 5 mph',
220+
"role": "tool",
221+
"tool_call_id": "c1",
222+
}
223+
assert tru_result == exp_result
224+
225+
226+
def test_format_request_tool_message_mixed_text_image_preserves_order():
227+
"""Test that text and image content blocks preserve their original order."""
228+
tool_result = {
229+
"content": [
230+
{"text": "Before image"},
231+
{"image": {"format": "png", "source": {"bytes": b"PNG"}}},
232+
{"text": "After image"},
233+
],
234+
"status": "success",
235+
"toolUseId": "c1",
236+
}
237+
238+
tru_result = OpenAIModel.format_request_tool_message(tool_result)
239+
content = tru_result["content"]
240+
# Array format since images are present
241+
assert isinstance(content, list)
242+
assert len(content) == 3
243+
# Order preserved: text, image, text
244+
assert content[0] == {"type": "text", "text": "Before image"}
245+
assert content[1]["type"] == "image_url"
246+
assert content[2] == {"type": "text", "text": "After image"}
247+
248+
249+
def test_format_request_tool_message_merges_adjacent_text():
250+
"""Test that adjacent text blocks are merged while non-text order is preserved."""
251+
tool_result = {
252+
"content": [
253+
{"text": "Line 1"},
254+
{"text": "Line 2"},
255+
{"image": {"format": "png", "source": {"bytes": b"PNG"}}},
256+
{"text": "Line 3"},
257+
],
258+
"status": "success",
259+
"toolUseId": "c1",
260+
}
261+
262+
tru_result = OpenAIModel.format_request_tool_message(tool_result)
263+
content = tru_result["content"]
264+
assert isinstance(content, list)
265+
assert len(content) == 3
266+
# Adjacent text merged, image order preserved
267+
assert content[0] == {"type": "text", "text": "Line 1\nLine 2"}
268+
assert content[1]["type"] == "image_url"
269+
assert content[2] == {"type": "text", "text": "Line 3"}
270+
271+
272+
def test_format_request_tool_message_image_only():
273+
"""Test tool message with only non-text content."""
274+
tool_result = {
275+
"content": [
276+
{"image": {"format": "png", "source": {"bytes": b"PNG"}}},
277+
],
278+
"status": "success",
279+
"toolUseId": "c1",
280+
}
281+
282+
tru_result = OpenAIModel.format_request_tool_message(tool_result)
283+
content = tru_result["content"]
284+
assert isinstance(content, list)
285+
assert len(content) == 1
286+
assert content[0]["type"] == "image_url"
287+
288+
289+
def test_format_request_tool_message_document_mixed():
290+
"""Test tool message with document content mixed with text."""
291+
tool_result = {
292+
"content": [
293+
{"text": "Summary"},
294+
{"document": {"format": "pdf", "name": "report.pdf", "source": {"bytes": b"PDF"}}},
295+
{"text": "Footer"},
296+
],
297+
"status": "success",
298+
"toolUseId": "c1",
299+
}
300+
301+
tru_result = OpenAIModel.format_request_tool_message(tool_result)
302+
content = tru_result["content"]
303+
assert isinstance(content, list)
304+
assert len(content) == 3
305+
assert content[0] == {"type": "text", "text": "Summary"}
306+
assert content[1]["type"] == "file"
307+
assert content[2] == {"type": "text", "text": "Footer"}
308+
309+
310+
def test_format_request_tool_message_empty_content():
311+
"""Test tool message with empty content list returns empty string."""
312+
tool_result = {
313+
"content": [],
314+
"status": "success",
315+
"toolUseId": "c1",
316+
}
317+
318+
tru_result = OpenAIModel.format_request_tool_message(tool_result)
319+
assert tru_result["content"] == ""
320+
assert tru_result["role"] == "tool"
321+
assert tru_result["tool_call_id"] == "c1"
322+
323+
200324
def test_split_tool_message_images_with_image():
201325
"""Test that images are extracted from tool messages."""
202326
tool_message = {

0 commit comments

Comments
 (0)