From 319c18e7bb76171a06ed632d1f8b8a8978864656 Mon Sep 17 00:00:00 2001 From: Zelys Date: Sun, 5 Apr 2026 15:30:33 -0500 Subject: [PATCH] fix: repair orphaned toolUse in last session message during restore _fix_broken_tool_use skipped the last message with an explicit guard, relying on the agent-class fallback (_has_tool_use_in_latest_message). That fallback only works within the same process. When a new process restores a session that ended with an orphaned toolUse (e.g. after a runtime timeout), the guard causes the broken history to be sent to the model, producing a ValidationException. Remove the guard and handle the last-message case by appending a synthetic toolResult with status 'error'. The tool execution context is already lost at restore time, so letting the model decide how to proceed is the correct behavior. Updates the test that asserted the old (incorrect) behavior. --- .../session/repository_session_manager.py | 73 +++++++++++-------- .../test_repository_session_manager.py | 13 +++- 2 files changed, 51 insertions(+), 35 deletions(-) diff --git a/src/strands/session/repository_session_manager.py b/src/strands/session/repository_session_manager.py index c1032a85e..86e426c64 100644 --- a/src/strands/session/repository_session_manager.py +++ b/src/strands/session/repository_session_manager.py @@ -273,37 +273,48 @@ def _fix_broken_tool_use(self, messages: list[Message]) -> list[Message]: # Then check for orphaned toolUse messages for index, message in enumerate(messages): - # Check all but the latest message in the messages array - # The latest message being orphaned is handled in the agent class - if index + 1 < len(messages): - if any("toolUse" in content for content in message["content"]): - tool_use_ids = [ - content["toolUse"]["toolUseId"] for content in message["content"] if "toolUse" in content - ] - - # Check if there are more messages after the current toolUse message - tool_result_ids = [ - content["toolResult"]["toolUseId"] - for content in messages[index + 1]["content"] - if "toolResult" in content - ] - - missing_tool_use_ids = list(set(tool_use_ids) - set(tool_result_ids)) - # If there are missing tool use ids, that means the messages history is broken - if missing_tool_use_ids: - logger.warning( - "Session message history has an orphaned toolUse with no toolResult. " - "Adding toolResult content blocks to create valid conversation." - ) - # Create the missing toolResult content blocks - missing_content_blocks = generate_missing_tool_result_content(missing_tool_use_ids) - - if tool_result_ids: - # If there were any toolResult ids, that means only some of the content blocks are missing - messages[index + 1]["content"].extend(missing_content_blocks) - else: - # The message following the toolUse was not a toolResult, so lets insert it - messages.insert(index + 1, {"role": "user", "content": missing_content_blocks}) + if not any("toolUse" in content for content in message["content"]): + continue + + tool_use_ids = [ + content["toolUse"]["toolUseId"] for content in message["content"] if "toolUse" in content + ] + + if index + 1 >= len(messages): + # The last message has an orphaned toolUse. The in-process fallback + # (_has_tool_use_in_latest_message) only works within the same process. + # On cross-process restore the tool execution context is lost, so report + # an error to the model and let it decide how to proceed. + logger.warning( + "Session message history ends with an orphaned toolUse with no toolResult. " + "Adding toolResult content blocks to create valid conversation." + ) + missing_content_blocks = generate_missing_tool_result_content(tool_use_ids) + messages.append({"role": "user", "content": missing_content_blocks}) + else: + # Check if the next message already has tool results + tool_result_ids = [ + content["toolResult"]["toolUseId"] + for content in messages[index + 1]["content"] + if "toolResult" in content + ] + + missing_tool_use_ids = list(set(tool_use_ids) - set(tool_result_ids)) + # If there are missing tool use ids, that means the messages history is broken + if missing_tool_use_ids: + logger.warning( + "Session message history has an orphaned toolUse with no toolResult. " + "Adding toolResult content blocks to create valid conversation." + ) + # Create the missing toolResult content blocks + missing_content_blocks = generate_missing_tool_result_content(missing_tool_use_ids) + + if tool_result_ids: + # If there were any toolResult ids, that means only some of the content blocks are missing + messages[index + 1]["content"].extend(missing_content_blocks) + else: + # The message following the toolUse was not a toolResult, so lets insert it + messages.insert(index + 1, {"role": "user", "content": missing_content_blocks}) return messages def sync_multi_agent(self, source: "MultiAgentBase", **kwargs: Any) -> None: diff --git a/tests/strands/session/test_repository_session_manager.py b/tests/strands/session/test_repository_session_manager.py index 1d5048113..57f7c301a 100644 --- a/tests/strands/session/test_repository_session_manager.py +++ b/tests/strands/session/test_repository_session_manager.py @@ -416,8 +416,8 @@ def test_fix_broken_tool_use_handles_multiple_orphaned_tools(existing_session_ma assert tool_use_ids == {"orphaned-123", "orphaned-456"} -def test_fix_broken_tool_use_ignores_last_message(session_manager): - """Test that orphaned toolUse in the last message is not fixed.""" +def test_fix_broken_tool_use_repairs_orphaned_last_message(session_manager): + """Test that orphaned toolUse in the last message is repaired with a synthetic toolResult.""" messages = [ {"role": "user", "content": [{"text": "Hello"}]}, { @@ -430,8 +430,13 @@ def test_fix_broken_tool_use_ignores_last_message(session_manager): fixed_messages = session_manager._fix_broken_tool_use(messages) - # Should remain unchanged since toolUse is in last message - assert fixed_messages == messages + # A synthetic toolResult should be appended for the orphaned toolUse + assert len(fixed_messages) == 3 + assert fixed_messages[2]["role"] == "user" + tool_result_ids = [ + c["toolResult"]["toolUseId"] for c in fixed_messages[2]["content"] if "toolResult" in c + ] + assert "last-message-123" in tool_result_ids def test_fix_broken_tool_use_does_not_change_valid_message(session_manager):