Skip to content

Commit 65b06d9

Browse files
fix: enforce that the first message is a user message in the sliding window conversation manager (#2087)
1 parent e7a2174 commit 65b06d9

File tree

5 files changed

+118
-34
lines changed

5 files changed

+118
-34
lines changed

src/strands/agent/conversation_manager/sliding_window_conversation_manager.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -167,9 +167,9 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A
167167
**kwargs: Additional keyword arguments for future extensibility.
168168
169169
Raises:
170-
ContextWindowOverflowException: If the context cannot be reduced further.
171-
Such as when the conversation is already minimal or when tool result messages cannot be properly
172-
converted.
170+
ContextWindowOverflowException: If the context cannot be reduced further and a context overflow
171+
error was provided (e is not None). When called during routine window management (e is None),
172+
logs a warning and returns without modification.
173173
"""
174174
messages = agent.messages
175175

@@ -188,24 +188,43 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A
188188
# If the number of messages is less than the window_size, then we default to 2, otherwise, trim to window size
189189
trim_index = 2 if len(messages) <= self.window_size else len(messages) - self.window_size
190190

191-
# Find the next valid trim_index
191+
# Find the next valid trim point that:
192+
# 1. Starts with a user message (required by most model providers)
193+
# 2. Does not start with an orphaned toolResult
194+
# 3. Does not start with a toolUse unless its toolResult immediately follows
192195
while trim_index < len(messages):
196+
# Must start with a user message
197+
if messages[trim_index]["role"] != "user":
198+
trim_index += 1
199+
continue
200+
193201
if (
194202
# Oldest message cannot be a toolResult because it needs a toolUse preceding it
195203
any("toolResult" in content for content in messages[trim_index]["content"])
196204
or (
197205
# Oldest message can be a toolUse only if a toolResult immediately follows it.
206+
# Note: toolUse content normally appears only in assistant messages, but this
207+
# check is kept as a defensive safeguard for non-standard message formats.
198208
any("toolUse" in content for content in messages[trim_index]["content"])
199-
and trim_index + 1 < len(messages)
200-
and not any("toolResult" in content for content in messages[trim_index + 1]["content"])
209+
and not (
210+
trim_index + 1 < len(messages)
211+
and any("toolResult" in content for content in messages[trim_index + 1]["content"])
212+
)
201213
)
202214
):
203215
trim_index += 1
204216
else:
205217
break
206218
else:
207-
# If we didn't find a valid trim_index, then we throw
208-
raise ContextWindowOverflowException("Unable to trim conversation context!") from e
219+
# If we didn't find a valid trim_index
220+
if e is not None:
221+
raise ContextWindowOverflowException("Unable to trim conversation context!") from e
222+
logger.warning(
223+
"window_size=<%s>, message_count=<%s> | unable to trim conversation context, no valid trim point found",
224+
self.window_size,
225+
len(messages),
226+
)
227+
return
209228

210229
# trim_index represents the number of messages being removed from the agents messages array
211230
self.removed_message_count += trim_index

src/strands/models/anthropic.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -410,10 +410,12 @@ async def stream(
410410
if event.type == "message_stop":
411411
# Build dict directly to avoid Pydantic serialization warnings
412412
# when the message contains ParsedTextBlock objects (issue #1746)
413-
yield self.format_chunk({
414-
"type": "message_stop",
415-
"message": {"stop_reason": event.message.stop_reason},
416-
})
413+
yield self.format_chunk(
414+
{
415+
"type": "message_stop",
416+
"message": {"stop_reason": event.message.stop_reason},
417+
}
418+
)
417419
else:
418420
yield self.format_chunk(event.model_dump())
419421

tests/strands/agent/test_agent.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,10 +1615,15 @@ def test_agent_restored_from_session_management_with_correct_index():
16151615

16161616

16171617
def test_agent_with_session_and_conversation_manager():
1618-
mock_model = MockedModelProvider([{"role": "assistant", "content": [{"text": "hello!"}]}])
1618+
mock_model = MockedModelProvider(
1619+
[
1620+
{"role": "assistant", "content": [{"text": "first"}]},
1621+
{"role": "assistant", "content": [{"text": "second"}]},
1622+
]
1623+
)
16191624
mock_session_repository = MockedSessionRepository()
16201625
session_manager = RepositorySessionManager(session_id="123", session_repository=mock_session_repository)
1621-
conversation_manager = SlidingWindowConversationManager(window_size=1)
1626+
conversation_manager = SlidingWindowConversationManager(window_size=2)
16221627
# Create an agent with a mocked model and session repository
16231628
agent = Agent(
16241629
session_manager=session_manager,
@@ -1633,22 +1638,28 @@ def test_agent_with_session_and_conversation_manager():
16331638

16341639
agent("Hello!")
16351640

1636-
# After invoking, assert that the messages were persisted
1641+
# After first invocation: [user, assistant] — fits in window, no trimming
16371642
assert len(mock_session_repository.list_messages("123", agent.agent_id)) == 2
1638-
# Assert conversation manager reduced the messages
1639-
assert len(agent.messages) == 1
1643+
assert len(agent.messages) == 2
1644+
1645+
agent("Second question")
1646+
1647+
# After second invocation: [user, assistant, user, assistant] exceeds window_size=2
1648+
# Conversation manager trims to 2 messages starting with a user message
1649+
assert len(agent.messages) == 2
1650+
assert agent.messages[0]["role"] == "user"
16401651

16411652
# Initialize another agent using the same session
16421653
session_manager_2 = RepositorySessionManager(session_id="123", session_repository=mock_session_repository)
1643-
conversation_manager_2 = SlidingWindowConversationManager(window_size=1)
1654+
conversation_manager_2 = SlidingWindowConversationManager(window_size=2)
16441655
agent_2 = Agent(
16451656
session_manager=session_manager_2,
16461657
conversation_manager=conversation_manager_2,
16471658
model=mock_model,
16481659
)
16491660
# Assert that the second agent was initialized properly, and that the messages of both agents are equal
16501661
assert agent.messages == agent_2.messages
1651-
# Asser the conversation manager was initialized properly
1662+
# Assert the conversation manager was initialized properly
16521663
assert agent.conversation_manager.removed_message_count == agent_2.conversation_manager.removed_message_count
16531664

16541665

tests/strands/agent/test_conversation_manager.py

Lines changed: 66 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def conversation_manager(request):
7878
],
7979
),
8080
# 5 - Remove dangling assistant message with tool use and user message without tool result
81+
# Must start with a user message, so we skip the assistant message
8182
(
8283
{"window_size": 3},
8384
[
@@ -87,7 +88,6 @@ def conversation_manager(request):
8788
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
8889
],
8990
[
90-
{"role": "assistant", "content": [{"text": "First response"}]},
9191
{"role": "user", "content": [{"text": "Use a tool"}]},
9292
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
9393
],
@@ -107,34 +107,37 @@ def conversation_manager(request):
107107
],
108108
),
109109
# 7 - Message count above max window size - Preserve tool use/tool result pairs
110+
# Cannot start with assistant or orphaned toolResult, so trim advances to next plain user message
110111
(
111112
{"window_size": 2},
112113
[
113-
{"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]},
114+
{"role": "user", "content": [{"text": "Hello"}]},
114115
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
115-
{"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]},
116+
{"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]},
117+
{"role": "assistant", "content": [{"text": "Done"}]},
118+
{"role": "user", "content": [{"text": "Next"}]},
116119
],
117120
[
118-
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
119-
{"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]},
121+
{"role": "user", "content": [{"text": "Next"}]},
120122
],
121123
),
122124
# 8 - Test sliding window behavior - preserve tool use/result pairs across cut boundary
125+
# Must start with user message (not assistant, not orphaned toolResult), so trim advances to plain user msg
123126
(
124127
{"window_size": 3},
125128
[
126129
{"role": "user", "content": [{"text": "First message"}]},
127130
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
128131
{"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]},
129132
{"role": "assistant", "content": [{"text": "Response after tool use"}]},
133+
{"role": "user", "content": [{"text": "Follow up"}]},
130134
],
131135
[
132-
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
133-
{"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]},
134-
{"role": "assistant", "content": [{"text": "Response after tool use"}]},
136+
{"role": "user", "content": [{"text": "Follow up"}]},
135137
],
136138
),
137139
# 9 - Test sliding window with multiple tool pairs that need preservation
140+
# Must start with user message; orphaned toolResult is skipped, lands on plain user text
138141
(
139142
{"window_size": 4},
140143
[
@@ -144,11 +147,10 @@ def conversation_manager(request):
144147
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "456", "name": "tool2", "input": {}}}]},
145148
{"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]},
146149
{"role": "assistant", "content": [{"text": "Final response"}]},
150+
{"role": "user", "content": [{"text": "Another question"}]},
147151
],
148152
[
149-
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "456", "name": "tool2", "input": {}}}]},
150-
{"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]},
151-
{"role": "assistant", "content": [{"text": "Final response"}]},
153+
{"role": "user", "content": [{"text": "Another question"}]},
152154
],
153155
),
154156
],
@@ -161,6 +163,43 @@ def test_apply_management(conversation_manager, messages, expected_messages):
161163
assert messages == expected_messages
162164

163165

166+
def test_sliding_window_forces_user_message_start():
167+
"""Test that trimmed conversation always starts with a user message (GitHub #2085)."""
168+
manager = SlidingWindowConversationManager(window_size=3, should_truncate_results=False)
169+
messages = [
170+
{"role": "user", "content": [{"text": "Hello"}]},
171+
{"role": "assistant", "content": [{"text": "Hi"}]},
172+
{"role": "user", "content": [{"text": "How are you?"}]},
173+
{"role": "assistant", "content": [{"text": "Good"}]},
174+
{"role": "user", "content": [{"text": "Great"}]},
175+
]
176+
test_agent = Agent(messages=messages)
177+
manager.apply_management(test_agent)
178+
179+
assert len(messages) == 3
180+
assert messages[0]["role"] == "user"
181+
assert messages[0]["content"] == [{"text": "How are you?"}]
182+
183+
184+
def test_sliding_window_happy_path_preserves_window_size():
185+
"""In a typical user/assistant conversation, trimming preserves close to window_size messages."""
186+
manager = SlidingWindowConversationManager(window_size=4, should_truncate_results=False)
187+
messages = [
188+
{"role": "user", "content": [{"text": "First"}]},
189+
{"role": "assistant", "content": [{"text": "First response"}]},
190+
{"role": "user", "content": [{"text": "Second"}]},
191+
{"role": "assistant", "content": [{"text": "Second response"}]},
192+
{"role": "user", "content": [{"text": "Third"}]},
193+
{"role": "assistant", "content": [{"text": "Third response"}]},
194+
]
195+
test_agent = Agent(messages=messages)
196+
manager.apply_management(test_agent)
197+
198+
assert len(messages) == 4
199+
assert messages[0]["role"] == "user"
200+
assert messages[0]["content"] == [{"text": "Second"}]
201+
202+
164203
def test_sliding_window_conversation_manager_with_untrimmable_history_raises_context_window_overflow_exception():
165204
manager = SlidingWindowConversationManager(1, False)
166205
messages = [
@@ -171,7 +210,22 @@ def test_sliding_window_conversation_manager_with_untrimmable_history_raises_con
171210
test_agent = Agent(messages=messages)
172211

173212
with pytest.raises(ContextWindowOverflowException):
174-
manager.apply_management(test_agent)
213+
manager.reduce_context(test_agent, e=RuntimeError("context overflow"))
214+
215+
assert messages == original_messages
216+
217+
218+
def test_sliding_window_no_valid_trim_point_without_error_does_not_raise():
219+
"""When no valid trim point exists during routine management (no error), messages are left unchanged."""
220+
manager = SlidingWindowConversationManager(1, False)
221+
messages = [
222+
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "456", "name": "tool1", "input": {}}}]},
223+
{"role": "user", "content": [{"toolResult": {"toolUseId": "789", "content": [], "status": "success"}}]},
224+
]
225+
original_messages = messages.copy()
226+
test_agent = Agent(messages=messages)
227+
228+
manager.apply_management(test_agent)
175229

176230
assert messages == original_messages
177231

tests/strands/models/test_anthropic.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -982,9 +982,7 @@ def model_dump_with_warning():
982982
events = await alist(response)
983983

984984
# Verify no Pydantic serialization warnings were emitted
985-
pydantic_warnings = [
986-
w for w in caught_warnings if "PydanticSerializationUnexpectedValue" in str(w.message)
987-
]
985+
pydantic_warnings = [w for w in caught_warnings if "PydanticSerializationUnexpectedValue" in str(w.message)]
988986
assert len(pydantic_warnings) == 0, f"Unexpected Pydantic warnings: {pydantic_warnings}"
989987

990988
# Verify the message_stop event was still processed correctly

0 commit comments

Comments
 (0)