Skip to content

Commit c964926

Browse files
authored
fix(compaction): #2727 strip orphaned assistant message IDs after reasoning removal (#2728)
1 parent aa483fe commit c964926

2 files changed

Lines changed: 171 additions & 0 deletions

File tree

src/agents/memory/openai_responses_compaction_session.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ async def run_compaction(self, args: OpenAIResponsesCompactionArgs | None = None
224224
item.model_dump(exclude_unset=True, warnings=False) # type: ignore
225225
)
226226

227+
output_items = _strip_orphaned_assistant_ids(output_items)
228+
227229
if output_items:
228230
await self.underlying_session.add_items(output_items)
229231

@@ -305,6 +307,35 @@ async def _ensure_compaction_candidates(
305307
return (candidates[:], history[:])
306308

307309

310+
def _strip_orphaned_assistant_ids(
311+
items: list[TResponseInputItem],
312+
) -> list[TResponseInputItem]:
313+
"""Remove ``id`` from assistant messages when their paired reasoning items are missing.
314+
315+
Some models (e.g. gpt-5.4) return compacted output that retains assistant
316+
message IDs even after stripping the reasoning items those IDs reference.
317+
Sending these orphaned IDs back to ``responses.create`` causes a 400 error
318+
because the API expects the paired reasoning item for each assistant message
319+
ID. This function detects and removes those orphaned IDs so the compacted
320+
history can be used safely.
321+
"""
322+
if not items:
323+
return items
324+
325+
has_reasoning = any(
326+
isinstance(item, dict) and item.get("type") == "reasoning" for item in items
327+
)
328+
if has_reasoning:
329+
return items
330+
331+
cleaned: list[TResponseInputItem] = []
332+
for item in items:
333+
if isinstance(item, dict) and item.get("role") == "assistant" and "id" in item:
334+
item = {k: v for k, v in item.items() if k != "id"} # type: ignore[assignment]
335+
cleaned.append(item)
336+
return cleaned
337+
338+
308339
_ResolvedCompactionMode = Literal["previous_response_id", "input"]
309340

310341

tests/memory/test_openai_responses_compaction_session.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
)
1717
from agents.memory.openai_responses_compaction_session import (
1818
DEFAULT_COMPACTION_THRESHOLD,
19+
_strip_orphaned_assistant_ids,
1920
is_openai_model_name,
2021
select_compaction_candidate_items,
2122
)
@@ -613,6 +614,145 @@ def should_trigger_compaction(context: dict[str, Any]) -> bool:
613614
mock_client.responses.compact.assert_awaited_once()
614615

615616

617+
class TestStripOrphanedAssistantIds:
618+
def test_noop_when_empty(self) -> None:
619+
assert _strip_orphaned_assistant_ids([]) == []
620+
621+
def test_strips_id_from_assistant_when_no_reasoning(self) -> None:
622+
items: list[TResponseInputItem] = [
623+
cast(
624+
TResponseInputItem,
625+
{"type": "message", "role": "assistant", "id": "msg_abc", "content": "hi"},
626+
),
627+
cast(
628+
TResponseInputItem,
629+
{"type": "message", "role": "user", "content": "hello"},
630+
),
631+
]
632+
result = _strip_orphaned_assistant_ids(items)
633+
assert "id" not in result[0]
634+
# user message untouched
635+
assert result[1] == items[1]
636+
637+
def test_preserves_id_when_reasoning_present(self) -> None:
638+
items: list[TResponseInputItem] = [
639+
cast(TResponseInputItem, {"type": "reasoning", "id": "rs_123", "content": "..."}),
640+
cast(
641+
TResponseInputItem,
642+
{"type": "message", "role": "assistant", "id": "msg_abc", "content": "hi"},
643+
),
644+
]
645+
result = _strip_orphaned_assistant_ids(items)
646+
assert result[1].get("id") == "msg_abc"
647+
648+
def test_preserves_assistant_without_id(self) -> None:
649+
items: list[TResponseInputItem] = [
650+
cast(
651+
TResponseInputItem,
652+
{"type": "message", "role": "assistant", "content": "hi"},
653+
),
654+
]
655+
result = _strip_orphaned_assistant_ids(items)
656+
assert result == items
657+
658+
def test_strips_multiple_assistant_ids(self) -> None:
659+
items: list[TResponseInputItem] = [
660+
cast(
661+
TResponseInputItem,
662+
{"type": "message", "role": "assistant", "id": "msg_1", "content": "a"},
663+
),
664+
cast(
665+
TResponseInputItem,
666+
{"type": "message", "role": "assistant", "id": "msg_2", "content": "b"},
667+
),
668+
cast(
669+
TResponseInputItem,
670+
{"type": "message", "role": "assistant", "id": "msg_3", "content": "c"},
671+
),
672+
]
673+
result = _strip_orphaned_assistant_ids(items)
674+
for item in result:
675+
assert "id" not in item
676+
677+
678+
class TestCompactionStripsOrphanedIds:
679+
"""Regression test for #2727: gpt-5.4 compact retains assistant msg IDs after
680+
stripping reasoning items, causing 400 errors on the next responses.create call."""
681+
682+
def create_mock_session(self) -> MagicMock:
683+
mock = MagicMock(spec=Session)
684+
mock.session_id = "test-session"
685+
mock.get_items = AsyncMock(return_value=[])
686+
mock.add_items = AsyncMock()
687+
mock.pop_item = AsyncMock(return_value=None)
688+
mock.clear_session = AsyncMock()
689+
return mock
690+
691+
@pytest.mark.asyncio
692+
async def test_run_compaction_strips_orphaned_assistant_ids(self) -> None:
693+
"""Compacted output with assistant IDs but no reasoning items should
694+
have those IDs removed before being stored."""
695+
mock_session = self.create_mock_session()
696+
mock_session.get_items.return_value = [
697+
cast(TResponseInputItem, {"type": "message", "role": "assistant", "content": f"m{i}"})
698+
for i in range(DEFAULT_COMPACTION_THRESHOLD)
699+
]
700+
701+
# Simulate gpt-5.4 compact output: assistant msgs WITH ids, NO reasoning items
702+
mock_compact_response = MagicMock()
703+
mock_compact_response.output = [
704+
{"type": "message", "role": "assistant", "id": "msg_aaa", "content": "summary 1"},
705+
{"type": "message", "role": "assistant", "id": "msg_bbb", "content": "summary 2"},
706+
{"type": "message", "role": "assistant", "id": "msg_ccc", "content": "summary 3"},
707+
]
708+
709+
mock_client = MagicMock()
710+
mock_client.responses.compact = AsyncMock(return_value=mock_compact_response)
711+
712+
session = OpenAIResponsesCompactionSession(
713+
session_id="test",
714+
underlying_session=mock_session,
715+
client=mock_client,
716+
)
717+
718+
await session.run_compaction({"response_id": "resp-123"})
719+
720+
# Verify stored items have no orphaned ids
721+
stored_items = mock_session.add_items.call_args[0][0]
722+
for item in stored_items:
723+
assert "id" not in item, f"orphaned id not stripped: {item}"
724+
725+
@pytest.mark.asyncio
726+
async def test_run_compaction_keeps_ids_when_reasoning_present(self) -> None:
727+
"""When compact output includes reasoning items, assistant IDs should be kept."""
728+
mock_session = self.create_mock_session()
729+
mock_session.get_items.return_value = [
730+
cast(TResponseInputItem, {"type": "message", "role": "assistant", "content": f"m{i}"})
731+
for i in range(DEFAULT_COMPACTION_THRESHOLD)
732+
]
733+
734+
mock_compact_response = MagicMock()
735+
mock_compact_response.output = [
736+
{"type": "reasoning", "id": "rs_111", "content": "thinking..."},
737+
{"type": "message", "role": "assistant", "id": "msg_aaa", "content": "answer"},
738+
]
739+
740+
mock_client = MagicMock()
741+
mock_client.responses.compact = AsyncMock(return_value=mock_compact_response)
742+
743+
session = OpenAIResponsesCompactionSession(
744+
session_id="test",
745+
underlying_session=mock_session,
746+
client=mock_client,
747+
)
748+
749+
await session.run_compaction({"response_id": "resp-123"})
750+
751+
stored_items = mock_session.add_items.call_args[0][0]
752+
assistant_items = [i for i in stored_items if i.get("role") == "assistant"]
753+
assert assistant_items[0]["id"] == "msg_aaa"
754+
755+
616756
class TestTypeGuard:
617757
def test_is_compaction_aware_session_true(self) -> None:
618758
mock_underlying = MagicMock(spec=Session)

0 commit comments

Comments
 (0)