diff --git a/pkg/model/provider/anthropic/beta_client.go b/pkg/model/provider/anthropic/beta_client.go index 249b7ab8a..67d93070e 100644 --- a/pkg/model/provider/anthropic/beta_client.go +++ b/pkg/model/provider/anthropic/beta_client.go @@ -44,14 +44,6 @@ func (c *Client) createBetaStream( slog.Error("Failed to convert messages for Anthropic Beta request", "error", err) return nil, err } - if err := validateAnthropicSequencingBeta(converted); err != nil { - slog.Warn("Invalid message sequencing for Anthropic Beta API detected, attempting self-repair", "error", err) - converted = repairAnthropicSequencingBeta(converted) - if err2 := validateAnthropicSequencingBeta(converted); err2 != nil { - slog.Error("Failed to self-repair Anthropic Beta sequencing", "error", err2) - return nil, err - } - } if len(converted) == 0 { return nil, errors.New("no messages to send after conversion: all messages were filtered out") } @@ -148,35 +140,6 @@ func (c *Client) createBetaStream( return ad, nil } -// validateAnthropicSequencingBeta performs the same validation as standard API but for Beta payloads -func validateAnthropicSequencingBeta(msgs []anthropic.BetaMessageParam) error { - return validateSequencing(msgs) -} - -// repairAnthropicSequencingBeta inserts a synthetic user message with tool_result blocks -// for any assistant tool_use blocks that don't have corresponding tool_result blocks -// in the immediate next user message. -func repairAnthropicSequencingBeta(msgs []anthropic.BetaMessageParam) []anthropic.BetaMessageParam { - return repairSequencing(msgs, func(toolUseIDs map[string]struct{}) anthropic.BetaMessageParam { - blocks := make([]anthropic.BetaContentBlockParamUnion, 0, len(toolUseIDs)) - for id := range toolUseIDs { - slog.Debug("Creating synthetic tool_result", "tool_use_id", id) - blocks = append(blocks, anthropic.BetaContentBlockParamUnion{ - OfToolResult: &anthropic.BetaToolResultBlockParam{ - ToolUseID: id, - Content: []anthropic.BetaToolResultBlockParamContentUnion{ - {OfText: &anthropic.BetaTextBlockParam{Text: "(tool execution failed)"}}, - }, - }, - }) - } - return anthropic.BetaMessageParam{ - Role: anthropic.BetaMessageParamRoleUser, - Content: blocks, - } - }) -} - // countAnthropicTokensBeta calls Anthropic's Count Tokens API for the provided Beta API payload // and returns the number of input tokens. func countAnthropicTokensBeta( diff --git a/pkg/model/provider/anthropic/beta_converter_test.go b/pkg/model/provider/anthropic/beta_converter_test.go index 5a8474112..746849fcc 100644 --- a/pkg/model/provider/anthropic/beta_converter_test.go +++ b/pkg/model/provider/anthropic/beta_converter_test.go @@ -78,14 +78,6 @@ func TestConvertBetaMessages_MergesConsecutiveToolMessages(t *testing.T) { require.True(t, ok) content := contentArray(userMsg2Map) require.Len(t, content, 2, "User message should have 2 tool_result blocks") - - toolResultIDs := collectToolResultIDs(content) - assert.Contains(t, toolResultIDs, "tool_call_1") - assert.Contains(t, toolResultIDs, "tool_call_2") - - // Most importantly: validate that the sequence is valid for Anthropic API - err = validateAnthropicSequencingBeta(betaMessages) - require.NoError(t, err, "Converted messages should pass Anthropic sequencing validation") } func TestConvertBetaMessages_SingleToolMessage(t *testing.T) { @@ -123,68 +115,4 @@ func TestConvertBetaMessages_SingleToolMessage(t *testing.T) { betaMessages, err := testClient().convertBetaMessages(t.Context(), messages) require.NoError(t, err) require.Len(t, betaMessages, 4) - - // Validate sequence - err = validateAnthropicSequencingBeta(betaMessages) - require.NoError(t, err) -} - -func TestConvertBetaMessages_NonConsecutiveToolMessages(t *testing.T) { - // When tool messages are separated by other messages (edge case) - // Each tool message group should be handled independently - messages := []chat.Message{ - { - Role: chat.MessageRoleUser, - Content: "First request", - }, - { - Role: chat.MessageRoleAssistant, - Content: "", - ToolCalls: []tools.ToolCall{ - { - ID: "tool_1", - Type: "function", - Function: tools.FunctionCall{ - Name: "test_tool", - Arguments: `{}`, - }, - }, - }, - }, - { - Role: chat.MessageRoleTool, - Content: "Tool result 1", - ToolCallID: "tool_1", - }, - { - Role: chat.MessageRoleAssistant, - Content: "Intermediate response", - ToolCalls: []tools.ToolCall{ - { - ID: "tool_2", - Type: "function", - Function: tools.FunctionCall{ - Name: "test_tool", - Arguments: `{}`, - }, - }, - }, - }, - { - Role: chat.MessageRoleTool, - Content: "Tool result 2", - ToolCallID: "tool_2", - }, - { - Role: chat.MessageRoleAssistant, - Content: "Final response", - }, - } - - betaMessages, err := testClient().convertBetaMessages(t.Context(), messages) - require.NoError(t, err) - - // Validate the entire sequence - err = validateAnthropicSequencingBeta(betaMessages) - require.NoError(t, err, "Messages with non-consecutive tool calls should still validate") } diff --git a/pkg/model/provider/anthropic/client.go b/pkg/model/provider/anthropic/client.go index 4194a4c19..fc3153069 100644 --- a/pkg/model/provider/anthropic/client.go +++ b/pkg/model/provider/anthropic/client.go @@ -271,15 +271,6 @@ func (c *Client) CreateChatCompletionStream( slog.Error("Failed to convert messages for Anthropic request", "error", err) return nil, err } - // Preflight validation to ensure tool_use/tool_result sequencing is valid - if err := validateAnthropicSequencing(converted); err != nil { - slog.Warn("Invalid message sequencing for Anthropic detected, attempting self-repair", "error", err) - converted = repairAnthropicSequencing(converted) - if err2 := validateAnthropicSequencing(converted); err2 != nil { - slog.Error("Failed to self-repair Anthropic sequencing", "error", err2) - return nil, err - } - } if len(converted) == 0 { return nil, errors.New("no messages to send after conversion: all messages were filtered out") } @@ -735,27 +726,6 @@ func (c *Client) FileManager() *FileManager { return c.fileManager } -// validateAnthropicSequencing verifies that for every assistant message that includes -// one or more tool_use blocks, the immediately following message is a user message -// that includes tool_result blocks for all those tool_use IDs (grouped into that single message). -func validateAnthropicSequencing(msgs []anthropic.MessageParam) error { - return validateSequencing(msgs) -} - -// repairAnthropicSequencing inserts a synthetic user message containing tool_result blocks -// immediately after any assistant message that has tool_use blocks missing a corresponding -// tool_result in the next user message. This is a best-effort local repair to keep the -// conversation valid for Anthropic while preserving original messages, to keep the agent loop running. -func repairAnthropicSequencing(msgs []anthropic.MessageParam) []anthropic.MessageParam { - return repairSequencing(msgs, func(toolUseIDs map[string]struct{}) anthropic.MessageParam { - blocks := make([]anthropic.ContentBlockParamUnion, 0, len(toolUseIDs)) - for id := range toolUseIDs { - blocks = append(blocks, anthropic.NewToolResultBlock(id, "(tool execution failed)", false)) - } - return anthropic.NewUserMessage(blocks...) - }) -} - // marshalToMap is a helper that converts any value to a map[string]any via JSON marshaling. // This is used to inspect SDK union types without depending on their internal structure. // It's shared by both standard and Beta API validation/repair code. @@ -780,125 +750,6 @@ func contentArray(m map[string]any) []any { return nil } -// validateSequencing generically validates that every assistant message with tool_use blocks -// is immediately followed by a user message with corresponding tool_result blocks. -// It works on both standard (MessageParam) and Beta (BetaMessageParam) types by -// marshaling to map[string]any for inspection. -func validateSequencing[T any](msgs []T) error { - for i := range msgs { - m, ok := marshalToMap(msgs[i]) - if !ok || m["role"] != "assistant" { - continue - } - - toolUseIDs := collectToolUseIDs(contentArray(m)) - if len(toolUseIDs) == 0 { - continue - } - - if i+1 >= len(msgs) { - slog.Warn("Anthropic sequencing invalid: assistant tool_use present but no next user tool_result message", "assistant_index", i) - return errors.New("assistant tool_use present but no subsequent user message with tool_result blocks") - } - - next, ok := marshalToMap(msgs[i+1]) - if !ok || next["role"] != "user" { - slog.Warn("Anthropic sequencing invalid: next message after assistant tool_use is not user", "assistant_index", i, "next_role", next["role"]) - return errors.New("assistant tool_use must be followed by a user message containing corresponding tool_result blocks") - } - - toolResultIDs := collectToolResultIDs(contentArray(next)) - missing := differenceIDs(toolUseIDs, toolResultIDs) - if len(missing) > 0 { - slog.Warn("Anthropic sequencing invalid: missing tool_result for tool_use id in next user message", "assistant_index", i, "tool_use_id", missing[0], "missing_count", len(missing)) - return fmt.Errorf("missing tool_result for tool_use id %s in the next user message", missing[0]) - } - } - return nil -} - -// repairSequencing generically inserts a synthetic user message after any assistant -// tool_use message that is missing corresponding tool_result blocks. The makeSynthetic -// callback builds the appropriate user message type for the remaining tool_use IDs. -func repairSequencing[T any](msgs []T, makeSynthetic func(toolUseIDs map[string]struct{}) T) []T { - if len(msgs) == 0 { - return msgs - } - repaired := make([]T, 0, len(msgs)+2) - for i := range msgs { - repaired = append(repaired, msgs[i]) - - m, ok := marshalToMap(msgs[i]) - if !ok || m["role"] != "assistant" { - continue - } - - toolUseIDs := collectToolUseIDs(contentArray(m)) - if len(toolUseIDs) == 0 { - continue - } - - // Remove any IDs that already have results in the next user message - if i+1 < len(msgs) { - if next, ok := marshalToMap(msgs[i+1]); ok && next["role"] == "user" { - toolResultIDs := collectToolResultIDs(contentArray(next)) - for id := range toolResultIDs { - delete(toolUseIDs, id) - } - } - } - - if len(toolUseIDs) > 0 { - slog.Debug("Inserting synthetic user message for missing tool_results", - "assistant_index", i, - "missing_count", len(toolUseIDs)) - repaired = append(repaired, makeSynthetic(toolUseIDs)) - } - } - return repaired -} - -func collectToolUseIDs(content []any) map[string]struct{} { - ids := make(map[string]struct{}) - for _, c := range content { - if cb, ok := c.(map[string]any); ok { - if t, _ := cb["type"].(string); t == "tool_use" { - if id, _ := cb["id"].(string); id != "" { - ids[id] = struct{}{} - } - } - } - } - return ids -} - -func collectToolResultIDs(content []any) map[string]struct{} { - ids := make(map[string]struct{}) - for _, c := range content { - if cb, ok := c.(map[string]any); ok { - if t, _ := cb["type"].(string); t == "tool_result" { - if id, _ := cb["tool_use_id"].(string); id != "" { - ids[id] = struct{}{} - } - } - } - } - return ids -} - -func differenceIDs(a, b map[string]struct{}) []string { - if len(a) == 0 { - return nil - } - var missing []string - for id := range a { - if _, ok := b[id]; !ok { - missing = append(missing, id) - } - } - return missing -} - // validThinkingTokens validates that the token budget is within the // acceptable range for Anthropic (>= 1024 and < maxTokens). // Returns (tokens, true) if valid, or (0, false) with a warning log if not. diff --git a/pkg/model/provider/anthropic/client_test.go b/pkg/model/provider/anthropic/client_test.go index 3e53a15a4..8a116baef 100644 --- a/pkg/model/provider/anthropic/client_test.go +++ b/pkg/model/provider/anthropic/client_test.go @@ -235,80 +235,6 @@ func TestSystemMessages_InterspersedExtractedAndExcluded(t *testing.T) { } } -func TestSequencingRepair_Standard(t *testing.T) { - msgs := []chat.Message{ - {Role: chat.MessageRoleUser, Content: "start"}, - { - Role: chat.MessageRoleAssistant, - ToolCalls: []tools.ToolCall{ - {ID: "tool-1", Function: tools.FunctionCall{Name: "do_thing", Arguments: "{}"}}, - }, - }, - // Intentionally missing the user/tool_result message here - {Role: chat.MessageRoleUser, Content: "continue"}, - } - - converted, err := testClient().convertMessages(t.Context(), msgs) - require.NoError(t, err) - err = validateAnthropicSequencing(converted) - require.Error(t, err) - - repaired := repairAnthropicSequencing(converted) - err = validateAnthropicSequencing(repaired) - require.NoError(t, err) -} - -func TestSequencingRepair_Beta(t *testing.T) { - msgs := []chat.Message{ - {Role: chat.MessageRoleUser, Content: "start"}, - { - Role: chat.MessageRoleAssistant, - ToolCalls: []tools.ToolCall{ - {ID: "tool-1", Function: tools.FunctionCall{Name: "do_thing", Arguments: "{}"}}, - }, - }, - // Intentionally missing the user/tool_result message here - {Role: chat.MessageRoleUser, Content: "continue"}, - } - - converted, err := testClient().convertBetaMessages(t.Context(), msgs) - require.NoError(t, err) - err = validateAnthropicSequencingBeta(converted) - require.Error(t, err) - - repaired := repairAnthropicSequencingBeta(converted) - err = validateAnthropicSequencingBeta(repaired) - require.NoError(t, err) -} - -func TestConvertMessages_DropOrphanToolResults_NoPrecedingToolUse(t *testing.T) { - msgs := []chat.Message{ - {Role: chat.MessageRoleUser, Content: "start"}, - // Orphan tool result (no assistant tool_use immediately before) - {Role: chat.MessageRoleTool, ToolCallID: "tool-1", Content: "result-1"}, - {Role: chat.MessageRoleUser, Content: "continue"}, - } - - converted, err := testClient().convertMessages(t.Context(), msgs) - require.NoError(t, err) - // Expect only the two user text messages to appear - require.Len(t, converted, 2) - - // Ensure none of the converted messages contain tool_result blocks - for i := range converted { - b, err := json.Marshal(converted[i]) - require.NoError(t, err) - var m map[string]any - require.NoError(t, json.Unmarshal(b, &m)) - content, _ := m["content"].([]any) - for _, c := range content { - if cb, ok := c.(map[string]any); ok { - assert.NotEqual(t, "tool_result", cb["type"], "unexpected orphan tool_result included in payload") - } - } - } -} - func TestConvertMessages_GroupToolResults_AfterAssistantToolUse(t *testing.T) { msgs := []chat.Message{ {Role: chat.MessageRoleUser, Content: "start"}, @@ -329,9 +255,6 @@ func TestConvertMessages_GroupToolResults_AfterAssistantToolUse(t *testing.T) { // Expect: user(start), assistant(tool_use), user(grouped tool_result), user(ok) require.Len(t, converted, 4) - // Validate sequencing is acceptable to Anthropic - require.NoError(t, validateAnthropicSequencing(converted)) - b, err := json.Marshal(converted[2]) require.NoError(t, err) var m map[string]any diff --git a/pkg/session/session.go b/pkg/session/session.go index 9af60713e..475bc5dd6 100644 --- a/pkg/session/session.go +++ b/pkg/session/session.go @@ -914,6 +914,8 @@ func (s *Session) GetMessages(a *agent.Agent) []chat.Message { messages = truncateOldToolContent(messages, maxOldToolCallTokens) } + messages = sanitizeToolCalls(messages) + systemCount := 0 conversationCount := 0 for i := range messages { @@ -1009,6 +1011,59 @@ func trimMessages(messages []chat.Message, maxItems int) []chat.Message { return result } +// sanitizeToolCalls ensures every tool call in assistant messages has a +// corresponding tool-result message. It walks the message list tracking +// pending tool calls; when a tool-result message arrives its ID is marked +// fulfilled. When the next assistant or user message is encountered (or the +// end of the list is reached), any still-pending tool calls receive synthetic +// error results injected just before that boundary. This guarantees the +// provider always sees a valid request/response pair for every tool call. +func sanitizeToolCalls(messages []chat.Message) []chat.Message { + var ( + out []chat.Message + pendingToolCalls []tools.ToolCall + resultIDs = make(map[string]bool) + ) + + flushPending := func() { + for _, tc := range pendingToolCalls { + if tc.ID != "" && !resultIDs[tc.ID] { + out = append(out, chat.Message{ + Role: chat.MessageRoleTool, + ToolCallID: tc.ID, + Content: "No result provided", + IsError: true, + }) + } + } + pendingToolCalls = nil + resultIDs = make(map[string]bool) + } + + for _, msg := range messages { + switch { + case msg.Role == chat.MessageRoleTool: + if msg.ToolCallID != "" { + resultIDs[msg.ToolCallID] = true + } + + case msg.Role == chat.MessageRoleAssistant && len(msg.ToolCalls) > 0: + flushPending() + out = append(out, msg) + pendingToolCalls = msg.ToolCalls + continue + + case msg.Role == chat.MessageRoleUser || msg.Role == chat.MessageRoleAssistant: + flushPending() + } + + out = append(out, msg) + } + + flushPending() + return out +} + // truncateOldToolContent replaces tool results with placeholders for older // messages that exceed the token budget. It processes messages from newest to // oldest, keeping recent tool content intact while truncating older content diff --git a/pkg/session/session_test.go b/pkg/session/session_test.go index e326c4a3e..55bf1076c 100644 --- a/pkg/session/session_test.go +++ b/pkg/session/session_test.go @@ -339,6 +339,203 @@ func TestEvalCriteriaUnmarshalJSON(t *testing.T) { } } +func TestSanitizeToolCalls(t *testing.T) { + t.Parallel() + + t.Run("no-op when all tool calls have results", func(t *testing.T) { + t.Parallel() + messages := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "hi"}, + { + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{ + {ID: "tc1", Function: tools.FunctionCall{Name: "shell"}}, + }, + }, + {Role: chat.MessageRoleTool, ToolCallID: "tc1", Content: "ok"}, + {Role: chat.MessageRoleAssistant, Content: "done"}, + } + result := sanitizeToolCalls(messages) + assert.Equal(t, messages, result) + }) + + t.Run("injects synthetic result for missing tool result", func(t *testing.T) { + t.Parallel() + messages := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "hi"}, + { + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{ + {ID: "tc1", Function: tools.FunctionCall{Name: "shell"}}, + }, + }, + } + result := sanitizeToolCalls(messages) + + require.Len(t, result, 3) + assert.Equal(t, chat.MessageRoleTool, result[2].Role) + assert.Equal(t, "tc1", result[2].ToolCallID) + assert.True(t, result[2].IsError) + assert.Equal(t, "No result provided", result[2].Content) + }) + + t.Run("handles multiple tool calls with partial results", func(t *testing.T) { + t.Parallel() + messages := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "hi"}, + { + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{ + {ID: "tc1", Function: tools.FunctionCall{Name: "read_file"}}, + {ID: "tc2", Function: tools.FunctionCall{Name: "write_file"}}, + {ID: "tc3", Function: tools.FunctionCall{Name: "shell"}}, + }, + }, + {Role: chat.MessageRoleTool, ToolCallID: "tc1", Content: "file contents"}, + // tc2 and tc3 are missing + } + result := sanitizeToolCalls(messages) + + // Original 3 messages + 2 synthetic results + require.Len(t, result, 5) + + // assistant, then existing tc1 result, then synthetics for tc2/tc3 flushed at end + assert.Equal(t, chat.MessageRoleAssistant, result[1].Role) + assert.Equal(t, "tc1", result[2].ToolCallID) + assert.False(t, result[2].IsError) + assert.Equal(t, "tc2", result[3].ToolCallID) + assert.True(t, result[3].IsError) + assert.Equal(t, "tc3", result[4].ToolCallID) + assert.True(t, result[4].IsError) + }) + + t.Run("no tool calls at all is a no-op", func(t *testing.T) { + t.Parallel() + messages := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "hello"}, + {Role: chat.MessageRoleAssistant, Content: "hi there"}, + } + result := sanitizeToolCalls(messages) + assert.Equal(t, messages, result) + }) + + t.Run("multiple assistant messages with missing results", func(t *testing.T) { + t.Parallel() + messages := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "hi"}, + { + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{{ID: "tc1"}}, + }, + {Role: chat.MessageRoleTool, ToolCallID: "tc1", Content: "ok"}, + { + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{{ID: "tc2"}}, + }, + // tc2 result missing (crash) + } + result := sanitizeToolCalls(messages) + + require.Len(t, result, 5) + assert.Equal(t, "tc2", result[4].ToolCallID) + assert.True(t, result[4].IsError) + }) + + t.Run("flushes synthetics before next user message", func(t *testing.T) { + t.Parallel() + messages := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "hi"}, + { + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{{ID: "tc1", Function: tools.FunctionCall{Name: "shell"}}}, + }, + // no tool result — user responds before result arrives + {Role: chat.MessageRoleUser, Content: "never mind"}, + {Role: chat.MessageRoleAssistant, Content: "ok"}, + } + result := sanitizeToolCalls(messages) + + // synthetic tc1 result should be injected before the second user message + require.Len(t, result, 5) + assert.Equal(t, chat.MessageRoleAssistant, result[1].Role) + assert.Equal(t, "tc1", result[2].ToolCallID) + assert.True(t, result[2].IsError) + assert.Equal(t, chat.MessageRoleUser, result[3].Role) + assert.Equal(t, chat.MessageRoleAssistant, result[4].Role) + }) + + t.Run("flushes synthetics before next assistant with tool calls", func(t *testing.T) { + t.Parallel() + messages := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "hi"}, + { + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{{ID: "tc1"}}, + }, + // no result for tc1, model immediately issues another tool call + { + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{{ID: "tc2"}}, + }, + {Role: chat.MessageRoleTool, ToolCallID: "tc2", Content: "ok"}, + } + result := sanitizeToolCalls(messages) + + require.Len(t, result, 5) + // synthetic for tc1 inserted before the second assistant message + assert.Equal(t, "tc1", result[2].ToolCallID) + assert.True(t, result[2].IsError) + assert.Len(t, result[3].ToolCalls, 1) + assert.Equal(t, "tc2", result[3].ToolCalls[0].ID) + assert.Equal(t, "tc2", result[4].ToolCallID) + assert.False(t, result[4].IsError) + }) + + t.Run("empty messages returns empty", func(t *testing.T) { + t.Parallel() + result := sanitizeToolCalls(nil) + assert.Nil(t, result) + + result = sanitizeToolCalls([]chat.Message{}) + assert.Empty(t, result) + }) +} + +func TestGetMessages_SanitizesOrphanedToolCalls(t *testing.T) { + testAgent := &agent.Agent{} + + s := New() + s.AddMessage(NewAgentMessage("", &chat.Message{ + Role: chat.MessageRoleUser, + Content: "do something", + })) + s.AddMessage(NewAgentMessage("", &chat.Message{ + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{ + {ID: "orphan1", Function: tools.FunctionCall{Name: "shell"}}, + {ID: "orphan2", Function: tools.FunctionCall{Name: "read_file"}}, + }, + })) + // No tool result messages — simulating a crash mid-run + + messages := s.GetMessages(testAgent) + + // Verify every tool call ID has a matching tool result + callIDs := make(map[string]bool) + resultIDs := make(map[string]bool) + for _, msg := range messages { + for _, tc := range msg.ToolCalls { + callIDs[tc.ID] = true + } + if msg.Role == chat.MessageRoleTool { + resultIDs[msg.ToolCallID] = true + } + } + for id := range callIDs { + assert.True(t, resultIDs[id], "tool call %s should have a matching result", id) + } +} + func TestTransferTaskPromptExcludesParents(t *testing.T) { t.Parallel()