diff --git a/pkg/model/provider/anthropic/beta_client.go b/pkg/model/provider/anthropic/beta_client.go
index 249b7ab8a..67d93070e 100644
--- a/pkg/model/provider/anthropic/beta_client.go
+++ b/pkg/model/provider/anthropic/beta_client.go
@@ -44,14 +44,6 @@ func (c *Client) createBetaStream(
 		slog.Error("Failed to convert messages for Anthropic Beta request", "error", err)
 		return nil, err
 	}
-	if err := validateAnthropicSequencingBeta(converted); err != nil {
-		slog.Warn("Invalid message sequencing for Anthropic Beta API detected, attempting self-repair", "error", err)
-		converted = repairAnthropicSequencingBeta(converted)
-		if err2 := validateAnthropicSequencingBeta(converted); err2 != nil {
-			slog.Error("Failed to self-repair Anthropic Beta sequencing", "error", err2)
-			return nil, err
-		}
-	}
 	if len(converted) == 0 {
 		return nil, errors.New("no messages to send after conversion: all messages were filtered out")
 	}
@@ -148,35 +140,6 @@ func (c *Client) createBetaStream(
 	return ad, nil
 }
 
-// validateAnthropicSequencingBeta performs the same validation as standard API but for Beta payloads
-func validateAnthropicSequencingBeta(msgs []anthropic.BetaMessageParam) error {
-	return validateSequencing(msgs)
-}
-
-// repairAnthropicSequencingBeta inserts a synthetic user message with tool_result blocks
-// for any assistant tool_use blocks that don't have corresponding tool_result blocks
-// in the immediate next user message.
-func repairAnthropicSequencingBeta(msgs []anthropic.BetaMessageParam) []anthropic.BetaMessageParam {
-	return repairSequencing(msgs, func(toolUseIDs map[string]struct{}) anthropic.BetaMessageParam {
-		blocks := make([]anthropic.BetaContentBlockParamUnion, 0, len(toolUseIDs))
-		for id := range toolUseIDs {
-			slog.Debug("Creating synthetic tool_result", "tool_use_id", id)
-			blocks = append(blocks, anthropic.BetaContentBlockParamUnion{
-				OfToolResult: &anthropic.BetaToolResultBlockParam{
-					ToolUseID: id,
-					Content: []anthropic.BetaToolResultBlockParamContentUnion{
-						{OfText: &anthropic.BetaTextBlockParam{Text: "(tool execution failed)"}},
-					},
-				},
-			})
-		}
-		return anthropic.BetaMessageParam{
-			Role:    anthropic.BetaMessageParamRoleUser,
-			Content: blocks,
-		}
-	})
-}
-
 // countAnthropicTokensBeta calls Anthropic's Count Tokens API for the provided Beta API payload
 // and returns the number of input tokens.
 func countAnthropicTokensBeta(
diff --git a/pkg/model/provider/anthropic/beta_converter_test.go b/pkg/model/provider/anthropic/beta_converter_test.go
index 5a8474112..746849fcc 100644
--- a/pkg/model/provider/anthropic/beta_converter_test.go
+++ b/pkg/model/provider/anthropic/beta_converter_test.go
@@ -78,14 +78,6 @@ func TestConvertBetaMessages_MergesConsecutiveToolMessages(t *testing.T) {
 	require.True(t, ok)
 	content := contentArray(userMsg2Map)
 	require.Len(t, content, 2, "User message should have 2 tool_result blocks")
-
-	toolResultIDs := collectToolResultIDs(content)
-	assert.Contains(t, toolResultIDs, "tool_call_1")
-	assert.Contains(t, toolResultIDs, "tool_call_2")
-
-	// Most importantly: validate that the sequence is valid for Anthropic API
-	err = validateAnthropicSequencingBeta(betaMessages)
-	require.NoError(t, err, "Converted messages should pass Anthropic sequencing validation")
 }
 
 func TestConvertBetaMessages_SingleToolMessage(t *testing.T) {
@@ -123,68 +115,4 @@ func TestConvertBetaMessages_SingleToolMessage(t *testing.T) {
 	betaMessages, err := testClient().convertBetaMessages(t.Context(), messages)
 	require.NoError(t, err)
 	require.Len(t, betaMessages, 4)
-
-	// Validate sequence
-	err = validateAnthropicSequencingBeta(betaMessages)
-	require.NoError(t, err)
-}
-
-func TestConvertBetaMessages_NonConsecutiveToolMessages(t *testing.T) {
-	// When tool messages are separated by other messages (edge case)
-	// Each tool message group should be handled independently
-	messages := []chat.Message{
-		{
-			Role:    chat.MessageRoleUser,
-			Content: "First request",
-		},
-		{
-			Role:    chat.MessageRoleAssistant,
-			Content: "",
-			ToolCalls: []tools.ToolCall{
-				{
-					ID:   "tool_1",
-					Type: "function",
-					Function: tools.FunctionCall{
-						Name:      "test_tool",
-						Arguments: `{}`,
-					},
-				},
-			},
-		},
-		{
-			Role:       chat.MessageRoleTool,
-			Content:    "Tool result 1",
-			ToolCallID: "tool_1",
-		},
-		{
-			Role:    chat.MessageRoleAssistant,
-			Content: "Intermediate response",
-			ToolCalls: []tools.ToolCall{
-				{
-					ID:   "tool_2",
-					Type: "function",
-					Function: tools.FunctionCall{
-						Name:      "test_tool",
-						Arguments: `{}`,
-					},
-				},
-			},
-		},
-		{
-			Role:       chat.MessageRoleTool,
-			Content:    "Tool result 2",
-			ToolCallID: "tool_2",
-		},
-		{
-			Role:    chat.MessageRoleAssistant,
-			Content: "Final response",
-		},
-	}
-
-	betaMessages, err := testClient().convertBetaMessages(t.Context(), messages)
-	require.NoError(t, err)
-
-	// Validate the entire sequence
-	err = validateAnthropicSequencingBeta(betaMessages)
-	require.NoError(t, err, "Messages with non-consecutive tool calls should still validate")
 }
diff --git a/pkg/model/provider/anthropic/client.go b/pkg/model/provider/anthropic/client.go
index 4194a4c19..fc3153069 100644
--- a/pkg/model/provider/anthropic/client.go
+++ b/pkg/model/provider/anthropic/client.go
@@ -271,15 +271,6 @@ func (c *Client) CreateChatCompletionStream(
 		slog.Error("Failed to convert messages for Anthropic request", "error", err)
 		return nil, err
 	}
-	// Preflight validation to ensure tool_use/tool_result sequencing is valid
-	if err := validateAnthropicSequencing(converted); err != nil {
-		slog.Warn("Invalid message sequencing for Anthropic detected, attempting self-repair", "error", err)
-		converted = repairAnthropicSequencing(converted)
-		if err2 := validateAnthropicSequencing(converted); err2 != nil {
-			slog.Error("Failed to self-repair Anthropic sequencing", "error", err2)
-			return nil, err
-		}
-	}
 	if len(converted) == 0 {
 		return nil, errors.New("no messages to send after conversion: all messages were filtered out")
 	}
@@ -735,27 +726,6 @@ func (c *Client) FileManager() *FileManager {
 	return c.fileManager
 }
 
-// validateAnthropicSequencing verifies that for every assistant message that includes
-// one or more tool_use blocks, the immediately following message is a user message
-// that includes tool_result blocks for all those tool_use IDs (grouped into that single message).
-func validateAnthropicSequencing(msgs []anthropic.MessageParam) error {
-	return validateSequencing(msgs)
-}
-
-// repairAnthropicSequencing inserts a synthetic user message containing tool_result blocks
-// immediately after any assistant message that has tool_use blocks missing a corresponding
-// tool_result in the next user message. This is a best-effort local repair to keep the
-// conversation valid for Anthropic while preserving original messages, to keep the agent loop running.
-func repairAnthropicSequencing(msgs []anthropic.MessageParam) []anthropic.MessageParam {
-	return repairSequencing(msgs, func(toolUseIDs map[string]struct{}) anthropic.MessageParam {
-		blocks := make([]anthropic.ContentBlockParamUnion, 0, len(toolUseIDs))
-		for id := range toolUseIDs {
-			blocks = append(blocks, anthropic.NewToolResultBlock(id, "(tool execution failed)", false))
-		}
-		return anthropic.NewUserMessage(blocks...)
-	})
-}
-
 // marshalToMap is a helper that converts any value to a map[string]any via JSON marshaling.
 // This is used to inspect SDK union types without depending on their internal structure.
 // It's shared by both standard and Beta API validation/repair code.
@@ -780,125 +750,6 @@ func contentArray(m map[string]any) []any {
 	return nil
 }
 
-// validateSequencing generically validates that every assistant message with tool_use blocks
-// is immediately followed by a user message with corresponding tool_result blocks.
-// It works on both standard (MessageParam) and Beta (BetaMessageParam) types by
-// marshaling to map[string]any for inspection.
-func validateSequencing[T any](msgs []T) error {
-	for i := range msgs {
-		m, ok := marshalToMap(msgs[i])
-		if !ok || m["role"] != "assistant" {
-			continue
-		}
-
-		toolUseIDs := collectToolUseIDs(contentArray(m))
-		if len(toolUseIDs) == 0 {
-			continue
-		}
-
-		if i+1 >= len(msgs) {
-			slog.Warn("Anthropic sequencing invalid: assistant tool_use present but no next user tool_result message", "assistant_index", i)
-			return errors.New("assistant tool_use present but no subsequent user message with tool_result blocks")
-		}
-
-		next, ok := marshalToMap(msgs[i+1])
-		if !ok || next["role"] != "user" {
-			slog.Warn("Anthropic sequencing invalid: next message after assistant tool_use is not user", "assistant_index", i, "next_role", next["role"])
-			return errors.New("assistant tool_use must be followed by a user message containing corresponding tool_result blocks")
-		}
-
-		toolResultIDs := collectToolResultIDs(contentArray(next))
-		missing := differenceIDs(toolUseIDs, toolResultIDs)
-		if len(missing) > 0 {
-			slog.Warn("Anthropic sequencing invalid: missing tool_result for tool_use id in next user message", "assistant_index", i, "tool_use_id", missing[0], "missing_count", len(missing))
-			return fmt.Errorf("missing tool_result for tool_use id %s in the next user message", missing[0])
-		}
-	}
-	return nil
-}
-
-// repairSequencing generically inserts a synthetic user message after any assistant
-// tool_use message that is missing corresponding tool_result blocks. The makeSynthetic
-// callback builds the appropriate user message type for the remaining tool_use IDs.
-func repairSequencing[T any](msgs []T, makeSynthetic func(toolUseIDs map[string]struct{}) T) []T {
-	if len(msgs) == 0 {
-		return msgs
-	}
-	repaired := make([]T, 0, len(msgs)+2)
-	for i := range msgs {
-		repaired = append(repaired, msgs[i])
-
-		m, ok := marshalToMap(msgs[i])
-		if !ok || m["role"] != "assistant" {
-			continue
-		}
-
-		toolUseIDs := collectToolUseIDs(contentArray(m))
-		if len(toolUseIDs) == 0 {
-			continue
-		}
-
-		// Remove any IDs that already have results in the next user message
-		if i+1 < len(msgs) {
-			if next, ok := marshalToMap(msgs[i+1]); ok && next["role"] == "user" {
-				toolResultIDs := collectToolResultIDs(contentArray(next))
-				for id := range toolResultIDs {
-					delete(toolUseIDs, id)
-				}
-			}
-		}
-
-		if len(toolUseIDs) > 0 {
-			slog.Debug("Inserting synthetic user message for missing tool_results",
-				"assistant_index", i,
-				"missing_count", len(toolUseIDs))
-			repaired = append(repaired, makeSynthetic(toolUseIDs))
-		}
-	}
-	return repaired
-}
-
-func collectToolUseIDs(content []any) map[string]struct{} {
-	ids := make(map[string]struct{})
-	for _, c := range content {
-		if cb, ok := c.(map[string]any); ok {
-			if t, _ := cb["type"].(string); t == "tool_use" {
-				if id, _ := cb["id"].(string); id != "" {
-					ids[id] = struct{}{}
-				}
-			}
-		}
-	}
-	return ids
-}
-
-func collectToolResultIDs(content []any) map[string]struct{} {
-	ids := make(map[string]struct{})
-	for _, c := range content {
-		if cb, ok := c.(map[string]any); ok {
-			if t, _ := cb["type"].(string); t == "tool_result" {
-				if id, _ := cb["tool_use_id"].(string); id != "" {
-					ids[id] = struct{}{}
-				}
-			}
-		}
-	}
-	return ids
-}
-
-func differenceIDs(a, b map[string]struct{}) []string {
-	if len(a) == 0 {
-		return nil
-	}
-	var missing []string
-	for id := range a {
-		if _, ok := b[id]; !ok {
-			missing = append(missing, id)
-		}
-	}
-	return missing
-}
-
 // validThinkingTokens validates that the token budget is within the
 // acceptable range for Anthropic (>= 1024 and < maxTokens).
 // Returns (tokens, true) if valid, or (0, false) with a warning log if not.
diff --git a/pkg/model/provider/anthropic/client_test.go b/pkg/model/provider/anthropic/client_test.go
index 3e53a15a4..8a116baef 100644
--- a/pkg/model/provider/anthropic/client_test.go
+++ b/pkg/model/provider/anthropic/client_test.go
@@ -235,80 +235,6 @@ func TestSystemMessages_InterspersedExtractedAndExcluded(t *testing.T) {
 	}
 }
 
-func TestSequencingRepair_Standard(t *testing.T) {
-	msgs := []chat.Message{
-		{Role: chat.MessageRoleUser, Content: "start"},
-		{
-			Role: chat.MessageRoleAssistant,
-			ToolCalls: []tools.ToolCall{
-				{ID: "tool-1", Function: tools.FunctionCall{Name: "do_thing", Arguments: "{}"}},
-			},
-		},
-		// Intentionally missing the user/tool_result message here
-		{Role: chat.MessageRoleUser, Content: "continue"},
-	}
-
-	converted, err := testClient().convertMessages(t.Context(), msgs)
-	require.NoError(t, err)
-	err = validateAnthropicSequencing(converted)
-	require.Error(t, err)
-
-	repaired := repairAnthropicSequencing(converted)
-	err = validateAnthropicSequencing(repaired)
-	require.NoError(t, err)
-}
-
-func TestSequencingRepair_Beta(t *testing.T) {
-	msgs := []chat.Message{
-		{Role: chat.MessageRoleUser, Content: "start"},
-		{
-			Role: chat.MessageRoleAssistant,
-			ToolCalls: []tools.ToolCall{
-				{ID: "tool-1", Function: tools.FunctionCall{Name: "do_thing", Arguments: "{}"}},
-			},
-		},
-		// Intentionally missing the user/tool_result message here
-		{Role: chat.MessageRoleUser, Content: "continue"},
-	}
-
-	converted, err := testClient().convertBetaMessages(t.Context(), msgs)
-	require.NoError(t, err)
-	err = validateAnthropicSequencingBeta(converted)
-	require.Error(t, err)
-
-	repaired := repairAnthropicSequencingBeta(converted)
-	err = validateAnthropicSequencingBeta(repaired)
-	require.NoError(t, err)
-}
-
-func TestConvertMessages_DropOrphanToolResults_NoPrecedingToolUse(t *testing.T) {
-	msgs := []chat.Message{
-		{Role: chat.MessageRoleUser, Content: "start"},
-		// Orphan tool result (no assistant tool_use immediately before)
-		{Role: chat.MessageRoleTool, ToolCallID: "tool-1", Content: "result-1"},
-		{Role: chat.MessageRoleUser, Content: "continue"},
-	}
-
-	converted, err := testClient().convertMessages(t.Context(), msgs)
-	require.NoError(t, err)
-	// Expect only the two user text messages to appear
-	require.Len(t, converted, 2)
-
-	// Ensure none of the converted messages contain tool_result blocks
-	for i := range converted {
-		b, err := json.Marshal(converted[i])
-		require.NoError(t, err)
-		var m map[string]any
-		require.NoError(t, json.Unmarshal(b, &m))
-		content, _ := m["content"].([]any)
-		for _, c := range content {
-			if cb, ok := c.(map[string]any); ok {
-				assert.NotEqual(t, "tool_result", cb["type"], "unexpected orphan tool_result included in payload")
-			}
-		}
-	}
-}
-
 func TestConvertMessages_GroupToolResults_AfterAssistantToolUse(t *testing.T) {
 	msgs := []chat.Message{
 		{Role: chat.MessageRoleUser, Content: "start"},
@@ -329,9 +255,6 @@ func TestConvertMessages_GroupToolResults_AfterAssistantToolUse(t *testing.T) {
 	// Expect: user(start), assistant(tool_use), user(grouped tool_result), user(ok)
 	require.Len(t, converted, 4)
 
-	// Validate sequencing is acceptable to Anthropic
-	require.NoError(t, validateAnthropicSequencing(converted))
-
 	b, err := json.Marshal(converted[2])
 	require.NoError(t, err)
 	var m map[string]any
diff --git a/pkg/session/session.go b/pkg/session/session.go
index 9af60713e..475bc5dd6 100644
--- a/pkg/session/session.go
+++ b/pkg/session/session.go
@@ -914,6 +914,8 @@ func (s *Session) GetMessages(a *agent.Agent) []chat.Message {
 		messages = truncateOldToolContent(messages, maxOldToolCallTokens)
 	}
 
+	messages = sanitizeToolCalls(messages)
+
 	systemCount := 0
 	conversationCount := 0
 	for i := range messages {
@@ -1009,6 +1011,59 @@ func trimMessages(messages []chat.Message, maxItems int) []chat.Message {
 	return result
 }
 
+// sanitizeToolCalls ensures every tool call in assistant messages has a
+// corresponding tool-result message. It walks the message list tracking
+// pending tool calls; when a tool-result message arrives its ID is marked
+// fulfilled. When the next assistant or user message is encountered (or the
+// end of the list is reached), any still-pending tool calls receive synthetic
+// error results injected just before that boundary. This guarantees the
+// provider always sees a valid request/response pair for every tool call.
+func sanitizeToolCalls(messages []chat.Message) []chat.Message {
+	var (
+		out              []chat.Message
+		pendingToolCalls []tools.ToolCall
+		resultIDs        = make(map[string]bool)
+	)
+
+	flushPending := func() {
+		for _, tc := range pendingToolCalls {
+			if tc.ID != "" && !resultIDs[tc.ID] {
+				out = append(out, chat.Message{
+					Role:       chat.MessageRoleTool,
+					ToolCallID: tc.ID,
+					Content:    "No result provided",
+					IsError:    true,
+				})
+			}
+		}
+		pendingToolCalls = nil
+		resultIDs = make(map[string]bool)
+	}
+
+	for _, msg := range messages {
+		switch {
+		case msg.Role == chat.MessageRoleTool:
+			if msg.ToolCallID != "" {
+				resultIDs[msg.ToolCallID] = true
+			}
+
+		case msg.Role == chat.MessageRoleAssistant && len(msg.ToolCalls) > 0:
+			flushPending()
+			out = append(out, msg)
+			pendingToolCalls = msg.ToolCalls
+			continue
+
+		case msg.Role == chat.MessageRoleUser || msg.Role == chat.MessageRoleAssistant:
+			flushPending()
+		}
+
+		out = append(out, msg)
+	}
+
+	flushPending()
+	return out
+}
+
 // truncateOldToolContent replaces tool results with placeholders for older
 // messages that exceed the token budget. It processes messages from newest to
 // oldest, keeping recent tool content intact while truncating older content
diff --git a/pkg/session/session_test.go b/pkg/session/session_test.go
index e326c4a3e..55bf1076c 100644
--- a/pkg/session/session_test.go
+++ b/pkg/session/session_test.go
@@ -339,6 +339,203 @@ func TestEvalCriteriaUnmarshalJSON(t *testing.T) {
 	}
 }
 
+func TestSanitizeToolCalls(t *testing.T) {
+	t.Parallel()
+
+	t.Run("no-op when all tool calls have results", func(t *testing.T) {
+		t.Parallel()
+		messages := []chat.Message{
+			{Role: chat.MessageRoleUser, Content: "hi"},
+			{
+				Role: chat.MessageRoleAssistant,
+				ToolCalls: []tools.ToolCall{
+					{ID: "tc1", Function: tools.FunctionCall{Name: "shell"}},
+				},
+			},
+			{Role: chat.MessageRoleTool, ToolCallID: "tc1", Content: "ok"},
+			{Role: chat.MessageRoleAssistant, Content: "done"},
+		}
+		result := sanitizeToolCalls(messages)
+		assert.Equal(t, messages, result)
+	})
+
+	t.Run("injects synthetic result for missing tool result", func(t *testing.T) {
+		t.Parallel()
+		messages := []chat.Message{
+			{Role: chat.MessageRoleUser, Content: "hi"},
+			{
+				Role: chat.MessageRoleAssistant,
+				ToolCalls: []tools.ToolCall{
+					{ID: "tc1", Function: tools.FunctionCall{Name: "shell"}},
+				},
+			},
+		}
+		result := sanitizeToolCalls(messages)
+
+		require.Len(t, result, 3)
+		assert.Equal(t, chat.MessageRoleTool, result[2].Role)
+		assert.Equal(t, "tc1", result[2].ToolCallID)
+		assert.True(t, result[2].IsError)
+		assert.Equal(t, "No result provided", result[2].Content)
+	})
+
+	t.Run("handles multiple tool calls with partial results", func(t *testing.T) {
+		t.Parallel()
+		messages := []chat.Message{
+			{Role: chat.MessageRoleUser, Content: "hi"},
+			{
+				Role: chat.MessageRoleAssistant,
+				ToolCalls: []tools.ToolCall{
+					{ID: "tc1", Function: tools.FunctionCall{Name: "read_file"}},
+					{ID: "tc2", Function: tools.FunctionCall{Name: "write_file"}},
+					{ID: "tc3", Function: tools.FunctionCall{Name: "shell"}},
+				},
+			},
+			{Role: chat.MessageRoleTool, ToolCallID: "tc1", Content: "file contents"},
+			// tc2 and tc3 are missing
+		}
+		result := sanitizeToolCalls(messages)
+
+		// Original 3 messages + 2 synthetic results
+		require.Len(t, result, 5)
+
+		// assistant, then existing tc1 result, then synthetics for tc2/tc3 flushed at end
+		assert.Equal(t, chat.MessageRoleAssistant, result[1].Role)
+		assert.Equal(t, "tc1", result[2].ToolCallID)
+		assert.False(t, result[2].IsError)
+		assert.Equal(t, "tc2", result[3].ToolCallID)
+		assert.True(t, result[3].IsError)
+		assert.Equal(t, "tc3", result[4].ToolCallID)
+		assert.True(t, result[4].IsError)
+	})
+
+	t.Run("no tool calls at all is a no-op", func(t *testing.T) {
+		t.Parallel()
+		messages := []chat.Message{
+			{Role: chat.MessageRoleUser, Content: "hello"},
+			{Role: chat.MessageRoleAssistant, Content: "hi there"},
+		}
+		result := sanitizeToolCalls(messages)
+		assert.Equal(t, messages, result)
+	})
+
+	t.Run("multiple assistant messages with missing results", func(t *testing.T) {
+		t.Parallel()
+		messages := []chat.Message{
+			{Role: chat.MessageRoleUser, Content: "hi"},
+			{
+				Role:      chat.MessageRoleAssistant,
+				ToolCalls: []tools.ToolCall{{ID: "tc1"}},
+			},
+			{Role: chat.MessageRoleTool, ToolCallID: "tc1", Content: "ok"},
+			{
+				Role:      chat.MessageRoleAssistant,
+				ToolCalls: []tools.ToolCall{{ID: "tc2"}},
+			},
+			// tc2 result missing (crash)
+		}
+		result := sanitizeToolCalls(messages)
+
+		require.Len(t, result, 5)
+		assert.Equal(t, "tc2", result[4].ToolCallID)
+		assert.True(t, result[4].IsError)
+	})
+
+	t.Run("flushes synthetics before next user message", func(t *testing.T) {
+		t.Parallel()
+		messages := []chat.Message{
+			{Role: chat.MessageRoleUser, Content: "hi"},
+			{
+				Role:      chat.MessageRoleAssistant,
+				ToolCalls: []tools.ToolCall{{ID: "tc1", Function: tools.FunctionCall{Name: "shell"}}},
+			},
+			// no tool result — user responds before result arrives
+			{Role: chat.MessageRoleUser, Content: "never mind"},
+			{Role: chat.MessageRoleAssistant, Content: "ok"},
+		}
+		result := sanitizeToolCalls(messages)
+
+		// synthetic tc1 result should be injected before the second user message
+		require.Len(t, result, 5)
+		assert.Equal(t, chat.MessageRoleAssistant, result[1].Role)
+		assert.Equal(t, "tc1", result[2].ToolCallID)
+		assert.True(t, result[2].IsError)
+		assert.Equal(t, chat.MessageRoleUser, result[3].Role)
+		assert.Equal(t, chat.MessageRoleAssistant, result[4].Role)
+	})
+
+	t.Run("flushes synthetics before next assistant with tool calls", func(t *testing.T) {
+		t.Parallel()
+		messages := []chat.Message{
+			{Role: chat.MessageRoleUser, Content: "hi"},
+			{
+				Role:      chat.MessageRoleAssistant,
+				ToolCalls: []tools.ToolCall{{ID: "tc1"}},
+			},
+			// no result for tc1, model immediately issues another tool call
+			{
+				Role:      chat.MessageRoleAssistant,
+				ToolCalls: []tools.ToolCall{{ID: "tc2"}},
+			},
+			{Role: chat.MessageRoleTool, ToolCallID: "tc2", Content: "ok"},
+		}
+		result := sanitizeToolCalls(messages)
+
+		require.Len(t, result, 5)
+		// synthetic for tc1 inserted before the second assistant message
+		assert.Equal(t, "tc1", result[2].ToolCallID)
+		assert.True(t, result[2].IsError)
+		assert.Len(t, result[3].ToolCalls, 1)
+		assert.Equal(t, "tc2", result[3].ToolCalls[0].ID)
+		assert.Equal(t, "tc2", result[4].ToolCallID)
+		assert.False(t, result[4].IsError)
+	})
+
+	t.Run("empty messages returns empty", func(t *testing.T) {
+		t.Parallel()
+		result := sanitizeToolCalls(nil)
+		assert.Nil(t, result)
+
+		result = sanitizeToolCalls([]chat.Message{})
+		assert.Empty(t, result)
+	})
+}
+
+func TestGetMessages_SanitizesOrphanedToolCalls(t *testing.T) {
+	testAgent := &agent.Agent{}
+
+	s := New()
+	s.AddMessage(NewAgentMessage("", &chat.Message{
+		Role:    chat.MessageRoleUser,
+		Content: "do something",
+	}))
+	s.AddMessage(NewAgentMessage("", &chat.Message{
+		Role: chat.MessageRoleAssistant,
+		ToolCalls: []tools.ToolCall{
+			{ID: "orphan1", Function: tools.FunctionCall{Name: "shell"}},
+			{ID: "orphan2", Function: tools.FunctionCall{Name: "read_file"}},
+		},
+	}))
+	// No tool result messages — simulating a crash mid-run
+
+	messages := s.GetMessages(testAgent)
+
+	// Verify every tool call ID has a matching tool result
+	callIDs := make(map[string]bool)
+	resultIDs := make(map[string]bool)
+	for _, msg := range messages {
+		for _, tc := range msg.ToolCalls {
+			callIDs[tc.ID] = true
+		}
+		if msg.Role == chat.MessageRoleTool {
+			resultIDs[msg.ToolCallID] = true
+		}
+	}
+	for id := range callIDs {
+		assert.True(t, resultIDs[id], "tool call %s should have a matching result", id)
+	}
+}
+
 func TestTransferTaskPromptExcludesParents(t *testing.T) {
 	t.Parallel()