From de6c0e736492495b1864345c261e414c58016f06 Mon Sep 17 00:00:00 2001
From: Eden Reich <eden.reich@gmail.com>
Date: Sun, 5 Jul 2026 00:47:45 +0200
Subject: [PATCH 1/3] fix(claude-code): make subscription mode a true
 pass-through

- No infer system prompt, context blocks, or system reminders are injected
  in claude_code mode; claude uses its own system prompt and native tools
- Stop double-executing claude's already-executed tool calls: their results
  are captured from the stream (domain.ToolCallResultProvider) and replayed
  verbatim instead of re-running them through infer's registry/approval gate
- Move the TaskCreate/TaskUpdate -> TodoWrite mapping to the output layer
  only: headless stdout shows one synthesized TodoWrite with the full
  accumulated todo list; the stored/replayed conversation stays verbatim
- Add prompts.agent.system_prompt_claude_code (empty default) passed via
  --append-system-prompt, settable with
  INFER_PROMPTS_AGENT_SYSTEM_PROMPT_CLAUDE_CODE
- Add claude_code.extra_args (config, --claude-code-extra-args flag,
  INFER_CLAUDE_CODE_EXTRA_ARGS env) appended before the trailing -p
- Accept tool_result content as string or content-block array (fixes
  unmarshal errors in logs)
- Quote claude CLI args in the debug log and keep -p as the last argument

Fixes the todo-mirroring and denied-writes behavior seen in
inference-gateway/inference-gateway#412
---
 cmd/agent.go                                  | 190 ++++++++++++-
 cmd/agent_test.go                             | 155 +++++++++++
 cmd/config.go                                 |   1 +
 cmd/root.go                                   |  15 +-
 config/config.go                              |  11 +-
 config/prompts.go                             |  13 +-
 internal/agent/agent.go                       |  10 +-
 internal/agent/agent_test.go                  |  27 ++
 internal/agent/agent_utils.go                 |  22 +-
 internal/container/container.go               |   2 +-
 internal/domain/interfaces.go                 |  15 +
 internal/infra/adapters/claude_code_client.go | 188 ++++++-------
 .../infra/adapters/claude_code_client_test.go | 260 +++++++++++-------
 13 files changed, 681 insertions(+), 228 deletions(-)

diff --git a/cmd/agent.go b/cmd/agent.go
index 8c1257d4..75eb6d1d 100644
--- a/cmd/agent.go
+++ b/cmd/agent.go
@@ -8,6 +8,7 @@ import (
 	"os"
 	"regexp"
 	"slices"
+	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -109,6 +110,15 @@ type AgentSession struct {
 	totalCompletionTokens int
 	totalTokens           int
 	requestCount          int
+	claudeTasks           []claudeTask
+}
+
+// claudeTask mirrors one entry of Claude Code's native task list (TaskCreate/
+// TaskUpdate). Claude assigns sequential ids, so index+1 == task id.
+type claudeTask struct {
+	Content string
+	Status  string
+	Deleted bool
 }
 
 // inheritedSubagentMode returns the coding mode a subagent should start in, read
@@ -702,6 +712,9 @@ func (s *AgentSession) processSyncResponse(response *domain.ChatSyncResponse, re
 
 	if len(response.ToolCalls) > 0 {
 		assistantMsg.ToolCalls = &response.ToolCalls
+		if s.config.IsClaudeCodeMode() {
+			s.feedTaskAccumulator(response.ToolCalls)
+		}
 	}
 
 	s.addMessage(assistantMsg)
@@ -724,7 +737,12 @@ func (s *AgentSession) processSyncResponse(response *domain.ChatSyncResponse, re
 		return nil
 	}
 
-	toolResults := s.executeToolCalls(response.ToolCalls)
+	var toolResults []ConversationMessage
+	if s.config.IsClaudeCodeMode() {
+		toolResults = s.claudeToolResultMessages(response.ToolCalls, response.ToolResults)
+	} else {
+		toolResults = s.executeToolCalls(response.ToolCalls)
+	}
 	s.lastToolFailed = anyToolResultFailed(toolResults)
 
 	for _, result := range toolResults {
@@ -838,6 +856,163 @@ func (s *AgentSession) readApprovalResponses() {
 	}
 }
 
+// claudeToolResultMessages builds tool messages from the results claude
+// reported for tool calls it executed itself (Claude Code mode). A call with
+// no reported result gets a neutral placeholder so the tool_call is never
+// left unanswered in the replayed conversation.
+func (s *AgentSession) claudeToolResultMessages(
+	toolCalls []sdk.ChatCompletionMessageToolCall,
+	results map[string]domain.ToolCallResult,
+) []ConversationMessage {
+	messages := make([]ConversationMessage, 0, len(toolCalls))
+	for _, tc := range toolCalls {
+		result, ok := results[tc.ID]
+		content := result.Content
+		if !ok || content == "" {
+			content = "Executed by Claude Code (no result reported)."
+		}
+		execution := &domain.ToolExecutionResult{
+			ToolName: tc.Function.Name,
+			Success:  !result.IsError,
+		}
+		if result.IsError {
+			execution.Error = result.Content
+		}
+		messages = append(messages, ConversationMessage{
+			Role:          "tool",
+			Content:       content,
+			ToolCallID:    tc.ID,
+			ToolExecution: execution,
+			Timestamp:     time.Now(),
+		})
+	}
+	return messages
+}
+
+// feedTaskAccumulator tracks Claude Code's native task list from TaskCreate/
+// TaskUpdate tool calls so the headless output can mirror it as a TodoWrite
+// view. Claude assigns sequential task ids, so creation order == id.
+func (s *AgentSession) feedTaskAccumulator(toolCalls []sdk.ChatCompletionMessageToolCall) {
+	for _, tc := range toolCalls {
+		switch tc.Function.Name {
+		case "TaskCreate":
+			var input struct {
+				Subject string `json:"subject"`
+			}
+			if err := json.Unmarshal([]byte(tc.Function.Arguments), &input); err != nil || input.Subject == "" {
+				logger.Debug("skipping unparsable TaskCreate input", "error", err)
+				continue
+			}
+			s.claudeTasks = append(s.claudeTasks, claudeTask{Content: input.Subject, Status: "pending"})
+		case "TaskUpdate":
+			s.applyTaskUpdate(tc.Function.Arguments)
+		}
+	}
+}
+
+// applyTaskUpdate applies a single TaskUpdate call to the accumulated task
+// list. The schema is parsed defensively (taskId may arrive as "1", "#1" or a
+// number; status/subject are optional) so drift in claude's tool schema
+// degrades to a no-op rather than corrupting the mirror.
+func (s *AgentSession) applyTaskUpdate(arguments string) {
+	var input struct {
+		TaskID  any    `json:"taskId"`
+		Status  string `json:"status"`
+		Subject string `json:"subject"`
+	}
+	if err := json.Unmarshal([]byte(arguments), &input); err != nil {
+		logger.Debug("skipping unparsable TaskUpdate input", "error", err)
+		return
+	}
+
+	var id int
+	switch v := input.TaskID.(type) {
+	case string:
+		id, _ = strconv.Atoi(strings.TrimPrefix(v, "#"))
+	case float64:
+		id = int(v)
+	}
+	if id < 1 || id > len(s.claudeTasks) {
+		logger.Debug("TaskUpdate references unknown task", "task_id", input.TaskID)
+		return
+	}
+
+	task := &s.claudeTasks[id-1]
+	if input.Subject != "" {
+		task.Content = input.Subject
+	}
+	switch input.Status {
+	case "pending", "in_progress", "completed":
+		task.Status = input.Status
+	case "deleted":
+		task.Deleted = true
+	}
+}
+
+// isClaudeTaskTool reports whether the tool name is one of Claude Code's
+// native task tools mirrored into the TodoWrite output view.
+func isClaudeTaskTool(name string) bool {
+	return name == "TaskCreate" || name == "TaskUpdate"
+}
+
+// renderTodoWriteView returns a copy of the message where Claude Code's
+// TaskCreate/TaskUpdate tool calls are replaced by a single synthesized
+// TodoWrite call carrying the full accumulated todo list, so downstream
+// consumers (infer-action) can mirror progress. Non-task tool calls are
+// preserved; messages without task calls are returned unchanged.
+func (s *AgentSession) renderTodoWriteView(msg ConversationMessage) ConversationMessage {
+	if msg.ToolCalls == nil {
+		return msg
+	}
+
+	hasTaskCall := false
+	for _, tc := range *msg.ToolCalls {
+		if isClaudeTaskTool(tc.Function.Name) {
+			hasTaskCall = true
+			break
+		}
+	}
+	if !hasTaskCall {
+		return msg
+	}
+
+	todos := make([]map[string]any, 0, len(s.claudeTasks))
+	for _, task := range s.claudeTasks {
+		if task.Deleted {
+			continue
+		}
+		todos = append(todos, map[string]any{"content": task.Content, "status": task.Status})
+	}
+	arguments, err := json.Marshal(map[string]any{"todos": todos})
+	if err != nil {
+		return msg
+	}
+
+	rendered := make([]sdk.ChatCompletionMessageToolCall, 0, len(*msg.ToolCalls))
+	todoWriteAdded := false
+	for _, tc := range *msg.ToolCalls {
+		if !isClaudeTaskTool(tc.Function.Name) {
+			rendered = append(rendered, tc)
+			continue
+		}
+		if todoWriteAdded {
+			continue
+		}
+		todoWriteAdded = true
+		rendered = append(rendered, sdk.ChatCompletionMessageToolCall{
+			ID:   tc.ID,
+			Type: tc.Type,
+			Function: sdk.ChatCompletionMessageToolCallFunction{
+				Name:      "TodoWrite",
+				Arguments: string(arguments),
+			},
+		})
+	}
+
+	msg.ToolCalls = &rendered
+	return msg
+}
+
 // toolResultMessage builds the conversation message for a finished tool call,
 // formatting either the successful result or the execution error.
 func (s *AgentSession) toolResultMessage(tc sdk.ChatCompletionMessageToolCall, result *domain.ToolExecutionResult, err error) ConversationMessage {
@@ -1143,6 +1318,10 @@ func (s *AgentSession) dispatchHooks(hook domain.HookPoint, turn int) {
 // pending tool_calls) - that guard is reminder-specific and must not block
 // command hooks, which is why it lives here rather than in dispatchHooks.
 func (s *AgentSession) injectDueReminders(hook domain.HookPoint, turn int) {
+	if s.config != nil && s.config.IsClaudeCodeMode() {
+		return
+	}
+
 	provider := s.reminderProvider
 	if provider == nil && s.config != nil {
 		provider = s.config.Reminders
@@ -1248,10 +1427,13 @@ func (s *AgentSession) outputMessage(msg ConversationMessage) {
 	}
 
 	logMsg := msg
+	if s.config.IsClaudeCodeMode() {
+		logMsg = s.renderTodoWriteView(logMsg)
+	}
 
-	if !s.config.Agent.VerboseTools && msg.ToolCalls != nil && len(*msg.ToolCalls) > 0 {
-		summaries := make([]string, len(*msg.ToolCalls))
-		for i, toolCall := range *msg.ToolCalls {
+	if !s.config.Agent.VerboseTools && logMsg.ToolCalls != nil && len(*logMsg.ToolCalls) > 0 {
+		summaries := make([]string, len(*logMsg.ToolCalls))
+		for i, toolCall := range *logMsg.ToolCalls {
 			summaries[i] = formatToolCallSummary(toolCall.Function.Name, toolCall.Function.Arguments)
 		}
 		logMsg.Tools = summaries
diff --git a/cmd/agent_test.go b/cmd/agent_test.go
index 191ed494..c94df359 100644
--- a/cmd/agent_test.go
+++ b/cmd/agent_test.go
@@ -1562,3 +1562,158 @@ func TestAgentSession_DispatchHooks_SkipsOffListCommandHook(t *testing.T) {
 		t.Fatal("off-list command hook must not run headless (secure-by-default)")
 	}
 }
+
+func TestInjectDueReminders_SkippedInClaudeCodeMode(t *testing.T) {
+	s := &AgentSession{
+		config: &config.Config{
+			ClaudeCode: config.ClaudeCodeConfig{Enabled: true},
+			Reminders: config.RemindersConfig{
+				Enabled: true,
+				Reminders: []config.ReminderConfig{
+					{Name: "todo", Text: "remember to push", Hook: domain.HookPreStream, Trigger: config.ReminderTriggerInterval, Interval: 1},
+				},
+			},
+		},
+		conversation:   []ConversationMessage{},
+		firedReminders: map[string]bool{},
+		maxTurns:       10,
+	}
+
+	s.injectDueReminders(domain.HookPreStream, 1)
+
+	if len(s.conversation) != 0 {
+		t.Fatalf("expected no reminder injected in claude_code mode, got %d messages", len(s.conversation))
+	}
+}
+
+func TestFeedTaskAccumulator(t *testing.T) {
+	s := &AgentSession{config: &config.Config{}}
+
+	s.feedTaskAccumulator([]sdk.ChatCompletionMessageToolCall{
+		{ID: "t1", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskCreate", Arguments: `{"subject":"Task one"}`}},
+		{ID: "t2", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskCreate", Arguments: `{"subject":"Task two","description":"details"}`}},
+		{ID: "b1", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "Bash", Arguments: `{"command":"ls"}`}},
+	})
+
+	if len(s.claudeTasks) != 2 {
+		t.Fatalf("expected 2 tasks, got %d", len(s.claudeTasks))
+	}
+	if s.claudeTasks[0].Content != "Task one" || s.claudeTasks[0].Status != "pending" {
+		t.Errorf("unexpected first task: %+v", s.claudeTasks[0])
+	}
+
+	for _, args := range []string{
+		`{"taskId":"1","status":"in_progress"}`,
+		`{"taskId":"#2","status":"completed"}`,
+	} {
+		s.feedTaskAccumulator([]sdk.ChatCompletionMessageToolCall{
+			{Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskUpdate", Arguments: args}},
+		})
+	}
+	if s.claudeTasks[0].Status != "in_progress" {
+		t.Errorf("task 1 status = %q, want in_progress", s.claudeTasks[0].Status)
+	}
+	if s.claudeTasks[1].Status != "completed" {
+		t.Errorf("task 2 status = %q, want completed", s.claudeTasks[1].Status)
+	}
+
+	s.feedTaskAccumulator([]sdk.ChatCompletionMessageToolCall{
+		{Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskUpdate", Arguments: `{"taskId":2,"status":"deleted"}`}},
+		{Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskUpdate", Arguments: `{"taskId":"99","status":"completed"}`}},
+		{Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskUpdate", Arguments: `not json`}},
+	})
+	if !s.claudeTasks[1].Deleted {
+		t.Error("task 2 should be marked deleted (numeric taskId)")
+	}
+}
+
+func TestRenderTodoWriteView(t *testing.T) {
+	s := &AgentSession{
+		config: &config.Config{ClaudeCode: config.ClaudeCodeConfig{Enabled: true}},
+		claudeTasks: []claudeTask{
+			{Content: "Task one", Status: "completed"},
+			{Content: "Task two", Status: "in_progress"},
+			{Content: "gone", Status: "pending", Deleted: true},
+		},
+	}
+
+	toolCalls := []sdk.ChatCompletionMessageToolCall{
+		{ID: "t1", Type: "function", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskCreate", Arguments: `{"subject":"Task one"}`}},
+		{ID: "u1", Type: "function", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskUpdate", Arguments: `{"taskId":"1","status":"completed"}`}},
+		{ID: "b1", Type: "function", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "Bash", Arguments: `{"command":"ls"}`}},
+	}
+	msg := ConversationMessage{Role: "assistant", ToolCalls: &toolCalls}
+
+	rendered := s.renderTodoWriteView(msg)
+
+	if len(*rendered.ToolCalls) != 2 {
+		t.Fatalf("expected 2 rendered tool calls (TodoWrite + Bash), got %d", len(*rendered.ToolCalls))
+	}
+	todoWrite := (*rendered.ToolCalls)[0]
+	if todoWrite.Function.Name != "TodoWrite" || todoWrite.ID != "t1" {
+		t.Errorf("unexpected first rendered call: %+v", todoWrite)
+	}
+	var args struct {
+		Todos []struct {
+			Content string `json:"content"`
+			Status  string `json:"status"`
+		} `json:"todos"`
+	}
+	if err := json.Unmarshal([]byte(todoWrite.Function.Arguments), &args); err != nil {
+		t.Fatalf("TodoWrite arguments invalid: %v", err)
+	}
+	if len(args.Todos) != 2 {
+		t.Fatalf("expected 2 todos (deleted excluded), got %d", len(args.Todos))
+	}
+	if args.Todos[1].Content != "Task two" || args.Todos[1].Status != "in_progress" {
+		t.Errorf("unexpected second todo: %+v", args.Todos[1])
+	}
+	if (*rendered.ToolCalls)[1].Function.Name != "Bash" {
+		t.Errorf("non-task tool call not preserved: %+v", (*rendered.ToolCalls)[1])
+	}
+
+	// original message must be untouched (stored conversation keeps TaskCreate)
+	if (*msg.ToolCalls)[0].Function.Name != "TaskCreate" {
+		t.Error("original tool calls were mutated")
+	}
+
+	// messages without task calls pass through unchanged
+	plain := ConversationMessage{Role: "assistant", ToolCalls: &[]sdk.ChatCompletionMessageToolCall{
+		{ID: "b2", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "Bash", Arguments: `{}`}},
+	}}
+	if got := s.renderTodoWriteView(plain); (*got.ToolCalls)[0].Function.Name != "Bash" {
+		t.Error("message without task calls should be unchanged")
+	}
+}
+
+func TestClaudeToolResultMessages(t *testing.T) {
+	s := &AgentSession{config: &config.Config{ClaudeCode: config.ClaudeCodeConfig{Enabled: true}}}
+
+	toolCalls := []sdk.ChatCompletionMessageToolCall{
+		{ID: "ok", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "Bash"}},
+		{ID: "err", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "Edit"}},
+		{ID: "missing", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "Read"}},
+	}
+	results := map[string]domain.ToolCallResult{
+		"ok":  {Content: "done"},
+		"err": {Content: "boom", IsError: true},
+	}
+
+	messages := s.claudeToolResultMessages(toolCalls, results)
+
+	if len(messages) != 3 {
+		t.Fatalf("expected 3 tool messages, got %d", len(messages))
+	}
+	if messages[0].Content != "done" || !messages[0].ToolExecution.Success || messages[0].ToolCallID != "ok" {
+		t.Errorf("unexpected ok message: %+v", messages[0])
+	}
+	if messages[1].ToolExecution.Success || messages[1].ToolExecution.Error != "boom" {
+		t.Errorf("unexpected err message: %+v", messages[1])
+	}
+	if messages[2].Content == "" || messages[2].ToolCallID != "missing" {
+		t.Errorf("missing-result call must get a placeholder: %+v", messages[2])
+	}
+	if !anyToolResultFailed(messages) {
+		t.Error("anyToolResultFailed should report the failed claude tool result")
+	}
+}
diff --git a/cmd/config.go b/cmd/config.go
index 9d190e15..1b7d724b 100644
--- a/cmd/config.go
+++ b/cmd/config.go
@@ -506,6 +506,7 @@ func applyPromptsEnvOverrides(cfg *config.Config) {
 		"INFER_PROMPTS_AGENT_SYSTEM_PROMPT_PLAN":                    &cfg.Prompts.Agent.SystemPromptPlan,
 		"INFER_PROMPTS_AGENT_SYSTEM_PROMPT_REMOTE":                  &cfg.Prompts.Agent.SystemPromptRemote,
 		"INFER_PROMPTS_AGENT_SYSTEM_PROMPT_HEARTBEAT":               &cfg.Prompts.Agent.SystemPromptHeartbeat,
+		"INFER_PROMPTS_AGENT_SYSTEM_PROMPT_CLAUDE_CODE":             &cfg.Prompts.Agent.SystemPromptClaudeCode,
 		"INFER_PROMPTS_AGENT_CUSTOM_INSTRUCTIONS":                   &cfg.Prompts.Agent.CustomInstructions,
 		"INFER_PROMPTS_GIT_COMMIT_MESSAGE_SYSTEM_PROMPT":            &cfg.Prompts.Git.CommitMessage.SystemPrompt,
 		"INFER_PROMPTS_CONVERSATION_TITLE_GENERATION_SYSTEM_PROMPT": &cfg.Prompts.Conversation.TitleGeneration.SystemPrompt,
diff --git a/cmd/root.go b/cmd/root.go
index d9bd9a75..42fb02a6 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -49,6 +49,9 @@ func init() {
 	rootCmd.PersistentFlags().String("tools-bash-allow-append", "",
 		"comma/newline-separated commands added to the bash allow-list in every mode "+
 			"(standard, plan, auto); INFER_TOOLS_BASH_ALLOW_APPEND takes precedence")
+	rootCmd.PersistentFlags().String("claude-code-extra-args", "",
+		"comma/newline-separated extra arguments appended to the claude CLI invocation "+
+			"in Claude Code mode; INFER_CLAUDE_CODE_EXTRA_ARGS takes precedence")
 	rootCmd.PersistentFlags().String("reminders-file", "",
 		"path to a reminders YAML file, overriding project .infer/ and ~/.infer reminders.yaml "+
 			"(INFER_REMINDERS_CONFIG inline YAML takes precedence)")
@@ -72,10 +75,10 @@ func parseDelimitedList(value string) []string {
 	return out
 }
 
-// resolveBashAllowOverride returns the override value for a bash allow-list,
+// resolveFlagEnvOverride returns the override value for a flag/env pair,
 // preferring the env var over the matching persistent flag (per the documented
 // flags < env layering). Empty means neither was provided.
-func resolveBashAllowOverride(flagName, envName string) string {
+func resolveFlagEnvOverride(flagName, envName string) string {
 	if env := os.Getenv(envName); env != "" {
 		return env
 	}
@@ -103,7 +106,7 @@ func applyBashAllowAppends(v *viper.Viper) {
 	}
 
 	for _, a := range appends {
-		if override := resolveBashAllowOverride(a.appendFlag, a.appendEnv); override != "" {
+		if override := resolveFlagEnvOverride(a.appendFlag, a.appendEnv); override != "" {
 			v.Set(a.key, append(v.GetStringSlice(a.key), parseDelimitedList(override)...))
 		}
 	}
@@ -184,6 +187,12 @@ func initConfig() {
 
 	applyBashAllowAppends(v)
 
+	// claude_code.extra_args is a slice, which viper can't parse from a single
+	// env var generically - same special-casing as INFER_A2A_AGENTS above.
+	if extra := resolveFlagEnvOverride("claude-code-extra-args", "INFER_CLAUDE_CODE_EXTRA_ARGS"); extra != "" {
+		v.Set("claude_code.extra_args", parseDelimitedList(extra))
+	}
+
 	cfg, err := loadConfigFromViper()
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "Error loading config: %v\n", err)
diff --git a/config/config.go b/config/config.go
index 922ee5a2..42957129 100644
--- a/config/config.go
+++ b/config/config.go
@@ -75,11 +75,12 @@ type GatewayConfig struct {
 
 // ClaudeCodeConfig contains Claude Code CLI integration settings
 type ClaudeCodeConfig struct {
-	Enabled         bool   `yaml:"enabled" mapstructure:"enabled"`
-	CLIPath         string `yaml:"cli_path" mapstructure:"cli_path"`
-	Timeout         int    `yaml:"timeout" mapstructure:"timeout"`
-	MaxOutputTokens int    `yaml:"max_output_tokens" mapstructure:"max_output_tokens"`
-	ThinkingBudget  int    `yaml:"thinking_budget" mapstructure:"thinking_budget"`
+	Enabled         bool     `yaml:"enabled" mapstructure:"enabled"`
+	CLIPath         string   `yaml:"cli_path" mapstructure:"cli_path"`
+	Timeout         int      `yaml:"timeout" mapstructure:"timeout"`
+	MaxOutputTokens int      `yaml:"max_output_tokens" mapstructure:"max_output_tokens"`
+	ThinkingBudget  int      `yaml:"thinking_budget" mapstructure:"thinking_budget"`
+	ExtraArgs       []string `yaml:"extra_args,omitempty" mapstructure:"extra_args"`
 }
 
 // SpeechToTextConfig contains speech-to-text (Whisper) integration settings.
diff --git a/config/prompts.go b/config/prompts.go
index 19fe6c6a..40316d26 100644
--- a/config/prompts.go
+++ b/config/prompts.go
@@ -121,12 +121,13 @@ type PromptsConfig struct {
 }
 
 type PromptsAgentConfig struct {
-	SystemPrompt          string `yaml:"system_prompt" mapstructure:"system_prompt"`
-	SystemPromptPlan      string `yaml:"system_prompt_plan" mapstructure:"system_prompt_plan"`
-	SystemPromptAuto      string `yaml:"system_prompt_auto" mapstructure:"system_prompt_auto"`
-	SystemPromptRemote    string `yaml:"system_prompt_remote" mapstructure:"system_prompt_remote"`
-	SystemPromptHeartbeat string `yaml:"system_prompt_heartbeat" mapstructure:"system_prompt_heartbeat"`
-	CustomInstructions    string `yaml:"custom_instructions" mapstructure:"custom_instructions"`
+	SystemPrompt           string `yaml:"system_prompt" mapstructure:"system_prompt"`
+	SystemPromptPlan       string `yaml:"system_prompt_plan" mapstructure:"system_prompt_plan"`
+	SystemPromptAuto       string `yaml:"system_prompt_auto" mapstructure:"system_prompt_auto"`
+	SystemPromptRemote     string `yaml:"system_prompt_remote" mapstructure:"system_prompt_remote"`
+	SystemPromptHeartbeat  string `yaml:"system_prompt_heartbeat" mapstructure:"system_prompt_heartbeat"`
+	SystemPromptClaudeCode string `yaml:"system_prompt_claude_code" mapstructure:"system_prompt_claude_code"`
+	CustomInstructions     string `yaml:"custom_instructions" mapstructure:"custom_instructions"`
 }
 
 type PromptsGitConfig struct {
diff --git a/internal/agent/agent.go b/internal/agent/agent.go
index 5ce8c3be..ebb74dd0 100644
--- a/internal/agent/agent.go
+++ b/internal/agent/agent.go
@@ -440,14 +440,20 @@ func (s *AgentServiceImpl) Run(ctx context.Context, req *domain.AgentRequest) (*
 
 	content, reasoningContent, toolCalls := extractFirstChoice(response)
 
-	return &domain.ChatSyncResponse{
+	syncResponse := &domain.ChatSyncResponse{
 		RequestID:        req.RequestID,
 		Content:          content,
 		ReasoningContent: reasoningContent,
 		ToolCalls:        toolCalls,
 		Usage:            response.Usage,
 		Duration:         duration,
-	}, nil
+	}
+
+	if provider, ok := s.client.(domain.ToolCallResultProvider); ok {
+		syncResponse.ToolResults = provider.TakeToolCallResults()
+	}
+
+	return syncResponse, nil
 }
 
 // extractFirstChoice pulls content, reasoning, and tool calls from the first
diff --git a/internal/agent/agent_test.go b/internal/agent/agent_test.go
index 4e9c592a..e9c46048 100644
--- a/internal/agent/agent_test.go
+++ b/internal/agent/agent_test.go
@@ -1282,6 +1282,33 @@ func TestAgentServiceImpl_AddSystemPrompt(t *testing.T) {
 	assert.Equal(t, sdk.User, result[1].Role)
 }
 
+func TestAgentServiceImpl_AddSystemPrompt_ClaudeCodeModePassthrough(t *testing.T) {
+	cfg := &config.Config{
+		ClaudeCode: config.ClaudeCodeConfig{Enabled: true},
+		Agent: config.AgentConfig{
+			SystemPromptWithDefaults: true,
+		},
+		Prompts: config.PromptsConfig{
+			Agent: config.PromptsAgentConfig{
+				SystemPrompt: "You are a helpful assistant.",
+			},
+		},
+	}
+
+	agentService := &AgentServiceImpl{
+		config: cfg,
+	}
+
+	inputMessages := []sdk.Message{
+		{Role: sdk.User, Content: sdk.NewMessageContent("Hello")},
+	}
+
+	result := agentService.addSystemPrompt(inputMessages)
+
+	assert.Len(t, result, 1)
+	assert.Equal(t, sdk.User, result[0].Role)
+}
+
 func TestAgentServiceImpl_BuildSystemPrompt(t *testing.T) {
 	cfg := &config.Config{
 		Agent: config.AgentConfig{
diff --git a/internal/agent/agent_utils.go b/internal/agent/agent_utils.go
index 4ee87add..c51b714e 100644
--- a/internal/agent/agent_utils.go
+++ b/internal/agent/agent_utils.go
@@ -162,13 +162,29 @@ func (s *AgentServiceImpl) buildSystemPromptText(messages []sdk.Message) string
 
 // BuildSystemPrompt returns the system prompt a fresh session (turn 0) would
 // send to the LLM. Exposed for the `infer debug agent system_prompt` command.
+// In Claude Code mode no prompt is sent at all - claude uses its own; only the
+// optional append (prompts.agent.system_prompt_claude_code) is reported.
 func (s *AgentServiceImpl) BuildSystemPrompt() string {
+	if s.config != nil && s.config.IsClaudeCodeMode() {
+		if appendPrompt := s.config.Prompts.Agent.SystemPromptClaudeCode; appendPrompt != "" {
+			return fmt.Sprintf("(claude_code mode: pass-through - appended to Claude Code's own system prompt via --append-system-prompt)\n\n%s", appendPrompt)
+		}
+		return "(claude_code mode: pass-through - no system prompt is sent; Claude Code uses its own)"
+	}
 	return s.buildSystemPromptText(nil)
 }
 
 // addSystemPrompt prepends the assembled system prompt (with dynamic sandbox
-// info) to messages.
+// info) to messages. In Claude Code mode the conversation is passed through
+// untouched: claude uses its own system prompt (an optional append lives in
+// prompts.agent.system_prompt_claude_code, applied via --append-system-prompt
+// by the adapter). BuildSystemPrompt still renders the gateway-mode prompt for
+// the debug command.
 func (s *AgentServiceImpl) addSystemPrompt(messages []sdk.Message) []sdk.Message {
+	if s.config != nil && s.config.IsClaudeCodeMode() {
+		return messages
+	}
+
 	prompt := s.buildSystemPromptText(messages)
 	if prompt == "" {
 		return messages
@@ -833,6 +849,10 @@ func conversationAwaitsToolResults(conv []sdk.Message) bool {
 // guards the fired-set because the streaming goroutine (pre_session/pre_stream)
 // and the event-loop goroutine (the other points) can both reach here.
 func (s *AgentServiceImpl) injectDueReminders(agentCtx *domain.AgentContext, hook domain.HookPoint) {
+	if s.config != nil && s.config.IsClaudeCodeMode() {
+		return
+	}
+
 	provider := s.reminderProvider
 	if provider == nil && s.config != nil {
 		provider = s.config.Reminders
diff --git a/internal/container/container.go b/internal/container/container.go
index 8600dc13..5f9b7933 100644
--- a/internal/container/container.go
+++ b/internal/container/container.go
@@ -772,7 +772,7 @@ func (c *ServiceContainer) createAgentSDKClient() domain.SDKClient {
 
 	if c.config.IsClaudeCodeMode() {
 		logger.Info("using Claude Code CLI mode (subscription-based)")
-		return adapters.NewClaudeCodeClient(&c.config.ClaudeCode, c.stateManager)
+		return adapters.NewClaudeCodeClient(&c.config.ClaudeCode, c.stateManager, c.config.Prompts.Agent.SystemPromptClaudeCode)
 	}
 
 	logger.Debug("using gateway mode (API-based)")
diff --git a/internal/domain/interfaces.go b/internal/domain/interfaces.go
index daaed8b8..10827737 100644
--- a/internal/domain/interfaces.go
+++ b/internal/domain/interfaces.go
@@ -266,10 +266,25 @@ type ChatSyncResponse struct {
 	Content          string                              `json:"content"`
 	ReasoningContent string                              `json:"reasoning_content,omitempty"`
 	ToolCalls        []sdk.ChatCompletionMessageToolCall `json:"tool_calls,omitempty"`
+	ToolResults      map[string]ToolCallResult           `json:"tool_results,omitempty"`
 	Usage            *sdk.CompletionUsage                `json:"usage,omitempty"`
 	Duration         time.Duration                       `json:"duration"`
 }
 
+// ToolCallResult is the outcome of a tool call executed by the backend itself
+// (e.g. inside the Claude Code CLI) rather than by the local tool registry.
+type ToolCallResult struct {
+	Content string `json:"content"`
+	IsError bool   `json:"is_error"`
+}
+
+// ToolCallResultProvider is implemented by SDK clients that execute tools
+// themselves and can report per-call results after a GenerateContent call.
+// Take semantics: the returned map is drained from the client.
+type ToolCallResultProvider interface {
+	TakeToolCallResults() map[string]ToolCallResult
+}
+
 // ChatService handles chat completion operations
 type ChatService interface {
 	CancelRequest(requestID string) error
diff --git a/internal/infra/adapters/claude_code_client.go b/internal/infra/adapters/claude_code_client.go
index 99087479..e4380e75 100644
--- a/internal/infra/adapters/claude_code_client.go
+++ b/internal/infra/adapters/claude_code_client.go
@@ -7,6 +7,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"maps"
 	"os"
 	"os/exec"
 	"strings"
@@ -23,25 +24,39 @@ import (
 // ClaudeCodeClient is a wrapper around the official Claude Code CLI
 // It implements the SDKClient interface by spawning the claude process
 type ClaudeCodeClient struct {
-	config         *config.ClaudeCodeConfig
-	stateManager   domain.StateManager
-	tools          *[]sdk.ChatCompletionTool
-	options        *sdk.CreateChatCompletionRequest
-	middlewareOpts *sdk.MiddlewareOptions
-	wg             *sync.WaitGroup
-	taskCreateIDs  map[string]string
+	config             *config.ClaudeCodeConfig
+	stateManager       domain.StateManager
+	tools              *[]sdk.ChatCompletionTool
+	options            *sdk.CreateChatCompletionRequest
+	middlewareOpts     *sdk.MiddlewareOptions
+	wg                 *sync.WaitGroup
+	appendSystemPrompt string
+	toolResults        map[string]domain.ToolCallResult
 }
 
 // NewClaudeCodeClient creates a new Claude Code CLI client
-func NewClaudeCodeClient(cfg *config.ClaudeCodeConfig, stateManager domain.StateManager) domain.SDKClient {
+func NewClaudeCodeClient(cfg *config.ClaudeCodeConfig, stateManager domain.StateManager, appendSystemPrompt string) domain.SDKClient {
 	return &ClaudeCodeClient{
-		config:        cfg,
-		stateManager:  stateManager,
-		wg:            &sync.WaitGroup{},
-		taskCreateIDs: make(map[string]string),
+		config:             cfg,
+		stateManager:       stateManager,
+		wg:                 &sync.WaitGroup{},
+		appendSystemPrompt: appendSystemPrompt,
+		toolResults:        make(map[string]domain.ToolCallResult),
 	}
 }
 
+// TakeToolCallResults returns and clears the tool results claude reported
+// during the last GenerateContent call. Implements domain.ToolCallResultProvider.
+func (c *ClaudeCodeClient) TakeToolCallResults() map[string]domain.ToolCallResult {
+	if len(c.toolResults) == 0 {
+		return nil
+	}
+	out := make(map[string]domain.ToolCallResult, len(c.toolResults))
+	maps.Copy(out, c.toolResults)
+	clear(c.toolResults)
+	return out
+}
+
 // WithOptions sets the chat completion request options
 func (c *ClaudeCodeClient) WithOptions(opts *sdk.CreateChatCompletionRequest) domain.SDKClient {
 	clone := *c
@@ -70,6 +85,8 @@ func (c *ClaudeCodeClient) GenerateContent(
 	model string,
 	messages []sdk.Message,
 ) (*sdk.CreateChatCompletionResponse, error) {
+	clear(c.toolResults)
+
 	eventChan, err := c.GenerateContentStream(ctx, provider, model, messages)
 	if err != nil {
 		return nil, err
@@ -94,6 +111,12 @@ func (c *ClaudeCodeClient) GenerateContentStream(
 ) (<-chan sdk.SSEvent, error) {
 	args := c.buildArgs(model)
 
+	logger.Debug("executing claude code cli",
+		"path", c.config.CLIPath,
+		"args", fmt.Sprintf("%q", args),
+		"messages", len(messages),
+	)
+
 	cmd := exec.CommandContext(ctx, c.config.CLIPath, args...)
 	cmd.Env = c.buildEnv()
 
@@ -122,6 +145,8 @@ func (c *ClaudeCodeClient) GenerateContentStream(
 		return nil, fmt.Errorf("failed to marshal messages: %w", err)
 	}
 
+	logger.Debug("writing conversation to claude code stdin", "bytes", len(messagesJSON))
+
 	if _, err := stdin.Write(messagesJSON); err != nil {
 		return nil, fmt.Errorf("failed to write to stdin: %w", err)
 	}
@@ -152,13 +177,16 @@ func (c *ClaudeCodeClient) buildArgs(model string) []string {
 		"--include-hook-events",
 		"--model", model,
 		"--permission-mode", permissionMode,
-		"-p",
 	}
 
-	if c.tools != nil && len(*c.tools) > 0 {
-		args = append(args, "--disallowedTools", "all")
+	if c.appendSystemPrompt != "" {
+		args = append(args, "--append-system-prompt", c.appendSystemPrompt)
 	}
 
+	args = append(args, c.config.ExtraArgs...)
+
+	args = append(args, "-p")
+
 	return args
 }
 
@@ -386,7 +414,7 @@ func (c *ClaudeCodeClient) transformAssistantMessage(msg ClaudeCodeMessage, mode
 				},
 			}, model))
 		case "tool_use":
-			name, args := c.maybeMapTaskCreateToTodoWrite(block)
+			name, args := block.Name, string(block.Input)
 			events = append(events, c.createDeltaEvent(map[string]any{
 				"choices": []map[string]any{
 					{
@@ -415,7 +443,7 @@ func (c *ClaudeCodeClient) transformAssistantMessage(msg ClaudeCodeMessage, mode
 
 // transformUserMessage converts tool_result blocks into tool-call delta chunks,
 // plus a typed tool_failure event when the result carries is_error=true.
-// TaskCreate results are mapped to TodoWrite results.
+// Results are forwarded verbatim - claude executed the tool itself.
 func (c *ClaudeCodeClient) transformUserMessage(msg ClaudeCodeMessage, model string) []sdk.SSEvent {
 	var events []sdk.SSEvent
 
@@ -430,7 +458,7 @@ func (c *ClaudeCodeClient) transformUserMessage(msg ClaudeCodeMessage, model str
 			continue
 		}
 
-		result, isError := c.maybeMapTaskCreateResult(content.ToolUseID, content.Content, content.IsError)
+		result, isError := string(content.Content), content.IsError
 
 		events = append(events, c.createDeltaEvent(map[string]any{
 			"choices": []map[string]any{
@@ -549,85 +577,6 @@ func (c *ClaudeCodeClient) createToolFailureEvent(toolUseID, errorMsg string) sd
 	}
 }
 
-// maybeMapTaskCreateToTodoWrite checks if a tool_use block is a TaskCreate
-// call and if so, maps it to a TodoWrite call. Returns the tool name and
-// serialized arguments to use. For non-TaskCreate tools, returns the original
-// values unchanged.
-func (c *ClaudeCodeClient) maybeMapTaskCreateToTodoWrite(block ContentBlock) (string, string) {
-	if block.Name != "TaskCreate" {
-		return block.Name, string(block.Input)
-	}
-
-	var taskInput struct {
-		Subject     string `json:"subject"`
-		Description string `json:"description,omitempty"`
-	}
-	if err := json.Unmarshal(block.Input, &taskInput); err != nil {
-		logger.Error(fmt.Sprintf("Failed to parse TaskCreate input: %v", err))
-		return block.Name, string(block.Input)
-	}
-
-	c.taskCreateIDs[block.ID] = taskInput.Subject
-
-	todoInput := map[string]any{
-		"todos": []map[string]any{
-			{
-				"content": taskInput.Subject,
-				"status":  "in_progress",
-			},
-		},
-	}
-	todoInputBytes, err := json.Marshal(todoInput)
-	if err != nil {
-		logger.Error(fmt.Sprintf("Failed to marshal TodoWrite input: %v", err))
-		return block.Name, string(block.Input)
-	}
-
-	return "TodoWrite", string(todoInputBytes)
-}
-
-// maybeMapTaskCreateResult checks if a tool result corresponds to a previously
-// tracked TaskCreate call and if so, maps the result to a TodoWrite result.
-// Returns the result content and is_error flag to use. For non-TaskCreate
-// results, returns the original values unchanged.
-func (c *ClaudeCodeClient) maybeMapTaskCreateResult(toolUseID, result string, isError bool) (string, bool) {
-	subject, ok := c.taskCreateIDs[toolUseID]
-	if !ok {
-		return result, isError
-	}
-
-	delete(c.taskCreateIDs, toolUseID)
-
-	status := "completed"
-	if isError {
-		status = "pending"
-	}
-
-	todoResult := map[string]any{
-		"todos": []map[string]any{
-			{
-				"content": subject,
-				"status":  status,
-			},
-		},
-		"total_tasks":      1,
-		"completed_tasks":  0,
-		"in_progress_task": "",
-		"validation_ok":    true,
-	}
-	if !isError {
-		todoResult["completed_tasks"] = 1
-	}
-
-	resultBytes, err := json.Marshal(todoResult)
-	if err != nil {
-		logger.Error(fmt.Sprintf("Failed to marshal TodoWrite result: %v", err))
-		return result, isError
-	}
-
-	return string(resultBytes), isError
-}
-
 type AssistantMessage struct {
 	Content []ContentBlock `json:"content"`
 	Role    string         `json:"role"`
@@ -639,10 +588,42 @@ type ToolResultMessage struct {
 }
 
 type ToolResultContent struct {
-	Type      string `json:"type"`
-	ToolUseID string `json:"tool_use_id"`
-	IsError   bool   `json:"is_error"`
-	Content   string `json:"content"`
+	Type      string            `json:"type"`
+	ToolUseID string            `json:"tool_use_id"`
+	IsError   bool              `json:"is_error"`
+	Content   toolResultPayload `json:"content"`
+}
+
+// toolResultPayload accepts the two shapes claude uses for tool_result
+// content on the wire: a plain string, or an array of content blocks
+// ([{"type":"text","text":"..."}]). Block arrays are flattened to their
+// concatenated text; non-text blocks are skipped.
+type toolResultPayload string
+
+func (p *toolResultPayload) UnmarshalJSON(data []byte) error {
+	var s string
+	if err := json.Unmarshal(data, &s); err == nil {
+		*p = toolResultPayload(s)
+		return nil
+	}
+
+	var blocks []struct {
+		Type string `json:"type"`
+		Text string `json:"text"`
+	}
+	if err := json.Unmarshal(data, &blocks); err != nil {
+		*p = toolResultPayload(data)
+		return nil
+	}
+
+	var sb strings.Builder
+	for _, b := range blocks {
+		if b.Type == "text" {
+			sb.WriteString(b.Text)
+		}
+	}
+	*p = toolResultPayload(sb.String())
+	return nil
 }
 
 // ContentBlock represents a content block in the assistant message
@@ -858,6 +839,11 @@ func (c *ClaudeCodeClient) processToolCalls(toolCallsRaw []any, toolCallsMap map
 		}
 
 		c.processToolCallFunction(tc, toolCall)
+
+		if result, ok := tc["result"].(string); ok {
+			isError, _ := tc["is_error"].(bool)
+			c.toolResults[id] = domain.ToolCallResult{Content: result, IsError: isError}
+		}
 	}
 }
 
diff --git a/internal/infra/adapters/claude_code_client_test.go b/internal/infra/adapters/claude_code_client_test.go
index e9ccf214..0bd96103 100644
--- a/internal/infra/adapters/claude_code_client_test.go
+++ b/internal/infra/adapters/claude_code_client_test.go
@@ -5,11 +5,13 @@ import (
 	"encoding/json"
 	"os"
 	"path/filepath"
+	"strings"
 	"testing"
 
 	sdk "github.com/inference-gateway/sdk"
 
 	config "github.com/inference-gateway/cli/config"
+	domain "github.com/inference-gateway/cli/internal/domain"
 )
 
 func transform(t *testing.T, c *ClaudeCodeClient, msg ClaudeCodeMessage) []sdk.SSEvent {
@@ -705,12 +707,6 @@ func TestToolResultContentWithIsError(t *testing.T) {
 	}
 }
 
-// todoWriteCounts tracks the counts collected during processing.
-type todoWriteCounts struct {
-	ToolUse int
-	Result  int
-}
-
 // extractToolCallsFromEvent extracts tool call maps from an SSE event's delta.
 // Returns nil if the event has no tool calls.
 func extractToolCallsFromEvent(ev sdk.SSEvent) []map[string]any {
@@ -746,104 +742,20 @@ func extractToolCallsFromEvent(ev sdk.SSEvent) []map[string]any {
 	return toolCalls
 }
 
-// todoWriteArgs holds the expected shape of TodoWrite arguments.
-type todoWriteArgs struct {
-	Todos []struct {
-		Content string `json:"content"`
-		Status  string `json:"status"`
-	} `json:"todos"`
-}
-
-// todoWriteResult holds the expected shape of a TodoWrite result.
-type todoWriteResult struct {
-	Todos          []any `json:"todos"`
-	TotalTasks     int   `json:"total_tasks"`
-	CompletedTasks int   `json:"completed_tasks"`
-	ValidationOK   bool  `json:"validation_ok"`
-}
-
-// validateTodoWriteArgs checks the shape of a TodoWrite arguments JSON string.
-func validateTodoWriteArgs(t *testing.T, args string) {
-	t.Helper()
-
-	var ta todoWriteArgs
-	if err := json.Unmarshal([]byte(args), &ta); err != nil {
-		t.Errorf("TodoWrite arguments not valid JSON: %v", err)
-		return
-	}
-	if len(ta.Todos) != 1 {
-		t.Errorf("TodoWrite arguments: got %d todos, want 1", len(ta.Todos))
-		return
-	}
-	if ta.Todos[0].Status != "in_progress" {
-		t.Errorf("TodoWrite todo status = %q, want in_progress", ta.Todos[0].Status)
-		return
-	}
-	if ta.Todos[0].Content == "" {
-		t.Error("TodoWrite todo content is empty")
-	}
-}
-
-// processToolCall inspects a single tool call map and updates counts.
-// It returns true if the tool call was a TodoWrite (for result matching).
-func processToolCall(t *testing.T, tc map[string]any, c *ClaudeCodeClient, counts *todoWriteCounts) bool {
-	t.Helper()
-
-	funcRaw, ok := tc["function"].(map[string]any)
-	if !ok {
-		return false
-	}
-	name, _ := funcRaw["name"].(string)
-	if name != "TodoWrite" {
-		return false
-	}
-	counts.ToolUse++
-	args, _ := funcRaw["arguments"].(string)
-	validateTodoWriteArgs(t, args)
-	return true
-}
-
-// processToolResult inspects a tool result and updates counts.
-func processToolResult(t *testing.T, tc map[string]any, c *ClaudeCodeClient, counts *todoWriteCounts) {
-	t.Helper()
-
-	resultStr, ok := tc["result"].(string)
-	if !ok {
-		return
-	}
-	if _, isTaskCreate := c.taskCreateIDs[tc["id"].(string)]; isTaskCreate {
-		// This was a mapped TaskCreate result; it should have been remapped.
-		// If we see it here, the mapping didn't work.
-		return
-	}
-	var tr todoWriteResult
-	if err := json.Unmarshal([]byte(resultStr), &tr); err != nil || !tr.ValidationOK {
-		return
-	}
-	counts.Result++
-	if tr.TotalTasks != 1 {
-		t.Errorf("TodoWrite result: total_tasks = %d, want 1", tr.TotalTasks)
-	}
-	if len(tr.Todos) != 1 {
-		t.Errorf("TodoWrite result: got %d todos, want 1", len(tr.Todos))
-	}
-}
-
-// TestTaskCreateToTodoWriteMapping uses the real claude-run.jsonl fixture to
-// verify that TaskCreate tool_use and tool_result events are mapped to TodoWrite
-// equivalents through the full transform pipeline.
-func TestTaskCreateToTodoWriteMapping(t *testing.T) {
+// TestTaskCreatePassthrough uses the real claude-run fixture to verify that
+// TaskCreate tool_use blocks and their results flow through the transform
+// pipeline verbatim - the stream is no longer rewritten to TodoWrite (the
+// rename now happens at the headless output layer in cmd/agent.go).
+func TestTaskCreatePassthrough(t *testing.T) {
 	f, err := os.Open(filepath.Join("testdata", "todos_write.jsonl"))
 	if err != nil {
 		t.Fatalf("open fixture: %v", err)
 	}
 	defer func() { _ = f.Close() }()
 
-	c := &ClaudeCodeClient{
-		taskCreateIDs: make(map[string]string),
-	}
+	c := &ClaudeCodeClient{}
 	var model string
-	var counts todoWriteCounts
+	var taskCreateCalls, taskCreateResults, todoWriteCalls int
 
 	scanner := bufio.NewScanner(f)
 	scanner.Buffer(make([]byte, 0, 256*1024), 10*1024*1024)
@@ -858,8 +770,25 @@ func TestTaskCreateToTodoWriteMapping(t *testing.T) {
 		}
 		for _, ev := range c.transformMessage(msg, line, model) {
 			for _, tc := range extractToolCallsFromEvent(ev) {
-				processToolCall(t, tc, c, &counts)
-				processToolResult(t, tc, c, &counts)
+				if funcRaw, ok := tc["function"].(map[string]any); ok {
+					name, _ := funcRaw["name"].(string)
+					switch name {
+					case "TaskCreate":
+						taskCreateCalls++
+						args, _ := funcRaw["arguments"].(string)
+						var input struct {
+							Subject string `json:"subject"`
+						}
+						if err := json.Unmarshal([]byte(args), &input); err != nil || input.Subject == "" {
+							t.Errorf("TaskCreate arguments not passed through verbatim: %q", args)
+						}
+					case "TodoWrite":
+						todoWriteCalls++
+					}
+				}
+				if result, ok := tc["result"].(string); ok && strings.Contains(result, "created successfully") {
+					taskCreateResults++
+				}
 			}
 		}
 	}
@@ -867,13 +796,134 @@ func TestTaskCreateToTodoWriteMapping(t *testing.T) {
 		t.Fatalf("scanner: %v", err)
 	}
 
-	if counts.ToolUse != 3 {
-		t.Errorf("got %d TodoWrite tool_use events, want 3", counts.ToolUse)
+	if taskCreateCalls != 3 {
+		t.Errorf("got %d TaskCreate tool_use events, want 3", taskCreateCalls)
+	}
+	if taskCreateResults != 3 {
+		t.Errorf("got %d verbatim TaskCreate results, want 3", taskCreateResults)
+	}
+	if todoWriteCalls != 0 {
+		t.Errorf("got %d TodoWrite tool_use events, want 0 (stream must not be rewritten)", todoWriteCalls)
+	}
+}
+
+func TestBuildArgs_AppendSystemPrompt(t *testing.T) {
+	base := &ClaudeCodeClient{config: &config.ClaudeCodeConfig{}}
+	args := base.buildArgs("anthropic/claude-sonnet-4-6")
+	for _, a := range args {
+		if a == "--append-system-prompt" {
+			t.Fatal("--append-system-prompt must be omitted when no prompt is configured")
+		}
+	}
+
+	withPrompt := &ClaudeCodeClient{config: &config.ClaudeCodeConfig{}, appendSystemPrompt: "extra context"}
+	args = withPrompt.buildArgs("claude-sonnet-4-6")
+	found := false
+	for i, a := range args {
+		if a == "--append-system-prompt" {
+			found = true
+			if i+1 >= len(args) || args[i+1] != "extra context" {
+				t.Fatalf("--append-system-prompt value missing, args: %v", args)
+			}
+		}
+	}
+	if !found {
+		t.Fatalf("--append-system-prompt not present, args: %v", args)
+	}
+}
+
+func TestBuildArgs_ExtraArgsAndTrailingP(t *testing.T) {
+	c := &ClaudeCodeClient{config: &config.ClaudeCodeConfig{
+		ExtraArgs: []string{"--max-turns", "5"},
+	}, appendSystemPrompt: "extra context"}
+	args := c.buildArgs("claude-sonnet-4-6")
+
+	if args[len(args)-1] != "-p" {
+		t.Fatalf("-p must be the last argument, args: %v", args)
+	}
+	joined := strings.Join(args, "\x00")
+	if !strings.Contains(joined, "--max-turns\x005") {
+		t.Fatalf("extra args not appended in order, args: %v", args)
+	}
+
+	noExtra := &ClaudeCodeClient{config: &config.ClaudeCodeConfig{}}
+	args = noExtra.buildArgs("claude-sonnet-4-6")
+	if args[len(args)-1] != "-p" {
+		t.Fatalf("-p must be the last argument without extra args, args: %v", args)
+	}
+}
+
+func TestProcessToolCalls_CapturesClaudeResults(t *testing.T) {
+	c := &ClaudeCodeClient{toolResults: map[string]domain.ToolCallResult{}}
+	toolCallsMap := map[string]*sdk.ChatCompletionMessageToolCall{}
+
+	c.processToolCalls([]any{
+		map[string]any{
+			"id": "call_1",
+			"function": map[string]any{
+				"name":      "Bash",
+				"arguments": `{"command":"ls"}`,
+			},
+		},
+	}, toolCallsMap)
+	c.processToolCalls([]any{
+		map[string]any{"id": "call_1", "result": "file.txt", "is_error": false},
+		map[string]any{"id": "call_2", "result": "boom", "is_error": true},
+	}, toolCallsMap)
+
+	results := c.TakeToolCallResults()
+	if len(results) != 2 {
+		t.Fatalf("expected 2 captured results, got %d", len(results))
+	}
+	if r := results["call_1"]; r.Content != "file.txt" || r.IsError {
+		t.Errorf("unexpected call_1 result: %+v", r)
+	}
+	if r := results["call_2"]; r.Content != "boom" || !r.IsError {
+		t.Errorf("unexpected call_2 result: %+v", r)
+	}
+	if again := c.TakeToolCallResults(); again != nil {
+		t.Errorf("TakeToolCallResults must drain, got %v", again)
+	}
+}
+
+// tool_result content arrives either as a plain string or as an array of
+// content blocks; the previous string typing made the whole user message fail
+// to unmarshal, silently dropping the tool results.
+func TestTransformUserMessage_BlockArrayContent(t *testing.T) {
+	rawJSON := `{
+		"type": "user",
+		"message": {
+			"role": "user",
+			"content": [
+				{"type":"tool_result","tool_use_id":"toolu_arr","is_error":false,"content":[{"type":"text","text":"line one"},{"type":"text","text":" line two"}]},
+				{"type":"tool_result","tool_use_id":"toolu_str","is_error":false,"content":"plain"}
+			]
+		}
+	}`
+
+	var msg ClaudeCodeMessage
+	if err := json.Unmarshal([]byte(rawJSON), &msg); err != nil {
+		t.Fatalf("unmarshal message: %v", err)
+	}
+
+	c := &ClaudeCodeClient{}
+	events := c.transformUserMessage(msg, "claude-haiku-4-5")
+	if len(events) != 2 {
+		t.Fatalf("got %d events, want 2", len(events))
+	}
+
+	results := map[string]string{}
+	for _, ev := range events {
+		for _, tc := range extractToolCallsFromEvent(ev) {
+			id, _ := tc["id"].(string)
+			result, _ := tc["result"].(string)
+			results[id] = result
+		}
 	}
-	if counts.Result != 3 {
-		t.Errorf("got %d TodoWrite result events, want 3", counts.Result)
+	if results["toolu_arr"] != "line one line two" {
+		t.Errorf("block-array content = %q, want flattened text", results["toolu_arr"])
 	}
-	if len(c.taskCreateIDs) != 0 {
-		t.Errorf("taskCreateIDs map not empty after processing, got %d entries", len(c.taskCreateIDs))
+	if results["toolu_str"] != "plain" {
+		t.Errorf("string content = %q, want plain", results["toolu_str"])
 	}
 }

From 7d6bc5ac8538455f14c8aacaf01715ac8b3952a9 Mon Sep 17 00:00:00 2001
From: Eden Reich <eden.reich@gmail.com>
Date: Sun, 5 Jul 2026 00:54:43 +0200
Subject: [PATCH 2/3] docs(readme): document Claude Code pass-through behavior,
 system_prompt_claude_code and extra_args

---
 README.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/README.md b/README.md
index 8a318557..88e6e7e5 100644
--- a/README.md
+++ b/README.md
@@ -328,6 +328,9 @@ claude_code:
   timeout: 600                   # Command timeout in seconds
   max_output_tokens: 32000       # Maximum output tokens per request
   thinking_budget: 10000         # Token budget for extended thinking
+  extra_args:                    # Extra arguments appended verbatim to the claude CLI invocation
+    - --max-turns
+    - "5"
 ```
 
 **Environment Variables:**
@@ -336,6 +339,27 @@ claude_code:
 export INFER_CLAUDE_CODE_ENABLED=true
 export INFER_CLAUDE_CODE_CLI_PATH=/usr/local/bin/claude
 export INFER_CLAUDE_CODE_TIMEOUT=600
+export INFER_CLAUDE_CODE_EXTRA_ARGS="--max-turns,5"  # comma/newline-separated; wins over --claude-code-extra-args
+```
+
+**Pass-through behavior:**
+
+Claude Code mode is a pure pass-through: infer does not inject its system prompt, context blocks, or
+system reminders, and does not re-execute claude's tool calls locally — claude runs with its own
+defaults and native tools. Infer's `prompts.yaml` and `reminders.yaml` do not apply in this mode.
+
+To add instructions on top of claude's built-in system prompt (passed via `--append-system-prompt`),
+set the dedicated prompt in `.infer/prompts.yaml` (empty by default):
+
+```yaml
+agent:
+  system_prompt_claude_code: "Always answer in English."
+```
+
+Or via environment variable:
+
+```bash
+export INFER_PROMPTS_AGENT_SYSTEM_PROMPT_CLAUDE_CODE="Always answer in English."
 ```
 
 ### Features and Limitations

From 094acba2d4a54841ed786b1547d4d64620be9bb4 Mon Sep 17 00:00:00 2001
From: Eden Reich <eden.reich@gmail.com>
Date: Sun, 5 Jul 2026 01:01:30 +0200
Subject: [PATCH 3/3] refactor: replace em dahses with regular dashes

---
 README.md                                          | 2 +-
 internal/infra/adapters/testdata/todos_write.jsonl | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 88e6e7e5..64d88869 100644
--- a/README.md
+++ b/README.md
@@ -345,7 +345,7 @@ export INFER_CLAUDE_CODE_EXTRA_ARGS="--max-turns,5"  # comma/newline-separated;
 **Pass-through behavior:**
 
 Claude Code mode is a pure pass-through: infer does not inject its system prompt, context blocks, or
-system reminders, and does not re-execute claude's tool calls locally — claude runs with its own
+system reminders, and does not re-execute claude's tool calls locally - claude runs with its own
 defaults and native tools. Infer's `prompts.yaml` and `reminders.yaml` do not apply in this mode.
 
 To add instructions on top of claude's built-in system prompt (passed via `--append-system-prompt`),
diff --git a/internal/infra/adapters/testdata/todos_write.jsonl b/internal/infra/adapters/testdata/todos_write.jsonl
index c2b9818d..2bf3b58a 100644
--- a/internal/infra/adapters/testdata/todos_write.jsonl
+++ b/internal/infra/adapters/testdata/todos_write.jsonl
@@ -10,5 +10,5 @@
 {"type":"user","message":{"role":"user","content":[{"tool_use_id":"xxx","type":"tool_result","content":"Task #2 created successfully: Review the release notes draft"}]},"parent_tool_use_id":null,"session_id":"xxx","uuid":"xxx","timestamp":"xxx","tool_use_result":{"task":{"id":"2","subject":"Review the release notes draft"}}}
 {"type":"assistant","message":{"model":"xxx","id":"xxx","type":"message","role":"assistant","content":[{"type":"tool_use","id":"xxx","name":"TaskCreate","input":{"subject":"Organize the downloads folder","description":"Sort and clean up files in the downloads folder."},"caller":{"type":"direct"}}],"stop_reason":null,"stop_sequence":null,"stop_details":null,"usage":{"input_tokens":2,"cache_creation_input_tokens":18150,"cache_read_input_tokens":14930,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":18150},"output_tokens":60,"service_tier":"standard","inference_geo":"not_available"},"diagnostics":{"cache_miss_reason":{"type":"tools_changed","cache_missed_input_tokens":23524}},"context_management":null},"parent_tool_use_id":null,"session_id":"xxx","uuid":"xxx","request_id":"xxx"}
 {"type":"user","message":{"role":"user","content":[{"tool_use_id":"xxx","type":"tool_result","content":"Task #3 created successfully: Organize the downloads folder"}]},"parent_tool_use_id":null,"session_id":"xxx","uuid":"xxx","timestamp":"xxx","tool_use_result":{"task":{"id":"3","subject":"Organize the downloads folder"}}}
-{"type":"assistant","message":{"model":"xxx","id":"xxx","type":"message","role":"assistant","content":[{"type":"text","text":"Done — created 3 random todos: \"Water the office plants\", \"Review the release notes draft\", and \"Organize the downloads folder\"."}],"stop_reason":null,"stop_sequence":null,"stop_details":null,"usage":{"input_tokens":2,"cache_creation_input_tokens":413,"cache_read_input_tokens":33080,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":413},"output_tokens":2,"service_tier":"standard","inference_geo":"not_available"},"diagnostics":null,"context_management":null},"parent_tool_use_id":null,"session_id":"xxx","uuid":"xxx","request_id":"xxx"}
-{"type":"result","subtype":"success","is_error":false,"api_error_status":null,"duration_ms":11504,"duration_api_ms":12519,"ttft_ms":4359,"ttft_stream_ms":3880,"time_to_request_ms":40,"num_turns":5,"result":"Done — created 3 random todos: \"Water the office plants\", \"Review the release notes draft\", and \"Organize the downloads folder\".","stop_reason":"end_turn","session_id":"xxx","total_cost_usd":0.7333969999999999,"usage":{"input_tokens":6282,"cache_creation_input_tokens":29307,"cache_read_input_tokens":62847,"output_tokens":420,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":29307,"ephemeral_5m_input_tokens":0},"inference_geo":"not_available","iterations":[{"input_tokens":2,"output_tokens":43,"cache_read_input_tokens":33080,"cache_creation_input_tokens":413,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":413},"type":"message"}],"speed":"standard"},"modelUsage":{"claude-haiku-4-5-20251001":{"inputTokens":510,"outputTokens":16,"cacheReadInputTokens":0,"cacheCreationInputTokens":0,"webSearchRequests":0,"costUSD":0.00059,"contextWindow":200000,"maxOutputTokens":32000},"xxx":{"inputTokens":6282,"outputTokens":420,"cacheReadInputTokens":62847,"cacheCreationInputTokens":29307,"webSearchRequests":0,"costUSD":0.732807,"contextWindow":1000000,"maxOutputTokens":64000}},"permission_denials":[],"terminal_reason":"completed","fast_mode_state":"off","uuid":"xxx"}
+{"type":"assistant","message":{"model":"xxx","id":"xxx","type":"message","role":"assistant","content":[{"type":"text","text":"Done - created 3 random todos: \"Water the office plants\", \"Review the release notes draft\", and \"Organize the downloads folder\"."}],"stop_reason":null,"stop_sequence":null,"stop_details":null,"usage":{"input_tokens":2,"cache_creation_input_tokens":413,"cache_read_input_tokens":33080,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":413},"output_tokens":2,"service_tier":"standard","inference_geo":"not_available"},"diagnostics":null,"context_management":null},"parent_tool_use_id":null,"session_id":"xxx","uuid":"xxx","request_id":"xxx"}
+{"type":"result","subtype":"success","is_error":false,"api_error_status":null,"duration_ms":11504,"duration_api_ms":12519,"ttft_ms":4359,"ttft_stream_ms":3880,"time_to_request_ms":40,"num_turns":5,"result":"Done - created 3 random todos: \"Water the office plants\", \"Review the release notes draft\", and \"Organize the downloads folder\".","stop_reason":"end_turn","session_id":"xxx","total_cost_usd":0.7333969999999999,"usage":{"input_tokens":6282,"cache_creation_input_tokens":29307,"cache_read_input_tokens":62847,"output_tokens":420,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":29307,"ephemeral_5m_input_tokens":0},"inference_geo":"not_available","iterations":[{"input_tokens":2,"output_tokens":43,"cache_read_input_tokens":33080,"cache_creation_input_tokens":413,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":413},"type":"message"}],"speed":"standard"},"modelUsage":{"claude-haiku-4-5-20251001":{"inputTokens":510,"outputTokens":16,"cacheReadInputTokens":0,"cacheCreationInputTokens":0,"webSearchRequests":0,"costUSD":0.00059,"contextWindow":200000,"maxOutputTokens":32000},"xxx":{"inputTokens":6282,"outputTokens":420,"cacheReadInputTokens":62847,"cacheCreationInputTokens":29307,"webSearchRequests":0,"costUSD":0.732807,"contextWindow":1000000,"maxOutputTokens":64000}},"permission_denials":[],"terminal_reason":"completed","fast_mode_state":"off","uuid":"xxx"}