From de6c0e736492495b1864345c261e414c58016f06 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Sun, 5 Jul 2026 00:47:45 +0200 Subject: [PATCH 1/3] fix(claude-code): make subscription mode a true pass-through - No infer system prompt, context blocks, or system reminders are injected in claude_code mode; claude uses its own system prompt and native tools - Stop double-executing claude's already-executed tool calls: their results are captured from the stream (domain.ToolCallResultProvider) and replayed verbatim instead of re-running them through infer's registry/approval gate - Move the TaskCreate/TaskUpdate -> TodoWrite mapping to the output layer only: headless stdout shows one synthesized TodoWrite with the full accumulated todo list; the stored/replayed conversation stays verbatim - Add prompts.agent.system_prompt_claude_code (empty default) passed via --append-system-prompt, settable with INFER_PROMPTS_AGENT_SYSTEM_PROMPT_CLAUDE_CODE - Add claude_code.extra_args (config, --claude-code-extra-args flag, INFER_CLAUDE_CODE_EXTRA_ARGS env) appended before the trailing -p - Accept tool_result content as string or content-block array (fixes unmarshal errors in logs) - Quote claude CLI args in the debug log and keep -p as the last argument Fixes the todo-mirroring and denied-writes behavior seen in inference-gateway/inference-gateway#412 --- cmd/agent.go | 190 ++++++++++++- cmd/agent_test.go | 155 +++++++++++ cmd/config.go | 1 + cmd/root.go | 15 +- config/config.go | 11 +- config/prompts.go | 13 +- internal/agent/agent.go | 10 +- internal/agent/agent_test.go | 27 ++ internal/agent/agent_utils.go | 22 +- internal/container/container.go | 2 +- internal/domain/interfaces.go | 15 + internal/infra/adapters/claude_code_client.go | 188 ++++++------- .../infra/adapters/claude_code_client_test.go | 260 +++++++++++------- 13 files changed, 681 insertions(+), 228 deletions(-) diff --git a/cmd/agent.go b/cmd/agent.go index 8c1257d4..75eb6d1d 100644 --- a/cmd/agent.go +++ b/cmd/agent.go @@ -8,6 +8,7 @@ import ( "os" "regexp" "slices" + "strconv" "strings" "sync" "time" @@ -109,6 +110,15 @@ type AgentSession struct { totalCompletionTokens int totalTokens int requestCount int + claudeTasks []claudeTask +} + +// claudeTask mirrors one entry of Claude Code's native task list (TaskCreate/ +// TaskUpdate). Claude assigns sequential ids, so index+1 == task id. +type claudeTask struct { + Content string + Status string + Deleted bool } // inheritedSubagentMode returns the coding mode a subagent should start in, read @@ -702,6 +712,9 @@ func (s *AgentSession) processSyncResponse(response *domain.ChatSyncResponse, re if len(response.ToolCalls) > 0 { assistantMsg.ToolCalls = &response.ToolCalls + if s.config.IsClaudeCodeMode() { + s.feedTaskAccumulator(response.ToolCalls) + } } s.addMessage(assistantMsg) @@ -724,7 +737,12 @@ func (s *AgentSession) processSyncResponse(response *domain.ChatSyncResponse, re return nil } - toolResults := s.executeToolCalls(response.ToolCalls) + var toolResults []ConversationMessage + if s.config.IsClaudeCodeMode() { + toolResults = s.claudeToolResultMessages(response.ToolCalls, response.ToolResults) + } else { + toolResults = s.executeToolCalls(response.ToolCalls) + } s.lastToolFailed = anyToolResultFailed(toolResults) for _, result := range toolResults { @@ -838,6 +856,163 @@ func (s *AgentSession) readApprovalResponses() { } } +// claudeToolResultMessages builds tool messages from the results claude +// reported for tool calls it executed itself (Claude Code mode). A call with +// no reported result gets a neutral placeholder so the tool_call is never +// left unanswered in the replayed conversation. +func (s *AgentSession) claudeToolResultMessages( + toolCalls []sdk.ChatCompletionMessageToolCall, + results map[string]domain.ToolCallResult, +) []ConversationMessage { + messages := make([]ConversationMessage, 0, len(toolCalls)) + for _, tc := range toolCalls { + result, ok := results[tc.ID] + content := result.Content + if !ok || content == "" { + content = "Executed by Claude Code (no result reported)." + } + execution := &domain.ToolExecutionResult{ + ToolName: tc.Function.Name, + Success: !result.IsError, + } + if result.IsError { + execution.Error = result.Content + } + messages = append(messages, ConversationMessage{ + Role: "tool", + Content: content, + ToolCallID: tc.ID, + ToolExecution: execution, + Timestamp: time.Now(), + }) + } + return messages +} + +// feedTaskAccumulator tracks Claude Code's native task list from TaskCreate/ +// TaskUpdate tool calls so the headless output can mirror it as a TodoWrite +// view. Claude assigns sequential task ids, so creation order == id. +func (s *AgentSession) feedTaskAccumulator(toolCalls []sdk.ChatCompletionMessageToolCall) { + for _, tc := range toolCalls { + switch tc.Function.Name { + case "TaskCreate": + var input struct { + Subject string `json:"subject"` + } + if err := json.Unmarshal([]byte(tc.Function.Arguments), &input); err != nil || input.Subject == "" { + logger.Debug("skipping unparsable TaskCreate input", "error", err) + continue + } + s.claudeTasks = append(s.claudeTasks, claudeTask{Content: input.Subject, Status: "pending"}) + case "TaskUpdate": + s.applyTaskUpdate(tc.Function.Arguments) + } + } +} + +// applyTaskUpdate applies a single TaskUpdate call to the accumulated task +// list. The schema is parsed defensively (taskId may arrive as "1", "#1" or a +// number; status/subject are optional) so drift in claude's tool schema +// degrades to a no-op rather than corrupting the mirror. +func (s *AgentSession) applyTaskUpdate(arguments string) { + var input struct { + TaskID any `json:"taskId"` + Status string `json:"status"` + Subject string `json:"subject"` + } + if err := json.Unmarshal([]byte(arguments), &input); err != nil { + logger.Debug("skipping unparsable TaskUpdate input", "error", err) + return + } + + var id int + switch v := input.TaskID.(type) { + case string: + id, _ = strconv.Atoi(strings.TrimPrefix(v, "#")) + case float64: + id = int(v) + } + if id < 1 || id > len(s.claudeTasks) { + logger.Debug("TaskUpdate references unknown task", "task_id", input.TaskID) + return + } + + task := &s.claudeTasks[id-1] + if input.Subject != "" { + task.Content = input.Subject + } + switch input.Status { + case "pending", "in_progress", "completed": + task.Status = input.Status + case "deleted": + task.Deleted = true + } +} + +// isClaudeTaskTool reports whether the tool name is one of Claude Code's +// native task tools mirrored into the TodoWrite output view. +func isClaudeTaskTool(name string) bool { + return name == "TaskCreate" || name == "TaskUpdate" +} + +// renderTodoWriteView returns a copy of the message where Claude Code's +// TaskCreate/TaskUpdate tool calls are replaced by a single synthesized +// TodoWrite call carrying the full accumulated todo list, so downstream +// consumers (infer-action) can mirror progress. Non-task tool calls are +// preserved; messages without task calls are returned unchanged. +func (s *AgentSession) renderTodoWriteView(msg ConversationMessage) ConversationMessage { + if msg.ToolCalls == nil { + return msg + } + + hasTaskCall := false + for _, tc := range *msg.ToolCalls { + if isClaudeTaskTool(tc.Function.Name) { + hasTaskCall = true + break + } + } + if !hasTaskCall { + return msg + } + + todos := make([]map[string]any, 0, len(s.claudeTasks)) + for _, task := range s.claudeTasks { + if task.Deleted { + continue + } + todos = append(todos, map[string]any{"content": task.Content, "status": task.Status}) + } + arguments, err := json.Marshal(map[string]any{"todos": todos}) + if err != nil { + return msg + } + + rendered := make([]sdk.ChatCompletionMessageToolCall, 0, len(*msg.ToolCalls)) + todoWriteAdded := false + for _, tc := range *msg.ToolCalls { + if !isClaudeTaskTool(tc.Function.Name) { + rendered = append(rendered, tc) + continue + } + if todoWriteAdded { + continue + } + todoWriteAdded = true + rendered = append(rendered, sdk.ChatCompletionMessageToolCall{ + ID: tc.ID, + Type: tc.Type, + Function: sdk.ChatCompletionMessageToolCallFunction{ + Name: "TodoWrite", + Arguments: string(arguments), + }, + }) + } + + msg.ToolCalls = &rendered + return msg +} + // toolResultMessage builds the conversation message for a finished tool call, // formatting either the successful result or the execution error. func (s *AgentSession) toolResultMessage(tc sdk.ChatCompletionMessageToolCall, result *domain.ToolExecutionResult, err error) ConversationMessage { @@ -1143,6 +1318,10 @@ func (s *AgentSession) dispatchHooks(hook domain.HookPoint, turn int) { // pending tool_calls) - that guard is reminder-specific and must not block // command hooks, which is why it lives here rather than in dispatchHooks. func (s *AgentSession) injectDueReminders(hook domain.HookPoint, turn int) { + if s.config != nil && s.config.IsClaudeCodeMode() { + return + } + provider := s.reminderProvider if provider == nil && s.config != nil { provider = s.config.Reminders @@ -1248,10 +1427,13 @@ func (s *AgentSession) outputMessage(msg ConversationMessage) { } logMsg := msg + if s.config.IsClaudeCodeMode() { + logMsg = s.renderTodoWriteView(logMsg) + } - if !s.config.Agent.VerboseTools && msg.ToolCalls != nil && len(*msg.ToolCalls) > 0 { - summaries := make([]string, len(*msg.ToolCalls)) - for i, toolCall := range *msg.ToolCalls { + if !s.config.Agent.VerboseTools && logMsg.ToolCalls != nil && len(*logMsg.ToolCalls) > 0 { + summaries := make([]string, len(*logMsg.ToolCalls)) + for i, toolCall := range *logMsg.ToolCalls { summaries[i] = formatToolCallSummary(toolCall.Function.Name, toolCall.Function.Arguments) } logMsg.Tools = summaries diff --git a/cmd/agent_test.go b/cmd/agent_test.go index 191ed494..c94df359 100644 --- a/cmd/agent_test.go +++ b/cmd/agent_test.go @@ -1562,3 +1562,158 @@ func TestAgentSession_DispatchHooks_SkipsOffListCommandHook(t *testing.T) { t.Fatal("off-list command hook must not run headless (secure-by-default)") } } + +func TestInjectDueReminders_SkippedInClaudeCodeMode(t *testing.T) { + s := &AgentSession{ + config: &config.Config{ + ClaudeCode: config.ClaudeCodeConfig{Enabled: true}, + Reminders: config.RemindersConfig{ + Enabled: true, + Reminders: []config.ReminderConfig{ + {Name: "todo", Text: "remember to push", Hook: domain.HookPreStream, Trigger: config.ReminderTriggerInterval, Interval: 1}, + }, + }, + }, + conversation: []ConversationMessage{}, + firedReminders: map[string]bool{}, + maxTurns: 10, + } + + s.injectDueReminders(domain.HookPreStream, 1) + + if len(s.conversation) != 0 { + t.Fatalf("expected no reminder injected in claude_code mode, got %d messages", len(s.conversation)) + } +} + +func TestFeedTaskAccumulator(t *testing.T) { + s := &AgentSession{config: &config.Config{}} + + s.feedTaskAccumulator([]sdk.ChatCompletionMessageToolCall{ + {ID: "t1", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskCreate", Arguments: `{"subject":"Task one"}`}}, + {ID: "t2", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskCreate", Arguments: `{"subject":"Task two","description":"details"}`}}, + {ID: "b1", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "Bash", Arguments: `{"command":"ls"}`}}, + }) + + if len(s.claudeTasks) != 2 { + t.Fatalf("expected 2 tasks, got %d", len(s.claudeTasks)) + } + if s.claudeTasks[0].Content != "Task one" || s.claudeTasks[0].Status != "pending" { + t.Errorf("unexpected first task: %+v", s.claudeTasks[0]) + } + + for _, args := range []string{ + `{"taskId":"1","status":"in_progress"}`, + `{"taskId":"#2","status":"completed"}`, + } { + s.feedTaskAccumulator([]sdk.ChatCompletionMessageToolCall{ + {Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskUpdate", Arguments: args}}, + }) + } + if s.claudeTasks[0].Status != "in_progress" { + t.Errorf("task 1 status = %q, want in_progress", s.claudeTasks[0].Status) + } + if s.claudeTasks[1].Status != "completed" { + t.Errorf("task 2 status = %q, want completed", s.claudeTasks[1].Status) + } + + s.feedTaskAccumulator([]sdk.ChatCompletionMessageToolCall{ + {Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskUpdate", Arguments: `{"taskId":2,"status":"deleted"}`}}, + {Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskUpdate", Arguments: `{"taskId":"99","status":"completed"}`}}, + {Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskUpdate", Arguments: `not json`}}, + }) + if !s.claudeTasks[1].Deleted { + t.Error("task 2 should be marked deleted (numeric taskId)") + } +} + +func TestRenderTodoWriteView(t *testing.T) { + s := &AgentSession{ + config: &config.Config{ClaudeCode: config.ClaudeCodeConfig{Enabled: true}}, + claudeTasks: []claudeTask{ + {Content: "Task one", Status: "completed"}, + {Content: "Task two", Status: "in_progress"}, + {Content: "gone", Status: "pending", Deleted: true}, + }, + } + + toolCalls := []sdk.ChatCompletionMessageToolCall{ + {ID: "t1", Type: "function", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskCreate", Arguments: `{"subject":"Task one"}`}}, + {ID: "u1", Type: "function", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "TaskUpdate", Arguments: `{"taskId":"1","status":"completed"}`}}, + {ID: "b1", Type: "function", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "Bash", Arguments: `{"command":"ls"}`}}, + } + msg := ConversationMessage{Role: "assistant", ToolCalls: &toolCalls} + + rendered := s.renderTodoWriteView(msg) + + if len(*rendered.ToolCalls) != 2 { + t.Fatalf("expected 2 rendered tool calls (TodoWrite + Bash), got %d", len(*rendered.ToolCalls)) + } + todoWrite := (*rendered.ToolCalls)[0] + if todoWrite.Function.Name != "TodoWrite" || todoWrite.ID != "t1" { + t.Errorf("unexpected first rendered call: %+v", todoWrite) + } + var args struct { + Todos []struct { + Content string `json:"content"` + Status string `json:"status"` + } `json:"todos"` + } + if err := json.Unmarshal([]byte(todoWrite.Function.Arguments), &args); err != nil { + t.Fatalf("TodoWrite arguments invalid: %v", err) + } + if len(args.Todos) != 2 { + t.Fatalf("expected 2 todos (deleted excluded), got %d", len(args.Todos)) + } + if args.Todos[1].Content != "Task two" || args.Todos[1].Status != "in_progress" { + t.Errorf("unexpected second todo: %+v", args.Todos[1]) + } + if (*rendered.ToolCalls)[1].Function.Name != "Bash" { + t.Errorf("non-task tool call not preserved: %+v", (*rendered.ToolCalls)[1]) + } + + // original message must be untouched (stored conversation keeps TaskCreate) + if (*msg.ToolCalls)[0].Function.Name != "TaskCreate" { + t.Error("original tool calls were mutated") + } + + // messages without task calls pass through unchanged + plain := ConversationMessage{Role: "assistant", ToolCalls: &[]sdk.ChatCompletionMessageToolCall{ + {ID: "b2", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "Bash", Arguments: `{}`}}, + }} + if got := s.renderTodoWriteView(plain); (*got.ToolCalls)[0].Function.Name != "Bash" { + t.Error("message without task calls should be unchanged") + } +} + +func TestClaudeToolResultMessages(t *testing.T) { + s := &AgentSession{config: &config.Config{ClaudeCode: config.ClaudeCodeConfig{Enabled: true}}} + + toolCalls := []sdk.ChatCompletionMessageToolCall{ + {ID: "ok", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "Bash"}}, + {ID: "err", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "Edit"}}, + {ID: "missing", Function: sdk.ChatCompletionMessageToolCallFunction{Name: "Read"}}, + } + results := map[string]domain.ToolCallResult{ + "ok": {Content: "done"}, + "err": {Content: "boom", IsError: true}, + } + + messages := s.claudeToolResultMessages(toolCalls, results) + + if len(messages) != 3 { + t.Fatalf("expected 3 tool messages, got %d", len(messages)) + } + if messages[0].Content != "done" || !messages[0].ToolExecution.Success || messages[0].ToolCallID != "ok" { + t.Errorf("unexpected ok message: %+v", messages[0]) + } + if messages[1].ToolExecution.Success || messages[1].ToolExecution.Error != "boom" { + t.Errorf("unexpected err message: %+v", messages[1]) + } + if messages[2].Content == "" || messages[2].ToolCallID != "missing" { + t.Errorf("missing-result call must get a placeholder: %+v", messages[2]) + } + if !anyToolResultFailed(messages) { + t.Error("anyToolResultFailed should report the failed claude tool result") + } +} diff --git a/cmd/config.go b/cmd/config.go index 9d190e15..1b7d724b 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -506,6 +506,7 @@ func applyPromptsEnvOverrides(cfg *config.Config) { "INFER_PROMPTS_AGENT_SYSTEM_PROMPT_PLAN": &cfg.Prompts.Agent.SystemPromptPlan, "INFER_PROMPTS_AGENT_SYSTEM_PROMPT_REMOTE": &cfg.Prompts.Agent.SystemPromptRemote, "INFER_PROMPTS_AGENT_SYSTEM_PROMPT_HEARTBEAT": &cfg.Prompts.Agent.SystemPromptHeartbeat, + "INFER_PROMPTS_AGENT_SYSTEM_PROMPT_CLAUDE_CODE": &cfg.Prompts.Agent.SystemPromptClaudeCode, "INFER_PROMPTS_AGENT_CUSTOM_INSTRUCTIONS": &cfg.Prompts.Agent.CustomInstructions, "INFER_PROMPTS_GIT_COMMIT_MESSAGE_SYSTEM_PROMPT": &cfg.Prompts.Git.CommitMessage.SystemPrompt, "INFER_PROMPTS_CONVERSATION_TITLE_GENERATION_SYSTEM_PROMPT": &cfg.Prompts.Conversation.TitleGeneration.SystemPrompt, diff --git a/cmd/root.go b/cmd/root.go index d9bd9a75..42fb02a6 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -49,6 +49,9 @@ func init() { rootCmd.PersistentFlags().String("tools-bash-allow-append", "", "comma/newline-separated commands added to the bash allow-list in every mode "+ "(standard, plan, auto); INFER_TOOLS_BASH_ALLOW_APPEND takes precedence") + rootCmd.PersistentFlags().String("claude-code-extra-args", "", + "comma/newline-separated extra arguments appended to the claude CLI invocation "+ + "in Claude Code mode; INFER_CLAUDE_CODE_EXTRA_ARGS takes precedence") rootCmd.PersistentFlags().String("reminders-file", "", "path to a reminders YAML file, overriding project .infer/ and ~/.infer reminders.yaml "+ "(INFER_REMINDERS_CONFIG inline YAML takes precedence)") @@ -72,10 +75,10 @@ func parseDelimitedList(value string) []string { return out } -// resolveBashAllowOverride returns the override value for a bash allow-list, +// resolveFlagEnvOverride returns the override value for a flag/env pair, // preferring the env var over the matching persistent flag (per the documented // flags < env layering). Empty means neither was provided. -func resolveBashAllowOverride(flagName, envName string) string { +func resolveFlagEnvOverride(flagName, envName string) string { if env := os.Getenv(envName); env != "" { return env } @@ -103,7 +106,7 @@ func applyBashAllowAppends(v *viper.Viper) { } for _, a := range appends { - if override := resolveBashAllowOverride(a.appendFlag, a.appendEnv); override != "" { + if override := resolveFlagEnvOverride(a.appendFlag, a.appendEnv); override != "" { v.Set(a.key, append(v.GetStringSlice(a.key), parseDelimitedList(override)...)) } } @@ -184,6 +187,12 @@ func initConfig() { applyBashAllowAppends(v) + // claude_code.extra_args is a slice, which viper can't parse from a single + // env var generically - same special-casing as INFER_A2A_AGENTS above. + if extra := resolveFlagEnvOverride("claude-code-extra-args", "INFER_CLAUDE_CODE_EXTRA_ARGS"); extra != "" { + v.Set("claude_code.extra_args", parseDelimitedList(extra)) + } + cfg, err := loadConfigFromViper() if err != nil { fmt.Fprintf(os.Stderr, "Error loading config: %v\n", err) diff --git a/config/config.go b/config/config.go index 922ee5a2..42957129 100644 --- a/config/config.go +++ b/config/config.go @@ -75,11 +75,12 @@ type GatewayConfig struct { // ClaudeCodeConfig contains Claude Code CLI integration settings type ClaudeCodeConfig struct { - Enabled bool `yaml:"enabled" mapstructure:"enabled"` - CLIPath string `yaml:"cli_path" mapstructure:"cli_path"` - Timeout int `yaml:"timeout" mapstructure:"timeout"` - MaxOutputTokens int `yaml:"max_output_tokens" mapstructure:"max_output_tokens"` - ThinkingBudget int `yaml:"thinking_budget" mapstructure:"thinking_budget"` + Enabled bool `yaml:"enabled" mapstructure:"enabled"` + CLIPath string `yaml:"cli_path" mapstructure:"cli_path"` + Timeout int `yaml:"timeout" mapstructure:"timeout"` + MaxOutputTokens int `yaml:"max_output_tokens" mapstructure:"max_output_tokens"` + ThinkingBudget int `yaml:"thinking_budget" mapstructure:"thinking_budget"` + ExtraArgs []string `yaml:"extra_args,omitempty" mapstructure:"extra_args"` } // SpeechToTextConfig contains speech-to-text (Whisper) integration settings. diff --git a/config/prompts.go b/config/prompts.go index 19fe6c6a..40316d26 100644 --- a/config/prompts.go +++ b/config/prompts.go @@ -121,12 +121,13 @@ type PromptsConfig struct { } type PromptsAgentConfig struct { - SystemPrompt string `yaml:"system_prompt" mapstructure:"system_prompt"` - SystemPromptPlan string `yaml:"system_prompt_plan" mapstructure:"system_prompt_plan"` - SystemPromptAuto string `yaml:"system_prompt_auto" mapstructure:"system_prompt_auto"` - SystemPromptRemote string `yaml:"system_prompt_remote" mapstructure:"system_prompt_remote"` - SystemPromptHeartbeat string `yaml:"system_prompt_heartbeat" mapstructure:"system_prompt_heartbeat"` - CustomInstructions string `yaml:"custom_instructions" mapstructure:"custom_instructions"` + SystemPrompt string `yaml:"system_prompt" mapstructure:"system_prompt"` + SystemPromptPlan string `yaml:"system_prompt_plan" mapstructure:"system_prompt_plan"` + SystemPromptAuto string `yaml:"system_prompt_auto" mapstructure:"system_prompt_auto"` + SystemPromptRemote string `yaml:"system_prompt_remote" mapstructure:"system_prompt_remote"` + SystemPromptHeartbeat string `yaml:"system_prompt_heartbeat" mapstructure:"system_prompt_heartbeat"` + SystemPromptClaudeCode string `yaml:"system_prompt_claude_code" mapstructure:"system_prompt_claude_code"` + CustomInstructions string `yaml:"custom_instructions" mapstructure:"custom_instructions"` } type PromptsGitConfig struct { diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 5ce8c3be..ebb74dd0 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -440,14 +440,20 @@ func (s *AgentServiceImpl) Run(ctx context.Context, req *domain.AgentRequest) (* content, reasoningContent, toolCalls := extractFirstChoice(response) - return &domain.ChatSyncResponse{ + syncResponse := &domain.ChatSyncResponse{ RequestID: req.RequestID, Content: content, ReasoningContent: reasoningContent, ToolCalls: toolCalls, Usage: response.Usage, Duration: duration, - }, nil + } + + if provider, ok := s.client.(domain.ToolCallResultProvider); ok { + syncResponse.ToolResults = provider.TakeToolCallResults() + } + + return syncResponse, nil } // extractFirstChoice pulls content, reasoning, and tool calls from the first diff --git a/internal/agent/agent_test.go b/internal/agent/agent_test.go index 4e9c592a..e9c46048 100644 --- a/internal/agent/agent_test.go +++ b/internal/agent/agent_test.go @@ -1282,6 +1282,33 @@ func TestAgentServiceImpl_AddSystemPrompt(t *testing.T) { assert.Equal(t, sdk.User, result[1].Role) } +func TestAgentServiceImpl_AddSystemPrompt_ClaudeCodeModePassthrough(t *testing.T) { + cfg := &config.Config{ + ClaudeCode: config.ClaudeCodeConfig{Enabled: true}, + Agent: config.AgentConfig{ + SystemPromptWithDefaults: true, + }, + Prompts: config.PromptsConfig{ + Agent: config.PromptsAgentConfig{ + SystemPrompt: "You are a helpful assistant.", + }, + }, + } + + agentService := &AgentServiceImpl{ + config: cfg, + } + + inputMessages := []sdk.Message{ + {Role: sdk.User, Content: sdk.NewMessageContent("Hello")}, + } + + result := agentService.addSystemPrompt(inputMessages) + + assert.Len(t, result, 1) + assert.Equal(t, sdk.User, result[0].Role) +} + func TestAgentServiceImpl_BuildSystemPrompt(t *testing.T) { cfg := &config.Config{ Agent: config.AgentConfig{ diff --git a/internal/agent/agent_utils.go b/internal/agent/agent_utils.go index 4ee87add..c51b714e 100644 --- a/internal/agent/agent_utils.go +++ b/internal/agent/agent_utils.go @@ -162,13 +162,29 @@ func (s *AgentServiceImpl) buildSystemPromptText(messages []sdk.Message) string // BuildSystemPrompt returns the system prompt a fresh session (turn 0) would // send to the LLM. Exposed for the `infer debug agent system_prompt` command. +// In Claude Code mode no prompt is sent at all - claude uses its own; only the +// optional append (prompts.agent.system_prompt_claude_code) is reported. func (s *AgentServiceImpl) BuildSystemPrompt() string { + if s.config != nil && s.config.IsClaudeCodeMode() { + if appendPrompt := s.config.Prompts.Agent.SystemPromptClaudeCode; appendPrompt != "" { + return fmt.Sprintf("(claude_code mode: pass-through - appended to Claude Code's own system prompt via --append-system-prompt)\n\n%s", appendPrompt) + } + return "(claude_code mode: pass-through - no system prompt is sent; Claude Code uses its own)" + } return s.buildSystemPromptText(nil) } // addSystemPrompt prepends the assembled system prompt (with dynamic sandbox -// info) to messages. +// info) to messages. In Claude Code mode the conversation is passed through +// untouched: claude uses its own system prompt (an optional append lives in +// prompts.agent.system_prompt_claude_code, applied via --append-system-prompt +// by the adapter). BuildSystemPrompt still renders the gateway-mode prompt for +// the debug command. func (s *AgentServiceImpl) addSystemPrompt(messages []sdk.Message) []sdk.Message { + if s.config != nil && s.config.IsClaudeCodeMode() { + return messages + } + prompt := s.buildSystemPromptText(messages) if prompt == "" { return messages @@ -833,6 +849,10 @@ func conversationAwaitsToolResults(conv []sdk.Message) bool { // guards the fired-set because the streaming goroutine (pre_session/pre_stream) // and the event-loop goroutine (the other points) can both reach here. func (s *AgentServiceImpl) injectDueReminders(agentCtx *domain.AgentContext, hook domain.HookPoint) { + if s.config != nil && s.config.IsClaudeCodeMode() { + return + } + provider := s.reminderProvider if provider == nil && s.config != nil { provider = s.config.Reminders diff --git a/internal/container/container.go b/internal/container/container.go index 8600dc13..5f9b7933 100644 --- a/internal/container/container.go +++ b/internal/container/container.go @@ -772,7 +772,7 @@ func (c *ServiceContainer) createAgentSDKClient() domain.SDKClient { if c.config.IsClaudeCodeMode() { logger.Info("using Claude Code CLI mode (subscription-based)") - return adapters.NewClaudeCodeClient(&c.config.ClaudeCode, c.stateManager) + return adapters.NewClaudeCodeClient(&c.config.ClaudeCode, c.stateManager, c.config.Prompts.Agent.SystemPromptClaudeCode) } logger.Debug("using gateway mode (API-based)") diff --git a/internal/domain/interfaces.go b/internal/domain/interfaces.go index daaed8b8..10827737 100644 --- a/internal/domain/interfaces.go +++ b/internal/domain/interfaces.go @@ -266,10 +266,25 @@ type ChatSyncResponse struct { Content string `json:"content"` ReasoningContent string `json:"reasoning_content,omitempty"` ToolCalls []sdk.ChatCompletionMessageToolCall `json:"tool_calls,omitempty"` + ToolResults map[string]ToolCallResult `json:"tool_results,omitempty"` Usage *sdk.CompletionUsage `json:"usage,omitempty"` Duration time.Duration `json:"duration"` } +// ToolCallResult is the outcome of a tool call executed by the backend itself +// (e.g. inside the Claude Code CLI) rather than by the local tool registry. +type ToolCallResult struct { + Content string `json:"content"` + IsError bool `json:"is_error"` +} + +// ToolCallResultProvider is implemented by SDK clients that execute tools +// themselves and can report per-call results after a GenerateContent call. +// Take semantics: the returned map is drained from the client. +type ToolCallResultProvider interface { + TakeToolCallResults() map[string]ToolCallResult +} + // ChatService handles chat completion operations type ChatService interface { CancelRequest(requestID string) error diff --git a/internal/infra/adapters/claude_code_client.go b/internal/infra/adapters/claude_code_client.go index 99087479..e4380e75 100644 --- a/internal/infra/adapters/claude_code_client.go +++ b/internal/infra/adapters/claude_code_client.go @@ -7,6 +7,7 @@ import ( "encoding/json" "fmt" "io" + "maps" "os" "os/exec" "strings" @@ -23,25 +24,39 @@ import ( // ClaudeCodeClient is a wrapper around the official Claude Code CLI // It implements the SDKClient interface by spawning the claude process type ClaudeCodeClient struct { - config *config.ClaudeCodeConfig - stateManager domain.StateManager - tools *[]sdk.ChatCompletionTool - options *sdk.CreateChatCompletionRequest - middlewareOpts *sdk.MiddlewareOptions - wg *sync.WaitGroup - taskCreateIDs map[string]string + config *config.ClaudeCodeConfig + stateManager domain.StateManager + tools *[]sdk.ChatCompletionTool + options *sdk.CreateChatCompletionRequest + middlewareOpts *sdk.MiddlewareOptions + wg *sync.WaitGroup + appendSystemPrompt string + toolResults map[string]domain.ToolCallResult } // NewClaudeCodeClient creates a new Claude Code CLI client -func NewClaudeCodeClient(cfg *config.ClaudeCodeConfig, stateManager domain.StateManager) domain.SDKClient { +func NewClaudeCodeClient(cfg *config.ClaudeCodeConfig, stateManager domain.StateManager, appendSystemPrompt string) domain.SDKClient { return &ClaudeCodeClient{ - config: cfg, - stateManager: stateManager, - wg: &sync.WaitGroup{}, - taskCreateIDs: make(map[string]string), + config: cfg, + stateManager: stateManager, + wg: &sync.WaitGroup{}, + appendSystemPrompt: appendSystemPrompt, + toolResults: make(map[string]domain.ToolCallResult), } } +// TakeToolCallResults returns and clears the tool results claude reported +// during the last GenerateContent call. Implements domain.ToolCallResultProvider. +func (c *ClaudeCodeClient) TakeToolCallResults() map[string]domain.ToolCallResult { + if len(c.toolResults) == 0 { + return nil + } + out := make(map[string]domain.ToolCallResult, len(c.toolResults)) + maps.Copy(out, c.toolResults) + clear(c.toolResults) + return out +} + // WithOptions sets the chat completion request options func (c *ClaudeCodeClient) WithOptions(opts *sdk.CreateChatCompletionRequest) domain.SDKClient { clone := *c @@ -70,6 +85,8 @@ func (c *ClaudeCodeClient) GenerateContent( model string, messages []sdk.Message, ) (*sdk.CreateChatCompletionResponse, error) { + clear(c.toolResults) + eventChan, err := c.GenerateContentStream(ctx, provider, model, messages) if err != nil { return nil, err @@ -94,6 +111,12 @@ func (c *ClaudeCodeClient) GenerateContentStream( ) (<-chan sdk.SSEvent, error) { args := c.buildArgs(model) + logger.Debug("executing claude code cli", + "path", c.config.CLIPath, + "args", fmt.Sprintf("%q", args), + "messages", len(messages), + ) + cmd := exec.CommandContext(ctx, c.config.CLIPath, args...) cmd.Env = c.buildEnv() @@ -122,6 +145,8 @@ func (c *ClaudeCodeClient) GenerateContentStream( return nil, fmt.Errorf("failed to marshal messages: %w", err) } + logger.Debug("writing conversation to claude code stdin", "bytes", len(messagesJSON)) + if _, err := stdin.Write(messagesJSON); err != nil { return nil, fmt.Errorf("failed to write to stdin: %w", err) } @@ -152,13 +177,16 @@ func (c *ClaudeCodeClient) buildArgs(model string) []string { "--include-hook-events", "--model", model, "--permission-mode", permissionMode, - "-p", } - if c.tools != nil && len(*c.tools) > 0 { - args = append(args, "--disallowedTools", "all") + if c.appendSystemPrompt != "" { + args = append(args, "--append-system-prompt", c.appendSystemPrompt) } + args = append(args, c.config.ExtraArgs...) + + args = append(args, "-p") + return args } @@ -386,7 +414,7 @@ func (c *ClaudeCodeClient) transformAssistantMessage(msg ClaudeCodeMessage, mode }, }, model)) case "tool_use": - name, args := c.maybeMapTaskCreateToTodoWrite(block) + name, args := block.Name, string(block.Input) events = append(events, c.createDeltaEvent(map[string]any{ "choices": []map[string]any{ { @@ -415,7 +443,7 @@ func (c *ClaudeCodeClient) transformAssistantMessage(msg ClaudeCodeMessage, mode // transformUserMessage converts tool_result blocks into tool-call delta chunks, // plus a typed tool_failure event when the result carries is_error=true. -// TaskCreate results are mapped to TodoWrite results. +// Results are forwarded verbatim - claude executed the tool itself. func (c *ClaudeCodeClient) transformUserMessage(msg ClaudeCodeMessage, model string) []sdk.SSEvent { var events []sdk.SSEvent @@ -430,7 +458,7 @@ func (c *ClaudeCodeClient) transformUserMessage(msg ClaudeCodeMessage, model str continue } - result, isError := c.maybeMapTaskCreateResult(content.ToolUseID, content.Content, content.IsError) + result, isError := string(content.Content), content.IsError events = append(events, c.createDeltaEvent(map[string]any{ "choices": []map[string]any{ @@ -549,85 +577,6 @@ func (c *ClaudeCodeClient) createToolFailureEvent(toolUseID, errorMsg string) sd } } -// maybeMapTaskCreateToTodoWrite checks if a tool_use block is a TaskCreate -// call and if so, maps it to a TodoWrite call. Returns the tool name and -// serialized arguments to use. For non-TaskCreate tools, returns the original -// values unchanged. -func (c *ClaudeCodeClient) maybeMapTaskCreateToTodoWrite(block ContentBlock) (string, string) { - if block.Name != "TaskCreate" { - return block.Name, string(block.Input) - } - - var taskInput struct { - Subject string `json:"subject"` - Description string `json:"description,omitempty"` - } - if err := json.Unmarshal(block.Input, &taskInput); err != nil { - logger.Error(fmt.Sprintf("Failed to parse TaskCreate input: %v", err)) - return block.Name, string(block.Input) - } - - c.taskCreateIDs[block.ID] = taskInput.Subject - - todoInput := map[string]any{ - "todos": []map[string]any{ - { - "content": taskInput.Subject, - "status": "in_progress", - }, - }, - } - todoInputBytes, err := json.Marshal(todoInput) - if err != nil { - logger.Error(fmt.Sprintf("Failed to marshal TodoWrite input: %v", err)) - return block.Name, string(block.Input) - } - - return "TodoWrite", string(todoInputBytes) -} - -// maybeMapTaskCreateResult checks if a tool result corresponds to a previously -// tracked TaskCreate call and if so, maps the result to a TodoWrite result. -// Returns the result content and is_error flag to use. For non-TaskCreate -// results, returns the original values unchanged. -func (c *ClaudeCodeClient) maybeMapTaskCreateResult(toolUseID, result string, isError bool) (string, bool) { - subject, ok := c.taskCreateIDs[toolUseID] - if !ok { - return result, isError - } - - delete(c.taskCreateIDs, toolUseID) - - status := "completed" - if isError { - status = "pending" - } - - todoResult := map[string]any{ - "todos": []map[string]any{ - { - "content": subject, - "status": status, - }, - }, - "total_tasks": 1, - "completed_tasks": 0, - "in_progress_task": "", - "validation_ok": true, - } - if !isError { - todoResult["completed_tasks"] = 1 - } - - resultBytes, err := json.Marshal(todoResult) - if err != nil { - logger.Error(fmt.Sprintf("Failed to marshal TodoWrite result: %v", err)) - return result, isError - } - - return string(resultBytes), isError -} - type AssistantMessage struct { Content []ContentBlock `json:"content"` Role string `json:"role"` @@ -639,10 +588,42 @@ type ToolResultMessage struct { } type ToolResultContent struct { - Type string `json:"type"` - ToolUseID string `json:"tool_use_id"` - IsError bool `json:"is_error"` - Content string `json:"content"` + Type string `json:"type"` + ToolUseID string `json:"tool_use_id"` + IsError bool `json:"is_error"` + Content toolResultPayload `json:"content"` +} + +// toolResultPayload accepts the two shapes claude uses for tool_result +// content on the wire: a plain string, or an array of content blocks +// ([{"type":"text","text":"..."}]). Block arrays are flattened to their +// concatenated text; non-text blocks are skipped. +type toolResultPayload string + +func (p *toolResultPayload) UnmarshalJSON(data []byte) error { + var s string + if err := json.Unmarshal(data, &s); err == nil { + *p = toolResultPayload(s) + return nil + } + + var blocks []struct { + Type string `json:"type"` + Text string `json:"text"` + } + if err := json.Unmarshal(data, &blocks); err != nil { + *p = toolResultPayload(data) + return nil + } + + var sb strings.Builder + for _, b := range blocks { + if b.Type == "text" { + sb.WriteString(b.Text) + } + } + *p = toolResultPayload(sb.String()) + return nil } // ContentBlock represents a content block in the assistant message @@ -858,6 +839,11 @@ func (c *ClaudeCodeClient) processToolCalls(toolCallsRaw []any, toolCallsMap map } c.processToolCallFunction(tc, toolCall) + + if result, ok := tc["result"].(string); ok { + isError, _ := tc["is_error"].(bool) + c.toolResults[id] = domain.ToolCallResult{Content: result, IsError: isError} + } } } diff --git a/internal/infra/adapters/claude_code_client_test.go b/internal/infra/adapters/claude_code_client_test.go index e9ccf214..0bd96103 100644 --- a/internal/infra/adapters/claude_code_client_test.go +++ b/internal/infra/adapters/claude_code_client_test.go @@ -5,11 +5,13 @@ import ( "encoding/json" "os" "path/filepath" + "strings" "testing" sdk "github.com/inference-gateway/sdk" config "github.com/inference-gateway/cli/config" + domain "github.com/inference-gateway/cli/internal/domain" ) func transform(t *testing.T, c *ClaudeCodeClient, msg ClaudeCodeMessage) []sdk.SSEvent { @@ -705,12 +707,6 @@ func TestToolResultContentWithIsError(t *testing.T) { } } -// todoWriteCounts tracks the counts collected during processing. -type todoWriteCounts struct { - ToolUse int - Result int -} - // extractToolCallsFromEvent extracts tool call maps from an SSE event's delta. // Returns nil if the event has no tool calls. func extractToolCallsFromEvent(ev sdk.SSEvent) []map[string]any { @@ -746,104 +742,20 @@ func extractToolCallsFromEvent(ev sdk.SSEvent) []map[string]any { return toolCalls } -// todoWriteArgs holds the expected shape of TodoWrite arguments. -type todoWriteArgs struct { - Todos []struct { - Content string `json:"content"` - Status string `json:"status"` - } `json:"todos"` -} - -// todoWriteResult holds the expected shape of a TodoWrite result. -type todoWriteResult struct { - Todos []any `json:"todos"` - TotalTasks int `json:"total_tasks"` - CompletedTasks int `json:"completed_tasks"` - ValidationOK bool `json:"validation_ok"` -} - -// validateTodoWriteArgs checks the shape of a TodoWrite arguments JSON string. -func validateTodoWriteArgs(t *testing.T, args string) { - t.Helper() - - var ta todoWriteArgs - if err := json.Unmarshal([]byte(args), &ta); err != nil { - t.Errorf("TodoWrite arguments not valid JSON: %v", err) - return - } - if len(ta.Todos) != 1 { - t.Errorf("TodoWrite arguments: got %d todos, want 1", len(ta.Todos)) - return - } - if ta.Todos[0].Status != "in_progress" { - t.Errorf("TodoWrite todo status = %q, want in_progress", ta.Todos[0].Status) - return - } - if ta.Todos[0].Content == "" { - t.Error("TodoWrite todo content is empty") - } -} - -// processToolCall inspects a single tool call map and updates counts. -// It returns true if the tool call was a TodoWrite (for result matching). -func processToolCall(t *testing.T, tc map[string]any, c *ClaudeCodeClient, counts *todoWriteCounts) bool { - t.Helper() - - funcRaw, ok := tc["function"].(map[string]any) - if !ok { - return false - } - name, _ := funcRaw["name"].(string) - if name != "TodoWrite" { - return false - } - counts.ToolUse++ - args, _ := funcRaw["arguments"].(string) - validateTodoWriteArgs(t, args) - return true -} - -// processToolResult inspects a tool result and updates counts. -func processToolResult(t *testing.T, tc map[string]any, c *ClaudeCodeClient, counts *todoWriteCounts) { - t.Helper() - - resultStr, ok := tc["result"].(string) - if !ok { - return - } - if _, isTaskCreate := c.taskCreateIDs[tc["id"].(string)]; isTaskCreate { - // This was a mapped TaskCreate result; it should have been remapped. - // If we see it here, the mapping didn't work. - return - } - var tr todoWriteResult - if err := json.Unmarshal([]byte(resultStr), &tr); err != nil || !tr.ValidationOK { - return - } - counts.Result++ - if tr.TotalTasks != 1 { - t.Errorf("TodoWrite result: total_tasks = %d, want 1", tr.TotalTasks) - } - if len(tr.Todos) != 1 { - t.Errorf("TodoWrite result: got %d todos, want 1", len(tr.Todos)) - } -} - -// TestTaskCreateToTodoWriteMapping uses the real claude-run.jsonl fixture to -// verify that TaskCreate tool_use and tool_result events are mapped to TodoWrite -// equivalents through the full transform pipeline. -func TestTaskCreateToTodoWriteMapping(t *testing.T) { +// TestTaskCreatePassthrough uses the real claude-run fixture to verify that +// TaskCreate tool_use blocks and their results flow through the transform +// pipeline verbatim - the stream is no longer rewritten to TodoWrite (the +// rename now happens at the headless output layer in cmd/agent.go). +func TestTaskCreatePassthrough(t *testing.T) { f, err := os.Open(filepath.Join("testdata", "todos_write.jsonl")) if err != nil { t.Fatalf("open fixture: %v", err) } defer func() { _ = f.Close() }() - c := &ClaudeCodeClient{ - taskCreateIDs: make(map[string]string), - } + c := &ClaudeCodeClient{} var model string - var counts todoWriteCounts + var taskCreateCalls, taskCreateResults, todoWriteCalls int scanner := bufio.NewScanner(f) scanner.Buffer(make([]byte, 0, 256*1024), 10*1024*1024) @@ -858,8 +770,25 @@ func TestTaskCreateToTodoWriteMapping(t *testing.T) { } for _, ev := range c.transformMessage(msg, line, model) { for _, tc := range extractToolCallsFromEvent(ev) { - processToolCall(t, tc, c, &counts) - processToolResult(t, tc, c, &counts) + if funcRaw, ok := tc["function"].(map[string]any); ok { + name, _ := funcRaw["name"].(string) + switch name { + case "TaskCreate": + taskCreateCalls++ + args, _ := funcRaw["arguments"].(string) + var input struct { + Subject string `json:"subject"` + } + if err := json.Unmarshal([]byte(args), &input); err != nil || input.Subject == "" { + t.Errorf("TaskCreate arguments not passed through verbatim: %q", args) + } + case "TodoWrite": + todoWriteCalls++ + } + } + if result, ok := tc["result"].(string); ok && strings.Contains(result, "created successfully") { + taskCreateResults++ + } } } } @@ -867,13 +796,134 @@ func TestTaskCreateToTodoWriteMapping(t *testing.T) { t.Fatalf("scanner: %v", err) } - if counts.ToolUse != 3 { - t.Errorf("got %d TodoWrite tool_use events, want 3", counts.ToolUse) + if taskCreateCalls != 3 { + t.Errorf("got %d TaskCreate tool_use events, want 3", taskCreateCalls) + } + if taskCreateResults != 3 { + t.Errorf("got %d verbatim TaskCreate results, want 3", taskCreateResults) + } + if todoWriteCalls != 0 { + t.Errorf("got %d TodoWrite tool_use events, want 0 (stream must not be rewritten)", todoWriteCalls) + } +} + +func TestBuildArgs_AppendSystemPrompt(t *testing.T) { + base := &ClaudeCodeClient{config: &config.ClaudeCodeConfig{}} + args := base.buildArgs("anthropic/claude-sonnet-4-6") + for _, a := range args { + if a == "--append-system-prompt" { + t.Fatal("--append-system-prompt must be omitted when no prompt is configured") + } + } + + withPrompt := &ClaudeCodeClient{config: &config.ClaudeCodeConfig{}, appendSystemPrompt: "extra context"} + args = withPrompt.buildArgs("claude-sonnet-4-6") + found := false + for i, a := range args { + if a == "--append-system-prompt" { + found = true + if i+1 >= len(args) || args[i+1] != "extra context" { + t.Fatalf("--append-system-prompt value missing, args: %v", args) + } + } + } + if !found { + t.Fatalf("--append-system-prompt not present, args: %v", args) + } +} + +func TestBuildArgs_ExtraArgsAndTrailingP(t *testing.T) { + c := &ClaudeCodeClient{config: &config.ClaudeCodeConfig{ + ExtraArgs: []string{"--max-turns", "5"}, + }, appendSystemPrompt: "extra context"} + args := c.buildArgs("claude-sonnet-4-6") + + if args[len(args)-1] != "-p" { + t.Fatalf("-p must be the last argument, args: %v", args) + } + joined := strings.Join(args, "\x00") + if !strings.Contains(joined, "--max-turns\x005") { + t.Fatalf("extra args not appended in order, args: %v", args) + } + + noExtra := &ClaudeCodeClient{config: &config.ClaudeCodeConfig{}} + args = noExtra.buildArgs("claude-sonnet-4-6") + if args[len(args)-1] != "-p" { + t.Fatalf("-p must be the last argument without extra args, args: %v", args) + } +} + +func TestProcessToolCalls_CapturesClaudeResults(t *testing.T) { + c := &ClaudeCodeClient{toolResults: map[string]domain.ToolCallResult{}} + toolCallsMap := map[string]*sdk.ChatCompletionMessageToolCall{} + + c.processToolCalls([]any{ + map[string]any{ + "id": "call_1", + "function": map[string]any{ + "name": "Bash", + "arguments": `{"command":"ls"}`, + }, + }, + }, toolCallsMap) + c.processToolCalls([]any{ + map[string]any{"id": "call_1", "result": "file.txt", "is_error": false}, + map[string]any{"id": "call_2", "result": "boom", "is_error": true}, + }, toolCallsMap) + + results := c.TakeToolCallResults() + if len(results) != 2 { + t.Fatalf("expected 2 captured results, got %d", len(results)) + } + if r := results["call_1"]; r.Content != "file.txt" || r.IsError { + t.Errorf("unexpected call_1 result: %+v", r) + } + if r := results["call_2"]; r.Content != "boom" || !r.IsError { + t.Errorf("unexpected call_2 result: %+v", r) + } + if again := c.TakeToolCallResults(); again != nil { + t.Errorf("TakeToolCallResults must drain, got %v", again) + } +} + +// tool_result content arrives either as a plain string or as an array of +// content blocks; the previous string typing made the whole user message fail +// to unmarshal, silently dropping the tool results. +func TestTransformUserMessage_BlockArrayContent(t *testing.T) { + rawJSON := `{ + "type": "user", + "message": { + "role": "user", + "content": [ + {"type":"tool_result","tool_use_id":"toolu_arr","is_error":false,"content":[{"type":"text","text":"line one"},{"type":"text","text":" line two"}]}, + {"type":"tool_result","tool_use_id":"toolu_str","is_error":false,"content":"plain"} + ] + } + }` + + var msg ClaudeCodeMessage + if err := json.Unmarshal([]byte(rawJSON), &msg); err != nil { + t.Fatalf("unmarshal message: %v", err) + } + + c := &ClaudeCodeClient{} + events := c.transformUserMessage(msg, "claude-haiku-4-5") + if len(events) != 2 { + t.Fatalf("got %d events, want 2", len(events)) + } + + results := map[string]string{} + for _, ev := range events { + for _, tc := range extractToolCallsFromEvent(ev) { + id, _ := tc["id"].(string) + result, _ := tc["result"].(string) + results[id] = result + } } - if counts.Result != 3 { - t.Errorf("got %d TodoWrite result events, want 3", counts.Result) + if results["toolu_arr"] != "line one line two" { + t.Errorf("block-array content = %q, want flattened text", results["toolu_arr"]) } - if len(c.taskCreateIDs) != 0 { - t.Errorf("taskCreateIDs map not empty after processing, got %d entries", len(c.taskCreateIDs)) + if results["toolu_str"] != "plain" { + t.Errorf("string content = %q, want plain", results["toolu_str"]) } } From 7d6bc5ac8538455f14c8aacaf01715ac8b3952a9 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Sun, 5 Jul 2026 00:54:43 +0200 Subject: [PATCH 2/3] docs(readme): document Claude Code pass-through behavior, system_prompt_claude_code and extra_args --- README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/README.md b/README.md index 8a318557..88e6e7e5 100644 --- a/README.md +++ b/README.md @@ -328,6 +328,9 @@ claude_code: timeout: 600 # Command timeout in seconds max_output_tokens: 32000 # Maximum output tokens per request thinking_budget: 10000 # Token budget for extended thinking + extra_args: # Extra arguments appended verbatim to the claude CLI invocation + - --max-turns + - "5" ``` **Environment Variables:** @@ -336,6 +339,27 @@ claude_code: export INFER_CLAUDE_CODE_ENABLED=true export INFER_CLAUDE_CODE_CLI_PATH=/usr/local/bin/claude export INFER_CLAUDE_CODE_TIMEOUT=600 +export INFER_CLAUDE_CODE_EXTRA_ARGS="--max-turns,5" # comma/newline-separated; wins over --claude-code-extra-args +``` + +**Pass-through behavior:** + +Claude Code mode is a pure pass-through: infer does not inject its system prompt, context blocks, or +system reminders, and does not re-execute claude's tool calls locally — claude runs with its own +defaults and native tools. Infer's `prompts.yaml` and `reminders.yaml` do not apply in this mode. + +To add instructions on top of claude's built-in system prompt (passed via `--append-system-prompt`), +set the dedicated prompt in `.infer/prompts.yaml` (empty by default): + +```yaml +agent: + system_prompt_claude_code: "Always answer in English." +``` + +Or via environment variable: + +```bash +export INFER_PROMPTS_AGENT_SYSTEM_PROMPT_CLAUDE_CODE="Always answer in English." ``` ### Features and Limitations From 094acba2d4a54841ed786b1547d4d64620be9bb4 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Sun, 5 Jul 2026 01:01:30 +0200 Subject: [PATCH 3/3] refactor: replace em dahses with regular dashes --- README.md | 2 +- internal/infra/adapters/testdata/todos_write.jsonl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 88e6e7e5..64d88869 100644 --- a/README.md +++ b/README.md @@ -345,7 +345,7 @@ export INFER_CLAUDE_CODE_EXTRA_ARGS="--max-turns,5" # comma/newline-separated; **Pass-through behavior:** Claude Code mode is a pure pass-through: infer does not inject its system prompt, context blocks, or -system reminders, and does not re-execute claude's tool calls locally — claude runs with its own +system reminders, and does not re-execute claude's tool calls locally - claude runs with its own defaults and native tools. Infer's `prompts.yaml` and `reminders.yaml` do not apply in this mode. To add instructions on top of claude's built-in system prompt (passed via `--append-system-prompt`), diff --git a/internal/infra/adapters/testdata/todos_write.jsonl b/internal/infra/adapters/testdata/todos_write.jsonl index c2b9818d..2bf3b58a 100644 --- a/internal/infra/adapters/testdata/todos_write.jsonl +++ b/internal/infra/adapters/testdata/todos_write.jsonl @@ -10,5 +10,5 @@ {"type":"user","message":{"role":"user","content":[{"tool_use_id":"xxx","type":"tool_result","content":"Task #2 created successfully: Review the release notes draft"}]},"parent_tool_use_id":null,"session_id":"xxx","uuid":"xxx","timestamp":"xxx","tool_use_result":{"task":{"id":"2","subject":"Review the release notes draft"}}} {"type":"assistant","message":{"model":"xxx","id":"xxx","type":"message","role":"assistant","content":[{"type":"tool_use","id":"xxx","name":"TaskCreate","input":{"subject":"Organize the downloads folder","description":"Sort and clean up files in the downloads folder."},"caller":{"type":"direct"}}],"stop_reason":null,"stop_sequence":null,"stop_details":null,"usage":{"input_tokens":2,"cache_creation_input_tokens":18150,"cache_read_input_tokens":14930,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":18150},"output_tokens":60,"service_tier":"standard","inference_geo":"not_available"},"diagnostics":{"cache_miss_reason":{"type":"tools_changed","cache_missed_input_tokens":23524}},"context_management":null},"parent_tool_use_id":null,"session_id":"xxx","uuid":"xxx","request_id":"xxx"} {"type":"user","message":{"role":"user","content":[{"tool_use_id":"xxx","type":"tool_result","content":"Task #3 created successfully: Organize the downloads folder"}]},"parent_tool_use_id":null,"session_id":"xxx","uuid":"xxx","timestamp":"xxx","tool_use_result":{"task":{"id":"3","subject":"Organize the downloads folder"}}} -{"type":"assistant","message":{"model":"xxx","id":"xxx","type":"message","role":"assistant","content":[{"type":"text","text":"Done — created 3 random todos: \"Water the office plants\", \"Review the release notes draft\", and \"Organize the downloads folder\"."}],"stop_reason":null,"stop_sequence":null,"stop_details":null,"usage":{"input_tokens":2,"cache_creation_input_tokens":413,"cache_read_input_tokens":33080,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":413},"output_tokens":2,"service_tier":"standard","inference_geo":"not_available"},"diagnostics":null,"context_management":null},"parent_tool_use_id":null,"session_id":"xxx","uuid":"xxx","request_id":"xxx"} -{"type":"result","subtype":"success","is_error":false,"api_error_status":null,"duration_ms":11504,"duration_api_ms":12519,"ttft_ms":4359,"ttft_stream_ms":3880,"time_to_request_ms":40,"num_turns":5,"result":"Done — created 3 random todos: \"Water the office plants\", \"Review the release notes draft\", and \"Organize the downloads folder\".","stop_reason":"end_turn","session_id":"xxx","total_cost_usd":0.7333969999999999,"usage":{"input_tokens":6282,"cache_creation_input_tokens":29307,"cache_read_input_tokens":62847,"output_tokens":420,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":29307,"ephemeral_5m_input_tokens":0},"inference_geo":"not_available","iterations":[{"input_tokens":2,"output_tokens":43,"cache_read_input_tokens":33080,"cache_creation_input_tokens":413,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":413},"type":"message"}],"speed":"standard"},"modelUsage":{"claude-haiku-4-5-20251001":{"inputTokens":510,"outputTokens":16,"cacheReadInputTokens":0,"cacheCreationInputTokens":0,"webSearchRequests":0,"costUSD":0.00059,"contextWindow":200000,"maxOutputTokens":32000},"xxx":{"inputTokens":6282,"outputTokens":420,"cacheReadInputTokens":62847,"cacheCreationInputTokens":29307,"webSearchRequests":0,"costUSD":0.732807,"contextWindow":1000000,"maxOutputTokens":64000}},"permission_denials":[],"terminal_reason":"completed","fast_mode_state":"off","uuid":"xxx"} +{"type":"assistant","message":{"model":"xxx","id":"xxx","type":"message","role":"assistant","content":[{"type":"text","text":"Done - created 3 random todos: \"Water the office plants\", \"Review the release notes draft\", and \"Organize the downloads folder\"."}],"stop_reason":null,"stop_sequence":null,"stop_details":null,"usage":{"input_tokens":2,"cache_creation_input_tokens":413,"cache_read_input_tokens":33080,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":413},"output_tokens":2,"service_tier":"standard","inference_geo":"not_available"},"diagnostics":null,"context_management":null},"parent_tool_use_id":null,"session_id":"xxx","uuid":"xxx","request_id":"xxx"} +{"type":"result","subtype":"success","is_error":false,"api_error_status":null,"duration_ms":11504,"duration_api_ms":12519,"ttft_ms":4359,"ttft_stream_ms":3880,"time_to_request_ms":40,"num_turns":5,"result":"Done - created 3 random todos: \"Water the office plants\", \"Review the release notes draft\", and \"Organize the downloads folder\".","stop_reason":"end_turn","session_id":"xxx","total_cost_usd":0.7333969999999999,"usage":{"input_tokens":6282,"cache_creation_input_tokens":29307,"cache_read_input_tokens":62847,"output_tokens":420,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":29307,"ephemeral_5m_input_tokens":0},"inference_geo":"not_available","iterations":[{"input_tokens":2,"output_tokens":43,"cache_read_input_tokens":33080,"cache_creation_input_tokens":413,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":413},"type":"message"}],"speed":"standard"},"modelUsage":{"claude-haiku-4-5-20251001":{"inputTokens":510,"outputTokens":16,"cacheReadInputTokens":0,"cacheCreationInputTokens":0,"webSearchRequests":0,"costUSD":0.00059,"contextWindow":200000,"maxOutputTokens":32000},"xxx":{"inputTokens":6282,"outputTokens":420,"cacheReadInputTokens":62847,"cacheCreationInputTokens":29307,"webSearchRequests":0,"costUSD":0.732807,"contextWindow":1000000,"maxOutputTokens":64000}},"permission_denials":[],"terminal_reason":"completed","fast_mode_state":"off","uuid":"xxx"}