From ebab2f88e02f828bf20c80af08449df92d212bc0 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Wed, 13 May 2026 16:00:36 +0200 Subject: [PATCH 1/4] feat: add slash commands for agent switching Add support for commands that switch the active agent via a new 'agent:' field in the commands section of agent.yaml. Users can now use /plan to switch to a planner sub-agent, /review to switch to a reviewer, etc. Trailing arguments after the command are forwarded to the target agent as the first prompt. --- agent-schema.json | 6 ++- docs/configuration/agents/index.md | 35 ++++++++++++-- examples/agent_switching_commands.yaml | 63 ++++++++++++++++++++++++++ pkg/app/app.go | 9 ++++ pkg/cli/runner.go | 9 ++++ pkg/config/types/commands.go | 36 ++++++++++++--- pkg/runtime/commands.go | 42 ++++++++++++++--- pkg/runtime/commands_test.go | 61 +++++++++++++++++++++++++ pkg/tui/handlers.go | 25 +++++++++- 9 files changed, 266 insertions(+), 20 deletions(-) create mode 100644 examples/agent_switching_commands.yaml diff --git a/agent-schema.json b/agent-schema.json index 6176efc02..93456736c 100644 --- a/agent-schema.json +++ b/agent-schema.json @@ -595,7 +595,7 @@ }, "CommandConfig": { "type": "object", - "description": "Advanced command configuration with description and instruction", + "description": "Advanced command configuration. Set 'instruction' to send a prompt to the current agent. Set 'agent' to switch the active agent to a sub-agent (e.g. /plan -> the 'planner' sub-agent). Both fields can be combined: the agent is switched first, then the instruction is sent to the new agent.", "properties": { "description": { "type": "string", @@ -604,6 +604,10 @@ "instruction": { "type": "string", "description": "The prompt sent to the agent. Supports bang commands (!`command`) and positional arguments ($1, $2, etc.)" + }, + "agent": { + "type": "string", + "description": "Name of a sub-agent to switch to when this command is invoked. The agent must be reachable from the current agent's sub-agent graph. When set without 'instruction', any text typed after the slash command is forwarded as a prompt to the target agent." } }, "additionalProperties": false diff --git a/docs/configuration/agents/index.md b/docs/configuration/agents/index.md index af62f19fa..eb3b9a2a8 100644 --- a/docs/configuration/agents/index.md +++ b/docs/configuration/agents/index.md @@ -35,7 +35,7 @@ agents: num_history_items: int # Optional: limit conversation history skills: boolean | [list] # Optional: enable skill discovery (true/false or list of names and/or sources) commands: # Optional: named prompts - name: "prompt text" + name: "prompt text" # or {instruction: "prompt", agent: "sub_agent_name"} welcome_message: string # Optional: message shown at session start handoffs: [list] # Optional: agent names this agent can hand off to hooks: # Optional: lifecycle hooks @@ -85,7 +85,7 @@ agents: | `max_old_tool_call_tokens` | int | ✗ | Maximum number of tokens to keep from old tool call arguments and results. Older tool calls beyond this budget have their content replaced with a placeholder, saving context space. Tokens are approximated as `len/4`. Set to `-1` to disable truncation (unlimited). Default: `40000`. | | `num_history_items` | int | ✗ | Limit the number of conversation history messages sent to the model. Useful for managing context window size with long conversations. Default: unlimited (all messages sent). | | `skills` | bool/array | ✗ | Enable automatic skill discovery. `true` loads all discovered local skills, `false` disables them. A list can mix skill sources (`local` or `https://…` URLs) and skill names to include — see [Skills]({{ '/features/skills/' | relative_url }}). | -| `commands` | object | ✗ | Named prompts that can be run with `docker agent run config.yaml /command_name`. | +| `commands` | object | ✗ | Named prompts that can be run with `docker agent run config.yaml /command_name`. Can be simple strings or objects with `instruction` and/or `agent` fields for agent switching. See [Named Commands](#named-commands) below. | | `welcome_message` | string | ✗ | Message displayed to the user when a session starts. Useful for providing context or instructions. | | `handoffs` | array | ✗ | List of agent names this agent can hand off the conversation to. Enables the `handoff` tool. See [Handoffs Routing]({{ '/concepts/multi-agent/#handoffs-routing' | relative_url }}). | | `hooks` | object | ✗ | Lifecycle hooks for running commands at various points. See [Hooks]({{ '/configuration/hooks/' | relative_url }}). | @@ -251,7 +251,7 @@ agents: ## Named Commands -Define reusable prompt shortcuts: +Define reusable prompt shortcuts that can send prompts to the current agent or switch to a different sub-agent: ```yaml agents: @@ -263,8 +263,37 @@ agents: logs: "Show me the last 50 lines of system logs" greet: "Say hello to ${env.USER}" deploy: "Deploy ${env.PROJECT_NAME || 'app'} to ${env.ENV || 'staging'}" + + # Advanced format with agent switching + plan: + agent: planner # Switch to the 'planner' sub-agent + instruction: "Create a detailed plan for: $1" # Optional: send this prompt after switching + + # Agent switching without instruction - forwards remaining text as prompt + review: + agent: reviewer # Any text after /review is sent to the reviewer agent ``` + +### Command Formats + +Commands support two formats: + +1. **Simple string format**: The string becomes the instruction sent to the current agent + ```yaml + df: "Check disk space" + ``` + +2. **Advanced object format**: Supports agent switching and optional instructions + ```yaml + plan: + agent: planner # Required: name of sub-agent to switch to + instruction: "Plan: $1" # Optional: prompt to send after switching + description: "Switch to planning mode" # Optional: shown in help text + ``` + +When `agent` is set without `instruction`, any text typed after the slash command (e.g., `/plan build a web app`) is forwarded as a prompt to the target agent. The target agent must be listed in the current agent's `sub_agents` array. + ```bash # Run commands from the CLI $ docker agent run agent.yaml /df diff --git a/examples/agent_switching_commands.yaml b/examples/agent_switching_commands.yaml new file mode 100644 index 000000000..f9b0b023a --- /dev/null +++ b/examples/agent_switching_commands.yaml @@ -0,0 +1,63 @@ +#!/usr/bin/env docker agent run + +# This example demonstrates slash commands that switch the active agent. +# Type "/plan" in the TUI to hand the conversation off to the planner sub-agent, +# "/review" to switch to the reviewer, and "/back" to return to the root agent. +# +# Anything typed after the slash command (e.g. "/plan add a logout button") is +# forwarded as the first user prompt to the target agent. + +models: + claude: + provider: anthropic + model: claude-sonnet-4-5 + +agents: + root: + model: claude + description: The default agent. Implements the work once a plan exists. + instruction: | + You are the implementation agent. You write and edit code. + If the user asks for a plan or design discussion, use the /plan command + to switch to the planner sub-agent. + sub_agents: + - planner + - reviewer + toolsets: + - type: filesystem + - type: shell + commands: + plan: + description: "Hand off to the planner sub-agent" + agent: planner + review: + description: "Hand off to the reviewer sub-agent" + agent: reviewer + + planner: + model: claude + description: Plans the work before implementation. + instruction: | + You are the planning agent. Ask clarifying questions, then produce a + step-by-step plan in markdown. Do not write code. + sub_agents: + - root + commands: + back: + description: "Return to the root agent" + agent: root + + reviewer: + model: claude + description: Reviews local changes for quality and security. + instruction: | + You review the local Git changes and provide concise, actionable feedback + on code quality, security, and maintainability. + sub_agents: + - root + toolsets: + - type: shell + commands: + back: + description: "Return to the root agent" + agent: root diff --git a/pkg/app/app.go b/pkg/app/app.go index ec8aba3ae..7328ebf24 100644 --- a/pkg/app/app.go +++ b/pkg/app/app.go @@ -301,6 +301,15 @@ func (a *App) ResolveCommand(ctx context.Context, userInput string) string { return runtime.ResolveCommand(ctx, a.runtime, userInput) } +// LookupCommand parses userInput as a /command invocation and returns the +// matching command, the trailing arguments, and whether a match was found. +// Callers that want to act on command metadata (for example switching to a +// sub-agent declared via the `agent:` field) should call this before +// ResolveCommand to inspect the raw command. +func (a *App) LookupCommand(ctx context.Context, userInput string) (types.Command, string, bool) { + return runtime.LookupCommand(ctx, a.runtime, userInput) +} + // EmitStartupInfo emits initial agent, team, and toolset information to the provided channel func (a *App) EmitStartupInfo(ctx context.Context, events chan runtime.Event) { a.runtime.EmitStartupInfo(ctx, a.session, runtime.NewChannelSink(events)) diff --git a/pkg/cli/runner.go b/pkg/cli/runner.go index cf298f8c0..f4d41bd7a 100644 --- a/pkg/cli/runner.go +++ b/pkg/cli/runner.go @@ -326,6 +326,15 @@ func Run(ctx context.Context, out *Printer, cfg Config, rt runtime.Runtime, sess // attachment was used). Callers should pass that path to // session.Session.AddAttachedFile so sub-agents inherit the file context. func PrepareUserMessage(ctx context.Context, rt runtime.Runtime, userInput, globalAttachPath string) (*session.Message, string) { + // Switch the active agent first if the /command targets a sub-agent. + // This must happen before the message is added to the session so the + // next runtime turn runs on the right agent. + if cmd, _, ok := runtime.LookupCommand(ctx, rt, userInput); ok && cmd.Agent != "" { + if err := rt.SetCurrentAgent(cmd.Agent); err != nil { + slog.WarnContext(ctx, "Failed to switch agent for /command", "agent", cmd.Agent, "error", err) + } + } + // Resolve any /command to its prompt text resolvedContent := runtime.ResolveCommand(ctx, rt, userInput) diff --git a/pkg/config/types/commands.go b/pkg/config/types/commands.go index e15d185d9..7c0dbcb62 100644 --- a/pkg/config/types/commands.go +++ b/pkg/config/types/commands.go @@ -19,6 +19,12 @@ import ( // instruction: | // Fix the lint issues reported by: !`golangci-lint run` // Focus on files: $1 +// +// Agent-switching format (object value): +// +// plan: +// description: "Hand off to the planner sub-agent" +// agent: planner type Command struct { // Description is shown in completion dialogs and help text. // For simple format commands, this is empty and the instruction is used for display. @@ -29,15 +35,30 @@ type Command struct { // - Bang commands: !`command` (executed and output inserted) // - Positional arguments: $1, $2, etc. Instruction string `json:"instruction,omitempty"` + + // Agent, when set, makes the command switch the active agent to the named + // sub-agent before any (optional) instruction is sent. This lets users + // hand off to a different agent with a single slash command, e.g. /plan + // to switch to the "planner" sub-agent. + Agent string `json:"agent,omitempty"` } // DisplayText returns the text to show in completion dialogs. -// Returns Description if available, otherwise truncates the Instruction. +// It returns Description when available, otherwise the Instruction. For +// agent-only commands (no instruction or description), it falls back to a +// short "switch to " hint so the completion dialog still has +// something meaningful to show. func (c Command) DisplayText() string { if c.Description != "" { return c.Description } - return c.Instruction + if c.Instruction != "" { + return c.Instruction + } + if c.Agent != "" { + return "Switch to " + c.Agent + } + return "" } // Commands represents a set of named prompts for quick-starting conversations. @@ -113,7 +134,7 @@ func (c *Commands) UnmarshalYAML(unmarshal func(any) error) error { // parseCommandValue parses a command value which can be either: // - a simple string (becomes the instruction) -// - a map with description/instruction fields. +// - a map with description/instruction/agent fields. func parseCommandValue(v any) (Command, error) { switch val := v.(type) { case string: @@ -121,15 +142,16 @@ func parseCommandValue(v any) (Command, error) { case map[string]any: desc, _ := val["description"].(string) inst, _ := val["instruction"].(string) + agent, _ := val["agent"].(string) - if inst == "" && desc == "" { - return Command{}, errors.New("command must have at least 'instruction' or 'description'") + if inst == "" && desc == "" && agent == "" { + return Command{}, errors.New("command must have at least 'instruction', 'description' or 'agent'") } - if inst == "" { + if inst == "" && agent == "" { inst = desc } - return Command{Description: desc, Instruction: inst}, nil + return Command{Description: desc, Instruction: inst, Agent: agent}, nil default: return Command{}, fmt.Errorf("invalid command value type: %T", v) } diff --git a/pkg/runtime/commands.go b/pkg/runtime/commands.go index ca92d678c..4b13f3642 100644 --- a/pkg/runtime/commands.go +++ b/pkg/runtime/commands.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "github.com/docker/docker-agent/pkg/config/types" "github.com/docker/docker-agent/pkg/js" "github.com/docker/docker-agent/pkg/tools" ) @@ -18,6 +19,27 @@ import ( // This includes ${args}, ${args[N]}, ${args.join(...)}, ${args.length}, etc. var argsPlaceholderRegex = regexp.MustCompile(`\$\{args[^}]*\}`) +// LookupCommand parses userInput as a /command invocation and returns the +// matching command along with its trailing arguments. The boolean is false +// when userInput doesn't start with '/' or doesn't match a configured +// command. Callers that need both the resolved instruction and the original +// command metadata (e.g. its target agent) typically call LookupCommand to +// inspect the command before calling ResolveCommand. +func LookupCommand(ctx context.Context, rt Runtime, userInput string) (cmd types.Command, rest string, ok bool) { + if !strings.HasPrefix(userInput, "/") { + return types.Command{}, "", false + } + + head, tail, _ := strings.Cut(userInput, " ") + commandName := head[1:] + + command, found := rt.CurrentAgentInfo(ctx).Commands[commandName] + if !found { + return types.Command{}, "", false + } + return command, tail, true +} + // ResolveCommand transforms a /command into its expanded instruction text. // It processes: // 1. Command lookup from agent commands @@ -26,20 +48,26 @@ var argsPlaceholderRegex = regexp.MustCompile(`\$\{args[^}]*\}`) // - ${args[0]}, ${args[1]}, etc. for positional arguments // - ${args} or ${args.join(" ")} for all arguments // - ${tool({...})} for tool calls +// +// For agent-switching commands (those declaring `agent: ` and no +// instruction), ResolveCommand returns the trailing arguments verbatim so the +// caller can forward them to the target sub-agent after switching. When the +// command has no instruction and no arguments, the result is the empty +// string, signalling "no message to send". func ResolveCommand(ctx context.Context, rt Runtime, userInput string) string { - if !strings.HasPrefix(userInput, "/") { + command, rest, ok := LookupCommand(ctx, rt, userInput) + if !ok { return userInput } - cmd, rest, _ := strings.Cut(userInput, " ") - commandName := cmd[1:] // Remove leading "/" + instruction := command.Instruction - command, found := rt.CurrentAgentInfo(ctx).Commands[commandName] - if !found { - return userInput + // Agent-only commands (no instruction): forward the trailing args verbatim + // so the target sub-agent receives the user's original prompt. + if instruction == "" { + return rest } - instruction := command.Instruction args := tokenize(rest) // Execute JavaScript expressions (${...} syntax) with args array diff --git a/pkg/runtime/commands_test.go b/pkg/runtime/commands_test.go index 2a42409dd..ac8e51629 100644 --- a/pkg/runtime/commands_test.go +++ b/pkg/runtime/commands_test.go @@ -623,3 +623,64 @@ func TestResolveCommand_ArgsSlice(t *testing.T) { result := ResolveCommand(t.Context(), rt, "/test first second third") assert.Equal(t, "Rest: second third", result) } + +func TestLookupCommand_AgentTarget(t *testing.T) { + t.Parallel() + + rt := &mockRuntime{ + commands: types.Commands{ + "plan": types.Command{ + Description: "Hand off to the planner", + Agent: "planner", + }, + }, + } + + cmd, rest, ok := LookupCommand(t.Context(), rt, "/plan add a logout button") + assert.True(t, ok) + assert.Equal(t, "planner", cmd.Agent) + assert.Empty(t, cmd.Instruction) + assert.Equal(t, "add a logout button", rest) +} + +func TestLookupCommand_NotACommand(t *testing.T) { + t.Parallel() + + rt := &mockRuntime{commands: types.Commands{}} + + _, _, ok := LookupCommand(t.Context(), rt, "hello there") + assert.False(t, ok) +} + +func TestResolveCommand_AgentOnlyForwardsArgs(t *testing.T) { + t.Parallel() + + rt := &mockRuntime{ + commands: types.Commands{ + "plan": types.Command{Agent: "planner"}, + }, + } + + // With trailing args: forward verbatim to the target agent. + assert.Equal(t, "design a login flow", ResolveCommand(t.Context(), rt, "/plan design a login flow")) + // Without trailing args: empty so no message is sent after the switch. + assert.Empty(t, ResolveCommand(t.Context(), rt, "/plan")) +} + +func TestResolveCommand_AgentWithInstruction(t *testing.T) { + t.Parallel() + + rt := &mockRuntime{ + commands: types.Commands{ + "plan": types.Command{ + Instruction: "Plan the work for: ${args.join(\" \")}", + Agent: "planner", + }, + }, + } + + // When both instruction and agent are set, the instruction wins (the + // caller is responsible for switching the agent before sending it). + result := ResolveCommand(t.Context(), rt, "/plan add login") + assert.Equal(t, "Plan the work for: add login", result) +} diff --git a/pkg/tui/handlers.go b/pkg/tui/handlers.go index 1f5047ddc..9932b3531 100644 --- a/pkg/tui/handlers.go +++ b/pkg/tui/handlers.go @@ -624,8 +624,29 @@ func (m *appModel) handleOpenURL(url string) (tea.Model, tea.Cmd) { } func (m *appModel) handleAgentCommand(command string) (tea.Model, tea.Cmd) { - resolvedCommand := m.application.ResolveCommand(context.Background(), command) - return m, core.CmdHandler(messages.SendMsg{Content: resolvedCommand}) + ctx := context.Background() + + // Inspect the command before resolving so we can detect /commands that + // switch to a sub-agent. For those, we switch first and only then send + // the resolved message — otherwise the message would be processed by + // the previous agent. + cmd, _, ok := m.application.LookupCommand(ctx, command) + resolved := m.application.ResolveCommand(ctx, command) + + var cmds []tea.Cmd + if ok && cmd.Agent != "" && cmd.Agent != m.sessionState.CurrentAgentName() { + switched, switchCmd := m.handleSwitchAgent(cmd.Agent) + m = switched.(*appModel) + if switchCmd != nil { + cmds = append(cmds, switchCmd) + } + } + + if resolved != "" { + cmds = append(cmds, core.CmdHandler(messages.SendMsg{Content: resolved})) + } + + return m, tea.Batch(cmds...) } func (m *appModel) handleAttachFile(filePath string) (tea.Model, tea.Cmd) { From 8e82e94116ba62a71157cb2e747d5a1635ff4f5d Mon Sep 17 00:00:00 2001 From: David Gageot Date: Wed, 13 May 2026 16:10:30 +0200 Subject: [PATCH 2/4] feat(coder): add /plan command for agent switching Wires the new agent-switching slash command feature into the built-in coder agent: - Adds a planner sub-agent with read-only tools (filesystem, fetch, todo, user_prompt) - Adds /plan command on root agent to switch to plan mode - Adds symmetric /back command on planner agent to hand work back - Updates root instruction to mention the /plan command --- pkg/config/builtin-agents/coder.yaml | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/pkg/config/builtin-agents/coder.yaml b/pkg/config/builtin-agents/coder.yaml index 675a29487..d2a09af07 100644 --- a/pkg/config/builtin-agents/coder.yaml +++ b/pkg/config/builtin-agents/coder.yaml @@ -64,6 +64,13 @@ agents: - Information about error messages or issues you can't resolve from context alone Don't delegate for things you already know or can find in the local codebase. + + + For non-trivial tasks where the user wants to design or plan before + implementation, suggest the /plan command. It switches the conversation + to the planner sub-agent, which gathers requirements and produces a + step-by-step plan without writing code. + skills: true add_date: true add_environment_info: true @@ -71,12 +78,16 @@ agents: - AGENTS.md sub_agents: - librarian + - planner toolsets: - type: filesystem - type: shell - type: todo - type: fetch commands: + plan: + description: "Switch to plan mode (planner sub-agent)" + agent: planner commit: description: "Commit local changes with an appropriate message" instruction: | @@ -127,6 +138,52 @@ agents: 3. If any fail, analyze the failures, fix the code, and re-run 4. Continue until all tests pass + planner: + model: default + description: Planning Agent + instruction: | + You are a planning agent. You gather requirements and produce a + step-by-step plan before any code is written. + + + 1. **Clarify**: Ask the user focused questions to remove ambiguity. + Prefer multiple-choice questions when possible. Don't guess. + 2. **Investigate**: Explore the codebase with read-only tools to + understand the relevant code, conventions, and constraints. + 3. **Plan**: Produce a numbered, step-by-step plan in markdown. + Identify the files to touch, the dependencies between steps, and + any risks. Save the plan to a markdown file in the working + directory when the user agrees with it. + 4. **Hand off**: Once the plan is approved, use /back to return to + the root agent for execution. Mention the plan filename so the + root agent can pick it up. + + + + - Do not write or edit code. You plan, the root agent executes. + - Read the existing code before proposing changes — don't invent file + paths or APIs. + - Keep the plan concrete and actionable: each step should be small + enough to verify on its own. + - When you don't know something, delegate to the librarian sub-agent. + + add_date: true + add_environment_info: true + add_prompt_files: + - AGENTS.md + sub_agents: + - root + - librarian + toolsets: + - type: filesystem + - type: fetch + - type: todo + - type: user_prompt + commands: + back: + description: "Hand off back to the root coding agent" + agent: root + librarian: model: haiku description: Web Researcher From 7e3d57d14eca48a5c1d84043d542b8fa4a33f248 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Wed, 13 May 2026 18:22:47 +0200 Subject: [PATCH 3/4] fix: resolve command before switching agent to prevent lookup failures Addresses review feedback from docker-agent bot: 1. In pkg/cli/runner.go (PrepareUserMessage): - Moved ResolveCommand call BEFORE SetCurrentAgent - This ensures the command is looked up in the original agent's command table - Prevents raw slash-command strings from being sent to target agents when the target agent doesn't have the command defined 2. In pkg/tui/handlers.go (handleAgentCommand): - Added switchSucceeded flag to track whether agent switch was successful - Only send resolved message if the agent switch succeeded - Prevents messages from being sent to the wrong agent when switching fails --- pkg/cli/runner.go | 10 +++++----- pkg/tui/handlers.go | 5 ++++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pkg/cli/runner.go b/pkg/cli/runner.go index f4d41bd7a..4889c187d 100644 --- a/pkg/cli/runner.go +++ b/pkg/cli/runner.go @@ -326,7 +326,11 @@ func Run(ctx context.Context, out *Printer, cfg Config, rt runtime.Runtime, sess // attachment was used). Callers should pass that path to // session.Session.AddAttachedFile so sub-agents inherit the file context. func PrepareUserMessage(ctx context.Context, rt runtime.Runtime, userInput, globalAttachPath string) (*session.Message, string) { - // Switch the active agent first if the /command targets a sub-agent. + // Resolve any /command to its prompt text BEFORE switching agents. + // This ensures the command is looked up in the original agent's command table. + resolvedContent := runtime.ResolveCommand(ctx, rt, userInput) + + // Switch the active agent if the /command targets a sub-agent. // This must happen before the message is added to the session so the // next runtime turn runs on the right agent. if cmd, _, ok := runtime.LookupCommand(ctx, rt, userInput); ok && cmd.Agent != "" { @@ -334,10 +338,6 @@ func PrepareUserMessage(ctx context.Context, rt runtime.Runtime, userInput, glob slog.WarnContext(ctx, "Failed to switch agent for /command", "agent", cmd.Agent, "error", err) } } - - // Resolve any /command to its prompt text - resolvedContent := runtime.ResolveCommand(ctx, rt, userInput) - // Parse for /attach commands in the message messageText, attachPath := ParseAttachCommand(resolvedContent) diff --git a/pkg/tui/handlers.go b/pkg/tui/handlers.go index 9932b3531..81a56e919 100644 --- a/pkg/tui/handlers.go +++ b/pkg/tui/handlers.go @@ -634,15 +634,18 @@ func (m *appModel) handleAgentCommand(command string) (tea.Model, tea.Cmd) { resolved := m.application.ResolveCommand(ctx, command) var cmds []tea.Cmd + switchSucceeded := true if ok && cmd.Agent != "" && cmd.Agent != m.sessionState.CurrentAgentName() { + prevAgent := m.sessionState.CurrentAgentName() switched, switchCmd := m.handleSwitchAgent(cmd.Agent) m = switched.(*appModel) if switchCmd != nil { cmds = append(cmds, switchCmd) } + switchSucceeded = m.sessionState.CurrentAgentName() != prevAgent } - if resolved != "" { + if resolved != "" && switchSucceeded { cmds = append(cmds, core.CmdHandler(messages.SendMsg{Content: resolved})) } From a8dfd3aa4c7cb3df66bd4792049026d9ddef6833 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Mon, 18 May 2026 10:58:53 +0200 Subject: [PATCH 4/4] Address PR review feedback for agent-switching commands This commit addresses all review feedback from PR #2790: **Critical fixes (blocking issues):** 1. **Fix SetCurrentAgent error handling in CLI and TUI** (runner.go, handlers.go) - If SetCurrentAgent fails, we now return an empty message instead of proceeding to send the message to the wrong agent - Added checked type assertion in TUI to prevent potential panics - Both CLI and TUI now properly handle agent switch failures 2. **Add unit tests for PrepareUserMessage and agent switching** (runner_test.go) - Added comprehensive tests for agent-switching commands - Tests cover success cases, failure cases, and edge cases - Tests verify that empty messages are returned on switch failures - Tests verify that agent-only commands (no instruction) work correctly **Non-blocking improvements:** 3. **Update agent-schema.json** (agent-schema.json) - Changed description from 'must be reachable from sub-agent graph' to 'must be defined in team configuration' to match runtime behavior - The runtime validates agent existence but doesn't enforce graph reachability 4. **Add explanatory comment for fallback logic** (commands.go) - Added comment explaining why description falls back to instruction - Clarifies the shorthand command definition behavior 5. **Improve code quality** - Fixed unchecked type assertion in TUI handlers - Used slog.WarnContext instead of slog.Warn for proper context propagation - All linters pass, all tests pass Addresses feedback from @aheritier and docker-agent bot. --- agent-schema.json | 323 +++++++++++++++++++++++++++++------ pkg/cli/runner.go | 3 + pkg/cli/runner_test.go | 174 +++++++++++++++++++ pkg/config/types/commands.go | 4 + pkg/tui/handlers.go | 15 +- 5 files changed, 462 insertions(+), 57 deletions(-) diff --git a/agent-schema.json b/agent-schema.json index 93456736c..f680b0411 100644 --- a/agent-schema.json +++ b/agent-schema.json @@ -84,12 +84,16 @@ "properties": { "profile": { "type": "string", - "description": "Shorthand that picks defaults for all the other fields. 'resilient' (default): auto-restart on failure with exponential backoff. 'strict': fail fast — the agent refuses to start when the toolset is unavailable. 'best-effort': single attempt, no auto-restart.", - "enum": ["resilient", "strict", "best-effort"] + "description": "Shorthand that picks defaults for all the other fields. 'resilient' (default): auto-restart on failure with exponential backoff. 'strict': fail fast \u2014 the agent refuses to start when the toolset is unavailable. 'best-effort': single attempt, no auto-restart.", + "enum": [ + "resilient", + "strict", + "best-effort" + ] }, "required": { "type": "boolean", - "description": "Marks the toolset as critical to the agent. NOTE: not yet enforced — a planned eager-startup phase will refuse to start the agent when a required toolset cannot reach Ready within startup_timeout. Defaults to true under the 'strict' profile, false otherwise." + "description": "Marks the toolset as critical to the agent. NOTE: not yet enforced \u2014 a planned eager-startup phase will refuse to start the agent when a required toolset cannot reach Ready within startup_timeout. Defaults to true under the 'strict' profile, false otherwise." }, "startup_timeout": { "type": "string", @@ -102,7 +106,11 @@ "restart": { "type": "string", "description": "How the supervisor reacts to an unexpected disconnect.", - "enum": ["never", "on_failure", "always"] + "enum": [ + "never", + "on_failure", + "always" + ] }, "max_restarts": { "type": "integer", @@ -244,7 +252,7 @@ ] }, "task_budget": { - "description": "Default total-token budget for an agentic task (forwarded to Anthropic as `output_config.task_budget`, with the required `task-budgets-2026-03-13` beta header attached automatically). Configurable on any Claude model — docker-agent does not gate by model name — but at the time of writing only Claude Opus 4.7 honors it. Accepts an integer token count or an object {type: tokens, total: N}.", + "description": "Default total-token budget for an agentic task (forwarded to Anthropic as `output_config.task_budget`, with the required `task-budgets-2026-03-13` beta header attached automatically). Configurable on any Claude model \u2014 docker-agent does not gate by model name \u2014 but at the time of writing only Claude Opus 4.7 honors it. Accepts an integer token count or an object {type: tokens, total: N}.", "oneOf": [ { "type": "integer", @@ -256,7 +264,9 @@ "properties": { "type": { "type": "string", - "enum": ["tokens"], + "enum": [ + "tokens" + ], "description": "Budget kind. Only \"tokens\" is supported today." }, "total": { @@ -265,7 +275,9 @@ "description": "Total budget value." } }, - "required": ["total"], + "required": [ + "total" + ], "additionalProperties": false } ] @@ -283,7 +295,9 @@ "properties": { "type": { "type": "string", - "enum": ["workload_identity_federation"], + "enum": [ + "workload_identity_federation" + ], "description": "Authentication scheme discriminator." }, "workload_identity_federation": { @@ -291,12 +305,24 @@ "description": "Parameters for the Anthropic OIDC federation flow. Required when type is workload_identity_federation." } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false, "allOf": [ { - "if": {"properties": {"type": {"const": "workload_identity_federation"}}}, - "then": {"required": ["workload_identity_federation"]} + "if": { + "properties": { + "type": { + "const": "workload_identity_federation" + } + } + }, + "then": { + "required": [ + "workload_identity_federation" + ] + } } ] }, @@ -323,7 +349,11 @@ "description": "How to obtain a fresh JWT identity token for each token exchange." } }, - "required": ["federation_rule_id", "organization_id", "identity_token"], + "required": [ + "federation_rule_id", + "organization_id", + "identity_token" + ], "additionalProperties": false }, "IdentityTokenSourceConfig": { @@ -340,7 +370,9 @@ }, "command": { "type": "array", - "items": {"type": "string"}, + "items": { + "type": "string" + }, "minItems": 1, "description": "Subprocess to execute; stdout is used as the JWT. Re-run on every token exchange." }, @@ -350,7 +382,9 @@ }, "headers": { "type": "object", - "additionalProperties": {"type": "string"}, + "additionalProperties": { + "type": "string" + }, "description": "HTTP headers sent with the URL request. Values support ${VAR} expansion. Only valid with 'url'." }, "response_field": { @@ -359,10 +393,102 @@ } }, "oneOf": [ - {"required": ["file"], "not": {"anyOf": [{"required": ["env"]}, {"required": ["command"]}, {"required": ["url"]}]}}, - {"required": ["env"], "not": {"anyOf": [{"required": ["file"]}, {"required": ["command"]}, {"required": ["url"]}]}}, - {"required": ["command"], "not": {"anyOf": [{"required": ["file"]}, {"required": ["env"]}, {"required": ["url"]}]}}, - {"required": ["url"], "not": {"anyOf": [{"required": ["file"]}, {"required": ["env"]}, {"required": ["command"]}]}} + { + "required": [ + "file" + ], + "not": { + "anyOf": [ + { + "required": [ + "env" + ] + }, + { + "required": [ + "command" + ] + }, + { + "required": [ + "url" + ] + } + ] + } + }, + { + "required": [ + "env" + ], + "not": { + "anyOf": [ + { + "required": [ + "file" + ] + }, + { + "required": [ + "command" + ] + }, + { + "required": [ + "url" + ] + } + ] + } + }, + { + "required": [ + "command" + ], + "not": { + "anyOf": [ + { + "required": [ + "file" + ] + }, + { + "required": [ + "env" + ] + }, + { + "required": [ + "url" + ] + } + ] + } + }, + { + "required": [ + "url" + ], + "not": { + "anyOf": [ + { + "required": [ + "file" + ] + }, + { + "required": [ + "env" + ] + }, + { + "required": [ + "command" + ] + } + ] + } + } ], "additionalProperties": false }, @@ -432,7 +558,7 @@ }, "redact_secrets": { "type": "boolean", - "description": "When true, the runtime auto-installs the redact_secrets builtin on all three of pre_tool_use (scrubs detected secrets from tool arguments), before_llm_call (scrubs the messages sent to the LLM), and tool_response_transform (scrubs tool output before it reaches event consumers, the persisted session, the post_tool_use hook input, or the next LLM call). The same hook entries can be authored directly in YAML for finer-grained control — see the hooks.tool_response_transform / hooks.before_llm_call / hooks.pre_tool_use sections. Detection uses the portcullis ruleset (GitHub PATs, AWS keys, Stripe / Slack / GitLab tokens, JWTs, private keys, etc.). Each detected span is replaced with the literal '[REDACTED]'." + "description": "When true, the runtime auto-installs the redact_secrets builtin on all three of pre_tool_use (scrubs detected secrets from tool arguments), before_llm_call (scrubs the messages sent to the LLM), and tool_response_transform (scrubs tool output before it reaches event consumers, the persisted session, the post_tool_use hook input, or the next LLM call). The same hook entries can be authored directly in YAML for finer-grained control \u2014 see the hooks.tool_response_transform / hooks.before_llm_call / hooks.pre_tool_use sections. Detection uses the portcullis ruleset (GitHub PATs, AWS keys, Stripe / Slack / GitLab tokens, JWTs, private keys, etc.). Each detected span is replaced with the literal '[REDACTED]'." }, "max_iterations": { "type": "integer", @@ -582,10 +708,21 @@ "type": "string" }, "examples": [ - ["local"], - ["local", "https://skills.example.com"], - ["git", "docker"], - ["local", "docker-build"] + [ + "local" + ], + [ + "local", + "https://skills.example.com" + ], + [ + "git", + "docker" + ], + [ + "local", + "docker-build" + ] ] } ] @@ -607,7 +744,7 @@ }, "agent": { "type": "string", - "description": "Name of a sub-agent to switch to when this command is invoked. The agent must be reachable from the current agent's sub-agent graph. When set without 'instruction', any text typed after the slash command is forwarded as a prompt to the target agent." + "description": "Name of a sub-agent to switch to when this command is invoked. The agent must be defined in the team configuration. When set without 'instruction', any text typed after the slash command is forwarded as a prompt to the target agent." } }, "additionalProperties": false @@ -693,7 +830,7 @@ }, "post_tool_use": { "type": "array", - "description": "Hooks that run after a tool completes (both success and failure). The result is delivered in tool_response (failed calls carry an is_error flag and any error text). Returning decision=block or exit code 2 stops the run loop after the current tool batch — useful for circuit-breaker patterns.", + "description": "Hooks that run after a tool completes (both success and failure). The result is delivered in tool_response (failed calls carry an is_error flag and any error text). Returning decision=block or exit code 2 stops the run loop after the current tool batch \u2014 useful for circuit-breaker patterns.", "items": { "$ref": "#/definitions/HookMatcherConfig" } @@ -721,21 +858,21 @@ }, "turn_start": { "type": "array", - "description": "Hooks that run at the start of every agent turn (each model call). Their AdditionalContext is appended as transient system messages for that turn only — it is NOT persisted to the session, so per-turn signals (date, prompt files) are recomputed every turn instead of bloating message history on every resume.", + "description": "Hooks that run at the start of every agent turn (each model call). Their AdditionalContext is appended as transient system messages for that turn only \u2014 it is NOT persisted to the session, so per-turn signals (date, prompt files) are recomputed every turn instead of bloating message history on every resume.", "items": { "$ref": "#/definitions/HookDefinition" } }, "turn_end": { "type": "array", - "description": "Hooks that run once per agent turn when the turn finishes — the symmetric counterpart of turn_start. Fires no matter why the turn ended: a normal stop, an error, a hook-driven shutdown, the loop detector, or context cancellation. The reason is reported via the hook input's reason field ('normal', 'continue', 'steered', 'error', 'canceled', 'hook_blocked', 'loop_detected'). Observational; output is ignored.", + "description": "Hooks that run once per agent turn when the turn finishes \u2014 the symmetric counterpart of turn_start. Fires no matter why the turn ended: a normal stop, an error, a hook-driven shutdown, the loop detector, or context cancellation. The reason is reported via the hook input's reason field ('normal', 'continue', 'steered', 'error', 'canceled', 'hook_blocked', 'loop_detected'). Observational; output is ignored.", "items": { "$ref": "#/definitions/HookDefinition" } }, "before_llm_call": { "type": "array", - "description": "Hooks that run just before each model call (after turn_start has assembled the messages). Use for observability, cost guardrails, or auditing without contributing system messages — turn_start is the right event for the latter.", + "description": "Hooks that run just before each model call (after turn_start has assembled the messages). Use for observability, cost guardrails, or auditing without contributing system messages \u2014 turn_start is the right event for the latter.", "items": { "$ref": "#/definitions/HookDefinition" } @@ -756,7 +893,7 @@ }, "pre_compact": { "type": "array", - "description": "Hooks that run just before the runtime compacts the session transcript into a summary. The trigger is reported in source: 'manual' (user-initiated /compact), 'auto' (proactive threshold), 'overflow' (context-overflow recovery), or 'tool_overflow' (proactive after tool results pushed past the threshold). Hooks may block compaction (decision=block / continue=false / exit code 2) or contribute additional_context that is appended to the compaction prompt — useful for steering the summary without modifying the agent's instruction.", + "description": "Hooks that run just before the runtime compacts the session transcript into a summary. The trigger is reported in source: 'manual' (user-initiated /compact), 'auto' (proactive threshold), 'overflow' (context-overflow recovery), or 'tool_overflow' (proactive after tool results pushed past the threshold). Hooks may block compaction (decision=block / continue=false / exit code 2) or contribute additional_context that is appended to the compaction prompt \u2014 useful for steering the summary without modifying the agent's instruction.", "items": { "$ref": "#/definitions/HookDefinition" } @@ -833,14 +970,14 @@ }, "after_compaction": { "type": "array", - "description": "Hooks that run after a successful compaction (a summary was applied to the session). Receives the final summary text in Input.summary alongside input_tokens, output_tokens, context_limit and compaction_reason. Purely observational — hook output is ignored.", + "description": "Hooks that run after a successful compaction (a summary was applied to the session). Receives the final summary text in Input.summary alongside input_tokens, output_tokens, context_limit and compaction_reason. Purely observational \u2014 hook output is ignored.", "items": { "$ref": "#/definitions/HookDefinition" } }, "tool_response_transform": { "type": "array", - "description": "Hooks that fire between a tool's execution and the runtime's emission/record of the response, with the rewrite reaching event consumers, the persisted session, the post_tool_use hook input, and the next LLM call. A hook may rewrite the tool's textual output by setting hookSpecificOutput.updated_tool_response — the symmetric counterpart of pre_tool_use's updated_input, applied to tool RESULTS instead of tool ARGUMENTS. The redact_secrets builtin uses this event for the third leg of the redact_secrets feature; custom rewriters can also truncate excessive output, scrub PII, or normalise tool dialects. Tool-matched, like pre_tool_use / post_tool_use.", + "description": "Hooks that fire between a tool's execution and the runtime's emission/record of the response, with the rewrite reaching event consumers, the persisted session, the post_tool_use hook input, and the next LLM call. A hook may rewrite the tool's textual output by setting hookSpecificOutput.updated_tool_response \u2014 the symmetric counterpart of pre_tool_use's updated_input, applied to tool RESULTS instead of tool ARGUMENTS. The redact_secrets builtin uses this event for the third leg of the redact_secrets feature; custom rewriters can also truncate excessive output, scrub PII, or normalise tool dialects. Tool-matched, like pre_tool_use / post_tool_use.", "items": { "$ref": "#/definitions/HookMatcherConfig" } @@ -885,7 +1022,7 @@ }, "type": { "type": "string", - "description": "Type of hook. 'command' executes a shell command; 'builtin' invokes a named in-process Go function registered by the runtime; 'model' asks an LLM and translates its reply into the hook's native output (used for LLM-as-a-judge pre_tool_use, summarizers, etc., with no Go code). The docker-agent runtime ships these builtins: 'add_date' (turn_start: today's date), 'add_environment_info' (session_start: cwd, git, OS, arch), 'add_prompt_files' (turn_start: contents of named files looked up in the workdir hierarchy and the home directory), 'add_git_status' (turn_start: `git status --short --branch`), 'add_git_diff' (turn_start: `git diff --stat`, or full diff with args=['full']), 'add_directory_listing' (session_start: top-level entries of cwd), 'add_user_info' (session_start: current OS user and hostname), 'add_recent_commits' (session_start: `git log --oneline -n N`, default N=10, override via args=['']), 'max_iterations' (before_llm_call: hard stop after N model calls; args=[''] required), 'redact_secrets' (pre_tool_use / before_llm_call / tool_response_transform: scrubs detected secrets from tool arguments, outgoing chat content, and tool output — the same builtin handles all three legs and dispatches on the event; the matching agent-level 'redact_secrets: true' flag auto-injects the entries for all three), 'unload' (on_agent_switch: POSTs `{\"model\": \"\"}` to the previous agent's DMR model endpoints — e.g. asks Docker Model Runner to release the GPU/RAM held by the just-departing model so the next agent's model can claim it. Pure HTTP, no provider-specific runtime coupling; non-DMR providers are silently skipped. Opt in by adding the entry to the agent's hooks.on_agent_switch list).", + "description": "Type of hook. 'command' executes a shell command; 'builtin' invokes a named in-process Go function registered by the runtime; 'model' asks an LLM and translates its reply into the hook's native output (used for LLM-as-a-judge pre_tool_use, summarizers, etc., with no Go code). The docker-agent runtime ships these builtins: 'add_date' (turn_start: today's date), 'add_environment_info' (session_start: cwd, git, OS, arch), 'add_prompt_files' (turn_start: contents of named files looked up in the workdir hierarchy and the home directory), 'add_git_status' (turn_start: `git status --short --branch`), 'add_git_diff' (turn_start: `git diff --stat`, or full diff with args=['full']), 'add_directory_listing' (session_start: top-level entries of cwd), 'add_user_info' (session_start: current OS user and hostname), 'add_recent_commits' (session_start: `git log --oneline -n N`, default N=10, override via args=['']), 'max_iterations' (before_llm_call: hard stop after N model calls; args=[''] required), 'redact_secrets' (pre_tool_use / before_llm_call / tool_response_transform: scrubs detected secrets from tool arguments, outgoing chat content, and tool output \u2014 the same builtin handles all three legs and dispatches on the event; the matching agent-level 'redact_secrets: true' flag auto-injects the entries for all three), 'unload' (on_agent_switch: POSTs `{\"model\": \"\"}` to the previous agent's DMR model endpoints \u2014 e.g. asks Docker Model Runner to release the GPU/RAM held by the just-departing model so the next agent's model can claim it. Pure HTTP, no provider-specific runtime coupling; non-DMR providers are silently skipped. Opt in by adding the entry to the agent's hooks.on_agent_switch list).", "enum": [ "command", "builtin", @@ -949,16 +1086,56 @@ "additionalProperties": false, "allOf": [ { - "if": {"properties": {"type": {"const": "command"}}, "required": ["type"]}, - "then": {"required": ["command"]} + "if": { + "properties": { + "type": { + "const": "command" + } + }, + "required": [ + "type" + ] + }, + "then": { + "required": [ + "command" + ] + } }, { - "if": {"properties": {"type": {"const": "builtin"}}, "required": ["type"]}, - "then": {"required": ["command"]} + "if": { + "properties": { + "type": { + "const": "builtin" + } + }, + "required": [ + "type" + ] + }, + "then": { + "required": [ + "command" + ] + } }, { - "if": {"properties": {"type": {"const": "model"}}, "required": ["type"]}, - "then": {"required": ["model", "prompt"]} + "if": { + "properties": { + "type": { + "const": "model" + } + }, + "required": [ + "type" + ] + }, + "then": { + "required": [ + "model", + "prompt" + ] + } } ] }, @@ -1069,7 +1246,7 @@ ] }, "task_budget": { - "description": "Total-token budget for a full agentic task (forwarded to Anthropic as `output_config.task_budget`, with the required `task-budgets-2026-03-13` beta header attached automatically). Limits the combined tokens spent on thinking, tool calls, and output across the whole task. Configurable on any Claude model — docker-agent does not gate by model name — but at the time of writing only Claude Opus 4.7 honors it. Accepts an integer token count or an object {type: tokens, total: N}.", + "description": "Total-token budget for a full agentic task (forwarded to Anthropic as `output_config.task_budget`, with the required `task-budgets-2026-03-13` beta header attached automatically). Limits the combined tokens spent on thinking, tool calls, and output across the whole task. Configurable on any Claude model \u2014 docker-agent does not gate by model name \u2014 but at the time of writing only Claude Opus 4.7 honors it. Accepts an integer token count or an object {type: tokens, total: N}.", "oneOf": [ { "type": "integer", @@ -1081,7 +1258,9 @@ "properties": { "type": { "type": "string", - "enum": ["tokens"], + "enum": [ + "tokens" + ], "description": "Budget kind. Only \"tokens\" is supported today." }, "total": { @@ -1090,14 +1269,19 @@ "description": "Total budget value." } }, - "required": ["total"], + "required": [ + "total" + ], "additionalProperties": false } ], "examples": [ 64000, 128000, - { "type": "tokens", "total": 128000 } + { + "type": "tokens", + "total": 128000 + } ] }, "routing": { @@ -1489,9 +1673,17 @@ "type": "string" }, "examples": [ - ["."], - [".", "~/projects"], - ["/srv/data", "~/scratch"] + [ + "." + ], + [ + ".", + "~/projects" + ], + [ + "/srv/data", + "~/scratch" + ] ] }, "deny_list": { @@ -1501,8 +1693,14 @@ "type": "string" }, "examples": [ - ["~/.ssh", "~/.aws"], - ["/etc", "/var/lib"] + [ + "~/.ssh", + "~/.aws" + ], + [ + "/etc", + "/var/lib" + ] ] }, "defer": { @@ -1540,9 +1738,17 @@ "type": "string" }, "examples": [ - ["docker.com", "docs.docker.com"], - ["github.com", "raw.githubusercontent.com"], - ["*.example.com"] + [ + "docker.com", + "docs.docker.com" + ], + [ + "github.com", + "raw.githubusercontent.com" + ], + [ + "*.example.com" + ] ] }, "blocked_domains": { @@ -1552,14 +1758,21 @@ "type": "string" }, "examples": [ - ["internal.example.com"], - ["169.254.169.254"], - ["169.254.0.0/16", "10.0.0.0/8"] + [ + "internal.example.com" + ], + [ + "169.254.169.254" + ], + [ + "169.254.0.0/16", + "10.0.0.0/8" + ] ] }, "allow_private_ips": { "type": "boolean", - "description": "Opt in to dialling non-public IP addresses (only valid for type 'fetch'). By default the fetch tool refuses connections — after DNS resolution, so DNS rebinding is also blocked — to loopback, RFC1918 private ranges, link-local (including the cloud metadata endpoint at 169.254.169.254), multicast and the unspecified address. Set this to true when an agent legitimately needs to call internal services. 'allowed_domains' / 'blocked_domains' are evaluated independently and still apply." + "description": "Opt in to dialling non-public IP addresses (only valid for type 'fetch'). By default the fetch tool refuses connections \u2014 after DNS resolution, so DNS rebinding is also blocked \u2014 to loopback, RFC1918 private ranges, link-local (including the cloud metadata endpoint at 169.254.169.254), multicast and the unspecified address. Set this to true when an agent legitimately needs to call internal services. 'allowed_domains' / 'blocked_domains' are evaluated independently and still apply." }, "url": { "type": "string", @@ -1568,7 +1781,7 @@ }, "headers": { "type": "object", - "description": "HTTP headers (supports ${env.VAR} interpolation). Valid for type 'openapi', 'a2a', and 'fetch'. For 'fetch', the headers are sent on every request the toolset issues — typical use is supplying API tokens (e.g. Authorization: Bearer ${env.MY_TOKEN}). Caller-supplied values override the format-driven Accept header and the default User-Agent.", + "description": "HTTP headers (supports ${env.VAR} interpolation). Valid for type 'openapi', 'a2a', and 'fetch'. For 'fetch', the headers are sent on every request the toolset issues \u2014 typical use is supplying API tokens (e.g. Authorization: Bearer ${env.MY_TOKEN}). Caller-supplied values override the format-driven Accept header and the default User-Agent.", "additionalProperties": { "type": "string" } diff --git a/pkg/cli/runner.go b/pkg/cli/runner.go index 4889c187d..a68d9153c 100644 --- a/pkg/cli/runner.go +++ b/pkg/cli/runner.go @@ -334,8 +334,11 @@ func PrepareUserMessage(ctx context.Context, rt runtime.Runtime, userInput, glob // This must happen before the message is added to the session so the // next runtime turn runs on the right agent. if cmd, _, ok := runtime.LookupCommand(ctx, rt, userInput); ok && cmd.Agent != "" { + // If the agent switch fails, we must not proceed with sending the message + // to the wrong agent. Return an empty message to signal the error. if err := rt.SetCurrentAgent(cmd.Agent); err != nil { slog.WarnContext(ctx, "Failed to switch agent for /command", "agent", cmd.Agent, "error", err) + return session.UserMessage(""), "" } } // Parse for /attach commands in the message diff --git a/pkg/cli/runner_test.go b/pkg/cli/runner_test.go index 4c14f7b63..d34e8aa7b 100644 --- a/pkg/cli/runner_test.go +++ b/pkg/cli/runner_test.go @@ -3,11 +3,13 @@ package cli import ( "bytes" "context" + "errors" "sync" "testing" "gotest.tools/v3/assert" + "github.com/docker/docker-agent/pkg/config/types" "github.com/docker/docker-agent/pkg/runtime" "github.com/docker/docker-agent/pkg/session" "github.com/docker/docker-agent/pkg/sessiontitle" @@ -27,6 +29,28 @@ type mockRuntime struct { elicitationLastAction tools.ElicitationAction } +// mockRuntimeWithOverrides extends mockRuntime to allow method overriding for testing +type mockRuntimeWithOverrides struct { + *mockRuntime + + setCurrentAgentFn func(string) error + currentAgentInfoFn func(context.Context) runtime.CurrentAgentInfo +} + +func (m *mockRuntimeWithOverrides) SetCurrentAgent(name string) error { + if m.setCurrentAgentFn != nil { + return m.setCurrentAgentFn(name) + } + return m.mockRuntime.SetCurrentAgent(name) +} + +func (m *mockRuntimeWithOverrides) CurrentAgentInfo(ctx context.Context) runtime.CurrentAgentInfo { + if m.currentAgentInfoFn != nil { + return m.currentAgentInfoFn(ctx) + } + return m.mockRuntime.CurrentAgentInfo(ctx) +} + func (m *mockRuntime) CurrentAgentName() string { return "test" } func (m *mockRuntime) CurrentAgentInfo(context.Context) runtime.CurrentAgentInfo { return runtime.CurrentAgentInfo{Name: "test"} @@ -247,3 +271,153 @@ func TestMaxIterationsSafetyCapJSONMode(t *testing.T) { } assert.Equal(t, resumes[maxAutoExtensions].Type, runtime.ResumeTypeReject) } + +// TestPrepareUserMessage_AgentSwitching tests that PrepareUserMessage correctly +// handles agent-switching commands and returns empty messages on switch failures. +func TestPrepareUserMessage_AgentSwitching(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + userInput string + commandAgent string + setAgentErr error + expectedContent string + expectedAttach string + expectAgentSwitch bool + }{ + { + name: "agent switch succeeds with trailing args", + userInput: "/plan design a login flow", + commandAgent: "planner", + setAgentErr: nil, + expectedContent: "design a login flow", + expectedAttach: "", + expectAgentSwitch: true, + }, + { + name: "agent switch succeeds without trailing args", + userInput: "/plan", + commandAgent: "planner", + setAgentErr: nil, + expectedContent: "", + expectedAttach: "", + expectAgentSwitch: true, + }, + { + name: "agent switch fails - returns empty message", + userInput: "/plan design a login flow", + commandAgent: "planner", + setAgentErr: errors.New("agent not found"), + expectedContent: "", + expectedAttach: "", + expectAgentSwitch: true, + }, + { + name: "non-agent command - no switch", + userInput: "/test regular command", + commandAgent: "", + setAgentErr: nil, + expectedContent: "This is the test instruction regular command", + expectedAttach: "", + expectAgentSwitch: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + // Create a mock runtime that tracks SetCurrentAgent calls + var setAgentCalled bool + var setAgentName string + rt := &mockRuntimeWithOverrides{ + mockRuntime: &mockRuntime{events: []runtime.Event{}}, + } + + // Override SetCurrentAgent to track calls and return the test error + rt.setCurrentAgentFn = func(name string) error { + setAgentCalled = true + setAgentName = name + return tt.setAgentErr + } + + // Override CurrentAgentInfo to return test commands + rt.currentAgentInfoFn = func(context.Context) runtime.CurrentAgentInfo { + commands := make(map[string]types.Command) + if tt.commandAgent != "" { + commands["plan"] = types.Command{ + Description: "Hand off to the planner", + Agent: tt.commandAgent, + } + } else { + commands["test"] = types.Command{ + Instruction: "This is the test instruction", + } + } + return runtime.CurrentAgentInfo{ + Name: "test", + Commands: commands, + } + } + + msg, attachPath := PrepareUserMessage(t.Context(), rt, tt.userInput, "") + + // Verify agent switch was called (or not) + assert.Equal(t, tt.expectAgentSwitch, setAgentCalled, "SetCurrentAgent call mismatch") + if tt.expectAgentSwitch && setAgentCalled { + assert.Equal(t, tt.commandAgent, setAgentName, "Wrong agent name passed to SetCurrentAgent") + } + + // Verify message content + assert.Equal(t, tt.expectedContent, msg.Message.Content, "Message content mismatch") + assert.Equal(t, tt.expectedAttach, attachPath, "Attachment path mismatch") + }) + } +} + +func TestPrepareUserMessage_EmptyMessageForAgentOnlyCommand(t *testing.T) { + t.Parallel() + + rt := &mockRuntimeWithOverrides{ + mockRuntime: &mockRuntime{}, + } + rt.currentAgentInfoFn = func(context.Context) runtime.CurrentAgentInfo { + return runtime.CurrentAgentInfo{ + Name: "test", + Commands: map[string]types.Command{ + "plan": {Agent: "planner"}, // agent-only, no instruction + }, + } + } + + msg, attachPath := PrepareUserMessage(t.Context(), rt, "/plan", "") + + // Agent-only command with no args should produce empty message + assert.Equal(t, "", msg.Message.Content, "Expected empty message for agent-only command with no args") + assert.Equal(t, "", attachPath, "Expected no attachment") +} + +// TestPrepareUserMessage_CommandResolution tests that commands are resolved +// correctly before agent switching. +func TestPrepareUserMessage_CommandResolution(t *testing.T) { + t.Parallel() + + rt := &mockRuntimeWithOverrides{ + mockRuntime: &mockRuntime{}, + } + rt.currentAgentInfoFn = func(context.Context) runtime.CurrentAgentInfo { + return runtime.CurrentAgentInfo{ + Name: "test", + Commands: map[string]types.Command{ + "fix": { + Instruction: "Fix the file ${args[0]}", + }, + }, + } + } + + msg, _ := PrepareUserMessage(t.Context(), rt, "/fix main.go", "") + + assert.Equal(t, "Fix the file main.go", msg.Message.Content, "Command should be resolved with args") +} diff --git a/pkg/config/types/commands.go b/pkg/config/types/commands.go index 7c0dbcb62..431dbfbd0 100644 --- a/pkg/config/types/commands.go +++ b/pkg/config/types/commands.go @@ -147,6 +147,10 @@ func parseCommandValue(v any) (Command, error) { if inst == "" && desc == "" && agent == "" { return Command{}, errors.New("command must have at least 'instruction', 'description' or 'agent'") } + // Fallback: if no instruction is provided but we have a description (and no + // agent target), use the description as the instruction. This allows shorthand + // command definitions like `{"cmd": "Do something"}` where the description + // doubles as the instruction text. if inst == "" && agent == "" { inst = desc } diff --git a/pkg/tui/handlers.go b/pkg/tui/handlers.go index 81a56e919..b3905f5bb 100644 --- a/pkg/tui/handlers.go +++ b/pkg/tui/handlers.go @@ -636,13 +636,24 @@ func (m *appModel) handleAgentCommand(command string) (tea.Model, tea.Cmd) { var cmds []tea.Cmd switchSucceeded := true if ok && cmd.Agent != "" && cmd.Agent != m.sessionState.CurrentAgentName() { + // Attempt to switch agents. If the switch fails, handleSwitchAgent + // returns an error notification command. We check if the agent actually + // changed to determine success, rather than relying on the command type. prevAgent := m.sessionState.CurrentAgentName() switched, switchCmd := m.handleSwitchAgent(cmd.Agent) - m = switched.(*appModel) + var ok bool + if m, ok = switched.(*appModel); !ok { + // This should never happen, but if it does, log and continue with the original model + slog.WarnContext(ctx, "handleSwitchAgent returned unexpected type", "type", fmt.Sprintf("%T", switched)) + switchSucceeded = false + } else { + // Check if the agent actually changed to determine if the switch succeeded. + // If it failed, we must not send the message to the wrong agent. + switchSucceeded = m.sessionState.CurrentAgentName() != prevAgent + } if switchCmd != nil { cmds = append(cmds, switchCmd) } - switchSucceeded = m.sessionState.CurrentAgentName() != prevAgent } if resolved != "" && switchSucceeded {