Merge pull request #667 from Yumiue/codex/plan-build-todo-semantics-659

phantom5099 · web-flow · commit b2690415d2c1 · 2026-05-20T19:00:46.000+08:00
调整 Plan/Build Todo 语义
diff --git a/docs/session-todo-design.md b/docs/session-todo-design.md
@@ -58,3 +58,10 @@
 - `Todo` 是更细粒度的结构化执行状态
 - `Todo` 不直接拼入模型消息历史
 - 如需让 `TaskState` 汇总 Todo，应在 runtime/context 层显式投影，而不是复用同一个字段
+
+## 与 Plan Mode 的关系
+
+- `CurrentPlan` 是计划上下文，表示 plan 模式产出的草案或已批准计划
+- `Session.Todos` 是 build 模式的执行进度状态，不由 plan 模式自动创建或维护
+- plan 模式只能研究、澄清和产出计划；即使计划正文包含旧版 `plan_spec.todos`，runtime 也不会把它自动灌入 `Session.Todos`
+- build 模式开始复杂执行且没有当前 Todo State 时，应通过 `todo_write action="plan"` 或 `todo_write action="add"` 显式创建本轮执行 todo
diff --git a/internal/context/builder_test.go b/internal/context/builder_test.go
@@ -187,6 +187,36 @@ func TestDefaultBuilderBuildIncludesPlanSections(t *testing.T) {
 	}
 }
 
+func TestDefaultBuilderBuildPlanModeDoesNotRequireTodoWrite(t *testing.T) {
+	t.Parallel()
+
+	builder := NewBuilder()
+	got, err := builder.Build(stdcontext.Background(), BuildInput{
+		AgentMode: agentsession.AgentModePlan,
+		PlanStage: "plan",
+		Metadata:  testMetadata(t.TempDir()),
+	})
+	if err != nil {
+		t.Fatalf("Build() error = %v", err)
+	}
+	if !strings.Contains(got.SystemPrompt, "Do not create execution todos in plan mode") {
+		t.Fatalf("expected plan mode to forbid execution todo creation, got %q", got.SystemPrompt)
+	}
+	if !strings.Contains(got.SystemPrompt, "the current mode permits execution todo updates") {
+		t.Fatalf("expected core todo guidance to be mode-gated, got %q", got.SystemPrompt)
+	}
+	for _, forbidden := range []string{
+		"maintain explicit todos with `todo_write`.",
+		"Maintain explicit task state and todos via `todo_write`.",
+		"keep task state explicit via `todo_write` (plan/add/update/set_status/claim/complete/fail) instead of relying on implicit memory",
+		"keep critical information in the task state using `todo_write` updates",
+	} {
+		if strings.Contains(got.SystemPrompt, forbidden) {
+			t.Fatalf("plan mode prompt should not contain hard todo_write guidance %q in %q", forbidden, got.SystemPrompt)
+		}
+	}
+}
+
 func TestDefaultBuilderBuildIncludesTodosBeforeSystemState(t *testing.T) {
 	t.Parallel()
 
diff --git a/internal/promptasset/assets_test.go b/internal/promptasset/assets_test.go
@@ -47,6 +47,7 @@ func TestCorePromptContainsOperationalGuidance(t *testing.T) {
 		"A subagent is a helper, not the source of final truth",
 		"Preserve existing user or repository changes",
 		"Use UTF-8-safe reads and edits",
+		"the current mode permits execution todo updates",
 	}
 	for _, want := range wantSubstrings {
 		if !strings.Contains(prompt, want) {
@@ -89,8 +90,12 @@ func TestPlanModePromptTemplates(t *testing.T) {
 		})
 	}
 
-	if !strings.Contains(PlanModePrompt("plan"), "summary_candidate.active_todo_ids") {
-		t.Fatalf("expected plan prompt to require active todo ownership")
+	if strings.Contains(PlanModePrompt("plan"), "summary_candidate.active_todo_ids") ||
+		strings.Contains(PlanModePrompt("plan"), "must not be empty") {
+		t.Fatalf("expected plan prompt not to require execution todo ownership")
+	}
+	if !strings.Contains(PlanModePrompt("plan"), "Do not create execution todos in plan mode") {
+		t.Fatalf("expected plan prompt to keep todos in build execution")
 	}
 	if !strings.Contains(PlanModePrompt("build_execute"), "create current-run required todos") {
 		t.Fatalf("expected build prompt to require direct-build todo bootstrap")
diff --git a/internal/promptasset/templates/context/plan_mode_build_execute.md b/internal/promptasset/templates/context/plan_mode_build_execute.md
@@ -4,7 +4,7 @@ You are currently in build execution.
 - If a current plan summary is attached, use it as guidance by default.
 - If the summary is insufficient for the current task, consult the attached full plan view when available.
 - If no current plan is attached, continue using task state, todos, and the conversation context.
-- If no current plan and no Todo State are attached, create current-run required todos with `todo_write` before the first substantive tool call for project analysis, documentation writing, code changes, multi-step debugging, or verification work.
+- If no Todo State is attached, create current-run required todos with `todo_write` before the first substantive tool call for project analysis, documentation writing, code changes, multi-step debugging, or verification work.
 - Do not update or complete todo IDs that are not present in the current Todo State; create new current-run todos instead.
 - Small necessary deviations are allowed, but explain why they are needed.
 - Do not create or rewrite the current full plan in this stage.
diff --git a/internal/promptasset/templates/context/plan_mode_plan.md b/internal/promptasset/templates/context/plan_mode_plan.md
@@ -6,9 +6,7 @@ You are currently in the planning stage.
 - **If no Current Plan section is attached, your first priority is to produce a plan.** The user has entered planning mode expecting a structured plan. Research the codebase as needed, then output a complete `plan_spec` + `summary_candidate` JSON. Do not end the turn with only a conversational answer when there is no existing plan.
 - If a Current Plan is already present, you may refine, replace, or discuss it. When the user asks a clarifying question or wants to explore options without committing to a new plan revision, you may answer conversationally without outputting planning JSON.
 - Only output a JSON object containing `plan_spec` and `summary_candidate` when you are explicitly creating or rewriting the current full plan.
-- `plan_spec` must include `goal`, `steps`, `constraints`, `todos`, and `open_questions`.
-- `plan_spec.todos` **must not be empty**. Populate it with the major actionable items that the plan requires. Each todo must have a unique `id`, a descriptive `content`, and `status: "pending"`. Without todos the plan has no executable work items and the build stage cannot proceed.
-- `summary_candidate` must include `goal`, `key_steps`, `constraints`, and `active_todo_ids`.
-- If a Todo State section is attached, decide which non-terminal todos still belong to the current plan.
-- Todos that still belong to the current plan must appear in `plan_spec.todos` and their IDs must appear in `summary_candidate.active_todo_ids`.
-- Todos that do not belong to the current plan must not be copied into the new plan; create replacement plan-owned todos when ongoing work is still needed.
+- `plan_spec` must include `goal`, `steps`, `constraints`, and `open_questions`.
+- `plan_spec.todos` is optional legacy data. Do not create execution todos in plan mode; build mode will create and maintain runtime todos when implementation starts.
+- `summary_candidate` must include `goal`, `key_steps`, and `constraints`.
+- If a Todo State section is attached, treat it as build execution progress only. Do not copy, rewrite, or complete those todos while planning.
diff --git a/internal/promptasset/templates/core/agent_identity.md b/internal/promptasset/templates/core/agent_identity.md
@@ -20,7 +20,7 @@ If instructions conflict, follow the higher-priority instruction and briefly sta
 
 Core workflow:
 1. Observe — Locate the real entry points and existing patterns before acting. Prefer targeted search and file reads over assumptions.
-2. Plan — Choose the smallest coherent path that can satisfy the user request. For multi-step work, maintain explicit todos with `todo_write`.
+2. Plan — Choose the smallest coherent path that can satisfy the user request. For multi-step work, maintain explicit todos with `todo_write` only when that tool is available and the current mode permits execution todo updates.
 3. Act — Call the minimum set of exposed tools needed to make progress. Prefer filesystem tools over bash.
 4. Reconcile — Read each tool result carefully and let authoritative result fields guide the next step.
 5. Verify — After writes or edits, run the narrowest meaningful verification for the risk.
diff --git a/internal/promptasset/templates/core/capabilities_plan.md b/internal/promptasset/templates/core/capabilities_plan.md
@@ -3,8 +3,8 @@ You are currently in plan mode. Write and edit tools are disabled. Only read and
 
 - Read and search files within the current workspace.
 - Run non-interactive shell commands for read-only inspection only.
-- Maintain explicit task state and todos via `todo_write`.
 - Ask clarifying questions when requirements are ambiguous or conflicting.
+- Produce or refine a plan, but do not create or update execution todos.
 - **Do not perform any write, edit, delete, or file mutation operations.** Use this stage only for research, analysis, and planning.
 
 ## Limitations
diff --git a/internal/promptasset/templates/core/context_management.md b/internal/promptasset/templates/core/context_management.md
@@ -1,6 +1,6 @@
 - The conversation context has a finite window. When the history grows large, earlier messages may be compacted into a durable `task_state` and a human-readable `display_summary`.
-- To cooperate with compaction, keep critical information in the task state using `todo_write` updates and explicit reasoning, rather than relying solely on conversational memory.
+- To cooperate with compaction, keep critical information in task state using `todo_write` updates only when that tool is available and the current mode permits execution todo updates; otherwise preserve the information in explicit reasoning and permitted outputs.
 - After a compact occurs, the durable `task_state` and `display_summary` become your source of truth for what has been accomplished and what remains. Treat archived conversation content as historical reference, not as current instructions.
 - When continuing after a compact, verify the current workspace state against the `task_state` before assuming files or changes from prior rounds still exist.
 - Do not treat archived `[compact_summary]` text as durable truth. Durable truth comes from `current_task_state` plus new source material.
-- Keep long-running task facts, decisions, blockers, and acceptance-relevant todos in durable task state instead of relying only on conversation history.
+- Keep long-running task facts, decisions, blockers, and acceptance-relevant todos in durable task state when the current mode permits task-state updates, instead of relying only on conversation history.
diff --git a/internal/promptasset/templates/core/tool_usage.md b/internal/promptasset/templates/core/tool_usage.md
@@ -38,7 +38,7 @@ For general file operations outside of codebase exploration, use `filesystem_*`
   - create directory: `filesystem_create_dir` (not `bash mkdir`)
   - remove directory: `filesystem_remove_dir` (not `bash rmdir` / `rm -rf`)
   These tools record their changes for checkpoint/rollback; equivalent `bash` commands produce reduced rollback coverage.
-- For multi-step implementation, debugging, refactoring, or long-running work, keep task state explicit via `todo_write` (plan/add/update/set_status/claim/complete/fail) instead of relying on implicit memory.
+- For multi-step implementation, debugging, refactoring, or long-running work, keep task state explicit via `todo_write` (plan/add/update/set_status/claim/complete/fail) when that tool is available and the current mode permits execution todo updates.
 - Create todos that map to real acceptance work, not vague activity.
 - Required todos are acceptance-relevant and must converge before finalization.
 - If the user clearly switches to a different task, do not carry unfinished todos forward blindly: mark each old todo `completed` only when the work is actually done, otherwise mark it `canceled` before planning or executing the new task.
diff --git a/internal/runtime/planning.go b/internal/runtime/planning.go
@@ -258,8 +258,6 @@ func buildPlanArtifact(current *agentsession.PlanArtifact, output planTurnOutput
 	return plan, nil
 }
 
-// applyCurrentPlanRevision 用新 revision 替换当前计划，并清理旧 revision 遗留的对齐状态。
-// resolvePlanDisplayText 优先保留模型对计划的额外说明文本，缺失时回退为规范计划正文。
 // resolvePlanDisplayText 优先保留模型对计划的额外说明文本，缺失时回退为规范计划正文。
 func resolvePlanDisplayText(output planTurnOutput, spec agentsession.PlanSpec) string {
 	display := strings.TrimSpace(output.DisplayText)
@@ -269,28 +267,11 @@ func resolvePlanDisplayText(output planTurnOutput, spec agentsession.PlanSpec) s
 	return strings.TrimSpace(agentsession.RenderPlanContent(spec))
 }
 
+// applyCurrentPlanRevision 用新 revision 替换当前计划，并清理计划对齐状态。
 func applyCurrentPlanRevision(session *agentsession.Session, plan *agentsession.PlanArtifact) bool {
 	if session == nil || plan == nil {
 		return false
 	}
-	// 新 revision 覆盖时，仅取消旧 plan 明确引用的非终态 todo
-	if oldPlan := session.CurrentPlan; oldPlan != nil && oldPlan.Revision < plan.Revision {
-		agentsession.CancelTodosByIDs(session.Todos, oldPlan.Summary.ActiveTodoIDs)
-	}
-	// 将 PlanSpec.Todos 中尚不存在于 session.Todos 的条目补入，
-	// 避免 plan 模式下模型后续通过 todo_write 引用这些 ID 时找不到。
-	for _, planTodo := range plan.Spec.Todos {
-		id := strings.TrimSpace(planTodo.ID)
-		if id == "" {
-			continue
-		}
-		if _, exists := session.FindTodo(id); exists {
-			continue
-		}
-		if err := session.AddTodo(planTodo); err != nil {
-			return false
-		}
-	}
 	session.CurrentPlan = plan
 	session.PlanApprovalPendingFullAlign = false
 	session.PlanCompletionPendingFullReview = false
diff --git a/internal/runtime/planning_test.go b/internal/runtime/planning_test.go
@@ -459,6 +459,55 @@ func TestApplyCurrentPlanRevisionNilGuards(t *testing.T) {
 	}
 }
 
+func TestApplyCurrentPlanRevisionDoesNotMutateExecutionTodos(t *testing.T) {
+	t.Parallel()
+
+	session := agentsession.New("plan revision keeps execution todos")
+	session.Todos = []agentsession.TodoItem{
+		{ID: "todo-exec", Content: "current build work", Status: agentsession.TodoStatusInProgress, Revision: 1},
+	}
+	session.CurrentPlan = &agentsession.PlanArtifact{
+		ID:       "plan-1",
+		Revision: 1,
+		Status:   agentsession.PlanStatusDraft,
+		Spec: agentsession.PlanSpec{
+			Goal:  "old plan",
+			Steps: []string{"old step"},
+		},
+		Summary: agentsession.SummaryView{
+			Goal:          "old plan",
+			KeySteps:      []string{"old step"},
+			ActiveTodoIDs: []string{"todo-old-plan"},
+		},
+	}
+
+	next := &agentsession.PlanArtifact{
+		ID:       "plan-1",
+		Revision: 2,
+		Status:   agentsession.PlanStatusDraft,
+		Spec: agentsession.PlanSpec{
+			Goal:  "new plan",
+			Steps: []string{"new step"},
+			Todos: []agentsession.TodoItem{
+				{ID: "todo-plan-only", Content: "legacy plan todo", Status: agentsession.TodoStatusPending},
+			},
+		},
+		Summary: agentsession.SummaryView{
+			Goal:          "new plan",
+			KeySteps:      []string{"new step"},
+			ActiveTodoIDs: []string{"todo-plan-only"},
+		},
+	}
+
+	if !applyCurrentPlanRevision(&session, next) {
+		t.Fatal("expected plan revision to apply")
+	}
+	if len(session.Todos) != 1 || session.Todos[0].ID != "todo-exec" ||
+		session.Todos[0].Status != agentsession.TodoStatusInProgress {
+		t.Fatalf("expected execution todos to remain untouched, got %+v", session.Todos)
+	}
+}
+
 func TestApproveCurrentPlanValidationErrors(t *testing.T) {
 	t.Parallel()
 
diff --git a/internal/runtime/runtime_test.go b/internal/runtime/runtime_test.go
@@ -3954,6 +3954,9 @@ func TestServiceRunPlanModePersistsDraftPlan(t *testing.T) {
 	if saved.CurrentPlan.Status != agentsession.PlanStatusDraft {
 		t.Fatalf("Status = %q, want %q", saved.CurrentPlan.Status, agentsession.PlanStatusDraft)
 	}
+	if len(saved.Todos) != 0 {
+		t.Fatalf("expected plan mode not to create execution todos, got %+v", saved.Todos)
+	}
 	if saved.CurrentPlan.Spec.Goal != "为 runtime 引入 plan/build 模式" {
 		t.Fatalf("Goal = %q", saved.CurrentPlan.Spec.Goal)
 	}
@@ -4019,6 +4022,9 @@ func TestServiceRunPlanModeShowsExplanationTextOutsidePlanningJSON(t *testing.T)
 	if saved.CurrentPlan == nil || saved.CurrentPlan.Spec.Goal != "Preserve prose around planning JSON" {
 		t.Fatalf("expected current plan to be updated, got %+v", saved.CurrentPlan)
 	}
+	if len(saved.Todos) != 0 {
+		t.Fatalf("expected plan prose turn not to create execution todos, got %+v", saved.Todos)
+	}
 	if len(saved.Messages) != 3 {
 		t.Fatalf("message count = %d, want 3", len(saved.Messages))
 	}
diff --git a/internal/runtime/todo_bootstrap.go b/internal/runtime/todo_bootstrap.go
@@ -10,13 +10,15 @@ const todoBootstrapRequiredReason = "todo_bootstrap_required"
 
 const todoBootstrapRequiredReminder = `[Runtime Control]
 
-todo_bootstrap_required: This build run has no current plan and no active todos.
+todo_bootstrap_required: This build run has no active todos.
 
 Before project analysis, documentation writing, code changes, multi-step debugging, or verification work, call todo_write with action=plan or action=add to create required todos for this run.
 
+If a Current Plan is attached, use it only as planning context. Create current-run execution todos explicitly instead of assuming plan steps already exist as todos.
+
 Do not update or complete old todo IDs that are not present in the current Todo State.`
 
-// maybeAppendTodoBootstrapReminder 在 direct build 缺少 plan/todo 时注入一次结构化提醒。
+// maybeAppendTodoBootstrapReminder 在 build 缺少执行态 todo 时注入一次结构化提醒。
 func (s *Service) maybeAppendTodoBootstrapReminder(ctx context.Context, state *runState) error {
 	if !shouldInjectTodoBootstrapReminder(state) {
 		return nil
@@ -36,23 +38,20 @@ func shouldInjectTodoBootstrapReminder(state *runState) bool {
 	if agentsession.NormalizeAgentMode(session.AgentMode) != agentsession.AgentModeBuild {
 		return false
 	}
-	if hasActivePlanForTodoBootstrap(session.CurrentPlan) || len(session.Todos) > 0 {
+	if hasActiveTodoForBootstrap(session.Todos) {
 		return false
 	}
 	return true
 }
 
-// hasActivePlanForTodoBootstrap 判断当前 plan 是否仍可为 build 继承 todo 所有权。
-func hasActivePlanForTodoBootstrap(plan *agentsession.PlanArtifact) bool {
-	if plan == nil {
-		return false
-	}
-	switch agentsession.NormalizePlanStatus(plan.Status) {
-	case agentsession.PlanStatusDraft, agentsession.PlanStatusApproved:
-		return true
-	default:
-		return false
+// hasActiveTodoForBootstrap 判断会话中是否已有可继续推进的非终态 todo。
+func hasActiveTodoForBootstrap(todos []agentsession.TodoItem) bool {
+	for _, todo := range todos {
+		if !todo.Status.IsTerminal() {
+			return true
+		}
 	}
+	return false
 }
 
 const planBootstrapRequiredReason = "plan_bootstrap_required"
@@ -65,7 +64,7 @@ Before research, analysis, or conversational response, you MUST complete the fol
 
 1. Research the codebase as needed using read-only tools.
 2. Output a JSON object containing "plan_spec" and "summary_candidate" that defines the current plan.
-3. plan_spec.todos must be non-empty — include major actionable items with unique IDs and status "pending".
+3. Focus plan_spec on goal, steps, constraints, and open_questions. Do not create execution todos in plan mode.
 
 Do not end this turn without producing a plan.`
 
diff --git a/internal/runtime/todo_bootstrap_test.go b/internal/runtime/todo_bootstrap_test.go
@@ -38,7 +38,7 @@ func TestShouldInjectTodoBootstrapReminder(t *testing.T) {
 			want: true,
 		},
 		{
-			name: "active plan skips",
+			name: "active plan without execution todos injects",
 			state: runState{
 				session: agentsession.Session{
 					AgentMode: agentsession.AgentModeBuild,
@@ -49,10 +49,10 @@ func TestShouldInjectTodoBootstrapReminder(t *testing.T) {
 				userGoal:        "请分析项目并写文档",
 				planningEnabled: true,
 			},
-			want: false,
+			want: true,
 		},
 		{
-			name: "existing todo skips",
+			name: "existing active todo skips",
 			state: runState{
 				session: agentsession.Session{
 					AgentMode: agentsession.AgentModeBuild,
@@ -68,6 +68,37 @@ func TestShouldInjectTodoBootstrapReminder(t *testing.T) {
 			},
 			want: false,
 		},
+		{
+			name: "terminal todos only still injects",
+			state: runState{
+				session: agentsession.Session{
+					AgentMode: agentsession.AgentModeBuild,
+					Todos: []agentsession.TodoItem{
+						{
+							ID:       "todo-completed",
+							Content:  "done",
+							Status:   agentsession.TodoStatusCompleted,
+							Required: &required,
+						},
+						{
+							ID:       "todo-failed",
+							Content:  "failed",
+							Status:   agentsession.TodoStatusFailed,
+							Required: &required,
+						},
+						{
+							ID:       "todo-canceled",
+							Content:  "canceled",
+							Status:   agentsession.TodoStatusCanceled,
+							Required: &required,
+						},
+					},
+				},
+				userGoal:        "继续实现剩余工作",
+				planningEnabled: true,
+			},
+			want: true,
+		},
 		{
 			name: "plan mode skips",
 			state: runState{
diff --git a/internal/session/plan.go b/internal/session/plan.go
diff --git a/internal/tools/ask_user_tool_test.go b/internal/tools/ask_user_tool_test.go
diff --git a/internal/tools/manager_test.go b/internal/tools/manager_test.go
diff --git a/internal/tools/mode_filter.go b/internal/tools/mode_filter.go