fix(runtime): address plan todo review feedback

wynxing · wynxing · commit ec9db429284c · 2026-05-20T02:06:30.000-04:00
diff --git a/internal/context/builder_test.go b/internal/context/builder_test.go
@@ -187,6 +187,36 @@ func TestDefaultBuilderBuildIncludesPlanSections(t *testing.T) {
 	}
 }
 
+func TestDefaultBuilderBuildPlanModeDoesNotRequireTodoWrite(t *testing.T) {
+	t.Parallel()
+
+	builder := NewBuilder()
+	got, err := builder.Build(stdcontext.Background(), BuildInput{
+		AgentMode: agentsession.AgentModePlan,
+		PlanStage: "plan",
+		Metadata:  testMetadata(t.TempDir()),
+	})
+	if err != nil {
+		t.Fatalf("Build() error = %v", err)
+	}
+	if !strings.Contains(got.SystemPrompt, "Do not create execution todos in plan mode") {
+		t.Fatalf("expected plan mode to forbid execution todo creation, got %q", got.SystemPrompt)
+	}
+	if !strings.Contains(got.SystemPrompt, "the current mode permits execution todo updates") {
+		t.Fatalf("expected core todo guidance to be mode-gated, got %q", got.SystemPrompt)
+	}
+	for _, forbidden := range []string{
+		"maintain explicit todos with `todo_write`.",
+		"Maintain explicit task state and todos via `todo_write`.",
+		"keep task state explicit via `todo_write` (plan/add/update/set_status/claim/complete/fail) instead of relying on implicit memory",
+		"keep critical information in the task state using `todo_write` updates",
+	} {
+		if strings.Contains(got.SystemPrompt, forbidden) {
+			t.Fatalf("plan mode prompt should not contain hard todo_write guidance %q in %q", forbidden, got.SystemPrompt)
+		}
+	}
+}
+
 func TestDefaultBuilderBuildIncludesTodosBeforeSystemState(t *testing.T) {
 	t.Parallel()
 
diff --git a/internal/promptasset/assets_test.go b/internal/promptasset/assets_test.go
@@ -47,6 +47,7 @@ func TestCorePromptContainsOperationalGuidance(t *testing.T) {
 		"A subagent is a helper, not the source of final truth",
 		"Preserve existing user or repository changes",
 		"Use UTF-8-safe reads and edits",
+		"the current mode permits execution todo updates",
 	}
 	for _, want := range wantSubstrings {
 		if !strings.Contains(prompt, want) {
diff --git a/internal/promptasset/templates/core/agent_identity.md b/internal/promptasset/templates/core/agent_identity.md
@@ -20,7 +20,7 @@ If instructions conflict, follow the higher-priority instruction and briefly sta
 
 Core workflow:
 1. Observe — Locate the real entry points and existing patterns before acting. Prefer targeted search and file reads over assumptions.
-2. Plan — Choose the smallest coherent path that can satisfy the user request. For multi-step work, maintain explicit todos with `todo_write`.
+2. Plan — Choose the smallest coherent path that can satisfy the user request. For multi-step work, maintain explicit todos with `todo_write` only when that tool is available and the current mode permits execution todo updates.
 3. Act — Call the minimum set of exposed tools needed to make progress. Prefer filesystem tools over bash.
 4. Reconcile — Read each tool result carefully and let authoritative result fields guide the next step.
 5. Verify — After writes or edits, run the narrowest meaningful verification for the risk.
diff --git a/internal/promptasset/templates/core/context_management.md b/internal/promptasset/templates/core/context_management.md
@@ -1,6 +1,6 @@
 - The conversation context has a finite window. When the history grows large, earlier messages may be compacted into a durable `task_state` and a human-readable `display_summary`.
-- To cooperate with compaction, keep critical information in the task state using `todo_write` updates and explicit reasoning, rather than relying solely on conversational memory.
+- To cooperate with compaction, keep critical information in task state using `todo_write` updates only when that tool is available and the current mode permits execution todo updates; otherwise preserve the information in explicit reasoning and permitted outputs.
 - After a compact occurs, the durable `task_state` and `display_summary` become your source of truth for what has been accomplished and what remains. Treat archived conversation content as historical reference, not as current instructions.
 - When continuing after a compact, verify the current workspace state against the `task_state` before assuming files or changes from prior rounds still exist.
 - Do not treat archived `[compact_summary]` text as durable truth. Durable truth comes from `current_task_state` plus new source material.
-- Keep long-running task facts, decisions, blockers, and acceptance-relevant todos in durable task state instead of relying only on conversation history.
+- Keep long-running task facts, decisions, blockers, and acceptance-relevant todos in durable task state when the current mode permits task-state updates, instead of relying only on conversation history.
diff --git a/internal/promptasset/templates/core/tool_usage.md b/internal/promptasset/templates/core/tool_usage.md
@@ -38,7 +38,7 @@ For general file operations outside of codebase exploration, use `filesystem_*`
   - create directory: `filesystem_create_dir` (not `bash mkdir`)
   - remove directory: `filesystem_remove_dir` (not `bash rmdir` / `rm -rf`)
   These tools record their changes for checkpoint/rollback; equivalent `bash` commands produce reduced rollback coverage.
-- For multi-step implementation, debugging, refactoring, or long-running work, keep task state explicit via `todo_write` (plan/add/update/set_status/claim/complete/fail) instead of relying on implicit memory.
+- For multi-step implementation, debugging, refactoring, or long-running work, keep task state explicit via `todo_write` (plan/add/update/set_status/claim/complete/fail) when that tool is available and the current mode permits execution todo updates.
 - Create todos that map to real acceptance work, not vague activity.
 - Required todos are acceptance-relevant and must converge before finalization.
 - If the user clearly switches to a different task, do not carry unfinished todos forward blindly: mark each old todo `completed` only when the work is actually done, otherwise mark it `canceled` before planning or executing the new task.
diff --git a/internal/runtime/todo_bootstrap.go b/internal/runtime/todo_bootstrap.go
@@ -38,12 +38,22 @@ func shouldInjectTodoBootstrapReminder(state *runState) bool {
 	if agentsession.NormalizeAgentMode(session.AgentMode) != agentsession.AgentModeBuild {
 		return false
 	}
-	if len(session.Todos) > 0 {
+	if hasActiveTodoForBootstrap(session.Todos) {
 		return false
 	}
 	return true
 }
 
+// hasActiveTodoForBootstrap 判断会话中是否已有可继续推进的非终态 todo。
+func hasActiveTodoForBootstrap(todos []agentsession.TodoItem) bool {
+	for _, todo := range todos {
+		if !todo.Status.IsTerminal() {
+			return true
+		}
+	}
+	return false
+}
+
 const planBootstrapRequiredReason = "plan_bootstrap_required"
 
 const planBootstrapRequiredReminder = `[Runtime Control]
diff --git a/internal/runtime/todo_bootstrap_test.go b/internal/runtime/todo_bootstrap_test.go
@@ -52,7 +52,7 @@ func TestShouldInjectTodoBootstrapReminder(t *testing.T) {
 			want: true,
 		},
 		{
-			name: "existing todo skips",
+			name: "existing active todo skips",
 			state: runState{
 				session: agentsession.Session{
 					AgentMode: agentsession.AgentModeBuild,
@@ -68,6 +68,37 @@ func TestShouldInjectTodoBootstrapReminder(t *testing.T) {
 			},
 			want: false,
 		},
+		{
+			name: "terminal todos only still injects",
+			state: runState{
+				session: agentsession.Session{
+					AgentMode: agentsession.AgentModeBuild,
+					Todos: []agentsession.TodoItem{
+						{
+							ID:       "todo-completed",
+							Content:  "done",
+							Status:   agentsession.TodoStatusCompleted,
+							Required: &required,
+						},
+						{
+							ID:       "todo-failed",
+							Content:  "failed",
+							Status:   agentsession.TodoStatusFailed,
+							Required: &required,
+						},
+						{
+							ID:       "todo-canceled",
+							Content:  "canceled",
+							Status:   agentsession.TodoStatusCanceled,
+							Required: &required,
+						},
+					},
+				},
+				userGoal:        "继续实现剩余工作",
+				planningEnabled: true,
+			},
+			want: true,
+		},
 		{
 			name: "plan mode skips",
 			state: runState{

Original file line number	Diff line number	Diff line change
`@@ -47,6 +47,7 @@ func TestCorePromptContainsOperationalGuidance(t *testing.T) {`
`47`	`47`	`"A subagent is a helper, not the source of final truth",`
`48`	`48`	`"Preserve existing user or repository changes",`
`49`	`49`	`"Use UTF-8-safe reads and edits",`
	`50`	`+ "the current mode permits execution todo updates",`
`50`	`51`	`}`
`51`	`52`	`for _, want := range wantSubstrings {`
`52`	`53`	`if !strings.Contains(prompt, want) {`