Skip to content

Commit 56088ee

Browse files
committed
perf: improve task management behavior
1 parent 59c7d19 commit 56088ee

File tree

15 files changed

+733
-87
lines changed

15 files changed

+733
-87
lines changed

internal/agent/events.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ const (
1212
ReminderRepeatToolCall RuntimeReminderKind = "repeat_tool_call"
1313
ReminderPostStopValidation RuntimeReminderKind = "post_stop_validation"
1414
ReminderSkillPaths RuntimeReminderKind = "skill_paths"
15+
ReminderTaskManagement RuntimeReminderKind = "task_management"
1516
)
1617

1718
type CompactionKind string

internal/agent/runtime_policy.go

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,14 @@ func newSessionRuntimePolicy(session *Session) *sessionRuntimePolicy {
4646
return &sessionRuntimePolicy{session: session}
4747
}
4848

49-
func (p *sessionRuntimePolicy) beforePrompt() {
49+
func (p *sessionRuntimePolicy) beforeUserPrompt(blocks []agentcore.ContentBlock) {
50+
if reminder, ok := taskManagementReminderForPrompt(blocks, p.session.TaskSnapshot()); ok {
51+
p.session.queueRuntimeReminder(
52+
"task_management:pre_prompt",
53+
ReminderTaskManagement,
54+
reminder,
55+
)
56+
}
5057
}
5158

5259
func (p *sessionRuntimePolicy) handleEvent(ev agentcore.Event) {
@@ -107,6 +114,7 @@ func (p *sessionRuntimePolicy) trackToolEnd(ev agentcore.Event) {
107114
p.session.mu.Unlock()
108115

109116
p.detectRepeatedCalls(record, recent)
117+
p.detectTaskManagementGap()
110118
}
111119

112120
func (p *sessionRuntimePolicy) detectRepeatedCalls(current toolCallFingerprint, recent []toolCallFingerprint) {
@@ -127,7 +135,27 @@ func (p *sessionRuntimePolicy) detectRepeatedCalls(current toolCallFingerprint,
127135
p.session.deliverRuntimeReminder(
128136
"repeat_tool_call:"+current.Tool+":"+current.ArgsHash,
129137
ReminderRepeatToolCall,
130-
"<system-reminder>\n检测到你在重复调用同一个工具且参数基本相同。先总结当前已知信息、差距和下一步假设;避免在没有新信息时继续相同调用。\n</system-reminder>",
138+
"<system-reminder>\nYou are repeatedly calling the same tool with effectively the same arguments. Summarize what you already know, what is still missing, and your next hypothesis before making the same call again.\n</system-reminder>",
139+
)
140+
}
141+
}
142+
143+
func (p *sessionRuntimePolicy) detectTaskManagementGap() {
144+
s := p.session
145+
if s.taskStore == nil {
146+
return
147+
}
148+
149+
s.mu.Lock()
150+
turn := s.currentTurn
151+
s.mu.Unlock()
152+
153+
snap := s.taskStore.Snapshot()
154+
if key, reminder, ok := taskManagementReminderForTurn(turn, snap); ok {
155+
p.session.deliverRuntimeReminder(
156+
key,
157+
ReminderTaskManagement,
158+
reminder,
131159
)
132160
}
133161
}
@@ -317,7 +345,7 @@ func (p *sessionRuntimePolicy) runPostStopValidation() {
317345
"post_stop_validation",
318346
ReminderPostStopValidation,
319347
fmt.Sprintf(
320-
"<system-reminder>\nPostStopValidation hook 检查失败,请根据以下输出修复问题:\n%s\n</system-reminder>",
348+
"<system-reminder>\nThe PostStopValidation hook failed. Fix the problem based on the following output:\n%s\n</system-reminder>",
321349
failOutput,
322350
),
323351
)

internal/agent/session.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,13 @@ func (s *Session) TaskSnapshot() localtools.TaskSnapshot {
215215
return s.taskStore.Snapshot()
216216
}
217217

218+
func (s *Session) ResetTaskList() error {
219+
if s.taskStore == nil {
220+
return nil
221+
}
222+
return s.taskStore.Reset()
223+
}
224+
218225
func (s *Session) resetHarnessStateLocked() {
219226
s.generation++
220227
s.runtimeReminders = nil

internal/agent/session_runtime.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ func (s *Session) Prompt(text string) error {
2222
s.beforePrompt()
2323
}
2424
if s.runtime != nil {
25-
s.runtime.beforePrompt()
25+
s.runtime.beforeUserPrompt([]agentcore.ContentBlock{agentcore.TextBlock(text)})
2626
}
2727

2828
var msgs []agentcore.AgentMessage
@@ -40,7 +40,7 @@ func (s *Session) PromptWithBlocks(blocks []agentcore.ContentBlock) error {
4040
s.beforePrompt()
4141
}
4242
if s.runtime != nil {
43-
s.runtime.beforePrompt()
43+
s.runtime.beforeUserPrompt(blocks)
4444
}
4545

4646
var msgs []agentcore.AgentMessage

internal/agent/session_test.go

Lines changed: 130 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"encoding/json"
66
"errors"
7+
"fmt"
78
"strings"
89
"sync"
910
"testing"
@@ -14,6 +15,7 @@ import (
1415
"github.com/voocel/codebot/internal/config"
1516
"github.com/voocel/codebot/internal/skill"
1617
"github.com/voocel/codebot/internal/storage"
18+
localtools "github.com/voocel/codebot/internal/tools"
1719
)
1820

1921
type stubChatModel struct{}
@@ -93,7 +95,7 @@ func (m *scriptedReminderModel) Generate(
9395
if msg.Role == agentcore.RoleUser && strings.Contains(msg.TextContent(), "<system-reminder>") {
9496
sawInjectedReminder = true
9597
}
96-
if msg.Role == agentcore.RoleUser && strings.Contains(msg.TextContent(), "重复调用同一个工具") {
98+
if msg.Role == agentcore.RoleUser && strings.Contains(msg.TextContent(), "repeatedly calling the same tool") {
9799
m.secondCallSawReminder = true
98100
}
99101
}
@@ -597,16 +599,16 @@ func TestBuildUserMessagePrependsRuntimeRemindersBeforeStaticReminders(t *testin
597599
Agent: ag,
598600
Settings: config.Resolved{MaxTurns: 30},
599601
Cwd: t.TempDir(),
600-
Reminders: []string{"<system-reminder>\n静态提醒\n</system-reminder>"},
602+
Reminders: []string{"<system-reminder>\nstatic reminder\n</system-reminder>"},
601603
})
602604
t.Cleanup(s.Close)
603605

604-
s.queueRuntimeReminder("loop", ReminderRepeatToolCall, "<system-reminder>\n动态提醒\n</system-reminder>")
605-
msg := s.buildUserMessage(agentcore.TextBlock("用户输入"))
606+
s.queueRuntimeReminder("loop", ReminderRepeatToolCall, "<system-reminder>\nruntime reminder\n</system-reminder>")
607+
msg := s.buildUserMessage(agentcore.TextBlock("user input"))
606608
if len(msg.Content) != 3 {
607609
t.Fatalf("expected 3 content blocks, got %d", len(msg.Content))
608610
}
609-
if !strings.Contains(msg.Content[0].Text, "动态提醒") || !strings.Contains(msg.Content[1].Text, "静态提醒") {
611+
if !strings.Contains(msg.Content[0].Text, "runtime reminder") || !strings.Contains(msg.Content[1].Text, "static reminder") {
610612
t.Fatalf("unexpected content ordering: %#v", msg.Content)
611613
}
612614
}
@@ -651,8 +653,8 @@ func TestRepeatedToolCallQueuesRuntimeReminder(t *testing.T) {
651653
s.handleAgentEvent(agentcore.Event{Type: agentcore.EventToolExecEnd, ToolID: toolID, Tool: "read"})
652654
}
653655

654-
msg := s.buildUserMessage(agentcore.TextBlock("继续"))
655-
if len(msg.Content) == 0 || !strings.Contains(msg.Content[0].Text, "重复调用同一个工具") {
656+
msg := s.buildUserMessage(agentcore.TextBlock("continue"))
657+
if len(msg.Content) == 0 || !strings.Contains(msg.Content[0].Text, "repeatedly calling the same tool") {
656658
t.Fatalf("expected repeated-call reminder, got %#v", msg.Content)
657659
}
658660
}
@@ -671,7 +673,7 @@ func TestDeliverRuntimeReminderSteersCurrentRun(t *testing.T) {
671673
})
672674
t.Cleanup(s.Close)
673675

674-
if err := s.Prompt("开始"); err != nil {
676+
if err := s.Prompt("start"); err != nil {
675677
t.Fatalf("prompt: %v", err)
676678
}
677679
waitFor(t, time.Second, func() bool {
@@ -685,8 +687,8 @@ func TestContinueWithRuntimeReminderAutoContinuesWhenIdle(t *testing.T) {
685687
model := &scriptedReminderModel{}
686688
ag := agentcore.NewAgent(agentcore.WithModel(model), agentcore.WithMaxTurns(10))
687689
if err := ag.SetMessages([]agentcore.AgentMessage{
688-
textMessage(agentcore.RoleUser, "初始任务"),
689-
textMessage(agentcore.RoleAssistant, "任务已完成。"),
690+
textMessage(agentcore.RoleUser, "initial task"),
691+
textMessage(agentcore.RoleAssistant, "task completed."),
690692
}); err != nil {
691693
t.Fatalf("set messages: %v", err)
692694
}
@@ -697,12 +699,129 @@ func TestContinueWithRuntimeReminderAutoContinuesWhenIdle(t *testing.T) {
697699
})
698700
t.Cleanup(s.Close)
699701

700-
s.continueWithRuntimeReminder("test_reminder:1:0", ReminderRepeatToolCall, "<system-reminder>\n测试运行时提醒。\n</system-reminder>")
702+
s.continueWithRuntimeReminder("test_reminder:1:0", ReminderRepeatToolCall, "<system-reminder>\ntest runtime reminder.\n</system-reminder>")
701703
waitFor(t, time.Second, func() bool {
702704
return s.LastAssistantText() == "steered"
703705
})
704706
}
705707

708+
func TestComplexPromptQueuesTaskManagementReminder(t *testing.T) {
709+
t.Parallel()
710+
711+
ag := agentcore.NewAgent(agentcore.WithModel(&stubChatModel{}))
712+
s := NewSession(SessionConfig{
713+
Agent: ag,
714+
Settings: config.Resolved{MaxTurns: 30},
715+
Cwd: t.TempDir(),
716+
TaskStore: localtools.NewTaskStore(),
717+
})
718+
t.Cleanup(s.Close)
719+
720+
s.beginTurn()
721+
s.runtime.beforeUserPrompt([]agentcore.ContentBlock{
722+
agentcore.TextBlock("Build a complete project: a Go CLI app that lets AI agents autonomously write novels."),
723+
})
724+
725+
msg := s.buildUserMessage(agentcore.TextBlock("start"))
726+
if len(msg.Content) != 2 {
727+
t.Fatalf("expected one injected reminder plus user block, got %#v", msg.Content)
728+
}
729+
if !strings.Contains(msg.Content[0].Text, "<system-reminder>") {
730+
t.Fatalf("expected injected system reminder, got %#v", msg.Content)
731+
}
732+
if msg.Content[1].Text != "start" {
733+
t.Fatalf("expected task management reminder, got %#v", msg.Content)
734+
}
735+
}
736+
737+
func TestSimplePromptDoesNotQueueTaskManagementReminder(t *testing.T) {
738+
t.Parallel()
739+
740+
ag := agentcore.NewAgent(agentcore.WithModel(&stubChatModel{}))
741+
s := NewSession(SessionConfig{
742+
Agent: ag,
743+
Settings: config.Resolved{MaxTurns: 30},
744+
Cwd: t.TempDir(),
745+
TaskStore: localtools.NewTaskStore(),
746+
})
747+
t.Cleanup(s.Close)
748+
749+
s.beginTurn()
750+
s.runtime.beforeUserPrompt([]agentcore.ContentBlock{
751+
agentcore.TextBlock("How do I print hello world in Go?"),
752+
})
753+
754+
msg := s.buildUserMessage(agentcore.TextBlock("start"))
755+
if len(msg.Content) != 1 {
756+
t.Fatalf("expected no task reminder blocks, got %#v", msg.Content)
757+
}
758+
if strings.Contains(msg.Content[0].Text, "task_create") {
759+
t.Fatalf("unexpected task reminder in %#v", msg.Content)
760+
}
761+
}
762+
763+
func TestTaskManagementReminderQueuedForUntrackedOrBroadWork(t *testing.T) {
764+
t.Parallel()
765+
766+
cases := []struct {
767+
name string
768+
store *localtools.TaskStore
769+
run func(s *Session)
770+
}{
771+
{
772+
name: "missing task list",
773+
store: localtools.NewTaskStore(),
774+
run: func(s *Session) {
775+
args := json.RawMessage(`{"path":"main.go"}`)
776+
for i := 0; i < 3; i++ {
777+
toolID := fmt.Sprintf("read-%d", i)
778+
s.handleAgentEvent(agentcore.Event{Type: agentcore.EventToolExecStart, ToolID: toolID, Tool: "read", Args: args})
779+
s.handleAgentEvent(agentcore.Event{Type: agentcore.EventToolExecEnd, ToolID: toolID, Tool: "read"})
780+
}
781+
},
782+
},
783+
{
784+
name: "single broad task",
785+
store: func() *localtools.TaskStore {
786+
store := localtools.NewTaskStore()
787+
store.Create("Implement the entire project", "An overly broad task", "Implementing the entire project", nil)
788+
return store
789+
}(),
790+
run: func(s *Session) {
791+
s.handleAgentEvent(agentcore.Event{Type: agentcore.EventToolExecStart, ToolID: "task-1", Tool: "task_create"})
792+
s.handleAgentEvent(agentcore.Event{Type: agentcore.EventToolExecEnd, ToolID: "task-1", Tool: "task_create"})
793+
editArgs := json.RawMessage(`{"file":"main.go"}`)
794+
s.handleAgentEvent(agentcore.Event{Type: agentcore.EventToolExecStart, ToolID: "edit-1", Tool: "edit", Args: editArgs})
795+
s.handleAgentEvent(agentcore.Event{Type: agentcore.EventToolExecEnd, ToolID: "edit-1", Tool: "edit"})
796+
},
797+
},
798+
}
799+
800+
for _, tc := range cases {
801+
t.Run(tc.name, func(t *testing.T) {
802+
ag := agentcore.NewAgent(agentcore.WithModel(&stubChatModel{}))
803+
s := NewSession(SessionConfig{
804+
Agent: ag,
805+
Settings: config.Resolved{MaxTurns: 30},
806+
Cwd: t.TempDir(),
807+
TaskStore: tc.store,
808+
})
809+
t.Cleanup(s.Close)
810+
811+
s.beginTurn()
812+
tc.run(s)
813+
814+
msg := s.buildUserMessage(agentcore.TextBlock("continue"))
815+
if len(msg.Content) != 2 {
816+
t.Fatalf("expected one injected reminder plus user block, got %#v", msg.Content)
817+
}
818+
if !strings.Contains(msg.Content[0].Text, "<system-reminder>") {
819+
t.Fatalf("expected injected system reminder, got %#v", msg.Content)
820+
}
821+
})
822+
}
823+
}
824+
706825
func TestRuntimeMetricsTrackCompactionSavings(t *testing.T) {
707826
t.Parallel()
708827

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
package agent
2+
3+
import (
4+
"strings"
5+
6+
"github.com/voocel/agentcore"
7+
localtools "github.com/voocel/codebot/internal/tools"
8+
)
9+
10+
const (
11+
taskManagementPromptReminder = "<system-reminder>\nThis is a multi-step implementation task. Create and maintain a task list before going deeper. Break the work into concrete tasks, mark a task in_progress before starting it, mark it completed immediately after finishing it, and keep moving to the next unblocked task.\n</system-reminder>"
12+
taskManagementMissingReminder = "<system-reminder>\nYou are doing multi-step work without maintaining a task list. Create concrete tasks now instead of continuing without structure.\n</system-reminder>"
13+
taskManagementExpandSingleReminder = "<system-reminder>\nYour task list is too broad for the current scope. Split the single broad task into multiple more specific tasks and keep their statuses up to date.\n</system-reminder>"
14+
)
15+
16+
func taskManagementReminderForPrompt(blocks []agentcore.ContentBlock, snap localtools.TaskSnapshot) (string, bool) {
17+
if snap.Total > 0 && !allTasksCompleted(snap) {
18+
return "", false
19+
}
20+
21+
text := strings.ToLower(strings.TrimSpace(textContentFromBlocks(blocks)))
22+
if text == "" || !looksLikeComplexTaskRequest(text) {
23+
return "", false
24+
}
25+
return taskManagementPromptReminder, true
26+
}
27+
28+
func taskManagementReminderForTurn(turn TurnOutcomeSnapshot, snap localtools.TaskSnapshot) (key, reminder string, ok bool) {
29+
if turn.ReadOnlyToolCalls < 3 && turn.CodeEditToolCalls == 0 {
30+
return "", "", false
31+
}
32+
33+
switch {
34+
case turn.TaskMutations == 0:
35+
return "task_management:missing", taskManagementMissingReminder, true
36+
case snap.Total == 1 && (turn.ReadOnlyToolCalls >= 3 || turn.CodeEditToolCalls > 0):
37+
return "task_management:expand_single", taskManagementExpandSingleReminder, true
38+
default:
39+
return "", "", false
40+
}
41+
}
42+
43+
func textContentFromBlocks(blocks []agentcore.ContentBlock) string {
44+
var parts []string
45+
for _, block := range blocks {
46+
if strings.TrimSpace(block.Text) != "" {
47+
parts = append(parts, block.Text)
48+
}
49+
}
50+
return strings.Join(parts, "\n")
51+
}
52+
53+
func looksLikeComplexTaskRequest(text string) bool {
54+
if text == "" {
55+
return false
56+
}
57+
58+
keywords := []string{
59+
"\u5b8c\u6574\u9879\u76ee", "\u5b8c\u6574\u7684\u9879\u76ee", "\u5b8c\u6574\u5e94\u7528", "\u5b8c\u6574\u7cfb\u7edf", "\u5b8c\u6574\u4ea7\u54c1",
60+
"cli\u5e94\u7528", "web\u5e94\u7528", "\u670d\u52a1\u7aef", "\u524d\u540e\u7aef", "\u811a\u624b\u67b6",
61+
"implement", "build", "create", "scaffold", "full project", "full app",
62+
"\u91cd\u6784", "\u642d\u5efa", "\u5f00\u53d1", "\u5b9e\u73b0", "\u8bbe\u8ba1\u5e76\u5b9e\u73b0", "\u4ece\u96f6\u5f00\u59cb",
63+
}
64+
for _, keyword := range keywords {
65+
if strings.Contains(text, keyword) {
66+
return true
67+
}
68+
}
69+
70+
separators := 0
71+
for _, token := range []string{"\n", "\uff0c", ",", "\u3001", "\u4ee5\u53ca", " and ", " then "} {
72+
if strings.Contains(text, token) {
73+
separators++
74+
}
75+
}
76+
if separators >= 2 {
77+
return true
78+
}
79+
80+
return len([]rune(text)) >= 60
81+
}
82+
83+
func allTasksCompleted(snap localtools.TaskSnapshot) bool {
84+
return snap.Total > 0 && snap.Pending == 0 && snap.InProgress == 0
85+
}

0 commit comments

Comments
 (0)