scriptscat
diff --git a/‎src/app/service/agent/system_prompt.ts‎
Lines changed: 18 additions & 16 deletions b/‎src/app/service/agent/system_prompt.ts‎
Lines changed: 18 additions & 16 deletions
diff --git a/‎src/app/service/agent/tool_call_guard.test.ts‎
Lines changed: 195 additions & 0 deletions b/‎src/app/service/agent/tool_call_guard.test.ts‎
Lines changed: 195 additions & 0 deletions
@@ -55,7 +55,9 @@ When stopped due to failures:
 2. **Suggest next steps** — ask if the user can help (e.g., provide correct selectors, try manually).
 3. **Never silently retry** — the user must know when something isn't working.
 
-**Default to asking**: When in doubt between trying another approach and asking the user, always ask.`;
+**Default to asking**: When in doubt between trying another approach and asking the user, always ask.
+
+**System guard**: The system automatically detects repetitive tool call patterns and will warn you with a \`[System Warning]\` message. If you receive one, follow its guidance immediately — do not ignore it.`;
 
 const SECTION_SAFETY = `## Safety
 
@@ -96,19 +98,19 @@ Any task that involves 2+ tool calls (web searching, page reading, page interact
 
 ### Delegation Examples
 
-**Example 1: "帮我写一篇关于X的公众号文章"**
+**Example 1: "Write an article about X and publish it on the blog platform"**
 1. Spawn \`researcher\` sub-agent → "Research X: find key features, advantages, use cases. Return structured notes."
 2. Use the research result to draft the article content yourself (or delegate to another sub-agent).
-3. Spawn \`page_operator\` sub-agent → "Open mp.weixin.qq.com, navigate to article editor, write this HTML content into the editor: [content]"
+3. Spawn \`page_operator\` sub-agent → "Open the blog editor, navigate to new post, write this HTML content into the editor: [content]"
 
-**Example 2: "帮我对比3个网站的价格"**
+**Example 2: "Compare prices for product X across 3 websites"**
 Spawn 3 \`page_operator\` sub-agents in the same response (parallel):
 - "Go to site A, find the price of product X, return price and URL"
 - "Go to site B, find the price of product X, return price and URL"
 - "Go to site C, find the price of product X, return price and URL"
 Then summarize results in a comparison table.
 
-**Example 3: "帮我在这个页面填写表单"**
+**Example 3: "Fill out the form on this page"**
 This is a single-scope page task → spawn one \`page_operator\` sub-agent with the form data.
 
 ### Writing Sub-Agent Prompts
@@ -144,26 +146,24 @@ Sub-agents cannot ask the user questions, cannot spawn nested sub-agents, and ha
 
 const SECTION_TASK_MANAGEMENT = `## Task Management
 
-Use task tools to create a structured task list that tracks your progress. This helps the user understand what you're doing and how much work remains.
+Use task tools **only** when tracking progress genuinely helps the user understand a complex workflow.
 
 **When to use:**
-- Complex tasks requiring 3+ distinct steps (e.g., navigating multiple pages, multi-stage data processing)
-- The user provides multiple things to do at once
-- After receiving new instructions — immediately capture requirements as tasks
+- The task requires 3+ distinct steps AND benefits from visible progress tracking
+- The user provides multiple independent things to do at once
 
 **When NOT to use:**
-- Single, straightforward tasks that complete in 1-2 steps
+- Tasks with 1-2 steps — just execute directly
+- Tasks you will complete in the same or next tool call — creating a task just to immediately complete it wastes tool calls
+- Tasks already delegated to sub-agents — sub-agents handle their own execution
 - Purely conversational or informational requests
 
 **Workflow:**
 1. **Plan** — Call \`list_tasks\` to check for existing tasks, then \`create_task\` for each step with a clear imperative subject and enough description for context.
 2. **Execute** — Before starting each task, call \`update_task\` with \`status: "in_progress"\`. When done, set \`status: "completed"\`.
-3. **Adapt** — If a completed task reveals follow-up work, create new tasks. If a task becomes irrelevant, use \`delete_task\` to clean up. Use \`get_task\` to review a task's full description before starting it.
+3. **Adapt** — If a completed task reveals follow-up work, create new tasks. If a task becomes irrelevant, use \`delete_task\` to clean up.
 
-**Tips:**
-- Write subjects as brief imperatives: "Extract product prices", not "I will extract prices".
-- Include acceptance criteria in the description so progress is unambiguous.
-- Do not create tasks you intend to complete in the same tool call — tasks are for tracking multi-step progress, not logging what you already did.`;
+**Important:** Do not create tasks just to log what you already did or are about to do in the same response.`;
 
 const SECTION_OPFS = `## OPFS Workspace
 
@@ -238,7 +238,9 @@ Read each tool's description before calling — it defines behavior, parameters,
 When stopped, describe clearly in your final response:
 1. What you tried and what happened.
 2. Your best guess at the root cause.
-Never silently keep trying — fail fast and report.`;
+Never silently keep trying — fail fast and report.
+
+**System guard**: The system automatically detects repetitive tool call patterns and will warn you with a \`[System Warning]\` message. If you receive one, follow its guidance immediately.`;
 
 // 页面交互工作流指南（仅有 tab 工具时包含）
 const SUB_AGENT_SECTION_PAGE_INTERACTION = `### Page Interaction Workflow
 
@@ -0,0 +1,195 @@
+import { describe, it, expect } from "vitest";
+import { detectToolCallIssues, type ToolCallRecord } from "./tool_call_guard";
+
+describe("detectToolCallIssues", () => {
+  it("历史记录不足时不生成警告", () => {
+    expect(detectToolCallIssues([])).toBeNull();
+    expect(
+      detectToolCallIssues([{ name: "web_search", args: '{"query":"test"}', result: "...", iteration: 1 }])
+    ).toBeNull();
+  });
+
+  describe("完全相同的 tool + args 检测", () => {
+    it("相同工具和参数调用2次时生成警告", () => {
+      const history: ToolCallRecord[] = [
+        { name: "web_fetch", args: '{"url":"https://example.com"}', result: "...", iteration: 1 },
+        { name: "web_fetch", args: '{"url":"https://example.com"}', result: "...", iteration: 2 },
+      ];
+      const warning = detectToolCallIssues(history);
+      expect(warning).not.toBeNull();
+      expect(warning).toContain("web_fetch");
+    });
+
+    it("JSON 格式不同但内容相同时也触发", () => {
+      const history: ToolCallRecord[] = [
+        { name: "web_fetch", args: '{"url": "https://example.com"}', result: "...", iteration: 1 },
+        { name: "web_fetch", args: '{"url":"https://example.com"}', result: "...", iteration: 2 },
+      ];
+      const warning = detectToolCallIssues(history);
+      expect(warning).not.toBeNull();
+    });
+
+    it("不同参数不触发警告", () => {
+      const history: ToolCallRecord[] = [
+        { name: "web_fetch", args: '{"url":"https://a.com"}', result: "...", iteration: 1 },
+        { name: "web_fetch", args: '{"url":"https://b.com"}', result: "...", iteration: 2 },
+      ];
+      expect(detectToolCallIssues(history)).toBeNull();
+    });
+
+    it("超过最近10条的重复不触发", () => {
+      const history: ToolCallRecord[] = [
+        { name: "web_fetch", args: '{"url":"https://old.com"}', result: "...", iteration: 1 },
+      ];
+      // 插入11条不同的调用（交替使用不同工具避免触发通用重复检测）
+      const tools = ["web_search", "web_fetch", "execute_script"];
+      for (let i = 0; i < 11; i++) {
+        history.push({
+          name: tools[i % 3],
+          args: `{"q":"pad${i}"}`,
+          result: '{"result":"ok"}',
+          iteration: i + 2,
+        });
+      }
+      // 再加一条与第1条相同的，但已超出最近10条窗口
+      history.push({ name: "web_fetch", args: '{"url":"https://old.com"}', result: "...", iteration: 13 });
+      expect(detectToolCallIssues(history)).toBeNull();
+    });
+  });
+
+  describe("execute_script 返回 null 检测", () => {
+    it("连续3次返回 null 时生成警告", () => {
+      const history: ToolCallRecord[] = [
+        {
+          name: "execute_script",
+          args: '{"code":"a.click()","target":"page"}',
+          result: '{"result":null,"target":"page","tab_id":123}',
+          iteration: 1,
+        },
+        {
+          name: "execute_script",
+          args: '{"code":"b.click()","target":"page"}',
+          result: '{"result":null,"target":"page","tab_id":123}',
+          iteration: 2,
+        },
+        {
+          name: "execute_script",
+          args: '{"code":"c.click()","target":"page"}',
+          result: '{"result":null,"target":"page","tab_id":123}',
+          iteration: 3,
+        },
+      ];
+      const warning = detectToolCallIssues(history);
+      expect(warning).not.toBeNull();
+      expect(warning).toContain("execute_script");
+      expect(warning).toContain("return");
+    });
+
+    it("中间穿插其他工具但 execute_script 仍然连续 null 时触发", () => {
+      const history: ToolCallRecord[] = [
+        { name: "execute_script", args: '{"code":"a()"}', result: '{"result":null}', iteration: 1 },
+        { name: "get_tab_content", args: '{"tab_id":1,"prompt":"find buttons"}', result: "page content...", iteration: 2 },
+        { name: "execute_script", args: '{"code":"b()"}', result: '{"result":null}', iteration: 3 },
+        { name: "get_tab_content", args: '{"tab_id":1,"prompt":"check state"}', result: "page content...", iteration: 4 },
+        { name: "execute_script", args: '{"code":"c()"}', result: '{"result":null}', iteration: 5 },
+      ];
+      const warning = detectToolCallIssues(history);
+      expect(warning).not.toBeNull();
+      expect(warning).toContain("execute_script");
+    });
+
+    it("2次返回 null 不触发", () => {
+      const history: ToolCallRecord[] = [
+        { name: "execute_script", args: '{"code":"a()"}', result: '{"result":null}', iteration: 1 },
+        { name: "execute_script", args: '{"code":"b()"}', result: '{"result":null}', iteration: 2 },
+      ];
+      expect(detectToolCallIssues(history)).toBeNull();
+    });
+
+    it("中间有非 null 结果打断连续计数", () => {
+      const history: ToolCallRecord[] = [
+        { name: "execute_script", args: '{"code":"a()"}', result: '{"result":null}', iteration: 1 },
+        { name: "execute_script", args: '{"code":"b()"}', result: '{"result":"ok"}', iteration: 2 },
+        { name: "execute_script", args: '{"code":"c()"}', result: '{"result":null}', iteration: 3 },
+        { name: "execute_script", args: '{"code":"d()"}', result: '{"result":null}', iteration: 4 },
+      ];
+      // 从最新往回数只有2个连续 null，不足3个
+      expect(detectToolCallIssues(history)).toBeNull();
+    });
+  });
+
+  describe("get_tab_content 重复调用检测", () => {
+    it("同一 tab 调用3次时生成警告", () => {
+      const history: ToolCallRecord[] = [
+        { name: "get_tab_content", args: '{"tab_id":123,"prompt":"find buttons"}', result: "...", iteration: 1 },
+        { name: "execute_script", args: '{"code":"click()"}', result: '{"result":"ok"}', iteration: 2 },
+        { name: "get_tab_content", args: '{"tab_id":123,"prompt":"find the button"}', result: "...", iteration: 3 },
+        { name: "execute_script", args: '{"code":"click2()"}', result: '{"result":"ok"}', iteration: 4 },
+        { name: "get_tab_content", args: '{"tab_id":123,"prompt":"detailed info"}', result: "...", iteration: 5 },
+      ];
+      const warning = detectToolCallIssues(history);
+      expect(warning).not.toBeNull();
+      expect(warning).toContain("get_tab_content");
+    });
+
+    it("不同 tab 不触发", () => {
+      const history: ToolCallRecord[] = [
+        { name: "get_tab_content", args: '{"tab_id":123}', result: "...", iteration: 1 },
+        { name: "get_tab_content", args: '{"tab_id":456}', result: "...", iteration: 2 },
+        { name: "get_tab_content", args: '{"tab_id":789}', result: "...", iteration: 3 },
+      ];
+      expect(detectToolCallIssues(history)).toBeNull();
+    });
+  });
+
+  describe("通用重复调用检测", () => {
+    it("最近8条中同一工具出现5次时生成警告", () => {
+      const history: ToolCallRecord[] = [];
+      for (let i = 1; i <= 5; i++) {
+        history.push({
+          name: "web_search",
+          args: `{"query":"search ${i}"}`,
+          result: "...",
+          iteration: i,
+        });
+      }
+      const warning = detectToolCallIssues(history);
+      expect(warning).not.toBeNull();
+      expect(warning).toContain("web_search");
+    });
+
+    it("查询类工具不参与通用计数", () => {
+      const history: ToolCallRecord[] = [];
+      for (let i = 1; i <= 6; i++) {
+        history.push({ name: "list_tasks", args: "{}", result: "[]", iteration: i });
+      }
+      expect(detectToolCallIssues(history)).toBeNull();
+    });
+
+    it("不同工具不合并计数", () => {
+      const history: ToolCallRecord[] = [
+        { name: "web_search", args: '{"query":"a"}', result: "...", iteration: 1 },
+        { name: "web_fetch", args: '{"url":"b"}', result: "...", iteration: 2 },
+        { name: "web_search", args: '{"query":"c"}', result: "...", iteration: 3 },
+        { name: "web_fetch", args: '{"url":"d"}', result: "...", iteration: 4 },
+        { name: "web_search", args: '{"query":"e"}', result: "...", iteration: 5 },
+        { name: "web_fetch", args: '{"url":"f"}', result: "...", iteration: 6 },
+      ];
+      expect(detectToolCallIssues(history)).toBeNull();
+    });
+  });
+
+  describe("优先级", () => {
+    it("完全相同参数的 execute_script 优先触发重复检测而非 null 检测", () => {
+      const history: ToolCallRecord[] = [
+        { name: "execute_script", args: '{"code":"a()"}', result: '{"result":null}', iteration: 1 },
+        { name: "execute_script", args: '{"code":"b()"}', result: '{"result":null}', iteration: 2 },
+        { name: "execute_script", args: '{"code":"a()"}', result: '{"result":null}', iteration: 3 },
+      ];
+      const warning = detectToolCallIssues(history);
+      expect(warning).not.toBeNull();
+      // 应该触发重复检测（规则1），而不是 null 检测（规则2）
+      expect(warning).toContain("identical arguments");
+    });
+  });
+});