scriptscat
diff --git a/‎src/app/service/agent/compact_prompt.test.ts‎
Lines changed: 18 additions & 7 deletions b/‎src/app/service/agent/compact_prompt.test.ts‎
Lines changed: 18 additions & 7 deletions
diff --git a/‎src/app/service/agent/compact_prompt.ts‎
Lines changed: 41 additions & 20 deletions b/‎src/app/service/agent/compact_prompt.ts‎
Lines changed: 41 additions & 20 deletions
diff --git a/‎src/app/service/agent/sub_agent_types.test.ts‎
Lines changed: 122 additions & 0 deletions b/‎src/app/service/agent/sub_agent_types.test.ts‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎src/app/service/agent/sub_agent_types.ts‎
Lines changed: 122 additions & 0 deletions b/‎src/app/service/agent/sub_agent_types.ts‎
Lines changed: 122 additions & 0 deletions
@@ -3,14 +3,12 @@ import { extractSummary, buildCompactUserPrompt, COMPACT_SYSTEM_PROMPT } from ".
 
 describe("extractSummary", () => {
   it("extracts content from <summary> tags", () => {
-    const response = `<analysis>Some analysis here</analysis>
-
-<summary>
-1. **Primary Request**: Build a feature
-2. **Key Decisions**: Used React
+    const response = `<summary>
+1. **Task Overview**: Build a feature
+2. **Current State**: Used React
 </summary>`;
     const result = extractSummary(response);
-    expect(result).toBe("1. **Primary Request**: Build a feature\n2. **Key Decisions**: Used React");
+    expect(result).toBe("1. **Task Overview**: Build a feature\n2. **Current State**: Used React");
   });
 
   it("returns full content when no <summary> tag found", () => {
@@ -35,11 +33,24 @@ Line 3
 describe("buildCompactUserPrompt", () => {
   it("builds prompt without custom instruction", () => {
     const prompt = buildCompactUserPrompt();
-    expect(prompt).toContain("Create a detailed summary");
+    expect(prompt).toContain("continuation summary");
     expect(prompt).toContain("<summary>");
+    expect(prompt).toContain("<analysis>");
     expect(prompt).not.toContain("Additional summarization instructions");
   });
 
+  it("包含所有 8 个摘要段落", () => {
+    const prompt = buildCompactUserPrompt();
+    expect(prompt).toContain("**Task Overview**");
+    expect(prompt).toContain("**Current State**");
+    expect(prompt).toContain("**User Messages**");
+    expect(prompt).toContain("**Errors and Fixes**");
+    expect(prompt).toContain("**Important Discoveries**");
+    expect(prompt).toContain("**Current Work**");
+    expect(prompt).toContain("**Next Steps**");
+    expect(prompt).toContain("**Context to Preserve**");
+  });
+
   it("appends custom instruction when provided", () => {
     const prompt = buildCompactUserPrompt("只保留代码相关内容");
     expect(prompt).toContain("Additional summarization instructions from the user: 只保留代码相关内容");
 
@@ -1,30 +1,51 @@
-export const COMPACT_SYSTEM_PROMPT = `You are a conversation summarizer. Your task is to create a detailed summary of the conversation, preserving all critical information needed to continue effectively.`;
+export const COMPACT_SYSTEM_PROMPT = `You are a conversation summarizer. Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions. This summary will replace the conversation history, enabling efficient task resumption in a new context window.`;
 
 export function buildCompactUserPrompt(customInstruction?: string): string {
-  let prompt = `Create a detailed summary of the conversation so far.
+  let prompt = `Write a structured, concise, and actionable continuation summary of the conversation so far. First analyze the conversation in <analysis> tags, then write the summary in <summary> tags.
 
-Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts:
+Include the following sections in your <summary>:
 
-1. Chronologically analyze each message. For each section identify:
-   - The user's explicit requests and intents
-   - Key decisions and outcomes
-   - Specific details: file names, code snippets, function signatures
-   - Errors encountered and how they were fixed
-   - Important user feedback or corrections
+1. **Task Overview**
+   - The user's core request and success criteria
+   - Any clarifications or constraints they specified
 
-2. Double-check for completeness.
+2. **Current State**
+   - What has been completed so far
+   - Pages visited, data extracted, or actions performed (with URLs/selectors if relevant)
+   - Key outputs or artifacts produced
 
-Your summary should include the following sections in <summary> tags:
+3. **User Messages**
+   - List ALL user messages that are not tool results
+   - These are critical for understanding the user's feedback and changing intent
+   - Include any mid-conversation corrections or preference changes
 
-1. **Primary Request and Intent**: The user's core requests and success criteria
-2. **Key Decisions**: Important decisions made and their rationale
-3. **Current State**: What has been completed, files modified, artifacts produced
-4. **Errors and Fixes**: Problems encountered and their solutions
-5. **Pending Tasks**: Outstanding work items
-6. **Current Work**: What was being worked on immediately before this summary
-7. **Next Steps**: Specific actions needed to continue
+4. **Errors and Fixes**
+   - All errors encountered and how they were resolved
+   - User feedback on errors (especially "do it differently" instructions)
+   - What approaches were tried that didn't work (and why)
 
-Be concise but complete — preserve all information that would prevent duplicate work or repeated mistakes.`;
+5. **Important Discoveries**
+   - Technical constraints or site-specific quirks uncovered
+   - Decisions made and their rationale
+   - Selectors, page structures, or API endpoints discovered that may be needed again
+
+6. **Current Work**
+   - Precisely what was being worked on immediately before this summary
+   - Include specific details: which page, which step, what was the last action
+   - If a sub-agent was running, what was its task and status
+
+7. **Next Steps**
+   - Specific actions needed to complete the task
+   - Any blockers or open questions to resolve
+   - Priority order if multiple steps remain
+   - If there is a next step, describe exactly where you left off to prevent task drift
+
+8. **Context to Preserve**
+   - User preferences or style requirements
+   - Domain-specific details that aren't obvious
+   - Any promises or commitments made to the user
+
+Be concise but complete — err on the side of including information that would prevent duplicate work or repeated mistakes.`;
 
   if (customInstruction) {
     prompt += `\n\nAdditional summarization instructions from the user: ${customInstruction}`;
@@ -33,7 +54,7 @@ Be concise but complete — preserve all information that would prevent duplicat
   return prompt;
 }
 
-/** 从 LLM 响应中提取 <summary> 标签内容 */
+/** 从 LLM 响应中提取 <summary> 标签内容，跳过 <analysis> 部分 */
 export function extractSummary(content: string): string {
   const match = content.match(/<summary>([\s\S]*?)<\/summary>/);
   return match ? match[1].trim() : content.trim();
 
@@ -0,0 +1,122 @@
+import { describe, it, expect } from "vitest";
+import { resolveSubAgentType, getExcludeToolsForType, SUB_AGENT_TYPES } from "./sub_agent_types";
+
+describe("Sub-Agent 类型系统", () => {
+  describe("resolveSubAgentType", () => {
+    it.concurrent("返回指定的内置类型", () => {
+      expect(resolveSubAgentType("researcher")).toBe(SUB_AGENT_TYPES.researcher);
+      expect(resolveSubAgentType("page_operator")).toBe(SUB_AGENT_TYPES.page_operator);
+      expect(resolveSubAgentType("general")).toBe(SUB_AGENT_TYPES.general);
+    });
+
+    it.concurrent("未知类型 fallback 到 general", () => {
+      expect(resolveSubAgentType("unknown_type")).toBe(SUB_AGENT_TYPES.general);
+      expect(resolveSubAgentType("")).toBe(SUB_AGENT_TYPES.general);
+    });
+
+    it.concurrent("undefined/不传参数返回 general", () => {
+      expect(resolveSubAgentType()).toBe(SUB_AGENT_TYPES.general);
+      expect(resolveSubAgentType(undefined)).toBe(SUB_AGENT_TYPES.general);
+    });
+  });
+
+  describe("getExcludeToolsForType", () => {
+    const allTools = [
+      "web_fetch",
+      "web_search",
+      "opfs_read",
+      "opfs_write",
+      "opfs_list",
+      "opfs_delete",
+      "execute_script",
+      "get_tab_content",
+      "list_tabs",
+      "open_tab",
+      "close_tab",
+      "activate_tab",
+      "ask_user",
+      "agent",
+      "create_task",
+      "update_task",
+      "get_task",
+      "list_tasks",
+      "delete_task",
+    ];
+
+    it.concurrent("researcher 类型排除 tab 工具和其他不在白名单中的工具", () => {
+      const config = SUB_AGENT_TYPES.researcher;
+      const excluded = getExcludeToolsForType(config, allTools);
+
+      // researcher 不包含 tab 工具、ask_user、agent
+      expect(excluded).toContain("get_tab_content");
+      expect(excluded).toContain("list_tabs");
+      expect(excluded).toContain("open_tab");
+      expect(excluded).toContain("close_tab");
+      expect(excluded).toContain("activate_tab");
+      expect(excluded).toContain("ask_user");
+      expect(excluded).toContain("agent");
+
+      // task 工具始终可用（ALWAYS_ALLOWED_TOOLS）
+      expect(excluded).not.toContain("create_task");
+      expect(excluded).not.toContain("update_task");
+      expect(excluded).not.toContain("list_tasks");
+
+      // 应该保留的工具不在排除列表中
+      expect(excluded).not.toContain("web_fetch");
+      expect(excluded).not.toContain("web_search");
+      expect(excluded).not.toContain("execute_script");
+      expect(excluded).not.toContain("opfs_read");
+    });
+
+    it.concurrent("page_operator 类型排除 web_search 和其他不在白名单中的工具", () => {
+      const config = SUB_AGENT_TYPES.page_operator;
+      const excluded = getExcludeToolsForType(config, allTools);
+
+      // page_operator 不包含 web_search、ask_user、agent
+      expect(excluded).toContain("web_search");
+      expect(excluded).toContain("ask_user");
+      expect(excluded).toContain("agent");
+
+      // 应该保留 tab 工具
+      expect(excluded).not.toContain("get_tab_content");
+      expect(excluded).not.toContain("list_tabs");
+      expect(excluded).not.toContain("open_tab");
+      expect(excluded).not.toContain("execute_script");
+      expect(excluded).not.toContain("web_fetch");
+
+      // task 工具始终可用
+      expect(excluded).not.toContain("create_task");
+      expect(excluded).not.toContain("update_task");
+    });
+
+    it.concurrent("general 类型使用黑名单模式，只排除 ask_user 和 agent", () => {
+      const config = SUB_AGENT_TYPES.general;
+      const excluded = getExcludeToolsForType(config, allTools);
+
+      expect(excluded).toEqual(["ask_user", "agent"]);
+    });
+
+    it.concurrent("allowedTools 和 excludeTools 都未指定时返回空数组", () => {
+      const config: any = { name: "empty", maxIterations: 10, timeoutMs: 60000, systemPromptAddition: "" };
+      const excluded = getExcludeToolsForType(config, allTools);
+      expect(excluded).toEqual([]);
+    });
+
+    it.concurrent("allowedTools 优先于 excludeTools", () => {
+      const config: any = {
+        name: "test",
+        allowedTools: ["web_fetch"],
+        excludeTools: ["web_search"],
+        maxIterations: 10,
+        timeoutMs: 60000,
+        systemPromptAddition: "",
+      };
+      const excluded = getExcludeToolsForType(config, ["web_fetch", "web_search", "execute_script"]);
+
+      // 使用白名单模式，排除不在 allowedTools 中的
+      expect(excluded).toContain("web_search");
+      expect(excluded).toContain("execute_script");
+      expect(excluded).not.toContain("web_fetch");
+    });
+  });
+});
@@ -0,0 +1,122 @@
+// 子代理类型定义和注册表
+
+export interface SubAgentTypeConfig {
+  name: string;
+  description: string; // 英文，写入 agent tool 描述供 LLM 选择
+  allowedTools?: string[]; // 白名单模式（优先于 excludeTools）
+  excludeTools?: string[]; // 黑名单模式
+  maxIterations: number;
+  timeoutMs: number;
+  systemPromptAddition: string; // 注入 sub-agent system prompt 的角色说明
+}
+
+// 所有子代理类型都默认可用的工具（task 工具用于与主 agent 共享任务进度）
+const ALWAYS_ALLOWED_TOOLS = [
+  "create_task",
+  "update_task",
+  "get_task",
+  "list_tasks",
+  "delete_task",
+];
+
+// 内置子代理类型
+export const SUB_AGENT_TYPES: Record<string, SubAgentTypeConfig> = {
+  researcher: {
+    name: "researcher",
+    description: "Web search/fetch, data analysis, no tab interaction",
+    allowedTools: [
+      "web_fetch",
+      "web_search",
+      "opfs_read",
+      "opfs_write",
+      "opfs_list",
+      "opfs_delete",
+      "execute_script",
+    ],
+    maxIterations: 20,
+    timeoutMs: 600_000,
+    systemPromptAddition: `## Role: Researcher
+
+You are a research-focused sub-agent. Your job is to search, fetch, read, and summarize information.
+
+**Capabilities:** Web search, URL fetching, data analysis via execute_script (sandbox mode only).
+**Limitations:** You cannot interact with browser tabs (no navigation, clicking, or form filling). You cannot ask the user questions.
+
+**Guidelines:**
+- Use web_search to find relevant sources, then web_fetch to read them.
+- Synthesize information from multiple sources when possible.
+- Return structured, concise results that the parent agent can act on.
+- If you cannot find the information, say so clearly rather than guessing.`,
+  },
+
+  page_operator: {
+    name: "page_operator",
+    description: "Browser tab interaction, page automation",
+    allowedTools: [
+      "get_tab_content",
+      "list_tabs",
+      "open_tab",
+      "close_tab",
+      "activate_tab",
+      "execute_script",
+      "web_fetch",
+      "opfs_read",
+      "opfs_write",
+      "opfs_list",
+      "opfs_delete",
+    ],
+    maxIterations: 30,
+    timeoutMs: 600_000,
+    systemPromptAddition: `## Role: Page Operator
+
+You are a page interaction sub-agent. Your job is to navigate web pages, interact with elements, and extract data.
+
+**Capabilities:** Tab navigation, page reading, DOM interaction via execute_script, URL fetching.
+**Limitations:** You cannot search the web (use a researcher sub-agent for that). You cannot ask the user questions.
+
+**Guidelines:**
+- Always read the page content (get_tab_content) before interacting to understand the current state.
+- Verify page state after each interaction — never assume an action succeeded.
+- For form filling, check that inputs exist and are visible before attempting to fill them.
+- Return extracted data in a structured format.`,
+  },
+
+  general: {
+    name: "general",
+    description: "All tools, general-purpose",
+    excludeTools: ["ask_user", "agent"],
+    maxIterations: 30,
+    timeoutMs: 600_000,
+    systemPromptAddition: `## Role: General Sub-Agent
+
+You are a general-purpose sub-agent with access to all tools except user interaction and nested sub-agents.
+
+**Limitations:** You cannot ask the user questions and cannot spawn nested sub-agents. If you encounter a situation that requires user input, describe the situation clearly in your response so the parent agent can handle it.`,
+  },
+};
+
+/**
+ * 解析子代理类型名称为配置，未知类型 fallback 到 general
+ */
+export function resolveSubAgentType(typeName?: string): SubAgentTypeConfig {
+  if (!typeName) return SUB_AGENT_TYPES.general;
+  return SUB_AGENT_TYPES[typeName] || SUB_AGENT_TYPES.general;
+}
+
+/**
+ * 根据类型配置和所有可用工具名，计算最终的排除工具列表
+ * - 白名单模式：排除不在 allowedTools 中的工具
+ * - 黑名单模式：直接使用 excludeTools
+ * - 两者都未指定：返回空数组（不排除任何工具）
+ */
+export function getExcludeToolsForType(config: SubAgentTypeConfig, allToolNames: string[]): string[] {
+  if (config.allowedTools && config.allowedTools.length > 0) {
+    // 白名单模式：合并 allowedTools + ALWAYS_ALLOWED_TOOLS
+    const allowedSet = new Set([...config.allowedTools, ...ALWAYS_ALLOWED_TOOLS]);
+    return allToolNames.filter((name) => !allowedSet.has(name));
+  }
+  if (config.excludeTools && config.excludeTools.length > 0) {
+    return [...config.excludeTools];
+  }
+  return [];
+}