Skip to content

Commit 367a1e7

Browse files
committed
✨ navigate_tab 工具、researcher 页面只读能力、精简 task 工具
- 新增 navigate_tab 工具:导航已有标签页到新 URL,支持等待加载完成 - researcher 子代理增加页面只读能力(get_tab_content/open_tab/list_tabs/close_tab/navigate_tab),但不允许 DOM 交互 - 移除 get_task 和 delete_task 工具,list_tasks 改为返回完整任务信息 - 页面交互指南仅在同时拥有 get_tab_content 和 execute_script 时显示
1 parent 53da6ed commit 367a1e7

10 files changed

Lines changed: 199 additions & 162 deletions

src/app/service/agent/core/sub_agent_types.test.ts

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,29 +37,31 @@ describe("Sub-Agent 类型系统", () => {
3737
"open_tab",
3838
"close_tab",
3939
"activate_tab",
40+
"navigate_tab",
4041
"ask_user",
4142
"agent",
4243
"create_task",
4344
"update_task",
44-
"get_task",
4545
"list_tasks",
46-
"delete_task",
4746
];
4847

49-
it.concurrent("researcher 类型排除 tab 工具和其他不在白名单中的工具", () => {
48+
it.concurrent("researcher 类型排除 DOM 交互工具和其他不在白名单中的工具", () => {
5049
const config = SUB_AGENT_TYPES.researcher;
5150
const excluded = getExcludeToolsForType(config, allTools);
5251

53-
// researcher 不包含 tab 工具、execute_script、ask_user、agent
54-
expect(excluded).toContain("get_tab_content");
55-
expect(excluded).toContain("list_tabs");
56-
expect(excluded).toContain("open_tab");
57-
expect(excluded).toContain("close_tab");
52+
// researcher 不包含 DOM 交互工具(execute_script、activate_tab)、ask_user、agent
5853
expect(excluded).toContain("activate_tab");
5954
expect(excluded).toContain("execute_script");
6055
expect(excluded).toContain("ask_user");
6156
expect(excluded).toContain("agent");
6257

58+
// researcher 可以读取页面(get_tab_content + tab 管理)
59+
expect(excluded).not.toContain("get_tab_content");
60+
expect(excluded).not.toContain("open_tab");
61+
expect(excluded).not.toContain("list_tabs");
62+
expect(excluded).not.toContain("close_tab");
63+
expect(excluded).not.toContain("navigate_tab");
64+
6365
// task 工具始终可用(ALWAYS_ALLOWED_TOOLS)
6466
expect(excluded).not.toContain("create_task");
6567
expect(excluded).not.toContain("update_task");
@@ -80,10 +82,11 @@ describe("Sub-Agent 类型系统", () => {
8082
expect(excluded).toContain("ask_user");
8183
expect(excluded).toContain("agent");
8284

83-
// 应该保留 tab 工具
85+
// 应该保留 tab 工具(含 navigate_tab)
8486
expect(excluded).not.toContain("get_tab_content");
8587
expect(excluded).not.toContain("list_tabs");
8688
expect(excluded).not.toContain("open_tab");
89+
expect(excluded).not.toContain("navigate_tab");
8790
expect(excluded).not.toContain("execute_script");
8891
expect(excluded).not.toContain("web_fetch");
8992

src/app/service/agent/core/sub_agent_types.ts

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,40 @@ export interface SubAgentTypeConfig {
1111
}
1212

1313
// 所有子代理类型都默认可用的工具(task 工具用于与主 agent 共享任务进度)
14-
const ALWAYS_ALLOWED_TOOLS = ["create_task", "update_task", "get_task", "list_tasks", "delete_task"];
14+
const ALWAYS_ALLOWED_TOOLS = ["create_task", "update_task", "list_tasks"];
1515

1616
// 内置子代理类型
1717
export const SUB_AGENT_TYPES: Record<string, SubAgentTypeConfig> = {
1818
researcher: {
1919
name: "researcher",
20-
description: "Web search/fetch, data analysis, no tab interaction",
21-
allowedTools: ["web_fetch", "web_search", "opfs_read", "opfs_write", "opfs_list", "opfs_delete"],
20+
description: "Web search/fetch, page reading (read-only, no DOM interaction)",
21+
allowedTools: [
22+
"web_fetch",
23+
"web_search",
24+
"get_tab_content",
25+
"open_tab",
26+
"list_tabs",
27+
"close_tab",
28+
"navigate_tab",
29+
"opfs_read",
30+
"opfs_write",
31+
"opfs_list",
32+
"opfs_delete",
33+
],
2234
maxIterations: 20,
2335
timeoutMs: 600_000,
2436
systemPromptAddition: `## Role: Researcher
2537
2638
You are a research-focused sub-agent. Your job is to search, fetch, read, and summarize information.
2739
28-
**Capabilities:** Web search, URL fetching, OPFS file storage.
29-
**Limitations:** You cannot interact with browser tabs (no navigation, clicking, or form filling). You cannot ask the user questions.
40+
**Capabilities:** Web search, URL fetching, page reading (open tabs and read rendered content with get_tab_content), OPFS file storage.
41+
**Limitations:** You cannot interact with page DOM (no clicking, form filling, or script execution). You cannot ask the user questions.
3042
3143
**Guidelines:**
32-
- Use web_search to find relevant sources, then web_fetch to read them.
44+
- Use web_search to find relevant sources, then web_fetch or get_tab_content to read them.
45+
- For JavaScript-rendered pages (SPAs), prefer get_tab_content over web_fetch — it reads the rendered DOM.
3346
- Synthesize information from multiple sources when possible.
47+
- Close tabs you no longer need to avoid clutter.
3448
- Return structured, concise results that the parent agent can act on.
3549
- If you cannot find the information, say so clearly rather than guessing.`,
3650
},
@@ -44,6 +58,7 @@ You are a research-focused sub-agent. Your job is to search, fetch, read, and su
4458
"open_tab",
4559
"close_tab",
4660
"activate_tab",
61+
"navigate_tab",
4762
"execute_script",
4863
"web_fetch",
4964
"opfs_read",

src/app/service/agent/core/system_prompt.test.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,16 @@ describe("buildSubAgentSystemPrompt", () => {
109109
expect(result).not.toContain("## Sub-Agent");
110110
});
111111

112-
it.concurrent("researcher 类型不包含 tab 工具的描述", () => {
112+
it.concurrent("researcher 类型包含页面读取工具但不包含页面交互工作流", () => {
113113
const config = SUB_AGENT_TYPES.researcher;
114114
const tools = config.allowedTools || [];
115115
const result = buildSubAgentSystemPrompt(config, tools);
116116

117-
expect(result).not.toContain("get_tab_content");
117+
expect(result).toContain("get_tab_content");
118118
expect(result).toContain("web_fetch");
119119
expect(result).toContain("web_search");
120+
// researcher 没有 execute_script,不应包含页面交互工作流段
121+
expect(result).not.toContain("### Page Interaction Workflow");
120122
});
121123

122124
it.concurrent("researcher 类型包含角色说明", () => {

src/app/service/agent/core/system_prompt.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ Any task that involves 2+ tool calls (web searching, page reading, page interact
9595
9696
### Sub-Agent Types
9797
98-
- **researcher** — Web search/fetch, data analysis. No tab interaction. Use for: information gathering, comparison research, content summarization, finding URLs/data.
98+
- **researcher** — Web search/fetch, page reading (read-only, no DOM interaction). Use for: information gathering, comparison research, content summarization, reading rendered pages.
9999
- **page_operator** — Browser tab interaction, page automation. Use for: navigating pages, filling forms, extracting page data, clicking buttons, writing content into editors.
100100
- **general** (default) — All tools. Use when the task spans both research and page interaction.
101101
@@ -164,7 +164,7 @@ Use task tools **only** when tracking progress genuinely helps the user understa
164164
**Workflow:**
165165
1. **Plan** — Call \`list_tasks\` to check for existing tasks, then \`create_task\` for each step with a clear imperative subject and enough description for context.
166166
2. **Execute** — Before starting each task, call \`update_task\` with \`status: "in_progress"\`. When done, set \`status: "completed"\`.
167-
3. **Adapt** — If a completed task reveals follow-up work, create new tasks. If a task becomes irrelevant, use \`delete_task\` to clean up.
167+
3. **Adapt** — If a completed task reveals follow-up work, create new tasks.
168168
169169
**Important:** Do not create tasks just to log what you already did or are about to do in the same response.`;
170170

@@ -364,7 +364,8 @@ export function buildSystemPrompt(options: BuildSystemPromptOptions): string {
364364
export function buildSubAgentSystemPrompt(typeConfig: SubAgentTypeConfig, availableToolNames: string[]): string {
365365
const nameSet = new Set(availableToolNames);
366366
const hasOpfs = nameSet.has("opfs_read") || nameSet.has("opfs_write");
367-
const hasTabTools = nameSet.has("get_tab_content");
367+
// 页面交互指南需要同时具备 get_tab_content 和 execute_script
368+
const hasPageInteraction = nameSet.has("get_tab_content") && nameSet.has("execute_script");
368369

369370
const sections: string[] = [
370371
SUB_AGENT_SECTION_INTRO,
@@ -374,8 +375,8 @@ export function buildSubAgentSystemPrompt(typeConfig: SubAgentTypeConfig, availa
374375
SUB_AGENT_SECTION_TOOL_USAGE,
375376
];
376377

377-
// 有 tab 工具时才包含页面交互验证指南
378-
if (hasTabTools) {
378+
// 同时拥有页面读取和 DOM 交互工具时才包含页面交互工作流指南
379+
if (hasPageInteraction) {
379380
sections.push(SUB_AGENT_SECTION_PAGE_INTERACTION);
380381
}
381382

src/app/service/agent/core/tool_call_guard.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ function isNullResult(result: string): boolean {
3131
}
3232

3333
// 不参与通用重复计数的查询类工具
34-
const QUERY_TOOLS = new Set(["list_tasks", "get_task", "list_tabs"]);
34+
const QUERY_TOOLS = new Set(["list_tasks", "list_tabs"]);
3535

3636
/**
3737
* 检测:完全相同的 tool + args 被调用2次

src/app/service/agent/core/tools/sub_agent.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ export const SUB_AGENT_DEFINITION: ToolDefinition = {
4141
type: "string",
4242
enum: SUB_AGENT_TYPE_NAMES,
4343
description:
44-
"Sub-agent type. 'researcher' (web search/fetch, data analysis, no tab interaction), 'page_operator' (browser tab interaction, page automation), 'general' (all tools, default). Choose the most specific type for better results.",
44+
"Sub-agent type. 'researcher' (web search/fetch, page reading — read-only, no DOM interaction), 'page_operator' (browser tab interaction, DOM manipulation, page automation), 'general' (all tools, default). Choose the most specific type for better results.",
4545
},
4646
to: {
4747
type: "string",

src/app/service/agent/core/tools/tab_tools.test.ts

Lines changed: 89 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ const mockTabsUpdate = vi.fn();
1111
const mockTabsGet = vi.fn();
1212
// mock chrome.windows
1313
const mockWindowsUpdate = vi.fn();
14+
// mock chrome.tabs.onUpdated
15+
const mockOnUpdatedAddListener = vi.fn();
16+
const mockOnUpdatedRemoveListener = vi.fn();
1417

1518
// mock offscreen extractHtmlWithSelectors 的返回值
1619
let mockExtractReturn: string | null = "Extracted content with selectors for testing";
@@ -42,6 +45,10 @@ beforeEach(() => {
4245
remove: mockTabsRemove,
4346
update: mockTabsUpdate,
4447
get: mockTabsGet,
48+
onUpdated: {
49+
addListener: mockOnUpdatedAddListener,
50+
removeListener: mockOnUpdatedRemoveListener,
51+
},
4552
};
4653
(chrome as any).windows = { update: mockWindowsUpdate };
4754
});
@@ -58,15 +65,16 @@ function getExecutor(name: string) {
5865
}
5966

6067
describe("createTabTools", () => {
61-
it("should create 5 tools", () => {
68+
it("should create 6 tools", () => {
6269
const { tools } = makeTools();
63-
expect(tools).toHaveLength(5);
70+
expect(tools).toHaveLength(6);
6471
const names = tools.map((t) => t.definition.name);
6572
expect(names).toContain("get_tab_content");
6673
expect(names).toContain("list_tabs");
6774
expect(names).toContain("open_tab");
6875
expect(names).toContain("close_tab");
6976
expect(names).toContain("activate_tab");
77+
expect(names).toContain("navigate_tab");
7078
});
7179
});
7280

@@ -436,3 +444,82 @@ describe("activate_tab", () => {
436444
expect(mockWindowsUpdate).not.toHaveBeenCalled();
437445
});
438446
});
447+
448+
describe("navigate_tab", () => {
449+
it("should throw when tab_id is missing", async () => {
450+
const executor = getExecutor("navigate_tab");
451+
await expect(executor.execute({ url: "https://example.com" })).rejects.toThrow("tab_id is required");
452+
});
453+
454+
it("should throw when url is missing", async () => {
455+
const executor = getExecutor("navigate_tab");
456+
await expect(executor.execute({ tab_id: 42 })).rejects.toThrow("url is required");
457+
});
458+
459+
it("should navigate and wait for load by default", async () => {
460+
mockTabsUpdate.mockResolvedValue({ id: 42 });
461+
mockOnUpdatedAddListener.mockImplementation((listener: Function) => {
462+
listener(42, { status: "complete" });
463+
});
464+
mockTabsGet.mockResolvedValue({
465+
id: 42,
466+
url: "https://new-page.com",
467+
title: "New Page",
468+
status: "complete",
469+
});
470+
471+
const executor = getExecutor("navigate_tab");
472+
const raw = (await executor.execute({ tab_id: 42, url: "https://new-page.com" })) as string;
473+
const result = JSON.parse(raw);
474+
475+
expect(result.id).toBe(42);
476+
expect(result.url).toBe("https://new-page.com");
477+
expect(result.title).toBe("New Page");
478+
expect(result.status).toBe("complete");
479+
expect(mockTabsUpdate).toHaveBeenCalledWith(42, { url: "https://new-page.com" });
480+
expect(mockOnUpdatedAddListener).toHaveBeenCalled();
481+
expect(mockOnUpdatedRemoveListener).toHaveBeenCalled();
482+
});
483+
484+
it("should skip waiting when wait_until_loaded is false", async () => {
485+
mockTabsUpdate.mockResolvedValue({ id: 42 });
486+
mockTabsGet.mockResolvedValue({
487+
id: 42,
488+
url: "https://new-page.com",
489+
title: "",
490+
status: "loading",
491+
});
492+
493+
const executor = getExecutor("navigate_tab");
494+
const raw = (await executor.execute({
495+
tab_id: 42,
496+
url: "https://new-page.com",
497+
wait_until_loaded: false,
498+
})) as string;
499+
const result = JSON.parse(raw);
500+
501+
expect(result.status).toBe("loading");
502+
expect(mockOnUpdatedAddListener).not.toHaveBeenCalled();
503+
});
504+
505+
it("should ignore updates from other tabs", async () => {
506+
mockTabsUpdate.mockResolvedValue({ id: 42 });
507+
mockOnUpdatedAddListener.mockImplementation((listener: Function) => {
508+
listener(99, { status: "complete" }); // 其他 tab
509+
listener(42, { status: "loading" }); // 目标 tab 还在加载
510+
listener(42, { status: "complete" }); // 目标 tab 加载完成
511+
});
512+
mockTabsGet.mockResolvedValue({
513+
id: 42,
514+
url: "https://new-page.com",
515+
title: "Done",
516+
status: "complete",
517+
});
518+
519+
const executor = getExecutor("navigate_tab");
520+
const raw = (await executor.execute({ tab_id: 42, url: "https://new-page.com" })) as string;
521+
const result = JSON.parse(raw);
522+
523+
expect(result.title).toBe("Done");
524+
});
525+
});

src/app/service/agent/core/tools/tab_tools.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,23 @@ const ACTIVATE_TAB_DEFINITION: ToolDefinition = {
8585
},
8686
};
8787

88+
const NAVIGATE_TAB_DEFINITION: ToolDefinition = {
89+
name: "navigate_tab",
90+
description: "Navigate an existing tab to a new URL. Waits for page load completion by default.",
91+
parameters: {
92+
type: "object",
93+
properties: {
94+
tab_id: { type: "number", description: "Target tab ID" },
95+
url: { type: "string", description: "URL to navigate to" },
96+
wait_until_loaded: {
97+
type: "boolean",
98+
description: "Wait for page to finish loading (default: true). Set false to return immediately.",
99+
},
100+
},
101+
required: ["tab_id", "url"],
102+
},
103+
};
104+
88105
// ---- Factory ----
89106

90107
export function createTabTools(deps: {
@@ -288,13 +305,53 @@ export function createTabTools(deps: {
288305
},
289306
};
290307

308+
const navigateTabExecutor: ToolExecutor = {
309+
execute: async (args: Record<string, unknown>) => {
310+
const tabId = args.tab_id as number;
311+
const url = args.url as string;
312+
const waitUntilLoaded = (args.wait_until_loaded as boolean | undefined) ?? true;
313+
314+
if (tabId == null) throw new Error("tab_id is required");
315+
if (!url) throw new Error("url is required");
316+
317+
await chrome.tabs.update(tabId, { url });
318+
319+
if (waitUntilLoaded) {
320+
await new Promise<void>((resolve) => {
321+
let timeoutId: ReturnType<typeof setTimeout>;
322+
const listener = (updatedTabId: number, changeInfo: { status?: string }) => {
323+
if (updatedTabId === tabId && changeInfo.status === "complete") {
324+
chrome.tabs.onUpdated.removeListener(listener);
325+
clearTimeout(timeoutId);
326+
resolve();
327+
}
328+
};
329+
chrome.tabs.onUpdated.addListener(listener);
330+
timeoutId = setTimeout(() => {
331+
chrome.tabs.onUpdated.removeListener(listener);
332+
resolve();
333+
}, 30_000);
334+
});
335+
}
336+
337+
const tab = await chrome.tabs.get(tabId);
338+
return JSON.stringify({
339+
id: tab.id,
340+
url: tab.url || tab.pendingUrl || url,
341+
title: tab.title || "",
342+
status: tab.status || "unknown",
343+
});
344+
},
345+
};
346+
291347
return {
292348
tools: [
293349
{ definition: GET_TAB_CONTENT_DEFINITION, executor: getTabContentExecutor },
294350
{ definition: LIST_TABS_DEFINITION, executor: listTabsExecutor },
295351
{ definition: OPEN_TAB_DEFINITION, executor: openTabExecutor },
296352
{ definition: CLOSE_TAB_DEFINITION, executor: closeTabExecutor },
297353
{ definition: ACTIVATE_TAB_DEFINITION, executor: activateTabExecutor },
354+
{ definition: NAVIGATE_TAB_DEFINITION, executor: navigateTabExecutor },
298355
],
299356
};
300357
}

0 commit comments

Comments
 (0)