|
| 1 | +import { describe, it, expect } from "vitest"; |
| 2 | +import { detectToolCallIssues, type ToolCallRecord } from "./tool_call_guard"; |
| 3 | + |
| 4 | +describe("detectToolCallIssues", () => { |
| 5 | + it("历史记录不足时不生成警告", () => { |
| 6 | + expect(detectToolCallIssues([])).toBeNull(); |
| 7 | + expect( |
| 8 | + detectToolCallIssues([{ name: "web_search", args: '{"query":"test"}', result: "...", iteration: 1 }]) |
| 9 | + ).toBeNull(); |
| 10 | + }); |
| 11 | + |
| 12 | + describe("完全相同的 tool + args 检测", () => { |
| 13 | + it("相同工具和参数调用2次时生成警告", () => { |
| 14 | + const history: ToolCallRecord[] = [ |
| 15 | + { name: "web_fetch", args: '{"url":"https://example.com"}', result: "...", iteration: 1 }, |
| 16 | + { name: "web_fetch", args: '{"url":"https://example.com"}', result: "...", iteration: 2 }, |
| 17 | + ]; |
| 18 | + const warning = detectToolCallIssues(history); |
| 19 | + expect(warning).not.toBeNull(); |
| 20 | + expect(warning).toContain("web_fetch"); |
| 21 | + }); |
| 22 | + |
| 23 | + it("JSON 格式不同但内容相同时也触发", () => { |
| 24 | + const history: ToolCallRecord[] = [ |
| 25 | + { name: "web_fetch", args: '{"url": "https://example.com"}', result: "...", iteration: 1 }, |
| 26 | + { name: "web_fetch", args: '{"url":"https://example.com"}', result: "...", iteration: 2 }, |
| 27 | + ]; |
| 28 | + const warning = detectToolCallIssues(history); |
| 29 | + expect(warning).not.toBeNull(); |
| 30 | + }); |
| 31 | + |
| 32 | + it("不同参数不触发警告", () => { |
| 33 | + const history: ToolCallRecord[] = [ |
| 34 | + { name: "web_fetch", args: '{"url":"https://a.com"}', result: "...", iteration: 1 }, |
| 35 | + { name: "web_fetch", args: '{"url":"https://b.com"}', result: "...", iteration: 2 }, |
| 36 | + ]; |
| 37 | + expect(detectToolCallIssues(history)).toBeNull(); |
| 38 | + }); |
| 39 | + |
| 40 | + it("超过最近10条的重复不触发", () => { |
| 41 | + const history: ToolCallRecord[] = [ |
| 42 | + { name: "web_fetch", args: '{"url":"https://old.com"}', result: "...", iteration: 1 }, |
| 43 | + ]; |
| 44 | + // 插入11条不同的调用(交替使用不同工具避免触发通用重复检测) |
| 45 | + const tools = ["web_search", "web_fetch", "execute_script"]; |
| 46 | + for (let i = 0; i < 11; i++) { |
| 47 | + history.push({ |
| 48 | + name: tools[i % 3], |
| 49 | + args: `{"q":"pad${i}"}`, |
| 50 | + result: '{"result":"ok"}', |
| 51 | + iteration: i + 2, |
| 52 | + }); |
| 53 | + } |
| 54 | + // 再加一条与第1条相同的,但已超出最近10条窗口 |
| 55 | + history.push({ name: "web_fetch", args: '{"url":"https://old.com"}', result: "...", iteration: 13 }); |
| 56 | + expect(detectToolCallIssues(history)).toBeNull(); |
| 57 | + }); |
| 58 | + }); |
| 59 | + |
| 60 | + describe("execute_script 返回 null 检测", () => { |
| 61 | + it("连续3次返回 null 时生成警告", () => { |
| 62 | + const history: ToolCallRecord[] = [ |
| 63 | + { |
| 64 | + name: "execute_script", |
| 65 | + args: '{"code":"a.click()","target":"page"}', |
| 66 | + result: '{"result":null,"target":"page","tab_id":123}', |
| 67 | + iteration: 1, |
| 68 | + }, |
| 69 | + { |
| 70 | + name: "execute_script", |
| 71 | + args: '{"code":"b.click()","target":"page"}', |
| 72 | + result: '{"result":null,"target":"page","tab_id":123}', |
| 73 | + iteration: 2, |
| 74 | + }, |
| 75 | + { |
| 76 | + name: "execute_script", |
| 77 | + args: '{"code":"c.click()","target":"page"}', |
| 78 | + result: '{"result":null,"target":"page","tab_id":123}', |
| 79 | + iteration: 3, |
| 80 | + }, |
| 81 | + ]; |
| 82 | + const warning = detectToolCallIssues(history); |
| 83 | + expect(warning).not.toBeNull(); |
| 84 | + expect(warning).toContain("execute_script"); |
| 85 | + expect(warning).toContain("return"); |
| 86 | + }); |
| 87 | + |
| 88 | + it("中间穿插其他工具但 execute_script 仍然连续 null 时触发", () => { |
| 89 | + const history: ToolCallRecord[] = [ |
| 90 | + { name: "execute_script", args: '{"code":"a()"}', result: '{"result":null}', iteration: 1 }, |
| 91 | + { name: "get_tab_content", args: '{"tab_id":1,"prompt":"find buttons"}', result: "page content...", iteration: 2 }, |
| 92 | + { name: "execute_script", args: '{"code":"b()"}', result: '{"result":null}', iteration: 3 }, |
| 93 | + { name: "get_tab_content", args: '{"tab_id":1,"prompt":"check state"}', result: "page content...", iteration: 4 }, |
| 94 | + { name: "execute_script", args: '{"code":"c()"}', result: '{"result":null}', iteration: 5 }, |
| 95 | + ]; |
| 96 | + const warning = detectToolCallIssues(history); |
| 97 | + expect(warning).not.toBeNull(); |
| 98 | + expect(warning).toContain("execute_script"); |
| 99 | + }); |
| 100 | + |
| 101 | + it("2次返回 null 不触发", () => { |
| 102 | + const history: ToolCallRecord[] = [ |
| 103 | + { name: "execute_script", args: '{"code":"a()"}', result: '{"result":null}', iteration: 1 }, |
| 104 | + { name: "execute_script", args: '{"code":"b()"}', result: '{"result":null}', iteration: 2 }, |
| 105 | + ]; |
| 106 | + expect(detectToolCallIssues(history)).toBeNull(); |
| 107 | + }); |
| 108 | + |
| 109 | + it("中间有非 null 结果打断连续计数", () => { |
| 110 | + const history: ToolCallRecord[] = [ |
| 111 | + { name: "execute_script", args: '{"code":"a()"}', result: '{"result":null}', iteration: 1 }, |
| 112 | + { name: "execute_script", args: '{"code":"b()"}', result: '{"result":"ok"}', iteration: 2 }, |
| 113 | + { name: "execute_script", args: '{"code":"c()"}', result: '{"result":null}', iteration: 3 }, |
| 114 | + { name: "execute_script", args: '{"code":"d()"}', result: '{"result":null}', iteration: 4 }, |
| 115 | + ]; |
| 116 | + // 从最新往回数只有2个连续 null,不足3个 |
| 117 | + expect(detectToolCallIssues(history)).toBeNull(); |
| 118 | + }); |
| 119 | + }); |
| 120 | + |
| 121 | + describe("get_tab_content 重复调用检测", () => { |
| 122 | + it("同一 tab 调用3次时生成警告", () => { |
| 123 | + const history: ToolCallRecord[] = [ |
| 124 | + { name: "get_tab_content", args: '{"tab_id":123,"prompt":"find buttons"}', result: "...", iteration: 1 }, |
| 125 | + { name: "execute_script", args: '{"code":"click()"}', result: '{"result":"ok"}', iteration: 2 }, |
| 126 | + { name: "get_tab_content", args: '{"tab_id":123,"prompt":"find the button"}', result: "...", iteration: 3 }, |
| 127 | + { name: "execute_script", args: '{"code":"click2()"}', result: '{"result":"ok"}', iteration: 4 }, |
| 128 | + { name: "get_tab_content", args: '{"tab_id":123,"prompt":"detailed info"}', result: "...", iteration: 5 }, |
| 129 | + ]; |
| 130 | + const warning = detectToolCallIssues(history); |
| 131 | + expect(warning).not.toBeNull(); |
| 132 | + expect(warning).toContain("get_tab_content"); |
| 133 | + }); |
| 134 | + |
| 135 | + it("不同 tab 不触发", () => { |
| 136 | + const history: ToolCallRecord[] = [ |
| 137 | + { name: "get_tab_content", args: '{"tab_id":123}', result: "...", iteration: 1 }, |
| 138 | + { name: "get_tab_content", args: '{"tab_id":456}', result: "...", iteration: 2 }, |
| 139 | + { name: "get_tab_content", args: '{"tab_id":789}', result: "...", iteration: 3 }, |
| 140 | + ]; |
| 141 | + expect(detectToolCallIssues(history)).toBeNull(); |
| 142 | + }); |
| 143 | + }); |
| 144 | + |
| 145 | + describe("通用重复调用检测", () => { |
| 146 | + it("最近8条中同一工具出现5次时生成警告", () => { |
| 147 | + const history: ToolCallRecord[] = []; |
| 148 | + for (let i = 1; i <= 5; i++) { |
| 149 | + history.push({ |
| 150 | + name: "web_search", |
| 151 | + args: `{"query":"search ${i}"}`, |
| 152 | + result: "...", |
| 153 | + iteration: i, |
| 154 | + }); |
| 155 | + } |
| 156 | + const warning = detectToolCallIssues(history); |
| 157 | + expect(warning).not.toBeNull(); |
| 158 | + expect(warning).toContain("web_search"); |
| 159 | + }); |
| 160 | + |
| 161 | + it("查询类工具不参与通用计数", () => { |
| 162 | + const history: ToolCallRecord[] = []; |
| 163 | + for (let i = 1; i <= 6; i++) { |
| 164 | + history.push({ name: "list_tasks", args: "{}", result: "[]", iteration: i }); |
| 165 | + } |
| 166 | + expect(detectToolCallIssues(history)).toBeNull(); |
| 167 | + }); |
| 168 | + |
| 169 | + it("不同工具不合并计数", () => { |
| 170 | + const history: ToolCallRecord[] = [ |
| 171 | + { name: "web_search", args: '{"query":"a"}', result: "...", iteration: 1 }, |
| 172 | + { name: "web_fetch", args: '{"url":"b"}', result: "...", iteration: 2 }, |
| 173 | + { name: "web_search", args: '{"query":"c"}', result: "...", iteration: 3 }, |
| 174 | + { name: "web_fetch", args: '{"url":"d"}', result: "...", iteration: 4 }, |
| 175 | + { name: "web_search", args: '{"query":"e"}', result: "...", iteration: 5 }, |
| 176 | + { name: "web_fetch", args: '{"url":"f"}', result: "...", iteration: 6 }, |
| 177 | + ]; |
| 178 | + expect(detectToolCallIssues(history)).toBeNull(); |
| 179 | + }); |
| 180 | + }); |
| 181 | + |
| 182 | + describe("优先级", () => { |
| 183 | + it("完全相同参数的 execute_script 优先触发重复检测而非 null 检测", () => { |
| 184 | + const history: ToolCallRecord[] = [ |
| 185 | + { name: "execute_script", args: '{"code":"a()"}', result: '{"result":null}', iteration: 1 }, |
| 186 | + { name: "execute_script", args: '{"code":"b()"}', result: '{"result":null}', iteration: 2 }, |
| 187 | + { name: "execute_script", args: '{"code":"a()"}', result: '{"result":null}', iteration: 3 }, |
| 188 | + ]; |
| 189 | + const warning = detectToolCallIssues(history); |
| 190 | + expect(warning).not.toBeNull(); |
| 191 | + // 应该触发重复检测(规则1),而不是 null 检测(规则2) |
| 192 | + expect(warning).toContain("identical arguments"); |
| 193 | + }); |
| 194 | + }); |
| 195 | +}); |
0 commit comments