Skip to content

Commit 9666d0e

Browse files
committed
✨ opfs_read 支持文本内容读取、搜索提取失败提示、正则校验
- opfs_read 自动检测文本/二进制:文本文件直接返回内容(支持 offset/limit 分页),二进制返回 blob URL - web_search 区分"无结果"和"提取失败",返回 warning 引导 agent 切换引擎 - tab_tools 对无效正则参数抛出明确错误
1 parent ae295c6 commit 9666d0e

7 files changed

Lines changed: 358 additions & 50 deletions

File tree

src/app/service/agent/system_prompt.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -167,18 +167,21 @@ Use task tools **only** when tracking progress genuinely helps the user understa
167167

168168
const SECTION_OPFS = `## OPFS Workspace
169169
170-
OPFS stores files persistently (survives conversation restarts). Designed primarily for **binary data** (images, downloads, attachments).
170+
OPFS stores files persistently (survives conversation restarts). Supports both **text** and **binary** data.
171171
172172
**When to use OPFS**:
173-
- Binary files that need to be passed to the page: images, PDFs, downloads → \`opfs_write\` to save, \`opfs_read\` to get blob URL for page use
174-
- Data that needs to persist across conversations (e.g., user config, style profiles managed by skills)
175-
- SkillScript intermediate binary output (e.g., generated images saved via \`CAT.agent.opfs.write(blob)\`)
173+
- Text data that needs to persist across conversations (config, notes, structured data) → \`opfs_write\` to save, \`opfs_read\` to retrieve text content
174+
- Binary files that need to be passed to the page: images, PDFs, downloads → \`opfs_write\` to save, \`opfs_read\` to get blob URL
175+
- SkillScript intermediate output (e.g., generated images saved via \`CAT.agent.opfs.write(blob)\`)
176176
177177
**When NOT to use OPFS**:
178-
- Text content already in conversation context (tool results, extracted data, generated articles) — use it directly, do not write to OPFS for later retrieval
178+
- Text content already in conversation context (tool results, extracted data) — use it directly
179179
- Temporary data only needed within the current conversation — keep in context
180180
181-
**Critical rule**: \`opfs_read\` returns a **blob URL only** — never text content. The opfs_write → opfs_read pattern does NOT work for text retrieval. If you need text data later, keep it in conversation context.
181+
**Text file reading**: \`opfs_read\` detects MIME type automatically.
182+
- Text files (txt, json, js, html, css, xml, etc.) → returns text content directly with line info
183+
- If text exceeds 200 lines, you **MUST** use \`offset\` and \`limit\` to read in segments
184+
- Binary files (images, PDFs, etc.) → returns blob URL
182185
183186
**Binary file workflow**:
184187
**Save**: screenshot with \`saveTo\` / SkillScript \`fetch()\` → \`CAT.agent.opfs.write(blob)\` → returns path

src/app/service/agent/tools/opfs_tools.test.ts

Lines changed: 220 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import { describe, it, expect, beforeEach, vi } from "vitest";
2-
import { createOPFSTools, sanitizePath, setCreateBlobUrlFn } from "./opfs_tools";
2+
import { createOPFSTools, sanitizePath, setCreateBlobUrlFn, guessMimeType } from "./opfs_tools";
3+
import { isText } from "@App/pkg/utils/istextorbinary";
34

45
// ---- In-memory OPFS mock ----
56

6-
type FSNode = { kind: "file"; content: string } | { kind: "directory"; children: Map<string, FSNode> };
7+
type FSNode = { kind: "file"; content: string | Uint8Array } | { kind: "directory"; children: Map<string, FSNode> };
78

89
function createMockFS() {
910
const root: FSNode = { kind: "directory", children: new Map() };
@@ -73,16 +74,22 @@ function createMockFS() {
7374
kind: "file",
7475
name,
7576
async getFile() {
76-
return new Blob([node.content], { type: "text/plain" });
77+
return new Blob([node.content as BlobPart]);
7778
},
7879
async createWritable() {
79-
let buffer = "";
80+
const chunks: (string | Uint8Array)[] = [];
8081
return {
81-
async write(data: string) {
82-
buffer += data;
82+
async write(data: string | Uint8Array) {
83+
chunks.push(data);
8384
},
8485
async close() {
85-
node.content = buffer;
86+
// 合并所有 chunk,如果全是 string 就存 string,否则存 Uint8Array
87+
if (chunks.every((c) => typeof c === "string")) {
88+
node.content = chunks.join("");
89+
} else {
90+
const blob = new Blob(chunks as BlobPart[]);
91+
node.content = new Uint8Array(await blob.arrayBuffer());
92+
}
8693
},
8794
};
8895
},
@@ -129,6 +136,60 @@ describe("sanitizePath", () => {
129136
});
130137
});
131138

139+
describe("guessMimeType", () => {
140+
it("常见文本扩展名返回正确 MIME", () => {
141+
expect(guessMimeType("readme.md")).toBe("text/markdown");
142+
expect(guessMimeType("data.csv")).toBe("text/csv");
143+
expect(guessMimeType("config.yaml")).toBe("text/yaml");
144+
expect(guessMimeType("config.yml")).toBe("text/yaml");
145+
expect(guessMimeType("index.html")).toBe("text/html");
146+
expect(guessMimeType("index.htm")).toBe("text/html");
147+
expect(guessMimeType("style.css")).toBe("text/css");
148+
expect(guessMimeType("data.xml")).toBe("text/xml");
149+
expect(guessMimeType("data.json")).toBe("application/json");
150+
expect(guessMimeType("app.js")).toBe("application/javascript");
151+
expect(guessMimeType("lib.mjs")).toBe("application/javascript");
152+
});
153+
154+
it("常见二进制扩展名返回正确 MIME", () => {
155+
expect(guessMimeType("photo.png")).toBe("image/png");
156+
expect(guessMimeType("photo.jpg")).toBe("image/jpeg");
157+
expect(guessMimeType("song.mp3")).toBe("audio/mpeg");
158+
expect(guessMimeType("video.mp4")).toBe("video/mp4");
159+
expect(guessMimeType("doc.pdf")).toBe("application/pdf");
160+
expect(guessMimeType("archive.zip")).toBe("application/zip");
161+
});
162+
163+
it("未知扩展名返回 octet-stream", () => {
164+
expect(guessMimeType("data.xyz")).toBe("application/octet-stream");
165+
expect(guessMimeType("Makefile")).toBe("application/octet-stream");
166+
expect(guessMimeType("file.rar")).toBe("application/octet-stream");
167+
});
168+
});
169+
170+
describe("isText(内容检测)", () => {
171+
it("UTF-8 文本内容被识别为文本", () => {
172+
const textContent = new TextEncoder().encode("Hello, world!\nThis is a text file.");
173+
expect(isText(textContent)).toBe(true);
174+
});
175+
176+
it("中文 UTF-8 文本被识别为文本", () => {
177+
const textContent = new TextEncoder().encode("你好,世界!这是一个文本文件。");
178+
expect(isText(textContent)).toBe(true);
179+
});
180+
181+
it("含 null 字节的内容被识别为二进制", () => {
182+
// isText 检测 charCode <= 8 为二进制(null byte = 0x00)
183+
const binaryContent = new Uint8Array([0x00, 0x01, 0x50, 0x4e, 0x47, 0xff, 0xfe, 0x00]);
184+
expect(isText(binaryContent)).toBe(false);
185+
});
186+
187+
it("空内容返回 false", () => {
188+
expect(isText(null)).toBe(false);
189+
expect(isText(undefined)).toBe(false);
190+
});
191+
});
192+
132193
describe("opfs_tools", () => {
133194
let mockFS: ReturnType<typeof createMockFS>;
134195

@@ -139,7 +200,7 @@ describe("opfs_tools", () => {
139200
getDirectory: vi.fn().mockResolvedValue(mockFS.rootHandle),
140201
},
141202
});
142-
// opfs_read 总是返回 blobUrl,需要初始化 createBlobUrlFn
203+
// opfs_read 读取二进制文件时需要 createBlobUrlFn 生成 blob URL
143204
setCreateBlobUrlFn(async () => "blob:mock-url");
144205
});
145206

@@ -155,7 +216,7 @@ describe("opfs_tools", () => {
155216
});
156217

157218
describe("opfs_write + opfs_read", () => {
158-
it("should write and read a file", async () => {
219+
it("should write and read a text file", async () => {
159220
const write = getTool("opfs_write");
160221
const read = getTool("opfs_read");
161222

@@ -167,8 +228,8 @@ describe("opfs_tools", () => {
167228

168229
const readResult = JSON.parse((await read.executor.execute({ path: "hello.txt" })) as string);
169230
expect(readResult.path).toBe("hello.txt");
170-
expect(readResult.blobUrl).toBe("blob:mock-url");
171-
expect(readResult.size).toBe(6);
231+
expect(readResult.type).toBe("text");
232+
expect(readResult.content).toBe("Hello!");
172233
});
173234

174235
it("should create nested directories automatically", async () => {
@@ -177,7 +238,8 @@ describe("opfs_tools", () => {
177238

178239
await write.executor.execute({ path: "a/b/c.txt", content: "deep" });
179240
const result = JSON.parse((await read.executor.execute({ path: "a/b/c.txt" })) as string);
180-
expect(result.blobUrl).toBe("blob:mock-url");
241+
expect(result.type).toBe("text");
242+
expect(result.content).toBe("deep");
181243
});
182244

183245
it("should overwrite existing file", async () => {
@@ -187,7 +249,8 @@ describe("opfs_tools", () => {
187249
await write.executor.execute({ path: "f.txt", content: "v1" });
188250
await write.executor.execute({ path: "f.txt", content: "v2" });
189251
const result = JSON.parse((await read.executor.execute({ path: "f.txt" })) as string);
190-
expect(result.blobUrl).toBe("blob:mock-url");
252+
expect(result.type).toBe("text");
253+
expect(result.content).toBe("v2");
191254
});
192255

193256
it("should strip leading slashes from path", async () => {
@@ -196,7 +259,8 @@ describe("opfs_tools", () => {
196259

197260
await write.executor.execute({ path: "/leading.txt", content: "ok" });
198261
const result = JSON.parse((await read.executor.execute({ path: "leading.txt" })) as string);
199-
expect(result.blobUrl).toBe("blob:mock-url");
262+
expect(result.type).toBe("text");
263+
expect(result.content).toBe("ok");
200264
});
201265

202266
it("should reject .. in path", async () => {
@@ -207,6 +271,148 @@ describe("opfs_tools", () => {
207271
});
208272
});
209273

274+
describe("opfs_read 文本读取", () => {
275+
it("should return text content for text files", async () => {
276+
const write = getTool("opfs_write");
277+
const read = getTool("opfs_read");
278+
279+
await write.executor.execute({ path: "hello.txt", content: "line1\nline2\nline3" });
280+
const result = JSON.parse((await read.executor.execute({ path: "hello.txt" })) as string);
281+
expect(result.type).toBe("text");
282+
expect(result.content).toBe("line1\nline2\nline3");
283+
expect(result.totalLines).toBe(3);
284+
expect(result.startLine).toBe(1);
285+
expect(result.endLine).toBe(3);
286+
expect(result.blobUrl).toBeUndefined();
287+
});
288+
289+
it("should return text content for json files", async () => {
290+
const write = getTool("opfs_write");
291+
const read = getTool("opfs_read");
292+
293+
await write.executor.execute({ path: "data.json", content: '{"key":"value"}' });
294+
const result = JSON.parse((await read.executor.execute({ path: "data.json" })) as string);
295+
expect(result.type).toBe("text");
296+
expect(result.content).toBe('{"key":"value"}');
297+
});
298+
299+
it("should return blob URL for binary files (png)", async () => {
300+
const write = getTool("opfs_write");
301+
const read = getTool("opfs_read");
302+
303+
// 先通过 write 创建文件(建立 workspace 目录结构),再替换为二进制内容
304+
await write.executor.execute({ path: "image.png", content: "placeholder" });
305+
const wsDir = mockFS.root.children.get("agents") as FSNode & { kind: "directory" };
306+
const workspace = wsDir.children.get("workspace") as FSNode & { kind: "directory" };
307+
workspace.children.set("image.png", {
308+
kind: "file",
309+
content: new Uint8Array([0x89, 0x50, 0x00, 0x47, 0x00, 0x0a, 0x00, 0x0a]),
310+
});
311+
312+
const result = JSON.parse((await read.executor.execute({ path: "image.png" })) as string);
313+
expect(result.type).toBe("binary");
314+
expect(result.blobUrl).toBe("blob:mock-url");
315+
expect(result.content).toBeUndefined();
316+
});
317+
318+
it("mode=blob 时文本文件也返回 blob URL", async () => {
319+
const write = getTool("opfs_write");
320+
const read = getTool("opfs_read");
321+
322+
await write.executor.execute({ path: "readme.txt", content: "hello" });
323+
const result = JSON.parse(
324+
(await read.executor.execute({ path: "readme.txt", mode: "blob" })) as string
325+
);
326+
expect(result.type).toBe("binary");
327+
expect(result.blobUrl).toBe("blob:mock-url");
328+
expect(result.content).toBeUndefined();
329+
});
330+
331+
it("mode=text 时二进制内容也强制返回文本", async () => {
332+
const write = getTool("opfs_write");
333+
const read = getTool("opfs_read");
334+
335+
// 先创建文件,再替换为二进制内容
336+
await write.executor.execute({ path: "data.bin", content: "placeholder" });
337+
const wsDir = mockFS.root.children.get("agents") as FSNode & { kind: "directory" };
338+
const workspace = wsDir.children.get("workspace") as FSNode & { kind: "directory" };
339+
workspace.children.set("data.bin", {
340+
kind: "file",
341+
content: new Uint8Array([0x48, 0x00, 0x65, 0x00, 0x6c, 0x00]),
342+
});
343+
344+
// auto 模式下内容检测为二进制,返回 blob
345+
const blobResult = JSON.parse(
346+
(await read.executor.execute({ path: "data.bin" })) as string
347+
);
348+
expect(blobResult.type).toBe("binary");
349+
350+
// mode=text 强制文本读取
351+
const textResult = JSON.parse(
352+
(await read.executor.execute({ path: "data.bin", mode: "text" })) as string
353+
);
354+
expect(textResult.type).toBe("text");
355+
});
356+
357+
it("should support offset and limit for line-based reading", async () => {
358+
const write = getTool("opfs_write");
359+
const read = getTool("opfs_read");
360+
361+
const lines = Array.from({ length: 10 }, (_, i) => `line${i + 1}`).join("\n");
362+
await write.executor.execute({ path: "multi.txt", content: lines });
363+
364+
const result = JSON.parse(
365+
(await read.executor.execute({ path: "multi.txt", offset: 3, limit: 4 })) as string
366+
);
367+
expect(result.content).toBe("line3\nline4\nline5\nline6");
368+
expect(result.startLine).toBe(3);
369+
expect(result.endLine).toBe(6);
370+
expect(result.totalLines).toBe(10);
371+
});
372+
373+
it("should error when text file exceeds max lines without offset/limit", async () => {
374+
const write = getTool("opfs_write");
375+
const read = getTool("opfs_read");
376+
377+
// 生成 201 行文本
378+
const lines = Array.from({ length: 201 }, (_, i) => `line${i + 1}`).join("\n");
379+
await write.executor.execute({ path: "big.txt", content: lines });
380+
381+
await expect(read.executor.execute({ path: "big.txt" })).rejects.toThrow(/201/);
382+
await expect(read.executor.execute({ path: "big.txt" })).rejects.toThrow(/offset/);
383+
});
384+
385+
it("should allow reading large file with offset/limit", async () => {
386+
const write = getTool("opfs_write");
387+
const read = getTool("opfs_read");
388+
389+
const lines = Array.from({ length: 300 }, (_, i) => `line${i + 1}`).join("\n");
390+
await write.executor.execute({ path: "big.txt", content: lines });
391+
392+
const result = JSON.parse(
393+
(await read.executor.execute({ path: "big.txt", offset: 290, limit: 11 })) as string
394+
);
395+
expect(result.startLine).toBe(290);
396+
expect(result.endLine).toBe(300);
397+
expect(result.totalLines).toBe(300);
398+
});
399+
400+
it("should clamp offset to valid range", async () => {
401+
const write = getTool("opfs_write");
402+
const read = getTool("opfs_read");
403+
404+
await write.executor.execute({ path: "small.txt", content: "a\nb\nc" });
405+
406+
// offset 超出范围
407+
const result = JSON.parse(
408+
(await read.executor.execute({ path: "small.txt", offset: 100, limit: 5 })) as string
409+
);
410+
expect(result.content).toBe("");
411+
expect(result.startLine).toBe(100);
412+
expect(result.endLine).toBe(3);
413+
});
414+
});
415+
210416
describe("opfs_read errors", () => {
211417
it("should throw for non-existent file", async () => {
212418
const read = getTool("opfs_read");

0 commit comments

Comments
 (0)