Skip to content

Commit 3564676

Browse files
committed
✨ 附件扩展名、readAttachment API、缓存用量追踪与 UI 优化
- 附件 ID 统一追加文件扩展名,便于类型识别 - 新增 CAT.agent.opfs.readAttachment API 读取内部附件存储 - usage 增加 cacheCreationInputTokens/cacheReadInputTokens 追踪 - OPFS 浏览器支持图片预览、文件下载、图标区分 - AskUserBlock 交互卡片 UI 重构(渐变条、pill 选项、内联输入) - ConversationInstance 收集模型生成的图片/文件 content blocks - 模型配置自动检测 supportsVision/supportsImageOutput
1 parent 8178141 commit 3564676

16 files changed

Lines changed: 380 additions & 93 deletions

File tree

src/app/service/agent/agent.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,7 @@ describe("callLLMWithToolLoop", () => {
692692

693693
const doneEvent = events.find((e) => e.type === "done");
694694
expect(doneEvent).toBeDefined();
695-
expect(doneEvent!.type === "done" && doneEvent!.usage).toEqual({ inputTokens: 10, outputTokens: 5 });
695+
expect(doneEvent!.type === "done" && doneEvent!.usage).toEqual({ inputTokens: 10, outputTokens: 5, cacheCreationInputTokens: 0, cacheReadInputTokens: 0 });
696696
});
697697

698698
it("单轮 tool calling", async () => {
@@ -747,7 +747,7 @@ describe("callLLMWithToolLoop", () => {
747747
const doneEvent = events.find((e) => e.type === "done");
748748
expect(doneEvent).toBeDefined();
749749
if (doneEvent?.type === "done") {
750-
expect(doneEvent.usage).toEqual({ inputTokens: 50, outputTokens: 18 });
750+
expect(doneEvent.usage).toEqual({ inputTokens: 50, outputTokens: 18, cacheCreationInputTokens: 0, cacheReadInputTokens: 0 });
751751
}
752752
});
753753

src/app/service/agent/content_utils.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,43 @@
11
import type { MessageContent, ContentBlock } from "./types";
22

3+
// MIME 类型 → 文件扩展名映射
4+
const MIME_EXT_MAP: Record<string, string> = {
5+
"image/png": "png",
6+
"image/jpeg": "jpg",
7+
"image/gif": "gif",
8+
"image/webp": "webp",
9+
"image/svg+xml": "svg",
10+
"image/bmp": "bmp",
11+
"audio/wav": "wav",
12+
"audio/mpeg": "mp3",
13+
"audio/mp3": "mp3",
14+
"audio/ogg": "ogg",
15+
"audio/webm": "webm",
16+
"application/pdf": "pdf",
17+
"application/zip": "zip",
18+
"application/json": "json",
19+
"text/plain": "txt",
20+
"text/html": "html",
21+
"text/csv": "csv",
22+
};
23+
24+
/**
25+
* 根据 MIME 类型获取文件扩展名
26+
*/
27+
export function getExtFromMime(mimeType: string): string {
28+
if (MIME_EXT_MAP[mimeType]) return MIME_EXT_MAP[mimeType];
29+
// 从子类型中提取(去掉非字母数字字符)
30+
const sub = mimeType.split("/")[1];
31+
return sub ? sub.replace(/[^a-z0-9]/gi, "") : "bin";
32+
}
33+
34+
/**
35+
* 判断文件名是否为图片类型(根据扩展名)
36+
*/
37+
export function isImageFileName(name: string): boolean {
38+
return /\.(png|jpe?g|gif|webp|svg|bmp|ico|avif)$/i.test(name);
39+
}
40+
341
/**
442
* 从 MessageContent 提取纯文本(用于 copy、搜索、标题生成等)
543
* - string: 直接返回

src/app/service/agent/providers/anthropic.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,8 @@ export function parseAnthropicStream(
270270
if (imageBlockData) {
271271
const fullBase64 = imageBlockData.base64Chunks.join("");
272272
const dataUrl = `data:${imageBlockData.mediaType};base64,${fullBase64}`;
273-
const attachmentId = `img_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
273+
const ext = imageBlockData.mediaType.split("/")[1] || "png";
274+
const attachmentId = `img_${Date.now()}_${Math.random().toString(36).slice(2, 8)}.${ext}`;
274275
onEvent({
275276
type: "content_block_complete",
276277
block: {

src/app/service/agent/providers/openai.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,8 @@ export function parseOpenAIStream(
180180
const dataUrl: string = part.image_url.url;
181181
const mimeMatch = dataUrl.match(/^data:([^;]+);/);
182182
const mimeType = mimeMatch ? mimeMatch[1] : "image/png";
183-
const blockId = `img_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
183+
const ext = mimeType.split("/")[1] || "png";
184+
const blockId = `img_${Date.now()}_${Math.random().toString(36).slice(2, 8)}.${ext}`;
184185
onEvent({
185186
type: "content_block_complete",
186187
block: { type: "image", attachmentId: blockId, mimeType, name: "generated-image" },

src/app/service/agent/tool_registry.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import type { Attachment, ToolCall, ToolDefinition, ToolResultWithAttachments } from "./types";
22
import type { AgentChatRepo } from "@App/app/repo/agent_chat";
33
import { uuidv4 } from "@App/pkg/utils/uuid";
4+
import { getExtFromMime } from "./content_utils";
45

56
// 工具执行器接口
67
export interface ToolExecutor {
@@ -131,7 +132,8 @@ export class ToolRegistry {
131132

132133
const attachments: Attachment[] = [];
133134
for (const ad of attachmentDataList) {
134-
const id = uuidv4();
135+
const ext = getExtFromMime(ad.mimeType);
136+
const id = `${uuidv4()}.${ext}`;
135137
const size = await this.chatRepo.saveAttachment(id, ad.data);
136138
attachments.push({
137139
id,

src/app/service/agent/tools/tab_tools.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ import { extractHtmlWithSelectors } from "@App/app/service/offscreen/client";
88
const GET_TAB_CONTENT_DEFINITION: ToolDefinition = {
99
name: "get_tab_content",
1010
description:
11-
"Read the rendered content of a browser tab. Returns cleaned markdown with CSS selector annotations for key elements. " +
12-
"Use selector to extract specific sections. Use prompt to have the LLM summarize/extract specific information from the content.",
11+
"Read the text content of a browser tab as cleaned markdown. Best for reading articles, extracting text, or summarizing page content. " +
12+
"Use selector to extract specific sections. Use prompt to have the LLM summarize/extract specific information. " +
13+
"NOTE: If you need to locate interactive elements (buttons, inputs, links) for clicking or form-filling, use the browser_action tool from the browser-automation skill instead — it returns element selectors optimized for DOM operations.",
1314
parameters: {
1415
type: "object",
1516
properties: {

src/app/service/agent/types.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ export type ChatReply = {
176176
content: MessageContent;
177177
thinking?: string;
178178
toolCalls?: ToolCall[];
179-
usage?: { inputTokens: number; outputTokens: number };
179+
usage?: { inputTokens: number; outputTokens: number; cacheCreationInputTokens?: number; cacheReadInputTokens?: number };
180180
command?: boolean; // 标识该回复来自命令处理
181181
};
182182

@@ -186,7 +186,7 @@ export type StreamChunk = {
186186
content?: string;
187187
block?: ContentBlock;
188188
toolCall?: ToolCall;
189-
usage?: { inputTokens: number; outputTokens: number };
189+
usage?: { inputTokens: number; outputTokens: number; cacheCreationInputTokens?: number; cacheReadInputTokens?: number };
190190
error?: string;
191191
/** 错误分类码:"rate_limit" | "auth" | "tool_timeout" | "max_iterations" | "api_error" */
192192
errorCode?: string;
@@ -253,6 +253,7 @@ export type SkillApiRequest =
253253
export type OPFSApiRequest =
254254
| { action: "write"; path: string; content: string | Blob; scriptUuid: string }
255255
| { action: "read"; path: string; format?: "text" | "bloburl"; scriptUuid: string }
256+
| { action: "readAttachment"; id: string; format?: "bloburl" | "dataurl"; scriptUuid: string }
256257
| { action: "list"; path?: string; scriptUuid: string }
257258
| { action: "delete"; path: string; scriptUuid: string };
258259

src/app/service/content/gm_api/cat_agent.ts

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import type {
55
ChatReply,
66
ChatStreamEvent,
77
CommandHandler,
8+
ContentBlock,
89
Conversation,
910
ConversationApiRequest,
1011
ConversationCreateOptions,
@@ -294,6 +295,7 @@ export class ConversationInstance {
294295
let content = "";
295296
let thinking = "";
296297
const toolCalls: ToolCall[] = [];
298+
const contentBlocks: ContentBlock[] = [];
297299
let currentToolCall: ToolCall | null = null;
298300
let usage: { inputTokens: number; outputTokens: number } | undefined;
299301

@@ -316,26 +318,39 @@ export class ConversationInstance {
316318
case "thinking_delta":
317319
thinking += event.delta;
318320
break;
321+
case "content_block_complete":
322+
// 收集模型生成的图片/文件/音频 blocks(data 已由 finalize 保存到 attachment 存储)
323+
contentBlocks.push(event.block);
324+
break;
319325
case "tool_call_start":
320326
if (currentToolCall) toolCalls.push(currentToolCall);
321327
currentToolCall = { ...event.toolCall, arguments: event.toolCall.arguments || "" };
322328
break;
323329
case "tool_call_delta":
324330
if (currentToolCall) currentToolCall.arguments += event.delta;
325331
break;
326-
case "done":
332+
case "done": {
327333
if (currentToolCall) {
328334
toolCalls.push(currentToolCall);
329335
currentToolCall = null;
330336
}
331337
if (event.usage) usage = event.usage;
338+
// 合并文本和 content blocks 到 MessageContent
339+
let finalContent: MessageContent = content;
340+
if (contentBlocks.length > 0) {
341+
const blocks: ContentBlock[] = [];
342+
if (content) blocks.push({ type: "text", text: content });
343+
blocks.push(...contentBlocks);
344+
finalContent = blocks;
345+
}
332346
resolve({
333-
content,
347+
content: finalContent,
334348
thinking: thinking || undefined,
335349
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
336350
usage,
337351
});
338352
break;
353+
}
339354
case "error":
340355
reject(Object.assign(new Error(event.message), { errorCode: event.errorCode }));
341356
break;
@@ -377,6 +392,9 @@ export class ConversationInstance {
377392
case "thinking_delta":
378393
chunk = { type: "thinking_delta", content: event.delta };
379394
break;
395+
case "content_block_complete":
396+
chunk = { type: "content_block", block: event.block };
397+
break;
380398
case "tool_call_start":
381399
chunk = { type: "tool_call", toolCall: { ...event.toolCall, arguments: "" } };
382400
break;

src/app/service/content/gm_api/cat_agent_opfs.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,17 @@ export default class CATAgentOPFSApi {
5151
]) as Promise<Array<{ name: string; type: string; size?: number }>>;
5252
}
5353

54+
@GMContext.API({ follow: "CAT.agent.opfs" })
55+
public "CAT.agent.opfs.readAttachment"(
56+
id: string,
57+
format?: "bloburl" | "dataurl"
58+
): Promise<{ id: string; blobUrl?: string; content?: string; size: number; mimeType?: string }> {
59+
const ctx = this as unknown as GMBaseContext;
60+
return ctx.sendMessage("CAT_agentOPFS", [
61+
{ action: "readAttachment", id, format, scriptUuid: ctx.scriptRes?.uuid || "" } as OPFSApiRequest,
62+
]) as Promise<{ id: string; blobUrl?: string; content?: string; size: number; mimeType?: string }>;
63+
}
64+
5465
@GMContext.API({ follow: "CAT.agent.opfs" })
5566
public "CAT.agent.opfs.delete"(path: string): Promise<{ success: true }> {
5667
const ctx = this as unknown as GMBaseContext;

src/app/service/service_worker/agent.ts

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import type {
2727
ContentBlock,
2828
} from "@App/app/service/agent/types";
2929
import { getTextContent, isContentBlocks } from "@App/app/service/agent/content_utils";
30+
import { supportsVision, supportsImageOutput } from "@App/pages/options/routes/AgentChat/model_utils";
3031
import { buildOpenAIRequest, parseOpenAIStream } from "@App/app/service/agent/providers/openai";
3132
import { buildAnthropicRequest, parseAnthropicStream } from "@App/app/service/agent/providers/anthropic";
3233
import { AgentChatRepo } from "@App/app/repo/agent_chat";
@@ -447,6 +448,22 @@ export class AgentService {
447448
const executor = toolMap.get("opfs_read")!;
448449
return JSON.parse((await executor.execute({ path: request.path, format: request.format })) as string);
449450
}
451+
case "readAttachment": {
452+
const blob = await this.repo.getAttachment(request.id);
453+
if (!blob) {
454+
throw new Error(`Attachment not found: ${request.id}`);
455+
}
456+
if (request.format === "dataurl") {
457+
// 转为 data URL
458+
const buffer = await blob.arrayBuffer();
459+
const base64 = btoa(String.fromCharCode(...new Uint8Array(buffer)));
460+
const dataUrl = `data:${blob.type || "application/octet-stream"};base64,${base64}`;
461+
return { id: request.id, content: dataUrl, size: blob.size, mimeType: blob.type };
462+
}
463+
// 默认返回 blob URL
464+
const blobUrl = URL.createObjectURL(blob);
465+
return { id: request.id, blobUrl, size: blob.size, mimeType: blob.type };
466+
}
450467
case "list": {
451468
const executor = toolMap.get("opfs_list")!;
452469
return JSON.parse((await executor.execute({ path: request.path || "" })) as string);
@@ -909,8 +926,12 @@ export class AgentService {
909926
}
910927

911928
// 处理 CAT.agent.model API 请求(只读,隐藏 apiKey),供 GMApi 调用
929+
// 同时补充 supportsVision / supportsImageOutput 的自动检测 fallback,
930+
// 避免用户未手动勾选时脚本端拿到 undefined
912931
private stripApiKey(model: AgentModelConfig): AgentModelSafeConfig {
913932
const { apiKey: _, ...safe } = model;
933+
safe.supportsVision = supportsVision(model);
934+
safe.supportsImageOutput = supportsImageOutput(model);
914935
return safe;
915936
}
916937

@@ -1018,7 +1039,7 @@ export class AgentService {
10181039

10191040
const startTime = Date.now();
10201041
let iterations = 0;
1021-
const totalUsage = { inputTokens: 0, outputTokens: 0 };
1042+
const totalUsage = { inputTokens: 0, outputTokens: 0, cacheCreationInputTokens: 0, cacheReadInputTokens: 0 };
10221043

10231044
while (iterations < maxIterations) {
10241045
iterations++;
@@ -1050,6 +1071,8 @@ export class AgentService {
10501071
if (result.usage) {
10511072
totalUsage.inputTokens += result.usage.inputTokens;
10521073
totalUsage.outputTokens += result.usage.outputTokens;
1074+
totalUsage.cacheCreationInputTokens += result.usage.cacheCreationInputTokens || 0;
1075+
totalUsage.cacheReadInputTokens += result.usage.cacheReadInputTokens || 0;
10531076
}
10541077

10551078
// 自动 compact:当上下文占用超过 80% 时触发
@@ -1159,19 +1182,22 @@ export class AgentService {
11591182
}
11601183

11611184
// 没有 tool calls,对话结束
1185+
const durationMs = Date.now() - startTime;
11621186
if (conversationId) {
11631187
await this.repo.appendMessage({
11641188
id: uuidv4(),
11651189
conversationId,
11661190
role: "assistant",
11671191
content: buildMessageContent(),
11681192
thinking: result.thinking ? { content: result.thinking } : undefined,
1193+
usage: totalUsage,
1194+
durationMs,
11691195
createtime: Date.now(),
11701196
});
11711197
}
11721198

11731199
// 发送 done 事件
1174-
sendEvent({ type: "done", usage: totalUsage, durationMs: Date.now() - startTime });
1200+
sendEvent({ type: "done", usage: totalUsage, durationMs });
11751201
return;
11761202
}
11771203

@@ -1748,7 +1774,7 @@ export class AgentService {
17481774
content: string;
17491775
thinking?: string;
17501776
toolCalls?: ToolCall[];
1751-
usage?: { inputTokens: number; outputTokens: number };
1777+
usage?: { inputTokens: number; outputTokens: number; cacheCreationInputTokens?: number; cacheReadInputTokens?: number };
17521778
contentBlocks?: ContentBlock[];
17531779
}> {
17541780
const chatRequest: ChatRequest = {
@@ -1793,7 +1819,7 @@ export class AgentService {
17931819
let thinking = "";
17941820
const toolCalls: ToolCall[] = [];
17951821
let currentToolCall: ToolCall | null = null;
1796-
let usage: { inputTokens: number; outputTokens: number } | undefined;
1822+
let usage: { inputTokens: number; outputTokens: number; cacheCreationInputTokens?: number; cacheReadInputTokens?: number } | undefined;
17971823
// 收集带 data 的图片 block(模型生成的图片),stream 结束后统一保存到 OPFS
17981824
const pendingImageSaves: Array<{ block: ContentBlock & { type: "image" }; data: string }> = [];
17991825

@@ -1860,8 +1886,9 @@ export class AgentService {
18601886
let cleanedContent = content;
18611887
while ((match = imgRegex.exec(content)) !== null) {
18621888
const [fullMatch, alt, dataUrl, subtype] = match;
1863-
const blockId = `img_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
18641889
const mimeType = `image/${subtype}`;
1890+
const ext = subtype || "png";
1891+
const blockId = `img_${Date.now()}_${Math.random().toString(36).slice(2, 8)}.${ext}`;
18651892
try {
18661893
await this.repo.saveAttachment(blockId, dataUrl);
18671894
const block: ContentBlock = {

0 commit comments

Comments
 (0)