diff --git a/apps/site/docs/en/api.mdx b/apps/site/docs/en/api.mdx index 3a5391b2de..3832badd9f 100644 --- a/apps/site/docs/en/api.mdx +++ b/apps/site/docs/en/api.mdx @@ -18,7 +18,12 @@ All agents share these base options: - `generateReport: boolean`: If true, a report file will be generated. (Default: true) - `reportFileName: string`: The name of the report file. (Default: generated by midscene) - `autoPrintReportMsg: boolean`: If true, report messages will be printed. (Default: true) -- `cacheId: string | undefined`: If provided, this cacheId will be used to save or match the cache. (Default: undefined, means cache feature is disabled) +- `cache?: false | { id: string; strategy?: 'read-only' | 'read-write' | 'write-only'; dir?: string }`: + - `false`: disable cache completely. + - `id`: required cache ID. + - `strategy`: optional cache strategy. Default is `'read-write'`. + - `dir`: optional cache directory path. When set, cache files are written to this directory instead of `/cache`. This lets you split cache and logs/report directories. +- `cacheId: string | undefined` (deprecated): legacy cache ID for backward compatibility. Prefer `cache.id`. - `aiActContext: string`: Some background knowledge that should be sent to the AI model when calling `agent.aiAct()`, like 'close the cookie consent dialog first if it exists' (Default: undefined). Previously exposed as `aiActionContext`; the legacy name is still accepted for backward compatibility. - `replanningCycleLimit: number`: The maximum number of `aiAct` replanning cycles. Default is 20 (40 for UI-TARS models). Prefer setting this via the agent option; reading `MIDSCENE_REPLANNING_CYCLE_LIMIT` is only for backward compatibility. - `waitAfterAction: number`: Wait time in milliseconds after each action execution. This allows the UI to settle and stabilize before the next action. Default is 300ms. diff --git a/apps/site/docs/zh/api.mdx b/apps/site/docs/zh/api.mdx index 45c88734af..c2c463ec1f 100644 --- a/apps/site/docs/zh/api.mdx +++ b/apps/site/docs/zh/api.mdx @@ -20,7 +20,12 @@ Midscene 针对每个不同环境都有对应的 Agent。每个 Agent 的构造 - `generateReport: boolean`: 如果为 true,则生成报告文件。默认值为 true。 - `reportFileName: string`: 报告文件的名称,默认值由 midscene 内部生成。 - `autoPrintReportMsg: boolean`: 如果为 true,则打印报告消息。默认值为 true。 -- `cacheId: string | undefined`: 如果配置,则使用此 cacheId 保存或匹配缓存。默认值为 undefined,也就是不启用缓存。 +- `cache?: false | { id: string; strategy?: 'read-only' | 'read-write' | 'write-only'; dir?: string }`: + - `false`:完全禁用缓存。 + - `id`:必填的缓存 ID。 + - `strategy`:可选缓存策略。默认值为 `'read-write'`。 + - `dir`:可选缓存目录路径。配置后,缓存文件会写入该目录,而不是 `/cache`。这样可以把 cache 与 logs/report 目录拆开。 +- `cacheId: string | undefined`(已废弃):仅用于向后兼容。推荐使用 `cache.id`。 - `aiActContext: string`: 调用 `agent.aiAct()` 时,发送给 AI 模型的背景知识,比如 "有 cookie 对话框时先关闭它",默认值为空。此前名为 `aiActionContext`,旧名称仍然兼容。 - `replanningCycleLimit: number`: `aiAct` 的最大重规划次数。默认值为 20(UI-TARS 模型默认 40)。推荐通过 agent 入参设置;`MIDSCENE_REPLANNING_CYCLE_LIMIT` 环境变量仅作兼容读取。 - `waitAfterAction: number`: 每次动作执行后的等待时间(毫秒)。这让 UI 有时间稳定,然后再执行下一个动作。默认值为 300 毫秒。 diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index 5e88252d75..b37ea7fc89 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -314,6 +314,7 @@ export class Agent< { readOnly: cacheConfigObj.readOnly, writeOnly: cacheConfigObj.writeOnly, + cacheDirectory: cacheConfigObj.directory, }, ); } @@ -1464,6 +1465,7 @@ export class Agent< enabled: boolean; readOnly: boolean; writeOnly: boolean; + directory?: string; } | null { // Validate original cache config before processing // Agent requires explicit IDs - don't allow auto-generation @@ -1487,6 +1489,19 @@ export class Agent< ); } + if ( + opts.cache && + typeof opts.cache === 'object' && + opts.cache !== null && + opts.cache.dir !== undefined && + (typeof opts.cache.dir !== 'string' || !opts.cache.dir.trim()) + ) { + throw new Error( + 'cache.dir must be a non-empty string when provided.\n' + + 'Example: cache: { id: "my-cache-id", dir: "./my-cache-dir" }', + ); + } + // Use the unified utils function to process cache configuration const cacheConfig = processCacheConfig( opts.cache, @@ -1527,6 +1542,7 @@ export class Agent< enabled: !isWriteOnly, readOnly: isReadOnly, writeOnly: isWriteOnly, + directory: cacheConfig.dir, }; } diff --git a/packages/core/src/agent/task-cache.ts b/packages/core/src/agent/task-cache.ts index c0d8e48c44..6910710662 100644 --- a/packages/core/src/agent/task-cache.ts +++ b/packages/core/src/agent/task-cache.ts @@ -70,7 +70,11 @@ export class TaskCache { cacheId: string, isCacheResultUsed: boolean, cacheFilePath?: string, - options: { readOnly?: boolean; writeOnly?: boolean } = {}, + options: { + readOnly?: boolean; + writeOnly?: boolean; + cacheDirectory?: string; + } = {}, ) { assert(cacheId, 'cacheId is required'); let safeCacheId = replaceIllegalPathCharsAndSpace(cacheId); @@ -89,7 +93,10 @@ export class TaskCache { ifInBrowser || ifInWorker ? undefined : cacheFilePath || - join(getMidsceneRunSubDir('cache'), `${this.cacheId}${cacheFileExt}`); + join( + options.cacheDirectory || getMidsceneRunSubDir('cache'), + `${this.cacheId}${cacheFileExt}`, + ); const readOnlyMode = Boolean(options?.readOnly); const writeOnlyMode = Boolean(options?.writeOnly); diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index be25cb646e..c087ef32b3 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -1065,6 +1065,12 @@ export interface WebElementInfo extends BaseElement { export type CacheConfig = { strategy?: 'read-only' | 'read-write' | 'write-only'; id: string; + /** + * Optional cache directory path. + * When set, cache files are written to this directory instead of + * `/cache`. + */ + dir?: string; }; export type Cache = diff --git a/packages/web-integration/tests/unit-test/agent.test.ts b/packages/web-integration/tests/unit-test/agent.test.ts index 97f56c0990..3b920b2aac 100644 --- a/packages/web-integration/tests/unit-test/agent.test.ts +++ b/packages/web-integration/tests/unit-test/agent.test.ts @@ -460,6 +460,43 @@ describe('PageAgent cache configuration', () => { expect(agent.taskCache?.cacheId).toBe('custom-writeonly-cache'); }); + it('should support custom cache dir without changing log/report dirs', () => { + const cacheDir = path.join( + process.cwd(), + 'tmp-custom-cache-dir', + `${Date.now()}`, + ); + const agent = new PageAgent(mockPage, { + cache: { + id: 'custom-cache-dir-id', + dir: cacheDir, + }, + modelConfig: mockedModelConfig, + }); + + expect(agent.taskCache).toBeDefined(); + expect(agent.taskCache?.cacheFilePath).toContain(cacheDir); + expect(agent.taskCache?.cacheFilePath).toContain( + 'custom-cache-dir-id.cache.yaml', + ); + + if (fs.existsSync(cacheDir)) { + fs.rmSync(cacheDir, { recursive: true, force: true }); + } + }); + + it('should throw error for empty cache dir', () => { + expect(() => { + new PageAgent(mockPage, { + cache: { + id: 'custom-cache-id', + dir: ' ', + }, + modelConfig: mockedModelConfig, + }); + }).toThrow('cache.dir must be a non-empty string when provided'); + }); + it('should throw error for cache: true even with testId', () => { expect(() => { new PageAgent(mockPage, {