From d30260ada939662fa9da8a73c497e0f3dbb84e5a Mon Sep 17 00:00:00 2001 From: EAGzzyCSL Date: Thu, 26 Mar 2026 16:31:24 +0800 Subject: [PATCH 1/3] feat(web): add configurable scrollMethod for web scrolling --- apps/site/docs/en/web-api-reference.mdx | 9 +- apps/site/docs/zh/web-api-reference.mdx | 9 +- .../web-integration/src/bridge-mode/common.ts | 7 ++ .../src/bridge-mode/page-browser-side.ts | 10 +- .../src/chrome-extension/page.ts | 40 +++++-- packages/web-integration/src/index.ts | 1 + .../src/playwright/ai-fixture.ts | 5 +- .../web-integration/src/playwright/index.ts | 13 ++- .../src/puppeteer/base-page.ts | 54 +++++++++- .../web-integration/src/puppeteer/index.ts | 1 + packages/web-integration/src/web-element.ts | 6 ++ .../unit-test/chrome-extension-cache.test.ts | 55 ++++++++++ .../unit-test/constructor-validation.test.ts | 21 ++++ .../tests/unit-test/scroll-method.test.ts | 102 ++++++++++++++++++ 14 files changed, 317 insertions(+), 16 deletions(-) create mode 100644 packages/web-integration/tests/unit-test/scroll-method.test.ts diff --git a/apps/site/docs/en/web-api-reference.mdx b/apps/site/docs/en/web-api-reference.mdx index 7b8bda28e8..5636854e9b 100644 --- a/apps/site/docs/en/web-api-reference.mdx +++ b/apps/site/docs/en/web-api-reference.mdx @@ -47,6 +47,7 @@ In addition to the base agent options, Puppeteer exposes: - `forceSameTabNavigation: boolean` — Restrict navigation to the current tab. Default `true`. - `waitForNavigationTimeout: number` — Maximum wait when a step causes navigation. Default `5000` (set `0` to skip waiting). - `waitForNetworkIdleTimeout: number` — Wait for network idle between actions to reduce flakiness. Default `2000` (set `0` to skip waiting). +- `scrollMethod: 'wheel' | 'gesture'` — Choose how Midscene scrolls. Default `wheel`. Use `gesture` for Cocos, WebGL, Canvas, or custom-drawn scroll areas where synthetic wheel events do not trigger scrolling. - `enableTouchEventsInActionSpace: boolean` — Add touch gestures (like swipe) to the action space so the agent can handle touch-only interactions. Default `false`. - `forceChromeSelectRendering: boolean` — Force `select` elements to render with Chrome's base-select styling so they're visible in screenshots/element extraction; requires Puppeteer > `24.6.0`. - `customActions: DeviceAction[]` — Register bespoke actions defined via `defineAction` so planning can call domain-specific steps. @@ -133,6 +134,7 @@ const agent = new PlaywrightAgent(page, { - `forceSameTabNavigation: boolean` — Keep automation inside the active tab. Default `true`. - `waitForNavigationTimeout: number` — Wait time for navigation completion. Default `5000` (set `0` to disable). - `waitForNetworkIdleTimeout: number` — Wait between actions for network idle. Default `2000` (set `0` to disable). +- `scrollMethod: 'wheel' | 'gesture'` — Choose how Midscene scrolls. Default `wheel`. Use `gesture` for Cocos, WebGL, Canvas, or custom-drawn scroll areas where synthetic wheel events do not trigger scrolling. `gesture` requires a Chromium-based browser. - `enableTouchEventsInActionSpace: boolean` — Add touch gestures (like swipe) to the action space so the agent can handle touch-only interactions. Default `false`. - `forceChromeSelectRendering: boolean` — Force `select` elements to render with Chrome's base-select styling so they're visible in screenshots/element extraction; requires Playwright ≥ `1.52.0`. - `customActions: DeviceAction[]` — Extend planning with project-specific actions. @@ -241,10 +243,12 @@ Call `connectCurrentTab` or `connectNewTabWithUrl` before issuing other actions. ```ts function connectCurrentTab(options?: { forceSameTabNavigation?: boolean; + scrollMethod?: 'wheel' | 'gesture'; }): Promise; ``` - `options.forceSameTabNavigation` (default `true`) intercepts new tabs and opens them in the current tab to simplify debugging; set to `false` if you want normal new-tab behavior (create a separate agent per tab). +- `options.scrollMethod` (default `'wheel'`) chooses how scrolling is synthesized in the connected tab. Use `'gesture'` for Cocos, WebGL, Canvas, or custom-drawn scroll areas. - Resolves on a successful handshake with the active tab; rejects if the extension is not allowed to connect. #### `connectNewTabWithUrl()` @@ -252,7 +256,10 @@ function connectCurrentTab(options?: { ```ts function connectNewTabWithUrl( url: string, - options?: { forceSameTabNavigation?: boolean }, + options?: { + forceSameTabNavigation?: boolean; + scrollMethod?: 'wheel' | 'gesture'; + }, ): Promise; ``` diff --git a/apps/site/docs/zh/web-api-reference.mdx b/apps/site/docs/zh/web-api-reference.mdx index 13e9779718..cfc64256c4 100644 --- a/apps/site/docs/zh/web-api-reference.mdx +++ b/apps/site/docs/zh/web-api-reference.mdx @@ -47,6 +47,7 @@ const agent = new PuppeteerAgent(page, { - `forceSameTabNavigation: boolean` —— 限制始终在当前标签页内导航,默认 `true`。 - `waitForNavigationTimeout: number` —— 当操作触发页面跳转时的最长等待时间,默认 `5000`(设为 `0` 表示不等待)。 - `waitForNetworkIdleTimeout: number` —— 每次操作后等待网络空闲的时间,默认 `2000`(设为 `0` 关闭)。 +- `scrollMethod: 'wheel' | 'gesture'` —— 控制 Midscene 使用哪种滚动实现,默认 `wheel`。对于 Cocos、WebGL、Canvas 或自绘滚动区这类场景,如果模拟滚轮事件无法触发滚动,建议改用 `gesture`。 - `enableTouchEventsInActionSpace: boolean` —— 在动作空间里增加触摸手势(如滑动),用于需要触摸事件的页面,默认 `false`。 - `forceChromeSelectRendering: boolean` —— 强制 `select` 元素使用 Chrome 的 base-select 样式,避免系统原生样式导致截图/元素提取不可见;需要 Puppeteer > `24.6.0`。 - `customActions: DeviceAction[]` —— 借助 `defineAction` 注册自定义动作,让规划器可以调用领域特定步骤。 @@ -133,6 +134,7 @@ const agent = new PlaywrightAgent(page, { - `forceSameTabNavigation: boolean` —— 强制在当前标签页内执行,默认 `true`。 - `waitForNavigationTimeout: number` —— 等待导航完成的时间,默认 `5000`(设为 `0` 关闭)。 - `waitForNetworkIdleTimeout: number` —— 每次操作后等待网络空闲的时间,默认 `2000`(设为 `0` 关闭)。 +- `scrollMethod: 'wheel' | 'gesture'` —— 控制 Midscene 使用哪种滚动实现,默认 `wheel`。对于 Cocos、WebGL、Canvas 或自绘滚动区这类场景,如果模拟滚轮事件无法触发滚动,建议改用 `gesture`。`gesture` 仅支持 Chromium 内核浏览器。 - `enableTouchEventsInActionSpace: boolean` —— 在动作空间里增加触摸手势(如滑动),用于需要触摸事件的页面,默认 `false`。 - `forceChromeSelectRendering: boolean` —— 强制 `select` 元素使用 Chrome 的 base-select 样式,避免系统原生样式导致截图/元素提取不可见;需要 Playwright ≥ `1.52.0`。 - `customActions: DeviceAction[]` —— 追加项目特有的动作,供规划器调用。 @@ -241,10 +243,12 @@ const agent = new AgentOverChromeBridge({ ```ts function connectCurrentTab(options?: { forceSameTabNavigation?: boolean; + scrollMethod?: 'wheel' | 'gesture'; }): Promise; ``` - `options.forceSameTabNavigation`(默认 `true`)会拦截新标签并在当前页打开,方便调试;若想保留新标签行为可设为 `false`,但需要为每个新标签创建新的 Agent。 +- `options.scrollMethod`(默认 `'wheel'`)用于控制连接后标签页里的滚动实现。对于 Cocos、WebGL、Canvas 或自绘滚动区,建议使用 `'gesture'`。 - 连接当前激活标签页,成功后返回 `Promise`,如果扩展未允许连接会报错。 #### `connectNewTabWithUrl()` @@ -252,7 +256,10 @@ function connectCurrentTab(options?: { ```ts function connectNewTabWithUrl( url: string, - options?: { forceSameTabNavigation?: boolean }, + options?: { + forceSameTabNavigation?: boolean; + scrollMethod?: 'wheel' | 'gesture'; + }, ): Promise; ``` diff --git a/packages/web-integration/src/bridge-mode/common.ts b/packages/web-integration/src/bridge-mode/common.ts index 5a95535a72..f8eeea5121 100644 --- a/packages/web-integration/src/bridge-mode/common.ts +++ b/packages/web-integration/src/bridge-mode/common.ts @@ -1,3 +1,5 @@ +import type { ScrollMethod } from '../web-element'; + export const DefaultBridgeServerHost = '127.0.0.1'; export const DefaultBridgeServerPort = 3766; export const DefaultLocalEndpoint = `http://${DefaultBridgeServerHost}:${DefaultBridgeServerPort}`; @@ -42,6 +44,11 @@ export interface BridgeConnectTabOptions { * @default true */ forceSameTabNavigation?: boolean; + /** + * Choose how scroll is synthesized in the connected tab. + * @default 'wheel' + */ + scrollMethod?: ScrollMethod; /** * Custom timeout for connecting to the tab in milliseconds. * @default 30000 (30 seconds) diff --git a/packages/web-integration/src/bridge-mode/page-browser-side.ts b/packages/web-integration/src/bridge-mode/page-browser-side.ts index 9f699b199b..de7580c83e 100644 --- a/packages/web-integration/src/bridge-mode/page-browser-side.ts +++ b/packages/web-integration/src/bridge-mode/page-browser-side.ts @@ -1,5 +1,6 @@ import { assert } from '@midscene/shared/utils'; import ChromeExtensionProxyPage from '../chrome-extension/page'; +import { ScrollMethod } from '../web-element'; import type { ChromePageDestroyOptions, KeyboardAction, @@ -34,9 +35,10 @@ export class ExtensionBridgePageBrowserSide extends ChromeExtensionProxyPage { type: 'log' | 'status', ) => void = () => {}, forceSameTabNavigation = true, + scrollMethod: ScrollMethod = ScrollMethod.Wheel, public onConnectionRequest?: () => Promise, ) { - super(forceSameTabNavigation); + super(forceSameTabNavigation, scrollMethod); } private async setupBridgeClient() { @@ -181,6 +183,9 @@ export class ExtensionBridgePageBrowserSide extends ChromeExtensionProxyPage { if (options?.forceSameTabNavigation) { this.forceSameTabNavigation = true; } + if (options?.scrollMethod) { + this.scrollMethod = options.scrollMethod; + } await this.setActiveTabId(tabId); } @@ -199,6 +204,9 @@ export class ExtensionBridgePageBrowserSide extends ChromeExtensionProxyPage { if (options?.forceSameTabNavigation) { this.forceSameTabNavigation = true; } + if (options?.scrollMethod) { + this.scrollMethod = options.scrollMethod; + } await this.setActiveTabId(tabId); } diff --git a/packages/web-integration/src/chrome-extension/page.ts b/packages/web-integration/src/chrome-extension/page.ts index 8c79905664..592a34f99b 100644 --- a/packages/web-integration/src/chrome-extension/page.ts +++ b/packages/web-integration/src/chrome-extension/page.ts @@ -5,7 +5,7 @@ The page must be active when interacting with it. */ -import { limitOpenNewTabScript } from '@/web-element'; +import { ScrollMethod, limitOpenNewTabScript } from '@/web-element'; import type { ElementCacheFeature, ElementTreeNode, @@ -50,6 +50,8 @@ export default class ChromeExtensionProxyPage implements AbstractInterface { public forceSameTabNavigation: boolean; + public scrollMethod: ScrollMethod; + private viewportSize?: Size; private activeTabId: number | null = null; @@ -60,8 +62,12 @@ export default class ChromeExtensionProxyPage implements AbstractInterface { public _continueWhenFailedToAttachDebugger = false; - constructor(forceSameTabNavigation: boolean) { + constructor( + forceSameTabNavigation: boolean, + scrollMethod: ScrollMethod = ScrollMethod.Wheel, + ) { this.forceSameTabNavigation = forceSameTabNavigation; + this.scrollMethod = scrollMethod; } actionSpace(): DeviceAction[] { @@ -682,13 +688,29 @@ export default class ChromeExtensionProxyPage implements AbstractInterface { const finalX = startX || this.latestMouseX; const finalY = startY || this.latestMouseY; await this.showMousePointer(finalX, finalY); - await this.sendCommandToDebugger('Input.dispatchMouseEvent', { - type: 'mouseWheel', - x: finalX, - y: finalY, - deltaX, - deltaY, - }); + if (this.scrollMethod === ScrollMethod.Gesture) { + await this.sendCommandToDebugger('Input.synthesizeScrollGesture', { + x: finalX, + y: finalY, + // synthesizeScrollGesture uses gesture distances, whose directions are + // opposite to wheel deltas for the same visual scroll result. + xDistance: -deltaX, + yDistance: -deltaY, + // speed is measured in pixels per second, so it must stay very high; + // otherwise our "scroll to edge" calls would take a long time to finish. + speed: 9999999, + repeatCount: 0, + preventFling: true, + }); + } else { + await this.sendCommandToDebugger('Input.dispatchMouseEvent', { + type: 'mouseWheel', + x: finalX, + y: finalY, + deltaX, + deltaY, + }); + } this.latestMouseX = finalX; this.latestMouseY = finalY; }, diff --git a/packages/web-integration/src/index.ts b/packages/web-integration/src/index.ts index a915276efa..117f6b003e 100644 --- a/packages/web-integration/src/index.ts +++ b/packages/web-integration/src/index.ts @@ -4,6 +4,7 @@ export type { PlayWrightAiFixtureType } from './playwright'; export { Agent as PageAgent, type AgentOpt } from '@midscene/core/agent'; export { PuppeteerAgent } from './puppeteer'; export { PlaywrightAgent } from './playwright'; +export { ScrollMethod } from './web-element'; export { StaticPageAgent, StaticPage } from './static'; export { WebMidsceneTools } from './mcp-tools'; export { webPlaygroundPlatform } from './platform'; diff --git a/packages/web-integration/src/playwright/ai-fixture.ts b/packages/web-integration/src/playwright/ai-fixture.ts index 2db7e451d2..c9edf2714b 100644 --- a/packages/web-integration/src/playwright/ai-fixture.ts +++ b/packages/web-integration/src/playwright/ai-fixture.ts @@ -2,7 +2,7 @@ import { rmSync, writeFileSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { PlaywrightAgent, type PlaywrightWebPage } from '@/playwright/index'; -import type { WebPageAgentOpt } from '@/web-element'; +import type { ScrollMethod, WebPageAgentOpt } from '@/web-element'; import type { Cache } from '@midscene/core'; import type { AgentOpt, Agent as PageAgent } from '@midscene/core/agent'; import { processCacheConfig } from '@midscene/core/utils'; @@ -60,12 +60,14 @@ export const PlaywrightAiFixture = (options?: { forceSameTabNavigation?: boolean; waitForNetworkIdleTimeout?: number; waitForNavigationTimeout?: number; + scrollMethod?: ScrollMethod; cache?: PlaywrightCache; }) => { const { forceSameTabNavigation = true, waitForNetworkIdleTimeout = DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT, waitForNavigationTimeout = DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT, + scrollMethod, cache, } = options ?? {}; @@ -95,6 +97,7 @@ export const PlaywrightAiFixture = (options?: { pageAgentMap[idForPage] = new PlaywrightAgent(page, { testId: `playwright-${testId}-${idForPage}`, forceSameTabNavigation, + scrollMethod, cache: cacheConfig, groupName: title, groupDescription: file, diff --git a/packages/web-integration/src/playwright/index.ts b/packages/web-integration/src/playwright/index.ts index 29642e3c59..889b7e0d04 100644 --- a/packages/web-integration/src/playwright/index.ts +++ b/packages/web-integration/src/playwright/index.ts @@ -7,7 +7,8 @@ export { PlaywrightAiFixture } from './ai-fixture'; export { overrideAIConfig } from '@midscene/shared/env'; export { WebPage as PlaywrightWebPage } from './page'; export type { WebPageAgentOpt } from '@/web-element'; -import type { WebPageAgentOpt } from '@/web-element'; +export { ScrollMethod } from '@/web-element'; +import { ScrollMethod, type WebPageAgentOpt } from '@/web-element'; import { getDebug } from '@midscene/shared/logger'; import semver from 'semver'; import { @@ -46,6 +47,16 @@ export class PlaywrightAgent extends PageAgent { '[midscene] PlaywrightAgent requires a valid Playwright page instance. Please make sure to pass a valid page object.', ); } + + if (opts?.scrollMethod === ScrollMethod.Gesture) { + const browserName = page.context().browser()?.browserType().name(); + if (browserName && browserName !== 'chromium') { + throw new Error( + `[midscene] scrollMethod "gesture" requires a Chromium-based Playwright browser, but current browser is "${browserName}". Use scrollMethod "wheel" instead.`, + ); + } + } + const webPage = new PlaywrightWebPage(page, opts); super(webPage, opts); diff --git a/packages/web-integration/src/puppeteer/base-page.ts b/packages/web-integration/src/puppeteer/base-page.ts index f5a920a248..d0866c16eb 100644 --- a/packages/web-integration/src/puppeteer/base-page.ts +++ b/packages/web-integration/src/puppeteer/base-page.ts @@ -1,4 +1,4 @@ -import type { WebPageAgentOpt } from '@/web-element'; +import { ScrollMethod, type WebPageAgentOpt } from '@/web-element'; import type { DeviceAction, ElementCacheFeature, @@ -56,11 +56,14 @@ export class Page< private onAfterInvokeAction?: AbstractInterface['afterInvokeAction']; private customActions?: DeviceAction[]; private enableTouchEventsInActionSpace: boolean; + private scrollMethod: ScrollMethod; private puppeteerFileChooserSession?: CDPSession; private puppeteerFileChooserHandler?: ( event: Protocol.Page.FileChooserOpenedEvent, ) => Promise; interfaceType: AgentType; + private latestMouseX = 0; + private latestMouseY = 0; actionSpace(): DeviceAction[] { const defaultActions = commonWebActionsForWebPage( @@ -108,6 +111,7 @@ export class Page< this.customActions = opts?.customActions; this.enableTouchEventsInActionSpace = opts?.enableTouchEventsInActionSpace ?? false; + this.scrollMethod = opts?.scrollMethod ?? ScrollMethod.Wheel; } async evaluateJavaScript(script: string): Promise { @@ -356,7 +360,9 @@ export class Page< }, wheel: async (deltaX: number, deltaY: number) => { debugPage(`mouse wheel ${deltaX}, ${deltaY}`); - if (this.interfaceType === 'puppeteer') { + if (this.scrollMethod === ScrollMethod.Gesture) { + await this.synthesizeScrollGesture(deltaX, deltaY); + } else if (this.interfaceType === 'puppeteer') { await (this.underlyingPage as PuppeteerPage).mouse.wheel({ deltaX, deltaY, @@ -370,6 +376,8 @@ export class Page< }, move: async (x: number, y: number) => { this.everMoved = true; + this.latestMouseX = x; + this.latestMouseY = y; debugPage(`mouse move to ${x}, ${y}`); return this.underlyingPage.mouse.move(x, y); }, @@ -465,6 +473,48 @@ export class Page< debugPage('clearInput end'); } + private async synthesizeScrollGesture( + deltaX: number, + deltaY: number, + ): Promise { + type ScrollGestureClient = { + send( + method: 'Input.synthesizeScrollGesture', + params?: Protocol.Input.SynthesizeScrollGestureRequest, + ): Promise; + detach(): Promise; + }; + + let client: ScrollGestureClient; + if (this.interfaceType === 'puppeteer') { + const page = this.underlyingPage as PuppeteerPage; + client = (await page.createCDPSession()) as ScrollGestureClient; + } else if (this.interfaceType === 'playwright') { + const page = this.underlyingPage as PlaywrightPage; + client = (await page + .context() + .newCDPSession(page)) as ScrollGestureClient; + } else { + return; + } + + try { + await client.send('Input.synthesizeScrollGesture', { + x: Math.round(this.latestMouseX), + y: Math.round(this.latestMouseY), + xDistance: -deltaX, + yDistance: -deltaY, + // speed is measured in pixels per second, so it must stay very high; + // otherwise our "scroll to edge" calls would take a long time to finish. + speed: 9999999, + repeatCount: 0, + preventFling: true, + }); + } finally { + await client.detach(); + } + } + private everMoved = false; private async moveToPointBeforeScroll(point?: Point): Promise { if (point) { diff --git a/packages/web-integration/src/puppeteer/index.ts b/packages/web-integration/src/puppeteer/index.ts index b9060478be..e13e2910e3 100644 --- a/packages/web-integration/src/puppeteer/index.ts +++ b/packages/web-integration/src/puppeteer/index.ts @@ -28,6 +28,7 @@ function getPuppeteerVersion(): string | null { export { PuppeteerWebPage } from './page'; export type { WebPageAgentOpt } from '@/web-element'; +export { ScrollMethod } from '@/web-element'; export class PuppeteerAgent extends PageAgent { protected isRetryableContextError(error: unknown): boolean { diff --git a/packages/web-integration/src/web-element.ts b/packages/web-integration/src/web-element.ts index bd2f58494f..a69d943572 100644 --- a/packages/web-integration/src/web-element.ts +++ b/packages/web-integration/src/web-element.ts @@ -9,11 +9,17 @@ import { _keyDefinitions } from '@midscene/shared/us-keyboard-layout'; import type { NodeType } from '@midscene/shared/constants'; export type { WebElementInfo }; +export enum ScrollMethod { + Wheel = 'wheel', + Gesture = 'gesture', +} + export type WebPageAgentOpt = AgentOpt & WebPageOpt; export type WebPageOpt = { waitForNavigationTimeout?: number; waitForNetworkIdleTimeout?: number; forceSameTabNavigation?: boolean /* if limit the new tab to the current page, default true */; + scrollMethod?: ScrollMethod; enableTouchEventsInActionSpace?: boolean; /** * Force Chrome to render select elements using base-select appearance instead of OS-native rendering. diff --git a/packages/web-integration/tests/unit-test/chrome-extension-cache.test.ts b/packages/web-integration/tests/unit-test/chrome-extension-cache.test.ts index ddd77479e9..d427c4b67a 100644 --- a/packages/web-integration/tests/unit-test/chrome-extension-cache.test.ts +++ b/packages/web-integration/tests/unit-test/chrome-extension-cache.test.ts @@ -4,6 +4,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; vi.stubGlobal('chrome', { tabs: { update: vi.fn(), + query: vi.fn().mockResolvedValue([{ id: 123 }]), }, debugger: { attach: vi.fn(), @@ -24,6 +25,7 @@ vi.mock('@midscene/shared/logger', () => ({ import { AiJudgeOrderSensitive } from '@midscene/core/ai-model'; import ChromeExtensionProxyPage from '../../src/chrome-extension/page'; +import { ScrollMethod } from '../../src/web-element'; describe('ChromeExtensionProxyPage cache methods', () => { let page: ChromeExtensionProxyPage; @@ -37,6 +39,59 @@ describe('ChromeExtensionProxyPage cache methods', () => { vi.restoreAllMocks(); }); + describe('mouse.wheel', () => { + it('should use mouseWheel by default', async () => { + vi.spyOn(page as any, 'showMousePointer').mockResolvedValue(undefined); + vi.spyOn(page as any, 'enableWaterFlowAnimation').mockResolvedValue( + undefined, + ); + (page as any).activeTabId = 123; + + await page.mouse.wheel(120, 240, 300, 400); + + expect(chrome.debugger.sendCommand).toHaveBeenCalledWith( + { tabId: 123 }, + 'Input.dispatchMouseEvent', + { + type: 'mouseWheel', + x: 300, + y: 400, + deltaX: 120, + deltaY: 240, + }, + ); + expect((page as any).latestMouseX).toBe(300); + expect((page as any).latestMouseY).toBe(400); + }); + + it('should use synthesizeScrollGesture when scrollMethod is gesture', async () => { + page = new ChromeExtensionProxyPage(false, ScrollMethod.Gesture); + vi.spyOn(page as any, 'showMousePointer').mockResolvedValue(undefined); + vi.spyOn(page as any, 'enableWaterFlowAnimation').mockResolvedValue( + undefined, + ); + (page as any).activeTabId = 123; + + await page.mouse.wheel(120, 240, 300, 400); + + expect(chrome.debugger.sendCommand).toHaveBeenCalledWith( + { tabId: 123 }, + 'Input.synthesizeScrollGesture', + { + x: 300, + y: 400, + xDistance: -120, + yDistance: -240, + speed: 9999999, + repeatCount: 0, + preventFling: true, + }, + ); + expect((page as any).latestMouseX).toBe(300); + expect((page as any).latestMouseY).toBe(400); + }); + }); + describe('cacheFeatureForPoint', () => { it('should return xpaths for a valid point', async () => { const mockXpaths = ['/html/body/div[1]', '/html/body/div[1]/button[1]']; diff --git a/packages/web-integration/tests/unit-test/constructor-validation.test.ts b/packages/web-integration/tests/unit-test/constructor-validation.test.ts index c5f4de30e8..027bea429f 100644 --- a/packages/web-integration/tests/unit-test/constructor-validation.test.ts +++ b/packages/web-integration/tests/unit-test/constructor-validation.test.ts @@ -14,6 +14,27 @@ describe('PlaywrightAgent constructor validation', () => { '[midscene] PlaywrightAgent requires a valid Playwright page instance', ); }); + + it('should throw when gesture scroll is used with non-chromium browser', async () => { + const { PlaywrightAgent, ScrollMethod } = await import('@/playwright'); + const page = { + context: () => ({ + browser: () => ({ + browserType: () => ({ + name: () => 'firefox', + }), + }), + }), + }; + + expect( + () => + new PlaywrightAgent(page as any, { + forceSameTabNavigation: false, + scrollMethod: ScrollMethod.Gesture, + }), + ).toThrow('scrollMethod "gesture" requires a Chromium-based Playwright'); + }); }); describe('PuppeteerAgent constructor validation', () => { diff --git a/packages/web-integration/tests/unit-test/scroll-method.test.ts b/packages/web-integration/tests/unit-test/scroll-method.test.ts new file mode 100644 index 0000000000..969dea3a57 --- /dev/null +++ b/packages/web-integration/tests/unit-test/scroll-method.test.ts @@ -0,0 +1,102 @@ +import { WebPage as PlaywrightWebPage } from '@/playwright/page'; +import { PuppeteerWebPage } from '@/puppeteer/page'; +import { ScrollMethod } from '@/web-element'; +import { describe, expect, it, vi } from 'vitest'; + +describe('web scroll methods', () => { + it('uses wheel events by default for Puppeteer', async () => { + const mouse = { + move: vi.fn().mockResolvedValue(undefined), + wheel: vi.fn().mockResolvedValue(undefined), + }; + const page = { + mouse, + createCDPSession: vi.fn(), + }; + const webPage = new PuppeteerWebPage(page as any); + + await webPage.mouse.wheel(12, -34); + + expect(mouse.wheel).toHaveBeenCalledWith({ + deltaX: 12, + deltaY: -34, + }); + expect(page.createCDPSession).not.toHaveBeenCalled(); + }); + + it('uses CDP scroll gestures for Puppeteer when configured', async () => { + const mouse = { + move: vi.fn().mockResolvedValue(undefined), + wheel: vi.fn().mockResolvedValue(undefined), + }; + const session = { + send: vi.fn().mockResolvedValue(undefined), + detach: vi.fn().mockResolvedValue(undefined), + }; + const page = { + mouse, + createCDPSession: vi.fn().mockResolvedValue(session), + }; + const webPage = new PuppeteerWebPage(page as any, { + scrollMethod: ScrollMethod.Gesture, + }); + + await webPage.mouse.move(300, 400); + await webPage.mouse.wheel(120, 240); + + expect(page.createCDPSession).toHaveBeenCalledTimes(1); + expect(session.send).toHaveBeenCalledWith('Input.synthesizeScrollGesture', { + x: 300, + y: 400, + xDistance: -120, + yDistance: -240, + speed: 9999999, + repeatCount: 0, + preventFling: true, + }); + expect(session.detach).toHaveBeenCalledTimes(1); + expect(mouse.wheel).not.toHaveBeenCalled(); + }); + + it('uses CDP scroll gestures for Playwright when configured', async () => { + const mouse = { + move: vi.fn().mockResolvedValue(undefined), + wheel: vi.fn().mockResolvedValue(undefined), + }; + const session = { + send: vi.fn().mockResolvedValue(undefined), + detach: vi.fn().mockResolvedValue(undefined), + }; + const context = { + newCDPSession: vi.fn().mockResolvedValue(session), + browser: () => ({ + browserType: () => ({ + name: () => 'chromium', + }), + }), + }; + const page = { + mouse, + context: () => context, + }; + const webPage = new PlaywrightWebPage(page as any, { + scrollMethod: ScrollMethod.Gesture, + }); + + await webPage.mouse.move(500, 600); + await webPage.mouse.wheel(-80, 160); + + expect(context.newCDPSession).toHaveBeenCalledWith(page); + expect(session.send).toHaveBeenCalledWith('Input.synthesizeScrollGesture', { + x: 500, + y: 600, + xDistance: 80, + yDistance: -160, + speed: 9999999, + repeatCount: 0, + preventFling: true, + }); + expect(session.detach).toHaveBeenCalledTimes(1); + expect(mouse.wheel).not.toHaveBeenCalled(); + }); +}); From 4fc5f681f0ee6c8e40ae5142a401336080398c08 Mon Sep 17 00:00:00 2001 From: ottomao Date: Thu, 26 Mar 2026 19:42:24 +0800 Subject: [PATCH 2/3] fix(web): include pinch action inside touch spaces --- packages/web-integration/src/web-page.ts | 19 ++++++---- .../tests/ai/web/puppeteer/pinch.test.ts | 24 ++++++++++--- .../unit-test/base-page-invoke-action.test.ts | 4 +++ .../tests/unit-test/scroll-method.test.ts | 35 +++++++++++++++++++ 4 files changed, 71 insertions(+), 11 deletions(-) diff --git a/packages/web-integration/src/web-page.ts b/packages/web-integration/src/web-page.ts index a33f333473..a720053d30 100644 --- a/packages/web-integration/src/web-page.ts +++ b/packages/web-integration/src/web-page.ts @@ -579,15 +579,20 @@ export const commonWebActionsForWebPage = ( await page.longPress(element.center[0], element.center[1], duration); }), - defineActionPinch(async (param) => { - const { centerX, centerY, startDistance, endDistance, duration } = - normalizePinchParam(param, await page.size()); - - await page.pinch(centerX, centerY, startDistance, endDistance, duration); - }), - ...(includeTouchEvents ? [ + defineActionPinch(async (param) => { + const { centerX, centerY, startDistance, endDistance, duration } = + normalizePinchParam(param, await page.size()); + + await page.pinch( + centerX, + centerY, + startDistance, + endDistance, + duration, + ); + }), defineActionSwipe(async (param) => { const { width, height } = await page.size(); const { start, end } = param; diff --git a/packages/web-integration/tests/ai/web/puppeteer/pinch.test.ts b/packages/web-integration/tests/ai/web/puppeteer/pinch.test.ts index 258902cd58..1ac5a29094 100644 --- a/packages/web-integration/tests/ai/web/puppeteer/pinch.test.ts +++ b/packages/web-integration/tests/ai/web/puppeteer/pinch.test.ts @@ -13,15 +13,14 @@ describe( () => { const ctx = createTestContext(); - it('Pinch action is always available in action space', async () => { + it('Pinch action is NOT available without enableTouchEventsInActionSpace', async () => { const { originPage, reset } = await launchPage('https://www.example.com'); ctx.resetFn = reset; ctx.agent = new PuppeteerAgent(originPage); const actionSpace = await ctx.agent.getActionSpace(); const pinchAction = actionSpace.find((a) => a.name === 'Pinch'); - expect(pinchAction).toBeDefined(); - expect(pinchAction!.interfaceAlias).toBe('aiPinch'); + expect(pinchAction).toBeUndefined(); }); it('Swipe action is NOT available without enableTouchEventsInActionSpace', async () => { @@ -34,6 +33,21 @@ describe( expect(swipeAction).toBeUndefined(); }); + it('Pinch and Swipe are available when enableTouchEventsInActionSpace is true', async () => { + const { originPage, reset } = await launchPage('https://www.example.com'); + ctx.resetFn = reset; + ctx.agent = new PuppeteerAgent(originPage, { + enableTouchEventsInActionSpace: true, + }); + + const actionSpace = await ctx.agent.getActionSpace(); + const pinchAction = actionSpace.find((a) => a.name === 'Pinch'); + const swipeAction = actionSpace.find((a) => a.name === 'Swipe'); + expect(pinchAction).toBeDefined(); + expect(pinchAction!.interfaceAlias).toBe('aiPinch'); + expect(swipeAction).toBeDefined(); + }); + it('Pinch and Scroll do not conflict', async () => { const htmlPath = getFixturePath('pinch-scroll.html'); const { originPage, reset } = await launchPage(`file://${htmlPath}`, { @@ -46,7 +60,9 @@ describe( }, }); ctx.resetFn = reset; - ctx.agent = new PuppeteerAgent(originPage); + ctx.agent = new PuppeteerAgent(originPage, { + enableTouchEventsInActionSpace: true, + }); // Step 1: Verify initial state await ctx.agent.aiAssert( diff --git a/packages/web-integration/tests/unit-test/base-page-invoke-action.test.ts b/packages/web-integration/tests/unit-test/base-page-invoke-action.test.ts index c4e9aaecf1..4e23994290 100644 --- a/packages/web-integration/tests/unit-test/base-page-invoke-action.test.ts +++ b/packages/web-integration/tests/unit-test/base-page-invoke-action.test.ts @@ -21,6 +21,10 @@ vi.mock('@midscene/shared/node', () => ({ })); vi.mock('@/web-element', () => ({ + ScrollMethod: { + Wheel: 'wheel', + Gesture: 'gesture', + }, WebPageContextParser: vi.fn().mockResolvedValue({ tree: { node: null, children: [] }, shotSize: { width: 1024, height: 768 }, diff --git a/packages/web-integration/tests/unit-test/scroll-method.test.ts b/packages/web-integration/tests/unit-test/scroll-method.test.ts index 969dea3a57..e2735cb159 100644 --- a/packages/web-integration/tests/unit-test/scroll-method.test.ts +++ b/packages/web-integration/tests/unit-test/scroll-method.test.ts @@ -4,6 +4,41 @@ import { ScrollMethod } from '@/web-element'; import { describe, expect, it, vi } from 'vitest'; describe('web scroll methods', () => { + it('does not expose touch gestures in action space by default', async () => { + const page = { + mouse: { + move: vi.fn().mockResolvedValue(undefined), + wheel: vi.fn().mockResolvedValue(undefined), + }, + createCDPSession: vi.fn(), + }; + const webPage = new PuppeteerWebPage(page as any); + + const actionNames = webPage.actionSpace().map((action) => action.name); + + expect(actionNames).not.toContain('Swipe'); + expect(actionNames).not.toContain('Pinch'); + expect(actionNames).toContain('Scroll'); + }); + + it('exposes swipe and pinch when touch actions are enabled', async () => { + const page = { + mouse: { + move: vi.fn().mockResolvedValue(undefined), + wheel: vi.fn().mockResolvedValue(undefined), + }, + createCDPSession: vi.fn(), + }; + const webPage = new PuppeteerWebPage(page as any, { + enableTouchEventsInActionSpace: true, + }); + + const actionNames = webPage.actionSpace().map((action) => action.name); + + expect(actionNames).toContain('Swipe'); + expect(actionNames).toContain('Pinch'); + }); + it('uses wheel events by default for Puppeteer', async () => { const mouse = { move: vi.fn().mockResolvedValue(undefined), From 75f920e2bdf2685f8854e7d555c719f19395f134 Mon Sep 17 00:00:00 2001 From: ottomao Date: Thu, 26 Mar 2026 20:08:33 +0800 Subject: [PATCH 3/3] chore(core): fix naming issue --- apps/site/docs/en/web-api-reference.mdx | 18 +++--- apps/site/docs/zh/web-api-reference.mdx | 18 +++--- .../src/bridge-mode/agent-cli-side.ts | 14 ++++- .../web-integration/src/bridge-mode/common.ts | 9 +-- .../src/bridge-mode/page-browser-side.ts | 19 +++--- .../src/chrome-extension/page.ts | 19 ++++-- packages/web-integration/src/index.ts | 2 +- .../src/playwright/ai-fixture.ts | 8 +-- .../web-integration/src/playwright/index.ts | 13 ++-- .../src/puppeteer/base-page.ts | 18 +++--- .../web-integration/src/puppeteer/index.ts | 2 +- packages/web-integration/src/web-element.ts | 25 ++++++-- packages/web-integration/src/web-page.ts | 5 +- .../tests/ai/web/puppeteer/pinch.test.ts | 10 ++-- .../unit-test/base-page-invoke-action.test.ts | 13 +++- .../unit-test/chrome-extension-cache.test.ts | 18 +++++- .../unit-test/constructor-validation.test.ts | 16 +++-- .../tests/unit-test/scroll-method.test.ts | 60 +++++++++++++++++-- .../yaml/input-mode-typeonly.test.ts | 7 ++- 19 files changed, 209 insertions(+), 85 deletions(-) diff --git a/apps/site/docs/en/web-api-reference.mdx b/apps/site/docs/en/web-api-reference.mdx index 5636854e9b..36a2f83bcf 100644 --- a/apps/site/docs/en/web-api-reference.mdx +++ b/apps/site/docs/en/web-api-reference.mdx @@ -15,8 +15,8 @@ PuppeteerAgent, PlaywrightAgent, and Chrome Bridge share one action space; the M - `Scroll` — Scroll from an element or screen center; supports scroll-to-top/bottom/left/right helpers. - `DragAndDrop` — Drag from one element to another. - `LongPress` — Long-press a target element with optional duration. -- `Swipe` — Touch-style swipe gesture (available when `enableTouchEventsInActionSpace` is `true`). -- `Pinch` — Two-finger pinch gesture for zoom in/out (available when `enableTouchEventsInActionSpace` is `true`; Chromium-based browsers only for Playwright). +- `Swipe` — Touch-style swipe gesture (available when `interactionMode` is `touch`, or the legacy `enableTouchEventsInActionSpace` is `true`). +- `Pinch` — Two-finger pinch gesture for zoom in/out (available when `interactionMode` is `touch`, or the legacy `enableTouchEventsInActionSpace` is `true`; Chromium-based browsers only for Playwright). - `ClearInput` — Clear the contents of an input field. - `Navigate` — Open a URL in the current tab. - `Reload` — Reload the page. @@ -47,8 +47,8 @@ In addition to the base agent options, Puppeteer exposes: - `forceSameTabNavigation: boolean` — Restrict navigation to the current tab. Default `true`. - `waitForNavigationTimeout: number` — Maximum wait when a step causes navigation. Default `5000` (set `0` to skip waiting). - `waitForNetworkIdleTimeout: number` — Wait for network idle between actions to reduce flakiness. Default `2000` (set `0` to skip waiting). -- `scrollMethod: 'wheel' | 'gesture'` — Choose how Midscene scrolls. Default `wheel`. Use `gesture` for Cocos, WebGL, Canvas, or custom-drawn scroll areas where synthetic wheel events do not trigger scrolling. -- `enableTouchEventsInActionSpace: boolean` — Add touch gestures (like swipe) to the action space so the agent can handle touch-only interactions. Default `false`. +- `interactionMode: 'mouse' | 'touch'` — Choose the overall interaction mode. Default `mouse`. `touch` exposes touch gestures in the action space and uses gesture-based scrolling by default. +- `enableTouchEventsInActionSpace: boolean` — Legacy compatibility option. When `true`, it behaves like `interactionMode: 'touch'` for touch actions and default scrolling. - `forceChromeSelectRendering: boolean` — Force `select` elements to render with Chrome's base-select styling so they're visible in screenshots/element extraction; requires Puppeteer > `24.6.0`. - `customActions: DeviceAction[]` — Register bespoke actions defined via `defineAction` so planning can call domain-specific steps. @@ -134,8 +134,8 @@ const agent = new PlaywrightAgent(page, { - `forceSameTabNavigation: boolean` — Keep automation inside the active tab. Default `true`. - `waitForNavigationTimeout: number` — Wait time for navigation completion. Default `5000` (set `0` to disable). - `waitForNetworkIdleTimeout: number` — Wait between actions for network idle. Default `2000` (set `0` to disable). -- `scrollMethod: 'wheel' | 'gesture'` — Choose how Midscene scrolls. Default `wheel`. Use `gesture` for Cocos, WebGL, Canvas, or custom-drawn scroll areas where synthetic wheel events do not trigger scrolling. `gesture` requires a Chromium-based browser. -- `enableTouchEventsInActionSpace: boolean` — Add touch gestures (like swipe) to the action space so the agent can handle touch-only interactions. Default `false`. +- `interactionMode: 'mouse' | 'touch'` — Choose the overall interaction mode. Default `mouse`. `touch` exposes touch gestures in the action space and uses gesture-based scrolling by default. `touch` requires a Chromium-based browser for Playwright. +- `enableTouchEventsInActionSpace: boolean` — Legacy compatibility option. When `true`, it behaves like `interactionMode: 'touch'` for touch actions and default scrolling. - `forceChromeSelectRendering: boolean` — Force `select` elements to render with Chrome's base-select styling so they're visible in screenshots/element extraction; requires Playwright ≥ `1.52.0`. - `customActions: DeviceAction[]` — Extend planning with project-specific actions. @@ -243,12 +243,12 @@ Call `connectCurrentTab` or `connectNewTabWithUrl` before issuing other actions. ```ts function connectCurrentTab(options?: { forceSameTabNavigation?: boolean; - scrollMethod?: 'wheel' | 'gesture'; + interactionMode?: 'mouse' | 'touch'; }): Promise; ``` - `options.forceSameTabNavigation` (default `true`) intercepts new tabs and opens them in the current tab to simplify debugging; set to `false` if you want normal new-tab behavior (create a separate agent per tab). -- `options.scrollMethod` (default `'wheel'`) chooses how scrolling is synthesized in the connected tab. Use `'gesture'` for Cocos, WebGL, Canvas, or custom-drawn scroll areas. +- `options.interactionMode` (default `'mouse'`) controls the connected tab as mouse or touch. `touch` uses gesture-based scrolling by default. - Resolves on a successful handshake with the active tab; rejects if the extension is not allowed to connect. #### `connectNewTabWithUrl()` @@ -258,7 +258,7 @@ function connectNewTabWithUrl( url: string, options?: { forceSameTabNavigation?: boolean; - scrollMethod?: 'wheel' | 'gesture'; + interactionMode?: 'mouse' | 'touch'; }, ): Promise; ``` diff --git a/apps/site/docs/zh/web-api-reference.mdx b/apps/site/docs/zh/web-api-reference.mdx index cfc64256c4..c25c0ad996 100644 --- a/apps/site/docs/zh/web-api-reference.mdx +++ b/apps/site/docs/zh/web-api-reference.mdx @@ -15,8 +15,8 @@ PuppeteerAgent、PlaywrightAgent 和 Chrome Bridge 共用一套 Action Space,M - `Scroll` —— 以元素为起点或从屏幕中央滚动,支持滚动到顶/底/左/右。 - `DragAndDrop` —— 从一个元素拖拽到另一个元素。 - `LongPress` —— 长按目标元素,可选自定义时长。 -- `Swipe` —— 触摸式滑动(开启 `enableTouchEventsInActionSpace` 时可用)。 -- `Pinch` —— 双指缩放手势,用于放大/缩小(开启 `enableTouchEventsInActionSpace` 时可用;Playwright 仅支持 Chromium 内核浏览器)。 +- `Swipe` —— 触摸式滑动(`interactionMode` 为 `touch` 时可用,或兼容旧参数 `enableTouchEventsInActionSpace: true`)。 +- `Pinch` —— 双指缩放手势,用于放大/缩小(`interactionMode` 为 `touch` 时可用,或兼容旧参数 `enableTouchEventsInActionSpace: true`;Playwright 仅支持 Chromium 内核浏览器)。 - `ClearInput` —— 清空输入框内容。 - `Navigate` —— 在当前标签页打开指定 URL。 - `Reload` —— 刷新当前页面。 @@ -47,8 +47,8 @@ const agent = new PuppeteerAgent(page, { - `forceSameTabNavigation: boolean` —— 限制始终在当前标签页内导航,默认 `true`。 - `waitForNavigationTimeout: number` —— 当操作触发页面跳转时的最长等待时间,默认 `5000`(设为 `0` 表示不等待)。 - `waitForNetworkIdleTimeout: number` —— 每次操作后等待网络空闲的时间,默认 `2000`(设为 `0` 关闭)。 -- `scrollMethod: 'wheel' | 'gesture'` —— 控制 Midscene 使用哪种滚动实现,默认 `wheel`。对于 Cocos、WebGL、Canvas 或自绘滚动区这类场景,如果模拟滚轮事件无法触发滚动,建议改用 `gesture`。 -- `enableTouchEventsInActionSpace: boolean` —— 在动作空间里增加触摸手势(如滑动),用于需要触摸事件的页面,默认 `false`。 +- `interactionMode: 'mouse' | 'touch'` —— 控制整体交互模式,默认 `mouse`。设为 `touch` 时,会在动作空间中暴露触摸手势,并默认使用手势滚动。 +- `enableTouchEventsInActionSpace: boolean` —— 兼容旧参数。设为 `true` 时,会按 `interactionMode: 'touch'` 处理触摸动作和默认滚动方式。 - `forceChromeSelectRendering: boolean` —— 强制 `select` 元素使用 Chrome 的 base-select 样式,避免系统原生样式导致截图/元素提取不可见;需要 Puppeteer > `24.6.0`。 - `customActions: DeviceAction[]` —— 借助 `defineAction` 注册自定义动作,让规划器可以调用领域特定步骤。 @@ -134,8 +134,8 @@ const agent = new PlaywrightAgent(page, { - `forceSameTabNavigation: boolean` —— 强制在当前标签页内执行,默认 `true`。 - `waitForNavigationTimeout: number` —— 等待导航完成的时间,默认 `5000`(设为 `0` 关闭)。 - `waitForNetworkIdleTimeout: number` —— 每次操作后等待网络空闲的时间,默认 `2000`(设为 `0` 关闭)。 -- `scrollMethod: 'wheel' | 'gesture'` —— 控制 Midscene 使用哪种滚动实现,默认 `wheel`。对于 Cocos、WebGL、Canvas 或自绘滚动区这类场景,如果模拟滚轮事件无法触发滚动,建议改用 `gesture`。`gesture` 仅支持 Chromium 内核浏览器。 -- `enableTouchEventsInActionSpace: boolean` —— 在动作空间里增加触摸手势(如滑动),用于需要触摸事件的页面,默认 `false`。 +- `interactionMode: 'mouse' | 'touch'` —— 控制整体交互模式,默认 `mouse`。设为 `touch` 时,会在动作空间中暴露触摸手势,并默认使用手势滚动。Playwright 下 `touch` 仅支持 Chromium 内核浏览器。 +- `enableTouchEventsInActionSpace: boolean` —— 兼容旧参数。设为 `true` 时,会按 `interactionMode: 'touch'` 处理触摸动作和默认滚动方式。 - `forceChromeSelectRendering: boolean` —— 强制 `select` 元素使用 Chrome 的 base-select 样式,避免系统原生样式导致截图/元素提取不可见;需要 Playwright ≥ `1.52.0`。 - `customActions: DeviceAction[]` —— 追加项目特有的动作,供规划器调用。 @@ -243,12 +243,12 @@ const agent = new AgentOverChromeBridge({ ```ts function connectCurrentTab(options?: { forceSameTabNavigation?: boolean; - scrollMethod?: 'wheel' | 'gesture'; + interactionMode?: 'mouse' | 'touch'; }): Promise; ``` - `options.forceSameTabNavigation`(默认 `true`)会拦截新标签并在当前页打开,方便调试;若想保留新标签行为可设为 `false`,但需要为每个新标签创建新的 Agent。 -- `options.scrollMethod`(默认 `'wheel'`)用于控制连接后标签页里的滚动实现。对于 Cocos、WebGL、Canvas 或自绘滚动区,建议使用 `'gesture'`。 +- `options.interactionMode`(默认 `'mouse'`)用于控制连接后标签页采用鼠标还是触摸交互。设为 `touch` 时会默认使用手势滚动。 - 连接当前激活标签页,成功后返回 `Promise`,如果扩展未允许连接会报错。 #### `connectNewTabWithUrl()` @@ -258,7 +258,7 @@ function connectNewTabWithUrl( url: string, options?: { forceSameTabNavigation?: boolean; - scrollMethod?: 'wheel' | 'gesture'; + interactionMode?: 'mouse' | 'touch'; }, ): Promise; ``` diff --git a/packages/web-integration/src/bridge-mode/agent-cli-side.ts b/packages/web-integration/src/bridge-mode/agent-cli-side.ts index 8108568271..845f940fbf 100644 --- a/packages/web-integration/src/bridge-mode/agent-cli-side.ts +++ b/packages/web-integration/src/bridge-mode/agent-cli-side.ts @@ -1,5 +1,9 @@ import { Agent, type AgentOpt } from '@midscene/core/agent'; import { assert } from '@midscene/shared/utils'; +import { + InteractionMode, + resolveWebPageInteractionOptions, +} from '../web-element'; import { commonWebActionsForWebPage } from '../web-page'; import type { KeyboardAction, MouseAction } from '../web-page'; import { @@ -51,6 +55,9 @@ export const getBridgePageInCliSide = (options?: { await server.call(BridgeEvent.UpdateAgentStatus, [message]); }, }; + const state = { + interactionMode: InteractionMode.Mouse, + }; const proxyPage = new Proxy(page, { get(target, prop, receiver) { @@ -69,7 +76,8 @@ export const getBridgePageInCliSide = (options?: { } if (prop === 'actionSpace') { - return () => commonWebActionsForWebPage(proxyPage); + return () => + commonWebActionsForWebPage(proxyPage, state.interactionMode); } if (Object.keys(page).includes(prop)) { @@ -109,6 +117,8 @@ export const getBridgePageInCliSide = (options?: { // Special handling for methods that support timeout in options if (prop === 'connectNewTabWithUrl') { return async (url: string, options?: BridgeConnectTabOptions) => { + state.interactionMode = + resolveWebPageInteractionOptions(options).interactionMode; const timeout = options?.timeout; const caller = bridgeCaller(prop, timeout); return await caller(url, options); @@ -117,6 +127,8 @@ export const getBridgePageInCliSide = (options?: { if (prop === 'connectCurrentTab') { return async (options?: BridgeConnectTabOptions) => { + state.interactionMode = + resolveWebPageInteractionOptions(options).interactionMode; const timeout = options?.timeout; const caller = bridgeCaller(prop, timeout); return await caller(options); diff --git a/packages/web-integration/src/bridge-mode/common.ts b/packages/web-integration/src/bridge-mode/common.ts index f8eeea5121..59a44936ab 100644 --- a/packages/web-integration/src/bridge-mode/common.ts +++ b/packages/web-integration/src/bridge-mode/common.ts @@ -1,4 +1,4 @@ -import type { ScrollMethod } from '../web-element'; +import type { InteractionMode } from '../web-element'; export const DefaultBridgeServerHost = '127.0.0.1'; export const DefaultBridgeServerPort = 3766; @@ -45,10 +45,11 @@ export interface BridgeConnectTabOptions { */ forceSameTabNavigation?: boolean; /** - * Choose how scroll is synthesized in the connected tab. - * @default 'wheel' + * Choose how the connected tab should be controlled. + * `touch` enables gesture-based scrolling by default. + * @default 'mouse' */ - scrollMethod?: ScrollMethod; + interactionMode?: InteractionMode; /** * Custom timeout for connecting to the tab in milliseconds. * @default 30000 (30 seconds) diff --git a/packages/web-integration/src/bridge-mode/page-browser-side.ts b/packages/web-integration/src/bridge-mode/page-browser-side.ts index de7580c83e..4f679c537a 100644 --- a/packages/web-integration/src/bridge-mode/page-browser-side.ts +++ b/packages/web-integration/src/bridge-mode/page-browser-side.ts @@ -1,6 +1,9 @@ import { assert } from '@midscene/shared/utils'; import ChromeExtensionProxyPage from '../chrome-extension/page'; -import { ScrollMethod } from '../web-element'; +import { + type InteractionMode, + resolveWebPageInteractionOptions, +} from '../web-element'; import type { ChromePageDestroyOptions, KeyboardAction, @@ -35,10 +38,10 @@ export class ExtensionBridgePageBrowserSide extends ChromeExtensionProxyPage { type: 'log' | 'status', ) => void = () => {}, forceSameTabNavigation = true, - scrollMethod: ScrollMethod = ScrollMethod.Wheel, + interactionMode?: InteractionMode, public onConnectionRequest?: () => Promise, ) { - super(forceSameTabNavigation, scrollMethod); + super(forceSameTabNavigation, interactionMode); } private async setupBridgeClient() { @@ -183,9 +186,8 @@ export class ExtensionBridgePageBrowserSide extends ChromeExtensionProxyPage { if (options?.forceSameTabNavigation) { this.forceSameTabNavigation = true; } - if (options?.scrollMethod) { - this.scrollMethod = options.scrollMethod; - } + const interactionOptions = resolveWebPageInteractionOptions(options); + this.interactionMode = interactionOptions.interactionMode; await this.setActiveTabId(tabId); } @@ -204,9 +206,8 @@ export class ExtensionBridgePageBrowserSide extends ChromeExtensionProxyPage { if (options?.forceSameTabNavigation) { this.forceSameTabNavigation = true; } - if (options?.scrollMethod) { - this.scrollMethod = options.scrollMethod; - } + const interactionOptions = resolveWebPageInteractionOptions(options); + this.interactionMode = interactionOptions.interactionMode; await this.setActiveTabId(tabId); } diff --git a/packages/web-integration/src/chrome-extension/page.ts b/packages/web-integration/src/chrome-extension/page.ts index 592a34f99b..b74ca6f369 100644 --- a/packages/web-integration/src/chrome-extension/page.ts +++ b/packages/web-integration/src/chrome-extension/page.ts @@ -5,7 +5,11 @@ The page must be active when interacting with it. */ -import { ScrollMethod, limitOpenNewTabScript } from '@/web-element'; +import { + type InteractionMode, + limitOpenNewTabScript, + resolveWebPageInteractionOptions, +} from '@/web-element'; import type { ElementCacheFeature, ElementTreeNode, @@ -50,7 +54,7 @@ export default class ChromeExtensionProxyPage implements AbstractInterface { public forceSameTabNavigation: boolean; - public scrollMethod: ScrollMethod; + public interactionMode: InteractionMode; private viewportSize?: Size; @@ -64,14 +68,17 @@ export default class ChromeExtensionProxyPage implements AbstractInterface { constructor( forceSameTabNavigation: boolean, - scrollMethod: ScrollMethod = ScrollMethod.Wheel, + interactionMode?: InteractionMode, ) { this.forceSameTabNavigation = forceSameTabNavigation; - this.scrollMethod = scrollMethod; + const interactionOptions = resolveWebPageInteractionOptions({ + interactionMode, + }); + this.interactionMode = interactionOptions.interactionMode; } actionSpace(): DeviceAction[] { - return commonWebActionsForWebPage(this); + return commonWebActionsForWebPage(this, this.interactionMode); } public async setActiveTabId(tabId: number) { @@ -688,7 +695,7 @@ export default class ChromeExtensionProxyPage implements AbstractInterface { const finalX = startX || this.latestMouseX; const finalY = startY || this.latestMouseY; await this.showMousePointer(finalX, finalY); - if (this.scrollMethod === ScrollMethod.Gesture) { + if (this.interactionMode === 'touch') { await this.sendCommandToDebugger('Input.synthesizeScrollGesture', { x: finalX, y: finalY, diff --git a/packages/web-integration/src/index.ts b/packages/web-integration/src/index.ts index 117f6b003e..1041536b7f 100644 --- a/packages/web-integration/src/index.ts +++ b/packages/web-integration/src/index.ts @@ -4,7 +4,7 @@ export type { PlayWrightAiFixtureType } from './playwright'; export { Agent as PageAgent, type AgentOpt } from '@midscene/core/agent'; export { PuppeteerAgent } from './puppeteer'; export { PlaywrightAgent } from './playwright'; -export { ScrollMethod } from './web-element'; +export { InteractionMode } from './web-element'; export { StaticPageAgent, StaticPage } from './static'; export { WebMidsceneTools } from './mcp-tools'; export { webPlaygroundPlatform } from './platform'; diff --git a/packages/web-integration/src/playwright/ai-fixture.ts b/packages/web-integration/src/playwright/ai-fixture.ts index c9edf2714b..085998d36b 100644 --- a/packages/web-integration/src/playwright/ai-fixture.ts +++ b/packages/web-integration/src/playwright/ai-fixture.ts @@ -2,7 +2,7 @@ import { rmSync, writeFileSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { PlaywrightAgent, type PlaywrightWebPage } from '@/playwright/index'; -import type { ScrollMethod, WebPageAgentOpt } from '@/web-element'; +import type { InteractionMode, WebPageAgentOpt } from '@/web-element'; import type { Cache } from '@midscene/core'; import type { AgentOpt, Agent as PageAgent } from '@midscene/core/agent'; import { processCacheConfig } from '@midscene/core/utils'; @@ -60,14 +60,14 @@ export const PlaywrightAiFixture = (options?: { forceSameTabNavigation?: boolean; waitForNetworkIdleTimeout?: number; waitForNavigationTimeout?: number; - scrollMethod?: ScrollMethod; + interactionMode?: InteractionMode; cache?: PlaywrightCache; }) => { const { forceSameTabNavigation = true, waitForNetworkIdleTimeout = DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT, waitForNavigationTimeout = DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT, - scrollMethod, + interactionMode, cache, } = options ?? {}; @@ -97,7 +97,7 @@ export const PlaywrightAiFixture = (options?: { pageAgentMap[idForPage] = new PlaywrightAgent(page, { testId: `playwright-${testId}-${idForPage}`, forceSameTabNavigation, - scrollMethod, + interactionMode, cache: cacheConfig, groupName: title, groupDescription: file, diff --git a/packages/web-integration/src/playwright/index.ts b/packages/web-integration/src/playwright/index.ts index 889b7e0d04..3b241f7bf5 100644 --- a/packages/web-integration/src/playwright/index.ts +++ b/packages/web-integration/src/playwright/index.ts @@ -7,8 +7,11 @@ export { PlaywrightAiFixture } from './ai-fixture'; export { overrideAIConfig } from '@midscene/shared/env'; export { WebPage as PlaywrightWebPage } from './page'; export type { WebPageAgentOpt } from '@/web-element'; -export { ScrollMethod } from '@/web-element'; -import { ScrollMethod, type WebPageAgentOpt } from '@/web-element'; +export { InteractionMode } from '@/web-element'; +import { + type WebPageAgentOpt, + resolveWebPageInteractionOptions, +} from '@/web-element'; import { getDebug } from '@midscene/shared/logger'; import semver from 'semver'; import { @@ -48,11 +51,13 @@ export class PlaywrightAgent extends PageAgent { ); } - if (opts?.scrollMethod === ScrollMethod.Gesture) { + const { interactionMode } = resolveWebPageInteractionOptions(opts); + + if (interactionMode === 'touch') { const browserName = page.context().browser()?.browserType().name(); if (browserName && browserName !== 'chromium') { throw new Error( - `[midscene] scrollMethod "gesture" requires a Chromium-based Playwright browser, but current browser is "${browserName}". Use scrollMethod "wheel" instead.`, + `[midscene] touch interaction requires a Chromium-based Playwright browser, but current browser is "${browserName}". Gesture scrolling is not supported in Firefox/WebKit.`, ); } } diff --git a/packages/web-integration/src/puppeteer/base-page.ts b/packages/web-integration/src/puppeteer/base-page.ts index d0866c16eb..c3e6581e89 100644 --- a/packages/web-integration/src/puppeteer/base-page.ts +++ b/packages/web-integration/src/puppeteer/base-page.ts @@ -1,4 +1,8 @@ -import { ScrollMethod, type WebPageAgentOpt } from '@/web-element'; +import { + type InteractionMode, + type WebPageAgentOpt, + resolveWebPageInteractionOptions, +} from '@/web-element'; import type { DeviceAction, ElementCacheFeature, @@ -55,8 +59,7 @@ export class Page< private onBeforeInvokeAction?: AbstractInterface['beforeInvokeAction']; private onAfterInvokeAction?: AbstractInterface['afterInvokeAction']; private customActions?: DeviceAction[]; - private enableTouchEventsInActionSpace: boolean; - private scrollMethod: ScrollMethod; + private interactionMode: InteractionMode; private puppeteerFileChooserSession?: CDPSession; private puppeteerFileChooserHandler?: ( event: Protocol.Page.FileChooserOpenedEvent, @@ -68,7 +71,7 @@ export class Page< actionSpace(): DeviceAction[] { const defaultActions = commonWebActionsForWebPage( this, - this.enableTouchEventsInActionSpace, + this.interactionMode, ); const customActions = this.customActions || []; return [...defaultActions, ...customActions]; @@ -109,9 +112,8 @@ export class Page< this.onBeforeInvokeAction = opts?.beforeInvokeAction; this.onAfterInvokeAction = opts?.afterInvokeAction; this.customActions = opts?.customActions; - this.enableTouchEventsInActionSpace = - opts?.enableTouchEventsInActionSpace ?? false; - this.scrollMethod = opts?.scrollMethod ?? ScrollMethod.Wheel; + const interactionOptions = resolveWebPageInteractionOptions(opts); + this.interactionMode = interactionOptions.interactionMode; } async evaluateJavaScript(script: string): Promise { @@ -360,7 +362,7 @@ export class Page< }, wheel: async (deltaX: number, deltaY: number) => { debugPage(`mouse wheel ${deltaX}, ${deltaY}`); - if (this.scrollMethod === ScrollMethod.Gesture) { + if (this.interactionMode === 'touch') { await this.synthesizeScrollGesture(deltaX, deltaY); } else if (this.interfaceType === 'puppeteer') { await (this.underlyingPage as PuppeteerPage).mouse.wheel({ diff --git a/packages/web-integration/src/puppeteer/index.ts b/packages/web-integration/src/puppeteer/index.ts index e13e2910e3..a6c94cf107 100644 --- a/packages/web-integration/src/puppeteer/index.ts +++ b/packages/web-integration/src/puppeteer/index.ts @@ -28,7 +28,7 @@ function getPuppeteerVersion(): string | null { export { PuppeteerWebPage } from './page'; export type { WebPageAgentOpt } from '@/web-element'; -export { ScrollMethod } from '@/web-element'; +export { InteractionMode } from '@/web-element'; export class PuppeteerAgent extends PageAgent { protected isRetryableContextError(error: unknown): boolean { diff --git a/packages/web-integration/src/web-element.ts b/packages/web-integration/src/web-element.ts index a69d943572..4519a7374c 100644 --- a/packages/web-integration/src/web-element.ts +++ b/packages/web-integration/src/web-element.ts @@ -9,9 +9,9 @@ import { _keyDefinitions } from '@midscene/shared/us-keyboard-layout'; import type { NodeType } from '@midscene/shared/constants'; export type { WebElementInfo }; -export enum ScrollMethod { - Wheel = 'wheel', - Gesture = 'gesture', +export enum InteractionMode { + Mouse = 'mouse', + Touch = 'touch', } export type WebPageAgentOpt = AgentOpt & WebPageOpt; @@ -19,7 +19,7 @@ export type WebPageOpt = { waitForNavigationTimeout?: number; waitForNetworkIdleTimeout?: number; forceSameTabNavigation?: boolean /* if limit the new tab to the current page, default true */; - scrollMethod?: ScrollMethod; + interactionMode?: InteractionMode; enableTouchEventsInActionSpace?: boolean; /** * Force Chrome to render select elements using base-select appearance instead of OS-native rendering. @@ -35,6 +35,23 @@ export type WebPageOpt = { customActions?: DeviceAction[]; }; +export function resolveWebPageInteractionOptions( + opts?: Pick, +) { + const interactionMode = + opts?.interactionMode ?? + (opts?.enableTouchEventsInActionSpace + ? InteractionMode.Touch + : InteractionMode.Mouse); + + return { + interactionMode, + enableTouchEventsInActionSpace: + opts?.enableTouchEventsInActionSpace ?? + interactionMode === InteractionMode.Touch, + }; +} + export class WebElementInfoImpl implements WebElementInfo { content: string; diff --git a/packages/web-integration/src/web-page.ts b/packages/web-integration/src/web-page.ts index a720053d30..43d8e44536 100644 --- a/packages/web-integration/src/web-page.ts +++ b/packages/web-integration/src/web-page.ts @@ -25,6 +25,7 @@ import { sleep } from '@midscene/core/utils'; import type { ElementInfo } from '@midscene/shared/extractor'; import { getDebug } from '@midscene/shared/logger'; import { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout'; +import { InteractionMode } from './web-element'; const debug = getDebug('web:page'); @@ -442,7 +443,7 @@ export abstract class AbstractWebPage extends AbstractInterface { export const commonWebActionsForWebPage = ( page: T, - includeTouchEvents = false, + interactionMode: InteractionMode = InteractionMode.Mouse, ): DeviceAction[] => [ defineActionTap(async (param) => { const element = param.locate; @@ -579,7 +580,7 @@ export const commonWebActionsForWebPage = ( await page.longPress(element.center[0], element.center[1], duration); }), - ...(includeTouchEvents + ...(interactionMode === InteractionMode.Touch ? [ defineActionPinch(async (param) => { const { centerX, centerY, startDistance, endDistance, duration } = diff --git a/packages/web-integration/tests/ai/web/puppeteer/pinch.test.ts b/packages/web-integration/tests/ai/web/puppeteer/pinch.test.ts index 1ac5a29094..f6ade27390 100644 --- a/packages/web-integration/tests/ai/web/puppeteer/pinch.test.ts +++ b/packages/web-integration/tests/ai/web/puppeteer/pinch.test.ts @@ -13,7 +13,7 @@ describe( () => { const ctx = createTestContext(); - it('Pinch action is NOT available without enableTouchEventsInActionSpace', async () => { + it('Pinch action is NOT available in mouse interaction mode', async () => { const { originPage, reset } = await launchPage('https://www.example.com'); ctx.resetFn = reset; ctx.agent = new PuppeteerAgent(originPage); @@ -23,7 +23,7 @@ describe( expect(pinchAction).toBeUndefined(); }); - it('Swipe action is NOT available without enableTouchEventsInActionSpace', async () => { + it('Swipe action is NOT available in mouse interaction mode', async () => { const { originPage, reset } = await launchPage('https://www.example.com'); ctx.resetFn = reset; ctx.agent = new PuppeteerAgent(originPage); @@ -33,11 +33,11 @@ describe( expect(swipeAction).toBeUndefined(); }); - it('Pinch and Swipe are available when enableTouchEventsInActionSpace is true', async () => { + it('Pinch and Swipe are available when interactionMode is touch', async () => { const { originPage, reset } = await launchPage('https://www.example.com'); ctx.resetFn = reset; ctx.agent = new PuppeteerAgent(originPage, { - enableTouchEventsInActionSpace: true, + interactionMode: 'touch', }); const actionSpace = await ctx.agent.getActionSpace(); @@ -61,7 +61,7 @@ describe( }); ctx.resetFn = reset; ctx.agent = new PuppeteerAgent(originPage, { - enableTouchEventsInActionSpace: true, + interactionMode: 'touch', }); // Step 1: Verify initial state diff --git a/packages/web-integration/tests/unit-test/base-page-invoke-action.test.ts b/packages/web-integration/tests/unit-test/base-page-invoke-action.test.ts index 4e23994290..818f44fb92 100644 --- a/packages/web-integration/tests/unit-test/base-page-invoke-action.test.ts +++ b/packages/web-integration/tests/unit-test/base-page-invoke-action.test.ts @@ -21,10 +21,17 @@ vi.mock('@midscene/shared/node', () => ({ })); vi.mock('@/web-element', () => ({ - ScrollMethod: { - Wheel: 'wheel', - Gesture: 'gesture', + InteractionMode: { + Mouse: 'mouse', + Touch: 'touch', }, + resolveWebPageInteractionOptions: vi.fn((opts?: any) => ({ + interactionMode: + opts?.interactionMode ?? + (opts?.enableTouchEventsInActionSpace ? 'touch' : 'mouse'), + enableTouchEventsInActionSpace: + opts?.enableTouchEventsInActionSpace ?? opts?.interactionMode === 'touch', + })), WebPageContextParser: vi.fn().mockResolvedValue({ tree: { node: null, children: [] }, shotSize: { width: 1024, height: 768 }, diff --git a/packages/web-integration/tests/unit-test/chrome-extension-cache.test.ts b/packages/web-integration/tests/unit-test/chrome-extension-cache.test.ts index d427c4b67a..a00131deec 100644 --- a/packages/web-integration/tests/unit-test/chrome-extension-cache.test.ts +++ b/packages/web-integration/tests/unit-test/chrome-extension-cache.test.ts @@ -25,7 +25,7 @@ vi.mock('@midscene/shared/logger', () => ({ import { AiJudgeOrderSensitive } from '@midscene/core/ai-model'; import ChromeExtensionProxyPage from '../../src/chrome-extension/page'; -import { ScrollMethod } from '../../src/web-element'; +import { InteractionMode } from '../../src/web-element'; describe('ChromeExtensionProxyPage cache methods', () => { let page: ChromeExtensionProxyPage; @@ -64,8 +64,8 @@ describe('ChromeExtensionProxyPage cache methods', () => { expect((page as any).latestMouseY).toBe(400); }); - it('should use synthesizeScrollGesture when scrollMethod is gesture', async () => { - page = new ChromeExtensionProxyPage(false, ScrollMethod.Gesture); + it('should use synthesizeScrollGesture in touch interaction mode', async () => { + page = new ChromeExtensionProxyPage(false, InteractionMode.Touch); vi.spyOn(page as any, 'showMousePointer').mockResolvedValue(undefined); vi.spyOn(page as any, 'enableWaterFlowAnimation').mockResolvedValue( undefined, @@ -92,6 +92,18 @@ describe('ChromeExtensionProxyPage cache methods', () => { }); }); + describe('actionSpace', () => { + it('should expose touch actions in touch interaction mode', () => { + page = new ChromeExtensionProxyPage(false, InteractionMode.Touch); + + const actionNames = page.actionSpace().map((action) => action.name); + + expect(actionNames).toContain('Swipe'); + expect(actionNames).toContain('Pinch'); + expect(actionNames).toContain('Scroll'); + }); + }); + describe('cacheFeatureForPoint', () => { it('should return xpaths for a valid point', async () => { const mockXpaths = ['/html/body/div[1]', '/html/body/div[1]/button[1]']; diff --git a/packages/web-integration/tests/unit-test/constructor-validation.test.ts b/packages/web-integration/tests/unit-test/constructor-validation.test.ts index 027bea429f..f6a4897132 100644 --- a/packages/web-integration/tests/unit-test/constructor-validation.test.ts +++ b/packages/web-integration/tests/unit-test/constructor-validation.test.ts @@ -15,8 +15,8 @@ describe('PlaywrightAgent constructor validation', () => { ); }); - it('should throw when gesture scroll is used with non-chromium browser', async () => { - const { PlaywrightAgent, ScrollMethod } = await import('@/playwright'); + it('should throw when touch interaction is used with non-chromium browser', async () => { + const { InteractionMode, PlaywrightAgent } = await import('@/playwright'); const page = { context: () => ({ browser: () => ({ @@ -31,9 +31,17 @@ describe('PlaywrightAgent constructor validation', () => { () => new PlaywrightAgent(page as any, { forceSameTabNavigation: false, - scrollMethod: ScrollMethod.Gesture, + interactionMode: InteractionMode.Touch, }), - ).toThrow('scrollMethod "gesture" requires a Chromium-based Playwright'); + ).toThrow('touch interaction requires a Chromium-based Playwright'); + + expect( + () => + new PlaywrightAgent(page as any, { + forceSameTabNavigation: false, + enableTouchEventsInActionSpace: true, + }), + ).toThrow('touch interaction requires a Chromium-based Playwright'); }); }); diff --git a/packages/web-integration/tests/unit-test/scroll-method.test.ts b/packages/web-integration/tests/unit-test/scroll-method.test.ts index e2735cb159..11b08c9b9c 100644 --- a/packages/web-integration/tests/unit-test/scroll-method.test.ts +++ b/packages/web-integration/tests/unit-test/scroll-method.test.ts @@ -1,6 +1,6 @@ import { WebPage as PlaywrightWebPage } from '@/playwright/page'; import { PuppeteerWebPage } from '@/puppeteer/page'; -import { ScrollMethod } from '@/web-element'; +import { InteractionMode } from '@/web-element'; import { describe, expect, it, vi } from 'vitest'; describe('web scroll methods', () => { @@ -39,6 +39,24 @@ describe('web scroll methods', () => { expect(actionNames).toContain('Pinch'); }); + it('uses touch interaction mode to expose touch actions', async () => { + const page = { + mouse: { + move: vi.fn().mockResolvedValue(undefined), + wheel: vi.fn().mockResolvedValue(undefined), + }, + createCDPSession: vi.fn(), + }; + const webPage = new PuppeteerWebPage(page as any, { + interactionMode: InteractionMode.Touch, + }); + + const actionNames = webPage.actionSpace().map((action) => action.name); + + expect(actionNames).toContain('Swipe'); + expect(actionNames).toContain('Pinch'); + }); + it('uses wheel events by default for Puppeteer', async () => { const mouse = { move: vi.fn().mockResolvedValue(undefined), @@ -59,7 +77,7 @@ describe('web scroll methods', () => { expect(page.createCDPSession).not.toHaveBeenCalled(); }); - it('uses CDP scroll gestures for Puppeteer when configured', async () => { + it('uses CDP scroll gestures in touch interaction mode for Puppeteer', async () => { const mouse = { move: vi.fn().mockResolvedValue(undefined), wheel: vi.fn().mockResolvedValue(undefined), @@ -73,7 +91,7 @@ describe('web scroll methods', () => { createCDPSession: vi.fn().mockResolvedValue(session), }; const webPage = new PuppeteerWebPage(page as any, { - scrollMethod: ScrollMethod.Gesture, + interactionMode: InteractionMode.Touch, }); await webPage.mouse.move(300, 400); @@ -93,7 +111,39 @@ describe('web scroll methods', () => { expect(mouse.wheel).not.toHaveBeenCalled(); }); - it('uses CDP scroll gestures for Playwright when configured', async () => { + it('uses CDP scroll gestures when legacy touch-actions flag is enabled', async () => { + const mouse = { + move: vi.fn().mockResolvedValue(undefined), + wheel: vi.fn().mockResolvedValue(undefined), + }; + const session = { + send: vi.fn().mockResolvedValue(undefined), + detach: vi.fn().mockResolvedValue(undefined), + }; + const page = { + mouse, + createCDPSession: vi.fn().mockResolvedValue(session), + }; + const webPage = new PuppeteerWebPage(page as any, { + enableTouchEventsInActionSpace: true, + }); + + await webPage.mouse.move(120, 220); + await webPage.mouse.wheel(10, 20); + + expect(session.send).toHaveBeenCalledWith('Input.synthesizeScrollGesture', { + x: 120, + y: 220, + xDistance: -10, + yDistance: -20, + speed: 9999999, + repeatCount: 0, + preventFling: true, + }); + expect(mouse.wheel).not.toHaveBeenCalled(); + }); + + it('uses CDP scroll gestures in touch interaction mode for Playwright', async () => { const mouse = { move: vi.fn().mockResolvedValue(undefined), wheel: vi.fn().mockResolvedValue(undefined), @@ -115,7 +165,7 @@ describe('web scroll methods', () => { context: () => context, }; const webPage = new PlaywrightWebPage(page as any, { - scrollMethod: ScrollMethod.Gesture, + interactionMode: InteractionMode.Touch, }); await webPage.mouse.move(500, 600); diff --git a/packages/web-integration/tests/unit-test/yaml/input-mode-typeonly.test.ts b/packages/web-integration/tests/unit-test/yaml/input-mode-typeonly.test.ts index 8e8b0a2e62..608fae6172 100644 --- a/packages/web-integration/tests/unit-test/yaml/input-mode-typeonly.test.ts +++ b/packages/web-integration/tests/unit-test/yaml/input-mode-typeonly.test.ts @@ -1,3 +1,4 @@ +import { InteractionMode } from '@/web-element'; import { commonWebActionsForWebPage } from '@/web-page'; import { describe, expect, test, vi } from 'vitest'; @@ -23,7 +24,7 @@ describe('Input action typeOnly mode', () => { } as any; // Get actions from commonWebActionsForWebPage - const actions = commonWebActionsForWebPage(mockPage, false); + const actions = commonWebActionsForWebPage(mockPage, InteractionMode.Mouse); // Find the Input action const inputAction = actions.find((a) => a.name === 'Input'); @@ -69,7 +70,7 @@ describe('Input action typeOnly mode', () => { }, } as any; - const actions = commonWebActionsForWebPage(mockPage, false); + const actions = commonWebActionsForWebPage(mockPage, InteractionMode.Mouse); const inputAction = actions.find((a) => a.name === 'Input'); // Test with mode = 'replace' (default) @@ -111,7 +112,7 @@ describe('Input action typeOnly mode', () => { }, } as any; - const actions = commonWebActionsForWebPage(mockPage, false); + const actions = commonWebActionsForWebPage(mockPage, InteractionMode.Mouse); const inputAction = actions.find((a) => a.name === 'Input'); // Test with mode = 'clear'