diff --git a/README.md b/README.md index ea65e697a..4e34732e1 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ The project is in early development and considered experimental. Pull requests a ## Features - Platforms: iOS (simulator + limited device support) and Android (emulator + device). -- Core commands: `open`, `back`, `home`, `app-switcher`, `press`, `long-press`, `focus`, `type`, `fill`, `scroll`, `scrollintoview`, `wait`, `alert`, `screenshot`, `close`, `reinstall`. +- Core commands: `open`, `back`, `home`, `app-switcher`, `press`, `long-press`, `swipe`, `focus`, `type`, `fill`, `scroll`, `scrollintoview`, `pinch`, `wait`, `alert`, `screenshot`, `close`, `reinstall`. - Inspection commands: `snapshot` (accessibility tree). - Device tooling: `adb` (Android), `simctl`/`devicectl` (iOS via Xcode). - Minimal dependencies; TypeScript executed directly on Node 22+ (no build step). @@ -71,13 +71,21 @@ agent-device trace stop ./trace.log ``` Coordinates: -- All coordinate-based commands (`press`, `long-press`, `focus`, `fill`) use device coordinates with origin at top-left. +- All coordinate-based commands (`press`, `long-press`, `swipe`, `focus`, `fill`) use device coordinates with origin at top-left. - X increases to the right, Y increases downward. +Gesture series examples: + +```bash +agent-device press 300 500 --count 12 --interval-ms 45 +agent-device press 300 500 --count 6 --hold-ms 120 --interval-ms 30 --jitter-px 2 +agent-device swipe 540 1500 540 500 120 --count 8 --pause-ms 30 --pattern ping-pong +``` + ## Command Index - `boot`, `open`, `close`, `reinstall`, `home`, `back`, `app-switcher` - `snapshot`, `find`, `get` -- `click`, `focus`, `type`, `fill`, `press`, `long-press`, `scroll`, `scrollintoview`, `is` +- `click`, `focus`, `type`, `fill`, `press`, `long-press`, `swipe`, `scroll`, `scrollintoview`, `pinch`, `is` - `alert`, `wait`, `screenshot` - `trace start`, `trace stop` - `settings wifi|airplane|location on|off` @@ -103,10 +111,25 @@ Flags: - `--serial ` (Android) - `--activity ` (Android app launch only; package/Activity or package/.Activity; not for URL opens) - `--session ` +- `--count ` repeat count for `press`/`swipe` +- `--interval-ms ` delay between `press` iterations +- `--hold-ms ` hold duration per `press` iteration +- `--jitter-px ` deterministic coordinate jitter for `press` +- `--pause-ms ` delay between `swipe` iterations +- `--pattern one-way|ping-pong` repeat pattern for `swipe` - `--verbose` for daemon and runner logs - `--json` for structured output - `--backend ax|xctest` (snapshot only; defaults to `xctest` on iOS) +Pinch: +- `pinch` is supported on iOS simulators. +- On Android, `pinch` currently returns `UNSUPPORTED_OPERATION` in the adb backend. + +Swipe timing: +- `swipe` accepts optional `durationMs` (default `250`, range `16..10000`). +- Android uses requested swipe duration directly. +- iOS uses a safe normalized duration to avoid long-press side effects. + ## Skills Install the automation skills listed in [SKILL.md](skills/agent-device/SKILL.md). diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift index 7f152c4a3..1228cd2f9 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift @@ -251,6 +251,13 @@ final class RunnerTests: XCTestCase { let duration = (command.durationMs ?? 800) / 1000.0 longPressAt(app: activeApp, x: x, y: y, duration: duration) return Response(ok: true, data: DataPayload(message: "long pressed")) + case .drag: + guard let x = command.x, let y = command.y, let x2 = command.x2, let y2 = command.y2 else { + return Response(ok: false, error: ErrorPayload(message: "drag requires x, y, x2, and y2")) + } + let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0) + dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration) + return Response(ok: true, data: DataPayload(message: "dragged")) case .type: guard let text = command.text else { return Response(ok: false, error: ErrorPayload(message: "type requires text")) @@ -436,6 +443,20 @@ final class RunnerTests: XCTestCase { coordinate.press(forDuration: duration) } + private func dragAt( + app: XCUIApplication, + x: Double, + y: Double, + x2: Double, + y2: Double, + holdDuration: TimeInterval + ) { + let origin = app.coordinate(withNormalizedOffset: CGVector(dx: 0, dy: 0)) + let start = origin.withOffset(CGVector(dx: x, dy: y)) + let end = origin.withOffset(CGVector(dx: x2, dy: y2)) + start.press(forDuration: holdDuration, thenDragTo: end) + } + private func swipe(app: XCUIApplication, direction: SwipeDirection) { let target = app.windows.firstMatch.exists ? app.windows.firstMatch : app let start = target.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.2)) @@ -956,6 +977,7 @@ private func resolveRunnerPort() -> UInt16 { enum CommandType: String, Codable { case tap case longPress + case drag case type case swipe case findText @@ -984,6 +1006,8 @@ struct Command: Codable { let action: String? let x: Double? let y: Double? + let x2: Double? + let y2: Double? let durationMs: Double? let direction: SwipeDirection? let scale: Double? diff --git a/skills/agent-device/SKILL.md b/skills/agent-device/SKILL.md index 41f96df09..1afe5727c 100644 --- a/skills/agent-device/SKILL.md +++ b/skills/agent-device/SKILL.md @@ -112,10 +112,14 @@ agent-device focus @e2 agent-device fill @e2 "text" # Clear then type (Android: verifies value and retries once on mismatch) agent-device type "text" # Type into focused field without clearing agent-device press 300 500 # Tap by coordinates +agent-device press 300 500 --count 12 --interval-ms 45 +agent-device press 300 500 --count 6 --hold-ms 120 --interval-ms 30 --jitter-px 2 +agent-device swipe 540 1500 540 500 120 +agent-device swipe 540 1500 540 500 120 --count 8 --pause-ms 30 --pattern ping-pong agent-device long-press 300 500 800 # Long press (where supported) agent-device scroll down 0.5 -agent-device pinch 2.0 # Zoom in 2x (iOS simulator + Android) -agent-device pinch 0.5 200 400 # Zoom out at coordinates +agent-device pinch 2.0 # Zoom in 2x (iOS simulator) +agent-device pinch 0.5 200 400 # Zoom out at coordinates (iOS simulator) agent-device back agent-device home agent-device app-switcher @@ -167,7 +171,10 @@ agent-device apps --platform android --user-installed ## Best practices -- Pinch (`pinch [x y]`) is supported on iOS simulators and Android; scale > 1 zooms in, < 1 zooms out. On Android, pinch uses multi-touch `sendevent` injection. +- `press` supports gesture series controls: `--count`, `--interval-ms`, `--hold-ms`, `--jitter-px`. +- `swipe` supports coordinate + timing controls and repeat patterns: `swipe x1 y1 x2 y2 [durationMs] --count --pause-ms --pattern`. +- `swipe` timing is platform-safe: Android uses requested duration; iOS uses normalized safe timing to avoid long-press side effects. +- Pinch (`pinch [x y]`) is currently supported on iOS simulators only. - Snapshot refs are the core mechanism for interactive agent flows. - Use selectors for deterministic replay artifacts and assertions (e.g. in e2e test workflows). - Prefer `snapshot -i` to reduce output size. diff --git a/src/core/__tests__/capabilities.test.ts b/src/core/__tests__/capabilities.test.ts index ecb04ebf5..9aaefbefb 100644 --- a/src/core/__tests__/capabilities.test.ts +++ b/src/core/__tests__/capabilities.test.ts @@ -52,6 +52,7 @@ test('iOS simulator + Android commands reject iOS devices', () => { 'record', 'screenshot', 'scroll', + 'swipe', 'settings', 'snapshot', 'type', diff --git a/src/core/capabilities.ts b/src/core/capabilities.ts index cb8d38a1b..61b128db6 100644 --- a/src/core/capabilities.ts +++ b/src/core/capabilities.ts @@ -35,6 +35,7 @@ const COMMAND_CAPABILITY_MATRIX: Record = { record: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, screenshot: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, scroll: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, + swipe: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, settings: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, snapshot: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, type: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, diff --git a/src/core/dispatch.ts b/src/core/dispatch.ts index c1d13adb4..cc4ef6972 100644 --- a/src/core/dispatch.ts +++ b/src/core/dispatch.ts @@ -38,6 +38,12 @@ export type CommandFlags = { noRecord?: boolean; appsFilter?: 'launchable' | 'user-installed' | 'all'; appsMetadata?: boolean; + count?: number; + intervalMs?: number; + holdMs?: number; + jitterPx?: number; + pauseMs?: number; + pattern?: 'one-way' | 'ping-pong'; replayUpdate?: boolean; }; @@ -91,6 +97,12 @@ export async function dispatchCommand( snapshotScope?: string; snapshotRaw?: boolean; snapshotBackend?: 'ax' | 'xctest'; + count?: number; + intervalMs?: number; + holdMs?: number; + jitterPx?: number; + pauseMs?: number; + pattern?: 'one-way' | 'ping-pong'; }, ): Promise | void> { const runnerCtx: RunnerContext = { @@ -121,8 +133,60 @@ export async function dispatchCommand( case 'press': { const [x, y] = positionals.map(Number); if (Number.isNaN(x) || Number.isNaN(y)) throw new AppError('INVALID_ARGS', 'press requires x y'); - await interactor.tap(x, y); - return { x, y }; + const count = requireIntInRange(context?.count ?? 1, 'count', 1, 200); + const intervalMs = requireIntInRange(context?.intervalMs ?? 0, 'interval-ms', 0, 10_000); + const holdMs = requireIntInRange(context?.holdMs ?? 0, 'hold-ms', 0, 10_000); + const jitterPx = requireIntInRange(context?.jitterPx ?? 0, 'jitter-px', 0, 100); + + for (let index = 0; index < count; index += 1) { + const [dx, dy] = computeDeterministicJitter(index, jitterPx); + const targetX = x + dx; + const targetY = y + dy; + if (holdMs > 0) await interactor.longPress(targetX, targetY, holdMs); + else await interactor.tap(targetX, targetY); + if (index < count - 1 && intervalMs > 0) await sleep(intervalMs); + } + + return { x, y, count, intervalMs, holdMs, jitterPx }; + } + case 'swipe': { + const x1 = Number(positionals[0]); + const y1 = Number(positionals[1]); + const x2 = Number(positionals[2]); + const y2 = Number(positionals[3]); + if ([x1, y1, x2, y2].some(Number.isNaN)) { + throw new AppError('INVALID_ARGS', 'swipe requires x1 y1 x2 y2 [durationMs]'); + } + + const requestedDurationMs = positionals[4] ? Number(positionals[4]) : 250; + const durationMs = requireIntInRange(requestedDurationMs, 'durationMs', 16, 10_000); + const effectiveDurationMs = device.platform === 'ios' ? 60 : durationMs; + const count = requireIntInRange(context?.count ?? 1, 'count', 1, 200); + const pauseMs = requireIntInRange(context?.pauseMs ?? 0, 'pause-ms', 0, 10_000); + const pattern = context?.pattern ?? 'one-way'; + if (pattern !== 'one-way' && pattern !== 'ping-pong') { + throw new AppError('INVALID_ARGS', `Invalid pattern: ${pattern}`); + } + + for (let index = 0; index < count; index += 1) { + const reverse = pattern === 'ping-pong' && index % 2 === 1; + if (reverse) await interactor.swipe(x2, y2, x1, y1, effectiveDurationMs); + else await interactor.swipe(x1, y1, x2, y2, effectiveDurationMs); + if (index < count - 1 && pauseMs > 0) await sleep(pauseMs); + } + + return { + x1, + y1, + x2, + y2, + durationMs, + effectiveDurationMs, + timingMode: device.platform === 'ios' ? 'safe-normalized' : 'direct', + count, + pauseMs, + pattern, + }; } case 'long-press': { const x = Number(positionals[0]); @@ -171,6 +235,12 @@ export async function dispatchCommand( return { text }; } case 'pinch': { + if (device.platform === 'android') { + throw new AppError( + 'UNSUPPORTED_OPERATION', + 'Android pinch is not supported in current adb backend; requires instrumentation-based backend.', + ); + } const scale = Number(positionals[0]); const x = positionals[1] ? Number(positionals[1]) : undefined; const y = positionals[2] ? Number(positionals[2]) : undefined; @@ -280,3 +350,32 @@ export async function dispatchCommand( throw new AppError('INVALID_ARGS', `Unknown command: ${command}`); } } + +const DETERMINISTIC_JITTER_PATTERN: ReadonlyArray = [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], + [1, 1], + [-1, 1], + [1, -1], + [-1, -1], +]; + +function requireIntInRange(value: number, name: string, min: number, max: number): number { + if (!Number.isFinite(value) || !Number.isInteger(value) || value < min || value > max) { + throw new AppError('INVALID_ARGS', `${name} must be an integer between ${min} and ${max}`); + } + return value; +} + +function computeDeterministicJitter(index: number, jitterPx: number): [number, number] { + if (jitterPx <= 0) return [0, 0]; + const [dx, dy] = DETERMINISTIC_JITTER_PATTERN[index % DETERMINISTIC_JITTER_PATTERN.length]; + return [dx * jitterPx, dy * jitterPx]; +} + +async function sleep(ms: number): Promise { + await new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/src/daemon-client.ts b/src/daemon-client.ts index 713dd1501..77fcaccb7 100644 --- a/src/daemon-client.ts +++ b/src/daemon-client.ts @@ -35,8 +35,9 @@ export async function sendToDaemon(req: Omit): Promise { const existing = readDaemonInfo(); const localVersion = readVersion(); - if (existing && existing.version === localVersion && (await canConnect(existing))) return existing; - if (existing && (existing.version !== localVersion || !(await canConnect(existing)))) { + const existingReachable = existing ? await canConnect(existing) : false; + if (existing && existing.version === localVersion && existingReachable) return existing; + if (existing && (existing.version !== localVersion || !existingReachable)) { removeDaemonInfo(); } @@ -67,7 +68,11 @@ function readDaemonInfo(): DaemonInfo | null { } function removeDaemonInfo(): void { - if (fs.existsSync(infoPath)) fs.unlinkSync(infoPath); + try { + if (fs.existsSync(infoPath)) fs.unlinkSync(infoPath); + } catch { + // Best-effort cleanup only; daemon can still overwrite this file on startup. + } } async function canConnect(info: DaemonInfo): Promise { @@ -87,11 +92,14 @@ async function startDaemon(): Promise { const distPath = path.join(root, 'dist', 'src', 'daemon.js'); const srcPath = path.join(root, 'src', 'daemon.ts'); - const useDist = fs.existsSync(distPath); - if (!useDist && !fs.existsSync(srcPath)) { + const hasDist = fs.existsSync(distPath); + const hasSrc = fs.existsSync(srcPath); + if (!hasDist && !hasSrc) { throw new AppError('COMMAND_FAILED', 'Daemon entry not found', { distPath, srcPath }); } - const args = useDist ? [distPath] : ['--experimental-strip-types', srcPath]; + const runningFromSource = process.execArgv.includes('--experimental-strip-types'); + const useSrc = runningFromSource ? hasSrc : !hasDist && hasSrc; + const args = useSrc ? ['--experimental-strip-types', srcPath] : [distPath]; runCmdDetached(process.execPath, args); } diff --git a/src/daemon/context.ts b/src/daemon/context.ts index 1bbe1460d..78a876769 100644 --- a/src/daemon/context.ts +++ b/src/daemon/context.ts @@ -12,6 +12,12 @@ export type DaemonCommandContext = { snapshotScope?: string; snapshotBackend?: 'ax' | 'xctest'; snapshotRaw?: boolean; + count?: number; + intervalMs?: number; + holdMs?: number; + jitterPx?: number; + pauseMs?: number; + pattern?: 'one-way' | 'ping-pong'; }; export function contextFromFlags( @@ -32,5 +38,11 @@ export function contextFromFlags( snapshotScope: flags?.snapshotScope, snapshotRaw: flags?.snapshotRaw, snapshotBackend: flags?.snapshotBackend, + count: flags?.count, + intervalMs: flags?.intervalMs, + holdMs: flags?.holdMs, + jitterPx: flags?.jitterPx, + pauseMs: flags?.pauseMs, + pattern: flags?.pattern, }; } diff --git a/src/platforms/android/__tests__/index.test.ts b/src/platforms/android/__tests__/index.test.ts index ea4bf9416..6877bc629 100644 --- a/src/platforms/android/__tests__/index.test.ts +++ b/src/platforms/android/__tests__/index.test.ts @@ -1,6 +1,9 @@ import test from 'node:test'; import assert from 'node:assert/strict'; -import { openAndroidApp, parseAndroidLaunchComponent } from '../index.ts'; +import { promises as fs } from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { openAndroidApp, parseAndroidLaunchComponent, swipeAndroid } from '../index.ts'; import type { DeviceInfo } from '../../../utils/device.ts'; import { AppError } from '../../../utils/errors.ts'; import { findBounds, parseUiHierarchy } from '../ui-hierarchy.ts'; @@ -110,3 +113,45 @@ test('openAndroidApp rejects activity override for deep link URLs', async () => }, ); }); + +test('swipeAndroid invokes adb input swipe with duration', async () => { + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'agent-device-swipe-test-')); + const adbPath = path.join(tmpDir, 'adb'); + const argsLogPath = path.join(tmpDir, 'args.log'); + await fs.writeFile( + adbPath, + '#!/bin/sh\nprintf "%s\\n" "$@" > "$AGENT_DEVICE_TEST_ARGS_FILE"\nexit 0\n', + 'utf8', + ); + await fs.chmod(adbPath, 0o755); + + const previousPath = process.env.PATH; + const previousArgsFile = process.env.AGENT_DEVICE_TEST_ARGS_FILE; + process.env.PATH = `${tmpDir}${path.delimiter}${previousPath ?? ''}`; + process.env.AGENT_DEVICE_TEST_ARGS_FILE = argsLogPath; + + const device: DeviceInfo = { + platform: 'android', + id: 'emulator-5554', + name: 'Pixel', + kind: 'emulator', + booted: true, + }; + + try { + await swipeAndroid(device, 10, 20, 30, 40, 250); + const args = (await fs.readFile(argsLogPath, 'utf8')) + .trim() + .split('\n') + .filter(Boolean); + assert.deepEqual(args, ['-s', 'emulator-5554', 'shell', 'input', 'swipe', '10', '20', '30', '40', '250']); + } finally { + process.env.PATH = previousPath; + if (previousArgsFile === undefined) { + delete process.env.AGENT_DEVICE_TEST_ARGS_FILE; + } else { + process.env.AGENT_DEVICE_TEST_ARGS_FILE = previousArgsFile; + } + await fs.rm(tmpDir, { recursive: true, force: true }); + } +}); diff --git a/src/platforms/android/index.ts b/src/platforms/android/index.ts index 33463167a..828554cff 100644 --- a/src/platforms/android/index.ts +++ b/src/platforms/android/index.ts @@ -333,6 +333,29 @@ export async function pressAndroid(device: DeviceInfo, x: number, y: number): Pr await runCmd('adb', adbArgs(device, ['shell', 'input', 'tap', String(x), String(y)])); } +export async function swipeAndroid( + device: DeviceInfo, + x1: number, + y1: number, + x2: number, + y2: number, + durationMs = 250, +): Promise { + await runCmd( + 'adb', + adbArgs(device, [ + 'shell', + 'input', + 'swipe', + String(x1), + String(y1), + String(x2), + String(y2), + String(durationMs), + ]), + ); +} + export async function backAndroid(device: DeviceInfo): Promise { await runCmd('adb', adbArgs(device, ['shell', 'input', 'keyevent', '4'])); } diff --git a/src/platforms/ios/runner-client.ts b/src/platforms/ios/runner-client.ts index 6f4e71f64..2c7a8c0e9 100644 --- a/src/platforms/ios/runner-client.ts +++ b/src/platforms/ios/runner-client.ts @@ -13,6 +13,7 @@ export type RunnerCommand = { command: | 'tap' | 'longPress' + | 'drag' | 'type' | 'swipe' | 'findText' @@ -29,6 +30,8 @@ export type RunnerCommand = { action?: 'get' | 'accept' | 'dismiss'; x?: number; y?: number; + x2?: number; + y2?: number; durationMs?: number; direction?: 'up' | 'down' | 'left' | 'right'; scale?: number; diff --git a/src/utils/__tests__/args.test.ts b/src/utils/__tests__/args.test.ts index 829753179..6bc95bfb0 100644 --- a/src/utils/__tests__/args.test.ts +++ b/src/utils/__tests__/args.test.ts @@ -9,6 +9,63 @@ test('parseArgs recognizes --relaunch', () => { assert.equal(parsed.flags.relaunch, true); }); +test('parseArgs recognizes press series flags', () => { + const parsed = parseArgs([ + 'press', + '300', + '500', + '--count', + '12', + '--interval-ms=45', + '--hold-ms', + '120', + '--jitter-px', + '3', + ]); + assert.equal(parsed.command, 'press'); + assert.deepEqual(parsed.positionals, ['300', '500']); + assert.equal(parsed.flags.count, 12); + assert.equal(parsed.flags.intervalMs, 45); + assert.equal(parsed.flags.holdMs, 120); + assert.equal(parsed.flags.jitterPx, 3); +}); + +test('parseArgs recognizes swipe positional + pattern flags', () => { + const parsed = parseArgs([ + 'swipe', + '540', + '1500', + '540', + '500', + '120', + '--count', + '8', + '--pause-ms', + '30', + '--pattern', + 'ping-pong', + ]); + assert.equal(parsed.command, 'swipe'); + assert.deepEqual(parsed.positionals, ['540', '1500', '540', '500', '120']); + assert.equal(parsed.flags.count, 8); + assert.equal(parsed.flags.pauseMs, 30); + assert.equal(parsed.flags.pattern, 'ping-pong'); +}); + +test('parseArgs rejects invalid swipe pattern', () => { + assert.throws( + () => parseArgs(['swipe', '0', '0', '10', '10', '--pattern', 'diagonal']), + /Invalid pattern/, + ); +}); + test('usage includes --relaunch flag', () => { assert.match(usage(), /--relaunch/); }); + +test('usage includes swipe and press series options', () => { + const help = usage(); + assert.match(help, /swipe /); + assert.match(help, /--pattern one-way\|ping-pong/); + assert.match(help, /--interval-ms/); +}); diff --git a/src/utils/args.ts b/src/utils/args.ts index 7657facf0..6587357ec 100644 --- a/src/utils/args.ts +++ b/src/utils/args.ts @@ -20,6 +20,12 @@ export type ParsedArgs = { snapshotBackend?: 'ax' | 'xctest'; appsFilter?: 'launchable' | 'user-installed' | 'all'; appsMetadata?: boolean; + count?: number; + intervalMs?: number; + holdMs?: number; + jitterPx?: number; + pauseMs?: number; + pattern?: 'one-way' | 'ping-pong'; activity?: string; saveScript?: boolean; relaunch?: boolean; @@ -144,6 +150,27 @@ export function parseArgs(argv: string[]): ParsedArgs { case '--activity': flags.activity = value; break; + case '--count': + flags.count = parseNumericFlag(key, value); + break; + case '--interval-ms': + flags.intervalMs = parseNumericFlag(key, value); + break; + case '--hold-ms': + flags.holdMs = parseNumericFlag(key, value); + break; + case '--jitter-px': + flags.jitterPx = parseNumericFlag(key, value); + break; + case '--pause-ms': + flags.pauseMs = parseNumericFlag(key, value); + break; + case '--pattern': + if (value !== 'one-way' && value !== 'ping-pong') { + throw new AppError('INVALID_ARGS', `Invalid pattern: ${value}`); + } + flags.pattern = value; + break; default: throw new AppError('INVALID_ARGS', `Unknown flag: ${key}`); } @@ -172,6 +199,14 @@ export function parseArgs(argv: string[]): ParsedArgs { return { command, positionals, flags }; } +function parseNumericFlag(name: string, value: string): number { + const parsed = Number(value); + if (!Number.isFinite(parsed)) { + throw new AppError('INVALID_ARGS', `Invalid ${name}: ${value}`); + } + return parsed; +} + export function usage(): string { return `agent-device [args] [--json] @@ -204,13 +239,17 @@ Commands: get text <@ref|selector> Return element text by ref or selector get attrs <@ref|selector> Return element attributes by ref or selector replay [--update|-u] Replay a recorded session - press Tap at coordinates + press [--count N] [--interval-ms I] [--hold-ms H] [--jitter-px J] + Tap/press at coordinates (supports repeated gesture series) long-press [durationMs] Long press (where supported) + swipe [durationMs] [--count N] [--pause-ms P] [--pattern one-way|ping-pong] + Swipe coordinates with optional repeat pattern focus Focus input at coordinates type Type text in focused field fill | fill <@ref|selector> Tap then type scroll [amount] Scroll in direction (0-1 amount) + pinch [x] [y] Pinch/zoom (iOS simulator only) scrollintoview Scroll until text appears (Android only) screenshot [path] Capture screenshot record start [path] Start screen recording @@ -234,6 +273,12 @@ Flags: --serial Android device serial --activity Android app launch activity (package/Activity); not for URL opens --session Named session + --count Repeat count for press/swipe series + --interval-ms Delay between press iterations + --hold-ms Press hold duration for each iteration + --jitter-px Deterministic coordinate jitter radius for press + --pause-ms Delay between swipe iterations + --pattern one-way|ping-pong Swipe repeat pattern --verbose Stream daemon/runner logs --json JSON output --save-script Save session script (.ad) on close diff --git a/src/utils/interactors.ts b/src/utils/interactors.ts index 30abf4460..6853b807c 100644 --- a/src/utils/interactors.ts +++ b/src/utils/interactors.ts @@ -8,6 +8,7 @@ import { openAndroidApp, openAndroidDevice, pressAndroid, + swipeAndroid, scrollAndroid, scrollIntoViewAndroid, screenshotAndroid, @@ -33,6 +34,7 @@ export type Interactor = { openDevice(): Promise; close(app: string): Promise; tap(x: number, y: number): Promise; + swipe(x1: number, y1: number, x2: number, y2: number, durationMs?: number): Promise; longPress(x: number, y: number, durationMs?: number): Promise; focus(x: number, y: number): Promise; type(text: string): Promise; @@ -50,6 +52,7 @@ export function getInteractor(device: DeviceInfo, runnerContext: RunnerContext): openDevice: () => openAndroidDevice(device), close: (app) => closeAndroidApp(device, app), tap: (x, y) => pressAndroid(device, x, y), + swipe: (x1, y1, x2, y2, durationMs) => swipeAndroid(device, x1, y1, x2, y2, durationMs), longPress: (x, y, durationMs) => longPressAndroid(device, x, y, durationMs), focus: (x, y) => focusAndroid(device, x, y), type: (text) => typeAndroid(device, text), @@ -71,7 +74,7 @@ export function getInteractor(device: DeviceInfo, runnerContext: RunnerContext): } } -type IoRunnerOverrides = Pick; +type IoRunnerOverrides = Pick; function iosRunnerOverrides(device: DeviceInfo, ctx: RunnerContext): IoRunnerOverrides { const runnerOpts = { verbose: ctx.verbose, logPath: ctx.logPath, traceLogPath: ctx.traceLogPath }; @@ -84,6 +87,13 @@ function iosRunnerOverrides(device: DeviceInfo, ctx: RunnerContext): IoRunnerOve runnerOpts, ); }, + swipe: async (x1, y1, x2, y2, durationMs) => { + await runIosRunnerCommand( + device, + { command: 'drag', x: x1, y: y1, x2, y2, durationMs, appBundleId: ctx.appBundleId }, + runnerOpts, + ); + }, longPress: async (x, y, durationMs) => { await runIosRunnerCommand( device, diff --git a/website/docs/docs/commands.md b/website/docs/docs/commands.md index 0ada45449..525fb7028 100644 --- a/website/docs/docs/commands.md +++ b/website/docs/docs/commands.md @@ -40,6 +40,10 @@ agent-device focus @e2 agent-device fill @e2 "text" # Clear then type agent-device type "text" # Type into focused field without clearing agent-device press 300 500 +agent-device press 300 500 --count 12 --interval-ms 45 +agent-device press 300 500 --count 6 --hold-ms 120 --interval-ms 30 --jitter-px 2 +agent-device swipe 540 1500 540 500 120 +agent-device swipe 540 1500 540 500 120 --count 8 --pause-ms 30 --pattern ping-pong agent-device long-press 300 500 800 agent-device scroll down 0.5 agent-device pinch 2.0 # zoom in 2x (iOS simulator) @@ -48,6 +52,9 @@ agent-device pinch 0.5 200 400 # zoom out at coordinates (iOS simulator) `fill` clears then types. `type` does not clear. On Android, `fill` also verifies text and performs one clear-and-retry pass on mismatch. +`swipe` accepts an optional `durationMs` argument (default `250ms`, range `16..10000`). +On iOS, swipe timing uses a safe normalized duration to avoid long-press side effects. +`pinch` is iOS-simulator-only in the current adb-backed Android implementation. ## Find (semantic)