Skip to content

Commit 992959c

Browse files
janodetzelJano Detzel
andauthored
feat(ios): add pinch gesture for zoom on iOS simulators (#11)
- Add 'pinch' CLI command: pinch <scale> [x y] (scale > 1 zoom in, < 1 zoom out) - Implement in dispatch, runner-client (RunnerCommand + scale param), Swift runner - Use double-tap + drag workaround for map zoom (tap center, drag up/down) instead of XCUITest pinch(withScale:velocity:) which pans on map views - Update commands.md and agent-device skill with pinch usage and best practices - Fix skill reference: recording.md -> video-recording.md Co-authored-by: Jano Detzel <jano.detzel@ewe-go.de>
1 parent e63cf2a commit 992959c

5 files changed

Lines changed: 68 additions & 2 deletions

File tree

ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,12 @@ final class RunnerTests: XCTestCase {
305305
}
306306
let buttonLabels = alert.buttons.allElementsBoundByIndex.map { $0.label }
307307
return Response(ok: true, data: DataPayload(message: alert.label, items: buttonLabels))
308+
case .pinch:
309+
guard let scale = command.scale, scale > 0 else {
310+
return Response(ok: false, error: ErrorPayload(message: "pinch requires scale > 0"))
311+
}
312+
pinch(app: activeApp, scale: scale, x: command.x, y: command.y)
313+
return Response(ok: true, data: DataPayload(message: "pinched"))
308314
}
309315
}
310316

@@ -372,6 +378,39 @@ final class RunnerTests: XCTestCase {
372378
}
373379
}
374380

381+
private func pinch(app: XCUIApplication, scale: Double, x: Double?, y: Double?) {
382+
let target = app.windows.firstMatch.exists ? app.windows.firstMatch : app
383+
384+
// Use double-tap + drag gesture for reliable map zoom
385+
// Zoom in (scale > 1): tap then drag UP
386+
// Zoom out (scale < 1): tap then drag DOWN
387+
388+
// Determine center point (use provided x/y or screen center)
389+
let centerX = x.map { $0 / target.frame.width } ?? 0.5
390+
let centerY = y.map { $0 / target.frame.height } ?? 0.5
391+
let center = target.coordinate(withNormalizedOffset: CGVector(dx: centerX, dy: centerY))
392+
393+
// Calculate drag distance based on scale (clamped to reasonable range)
394+
// Larger scale = more drag distance
395+
let dragAmount: CGFloat
396+
if scale > 1.0 {
397+
// Zoom in: drag up (negative Y direction in normalized coords)
398+
dragAmount = min(0.4, CGFloat(scale - 1.0) * 0.2)
399+
} else {
400+
// Zoom out: drag down (positive Y direction)
401+
dragAmount = min(0.4, CGFloat(1.0 - scale) * 0.4)
402+
}
403+
404+
let endY = scale > 1.0 ? (centerY - Double(dragAmount)) : (centerY + Double(dragAmount))
405+
let endPoint = target.coordinate(withNormalizedOffset: CGVector(dx: centerX, dy: max(0.1, min(0.9, endY))))
406+
407+
// Tap first (first tap of double-tap)
408+
center.tap()
409+
410+
// Immediately press and drag (second tap + drag)
411+
center.press(forDuration: 0.05, thenDragTo: endPoint)
412+
}
413+
375414
private func aggregatedLabel(for element: XCUIElement, depth: Int = 0) -> String? {
376415
if depth > 2 { return nil }
377416
let text = element.label.trimmingCharacters(in: .whitespacesAndNewlines)
@@ -718,6 +757,7 @@ enum CommandType: String, Codable {
718757
case home
719758
case appSwitcher
720759
case alert
760+
case pinch
721761
case shutdown
722762
}
723763

@@ -736,6 +776,7 @@ struct Command: Codable {
736776
let x: Double?
737777
let y: Double?
738778
let direction: SwipeDirection?
779+
let scale: Double?
739780
let interactiveOnly: Bool?
740781
let compact: Bool?
741782
let depth: Int?

skills/agent-device/SKILL.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
name: agent-device
3-
description: Automates mobile and simulator interactions for iOS and Android devices. Use when navigating apps, taking snapshots/screenshots, tapping, typing, scrolling, or extracting UI info on mobile devices or simulators.
3+
description: Automates mobile and simulator interactions for iOS and Android devices. Use when navigating apps, taking snapshots/screenshots, tapping, typing, scrolling, pinching, or extracting UI info on mobile devices or simulators.
44
---
55

66
# Mobile Automation with agent-device
@@ -102,6 +102,8 @@ agent-device type "text" # Type into focused field
102102
agent-device press 300 500 # Tap by coordinates
103103
agent-device long-press 300 500 800 # Long press (where supported)
104104
agent-device scroll down 0.5
105+
agent-device pinch 2.0 # Zoom in 2x (iOS simulator)
106+
agent-device pinch 0.5 200 400 # Zoom out at coordinates (iOS simulator)
105107
agent-device back
106108
agent-device home
107109
agent-device app-switcher
@@ -139,6 +141,7 @@ agent-device apps --platform android --user-installed
139141

140142
## Best practices
141143

144+
- Pinch (`pinch <scale> [x y]`) is supported on iOS simulators only; scale > 1 zooms in, < 1 zooms out.
142145
- Always snapshot right before interactions; refs invalidate on UI changes.
143146
- Prefer `snapshot -i` to reduce output size.
144147
- On iOS, `xctest` is the default and does not require Accessibility permission.
@@ -153,7 +156,7 @@ agent-device apps --platform android --user-installed
153156
- [references/snapshot-refs.md](references/snapshot-refs.md)
154157
- [references/session-management.md](references/session-management.md)
155158
- [references/permissions.md](references/permissions.md)
156-
- [references/recording.md](references/recording.md)
159+
- [references/video-recording.md](references/video-recording.md)
157160
- [references/coordinate-system.md](references/coordinate-system.md)
158161

159162
## Missing features roadmap (high level)

src/core/dispatch.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,24 @@ export async function dispatchCommand(
224224
await interactor.scrollIntoView(text);
225225
return { text };
226226
}
227+
case 'pinch': {
228+
const scale = Number(positionals[0]);
229+
const x = positionals[1] ? Number(positionals[1]) : undefined;
230+
const y = positionals[2] ? Number(positionals[2]) : undefined;
231+
if (Number.isNaN(scale) || scale <= 0) {
232+
throw new AppError('INVALID_ARGS', 'pinch requires scale > 0');
233+
}
234+
if (device.platform === 'ios' && device.kind === 'simulator') {
235+
await runIosRunnerCommand(
236+
device,
237+
{ command: 'pinch', scale, x, y, appBundleId: context?.appBundleId },
238+
{ verbose: context?.verbose, logPath: context?.logPath, traceLogPath: context?.traceLogPath },
239+
);
240+
} else {
241+
throw new AppError('UNSUPPORTED_OPERATION', 'pinch is only supported on iOS simulators');
242+
}
243+
return { scale, x, y };
244+
}
227245
case 'screenshot': {
228246
const path = outPath ?? `./screenshot-${Date.now()}.png`;
229247
await interactor.screenshot(path);

src/platforms/ios/runner-client.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,15 @@ export type RunnerCommand = {
2020
| 'home'
2121
| 'appSwitcher'
2222
| 'alert'
23+
| 'pinch'
2324
| 'shutdown';
2425
appBundleId?: string;
2526
text?: string;
2627
action?: 'get' | 'accept' | 'dismiss';
2728
x?: number;
2829
y?: number;
2930
direction?: 'up' | 'down' | 'left' | 'right';
31+
scale?: number;
3032
interactiveOnly?: boolean;
3133
compact?: boolean;
3234
depth?: number;

website/docs/docs/commands.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ agent-device type "text"
3434
agent-device press 300 500
3535
agent-device long-press 300 500 800
3636
agent-device scroll down 0.5
37+
agent-device pinch 2.0 # zoom in 2x
38+
agent-device pinch 0.5 200 400 # zoom out at coordinates
3739
```
3840

3941
## Find (semantic)

0 commit comments

Comments
 (0)