Skip to content

Commit c3cbc62

Browse files
committed
feat: support ios transform gesture
1 parent 93b04b2 commit c3cbc62

8 files changed

Lines changed: 248 additions & 38 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
#import "RunnerObjCExceptionCatcher.h"
2+
#import "RunnerSynthesizedGesture.h"
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#import <Foundation/Foundation.h>
2+
3+
NS_ASSUME_NONNULL_BEGIN
4+
5+
@interface RunnerSynthesizedGesture : NSObject
6+
7+
+ (NSString * _Nullable)synthesizeTransformWithApplication:(id)application
8+
x:(double)x
9+
y:(double)y
10+
dx:(double)dx
11+
dy:(double)dy
12+
scale:(double)scale
13+
durationMs:(double)durationMs;
14+
15+
@end
16+
17+
NS_ASSUME_NONNULL_END
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
#import "RunnerSynthesizedGesture.h"
2+
3+
#import <CoreGraphics/CoreGraphics.h>
4+
#import <objc/message.h>
5+
6+
static const double RunnerTransformGestureRadius = 80.0;
7+
8+
typedef long long (*RunnerMsgSendLongLong)(id, SEL);
9+
typedef id (*RunnerMsgSendInitRecord)(id, SEL, NSString *, long long);
10+
typedef id (*RunnerMsgSendInitPath)(id, SEL, CGPoint, NSTimeInterval);
11+
typedef void (*RunnerMsgSendPathMove)(id, SEL, CGPoint, NSTimeInterval);
12+
typedef void (*RunnerMsgSendPathOffset)(id, SEL, NSTimeInterval);
13+
typedef void (*RunnerMsgSendAddPath)(id, SEL, id);
14+
typedef void (*RunnerMsgSendSetLongLong)(id, SEL, long long);
15+
typedef BOOL (*RunnerMsgSendSynthesize)(id, SEL, NSError **);
16+
17+
static id RunnerPointerPath(
18+
Class pathClass,
19+
CGPoint start,
20+
double x,
21+
double y,
22+
double dx,
23+
double dy,
24+
double scale,
25+
double radius,
26+
double durationMs,
27+
double side
28+
);
29+
static CGPoint RunnerPointerPointAt(
30+
double x,
31+
double y,
32+
double dx,
33+
double dy,
34+
double scale,
35+
double baseRadius,
36+
double t,
37+
double side
38+
);
39+
40+
@implementation RunnerSynthesizedGesture
41+
42+
+ (NSString * _Nullable)synthesizeTransformWithApplication:(id)application
43+
x:(double)x
44+
y:(double)y
45+
dx:(double)dx
46+
dy:(double)dy
47+
scale:(double)scale
48+
durationMs:(double)durationMs {
49+
@try {
50+
return [self trySynthesizeTransformWithApplication:application
51+
x:x
52+
y:y
53+
dx:dx
54+
dy:dy
55+
scale:scale
56+
durationMs:durationMs];
57+
} @catch (NSException *exception) {
58+
NSString *name = exception.name ?: @"NSException";
59+
NSString *reason = exception.reason ?: @"private XCTest event synthesis failed";
60+
return [NSString stringWithFormat:@"%@: %@", name, reason];
61+
}
62+
}
63+
64+
+ (NSString * _Nullable)trySynthesizeTransformWithApplication:(id)application
65+
x:(double)x
66+
y:(double)y
67+
dx:(double)dx
68+
dy:(double)dy
69+
scale:(double)scale
70+
durationMs:(double)durationMs {
71+
Class recordClass = NSClassFromString(@"XCSynthesizedEventRecord");
72+
Class pathClass = NSClassFromString(@"XCPointerEventPath");
73+
74+
SEL initRecordSelector = NSSelectorFromString(@"initWithName:interfaceOrientation:");
75+
SEL addPathSelector = NSSelectorFromString(@"addPointerEventPath:");
76+
SEL setTargetProcessIDSelector = NSSelectorFromString(@"setTargetProcessID:");
77+
SEL synthesizeSelector = NSSelectorFromString(@"synthesizeWithError:");
78+
79+
long long interfaceOrientation =
80+
((RunnerMsgSendLongLong)objc_msgSend)(application, NSSelectorFromString(@"interfaceOrientation"));
81+
long long targetProcessID =
82+
((RunnerMsgSendLongLong)objc_msgSend)(application, NSSelectorFromString(@"processID"));
83+
if (targetProcessID <= 0) {
84+
return @"private XCTest event synthesis unavailable: could not resolve target process ID";
85+
}
86+
double radius = RunnerTransformGestureRadius;
87+
88+
id record = ((RunnerMsgSendInitRecord)objc_msgSend)(
89+
[recordClass alloc],
90+
initRecordSelector,
91+
@"agent-device-transform",
92+
interfaceOrientation
93+
);
94+
if (record == nil) {
95+
return @"private XCTest event synthesis failed: could not create event record";
96+
}
97+
((RunnerMsgSendSetLongLong)objc_msgSend)(record, setTargetProcessIDSelector, targetProcessID);
98+
99+
double sides[] = {-1.0, 1.0};
100+
for (int index = 0; index < 2; index += 1) {
101+
double side = sides[index];
102+
id path = RunnerPointerPath(
103+
pathClass,
104+
RunnerPointerPointAt(x, y, dx, dy, scale, radius, 0.0, side),
105+
x,
106+
y,
107+
dx,
108+
dy,
109+
scale,
110+
radius,
111+
durationMs,
112+
side
113+
);
114+
if (path == nil) {
115+
return @"private XCTest event synthesis failed: could not create pointer path";
116+
}
117+
((RunnerMsgSendAddPath)objc_msgSend)(record, addPathSelector, path);
118+
}
119+
120+
NSError *error = nil;
121+
BOOL ok = ((RunnerMsgSendSynthesize)objc_msgSend)(record, synthesizeSelector, &error);
122+
if (!ok) {
123+
NSString *detail = error.localizedDescription ?: @"synthesizeWithError returned false";
124+
return [NSString stringWithFormat:@"private XCTest event synthesis failed: %@", detail];
125+
}
126+
return nil;
127+
}
128+
129+
static id RunnerPointerPath(
130+
Class pathClass,
131+
CGPoint start,
132+
double x,
133+
double y,
134+
double dx,
135+
double dy,
136+
double scale,
137+
double radius,
138+
double durationMs,
139+
double side
140+
) {
141+
SEL initPathSelector = NSSelectorFromString(@"initForTouchAtPoint:offset:");
142+
SEL moveSelector = NSSelectorFromString(@"moveToPoint:atOffset:");
143+
SEL liftSelector = NSSelectorFromString(@"liftUpAtOffset:");
144+
145+
id path = ((RunnerMsgSendInitPath)objc_msgSend)([pathClass alloc], initPathSelector, start, 0.0);
146+
if (path == nil) {
147+
return nil;
148+
}
149+
150+
int frameCount = MAX(3, (int)(durationMs / 16.0));
151+
NSTimeInterval durationSeconds = durationMs / 1000.0;
152+
for (int index = 1; index <= frameCount; index += 1) {
153+
double t = (double)index / (double)frameCount;
154+
CGPoint point = RunnerPointerPointAt(x, y, dx, dy, scale, radius, t, side);
155+
NSTimeInterval offset = durationSeconds * t;
156+
((RunnerMsgSendPathMove)objc_msgSend)(path, moveSelector, point, offset);
157+
}
158+
159+
((RunnerMsgSendPathOffset)objc_msgSend)(path, liftSelector, durationSeconds);
160+
return path;
161+
}
162+
163+
static CGPoint RunnerPointerPointAt(
164+
double x,
165+
double y,
166+
double dx,
167+
double dy,
168+
double scale,
169+
double baseRadius,
170+
double t,
171+
double side
172+
) {
173+
double centerX = x + dx * t;
174+
double centerY = y + dy * t;
175+
double startRadius = baseRadius / MAX(scale, 1.0);
176+
double endRadius = baseRadius;
177+
if (scale < 1.0) {
178+
startRadius = baseRadius;
179+
endRadius = baseRadius * scale;
180+
}
181+
double radius = startRadius + (endRadius - startRadius) * t;
182+
return CGPointMake(centerX, centerY + radius * side);
183+
}
184+
185+
@end

ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift

Lines changed: 18 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1281,28 +1281,27 @@ extension RunnerTests {
12811281
durationMs: Double
12821282
) -> RunnerInteractionOutcome {
12831283
#if os(iOS)
1284-
let holdDuration = max(0.02, min(durationMs / 1000.0, 10.0) / 3.0)
1285-
let panOutcome = performCoordinateDrag(
1286-
app: app,
1284+
if let message = RunnerSynthesizedGesture.synthesizeTransform(
1285+
withApplication: app,
12871286
x: x,
12881287
y: y,
1289-
x2: x + dx,
1290-
y2: y + dy,
1291-
holdDuration: holdDuration
1292-
)
1293-
guard case .performed = panOutcome else {
1294-
return panOutcome
1288+
dx: dx,
1289+
dy: dy,
1290+
scale: scale,
1291+
durationMs: durationMs
1292+
) {
1293+
return .unsupported(message)
1294+
}
1295+
if abs(degrees) > 0.001 {
1296+
return performCoordinateRotateGesture(
1297+
app: app,
1298+
degrees: degrees,
1299+
x: x + dx,
1300+
y: y + dy,
1301+
velocity: degrees >= 0 ? 1 : -1
1302+
)
12951303
}
1296-
1297-
let target = gestureElement(app: app, x: x, y: y)
1298-
target.pinch(withScale: CGFloat(scale), velocity: CGFloat(scale >= 1.0 ? 1.0 : -1.0))
1299-
return performCoordinateRotateGesture(
1300-
app: app,
1301-
degrees: degrees,
1302-
x: x,
1303-
y: y,
1304-
velocity: degrees >= 0 ? 1.0 : -1.0
1305-
)
1304+
return .performed
13061305
#elseif os(tvOS)
13071306
return .unsupported("transformGesture is not supported on tvOS")
13081307
#else
@@ -1361,21 +1360,6 @@ extension RunnerTests {
13611360
#endif
13621361
}
13631362

1364-
#if os(iOS)
1365-
private func gestureElement(app: XCUIApplication, x: Double, y: Double) -> XCUIElement {
1366-
let point = CGPoint(x: x, y: y)
1367-
let matches = app.descendants(matching: .any).allElementsBoundByIndex.filter { element in
1368-
element.exists && element.frame.contains(point) && !element.frame.isEmpty
1369-
}
1370-
if let smallest = matches.min(by: { left, right in
1371-
(left.frame.width * left.frame.height) < (right.frame.width * right.frame.height)
1372-
}) {
1373-
return smallest
1374-
}
1375-
return interactionRoot(app: app)
1376-
}
1377-
#endif
1378-
13791363
private func interactionRoot(app: XCUIApplication) -> XCUIElement {
13801364
let windows = app.windows.allElementsBoundByIndex
13811365
if let window = windows.first(where: { $0.exists && !$0.frame.isEmpty }) {

src/utils/__tests__/args.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,7 @@ test('usageForCommand resolves workflow help topic', () => {
962962
);
963963
assert.match(help, /agent-device clipboard write "some text"/);
964964
assert.match(help, /For gesture-heavy iOS simulator proof videos, prefer --hide-touches/);
965+
assert.match(help, /iOS simulator transform uses private XCTest synthesis for pan\/scale/);
965966
assert.match(help, /Android Gboard handwriting\/stylus UI can capture text/);
966967
assert.match(help, /targetInput\/actualInput details/);
967968
assert.match(help, /Do not keep retrying fill\/type against the same field/);

src/utils/command-schema.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ Command shape:
236236
Snapshot refs look like @e12. After snapshot -i, use the exact @eN ref from that output.
237237
If the exact ref is not known yet, first output snapshot -i, then use a concrete example shape like press @e12 in the next command; do not write @<ref>, @ref, @Label_Name, or @eN placeholders.
238238
Close means agent-device close. App-owned back means back; system back means back --system.
239-
Taps are press or click. Gestures use swipe, longpress, or gesture <pan|fling|pinch|rotate|transform>. Android pinch, rotate, and transform use provider-native touch injection when available, then the bundled multi-touch helper.
239+
Taps are press or click. Gestures use swipe, longpress, or gesture <pan|fling|pinch|rotate|transform>. Android pinch, rotate, and transform use provider-native touch injection when available, then the bundled multi-touch helper. iOS simulator transform uses private XCTest synthesis for pan/scale and XCTest rotation for degrees; otherwise it reports UNSUPPORTED_OPERATION.
240240
241241
Bootstrap:
242242
agent-device devices --platform ios
@@ -323,7 +323,7 @@ Navigation and gestures:
323323
agent-device gesture pinch 0.5 200 400
324324
agent-device gesture rotate 35 200 420
325325
agent-device gesture transform 200 420 80 -40 2 35 700
326-
iOS simulator transform uses XCTest gesture primitives; verify app metrics instead of assuming requested degrees map exactly to recognizer output.
326+
iOS simulator transform uses private XCTest synthesis for pan/scale and XCTest rotation for degrees; verify app metrics instead of assuming requested values map exactly to recognizer output.
327327
328328
Validation and evidence:
329329
Nearby mutation diff: agent-device diff snapshot -i.

test/skillgym/suites/agent-device-smoke-suite.ts

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1525,7 +1525,7 @@ const SKILL_GUIDANCE_CASES: Case[] = [
15251525
'Platform: Android',
15261526
'Current screen: gesture lab',
15271527
'Target center is x=200 y=420',
1528-
'Need one continuous two-finger gesture without lifting fingers',
1528+
'Need the direct transform command rather than separate gesture commands',
15291529
'Pan delta is dx=80 dy=-40',
15301530
'Zoom scale is 2',
15311531
'Rotation is 35 degrees',
@@ -1540,6 +1540,28 @@ const SKILL_GUIDANCE_CASES: Case[] = [
15401540
plannedCommand('compose-gestures'),
15411541
],
15421542
}),
1543+
makeCase({
1544+
id: 'ios-simulator-gesture-transform',
1545+
contract: [
1546+
'Platform: iOS simulator',
1547+
'Current screen: gesture lab',
1548+
'Target center is x=200 y=420',
1549+
'Need one continuous two-finger gesture without lifting fingers',
1550+
'Pan delta is dx=80 dy=-40',
1551+
'Zoom scale is 2',
1552+
'Rotation is 35 degrees',
1553+
'Duration is 700ms',
1554+
],
1555+
task: 'Plan the direct agent-device command for the combined pan, zoom, and rotate gesture.',
1556+
outputs: [plannedCommand('gesture transform'), /200\s+420\s+80\s+-40\s+2\s+35\s+700/i],
1557+
forbiddenOutputs: [
1558+
plannedCommand('gesture pan'),
1559+
plannedCommand('gesture pinch'),
1560+
plannedCommand('gesture rotate'),
1561+
plannedCommand('rotate-gesture'),
1562+
plannedCommand('swipe'),
1563+
],
1564+
}),
15431565
makeCase({
15441566
id: 'settings-animation-stabilizer',
15451567
contract: [

website/docs/docs/commands.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ On iOS, swipe duration is clamped to a safe range (`16..60ms`) to avoid longpres
280280
`gesture fling` accepts `up|down|left|right x y [distance] [durationMs]` for fast directional throws.
281281
`gesture rotate` accepts `degrees [x] [y] [velocity]`; the degree sign controls direction and velocity controls speed.
282282
`gesture transform` accepts `x y dx dy scale degrees [durationMs]` for one combined pan/zoom/rotate gesture on Android and iOS simulators.
283-
On iOS simulators it is implemented with XCTest gesture primitives, so verify app-level metrics instead of assuming the requested degrees map exactly to recognizer output.
283+
On iOS simulators it uses private XCTest synthesis for pan/scale and XCTest rotation for degrees, so verify app-level metrics instead of assuming the requested values map exactly to recognizer output.
284284
`scroll` accepts either a relative amount (`0.5` means roughly half of the viewport on that axis) or `--pixels <n>` for a fixed-distance gesture. Large distances are clamped to the usable drag band so the gesture stays reliable across Android, iOS, and macOS.
285285
Default snapshot text output is visible-first, so off-screen interactive content is summarized instead of shown as tappable refs.
286286
When a target only appears in an off-screen summary, use `scroll <direction>` and then take a fresh `snapshot -i`. For repeated checks, a small shell loop is enough:

0 commit comments

Comments
 (0)