Skip to content

Commit ca21b60

Browse files
committed
feat: support ios transform gesture
1 parent 93b04b2 commit ca21b60

8 files changed

Lines changed: 283 additions & 38 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
#import "RunnerObjCExceptionCatcher.h"
2+
#import "RunnerSynthesizedGesture.h"
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#import <Foundation/Foundation.h>
2+
3+
NS_ASSUME_NONNULL_BEGIN
4+
5+
@interface RunnerSynthesizedGesture : NSObject
6+
7+
+ (NSString * _Nullable)synthesizeTransformWithApplication:(id)application
8+
x:(double)x
9+
y:(double)y
10+
dx:(double)dx
11+
dy:(double)dy
12+
scale:(double)scale
13+
durationMs:(double)durationMs;
14+
15+
@end
16+
17+
NS_ASSUME_NONNULL_END
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
#import "RunnerSynthesizedGesture.h"
2+
3+
#import <CoreGraphics/CoreGraphics.h>
4+
#import <math.h>
5+
#import <objc/message.h>
6+
7+
static const double RunnerTransformGestureRadius = 80.0;
8+
9+
typedef long long (*RunnerMsgSendLongLong)(id, SEL);
10+
typedef id (*RunnerMsgSendInitRecord)(id, SEL, NSString *, long long);
11+
typedef id (*RunnerMsgSendInitPath)(id, SEL, CGPoint, NSTimeInterval);
12+
typedef void (*RunnerMsgSendPathMove)(id, SEL, CGPoint, NSTimeInterval);
13+
typedef void (*RunnerMsgSendPathOffset)(id, SEL, NSTimeInterval);
14+
typedef void (*RunnerMsgSendAddPath)(id, SEL, id);
15+
typedef void (*RunnerMsgSendSetLongLong)(id, SEL, long long);
16+
typedef BOOL (*RunnerMsgSendSynthesize)(id, SEL, NSError **);
17+
18+
typedef struct RunnerPointerPair {
19+
CGPoint a;
20+
CGPoint b;
21+
} RunnerPointerPair;
22+
23+
static long long RunnerInterfaceOrientationForApplication(id application);
24+
static long long RunnerProcessIDForApplication(id application);
25+
static RunnerPointerPair RunnerPointerPairAt(
26+
double x,
27+
double y,
28+
double dx,
29+
double dy,
30+
double scale,
31+
double baseRadius,
32+
double t
33+
);
34+
35+
@implementation RunnerSynthesizedGesture
36+
37+
+ (NSString * _Nullable)synthesizeTransformWithApplication:(id)application
38+
x:(double)x
39+
y:(double)y
40+
dx:(double)dx
41+
dy:(double)dy
42+
scale:(double)scale
43+
durationMs:(double)durationMs {
44+
@try {
45+
return [self trySynthesizeTransformWithApplication:application
46+
x:x
47+
y:y
48+
dx:dx
49+
dy:dy
50+
scale:scale
51+
durationMs:durationMs];
52+
} @catch (NSException *exception) {
53+
NSString *name = exception.name ?: @"NSException";
54+
NSString *reason = exception.reason ?: @"private XCTest event synthesis failed";
55+
return [NSString stringWithFormat:@"%@: %@", name, reason];
56+
}
57+
}
58+
59+
+ (NSString * _Nullable)trySynthesizeTransformWithApplication:(id)application
60+
x:(double)x
61+
y:(double)y
62+
dx:(double)dx
63+
dy:(double)dy
64+
scale:(double)scale
65+
durationMs:(double)durationMs {
66+
Class recordClass = NSClassFromString(@"XCSynthesizedEventRecord");
67+
Class pathClass = NSClassFromString(@"XCPointerEventPath");
68+
if (recordClass == Nil || pathClass == Nil) {
69+
return @"private XCTest event synthesis unavailable: missing XCUIAutomation classes";
70+
}
71+
72+
SEL initRecordSelector = NSSelectorFromString(@"initWithName:interfaceOrientation:");
73+
SEL addPathSelector = NSSelectorFromString(@"addPointerEventPath:");
74+
SEL setTargetProcessIDSelector = NSSelectorFromString(@"setTargetProcessID:");
75+
SEL synthesizeSelector = NSSelectorFromString(@"synthesizeWithError:");
76+
SEL initPathSelector = NSSelectorFromString(@"initForTouchAtPoint:offset:");
77+
SEL moveSelector = NSSelectorFromString(@"moveToPoint:atOffset:");
78+
SEL liftSelector = NSSelectorFromString(@"liftUpAtOffset:");
79+
if (![recordClass instancesRespondToSelector:initRecordSelector] ||
80+
![recordClass instancesRespondToSelector:addPathSelector] ||
81+
![recordClass instancesRespondToSelector:setTargetProcessIDSelector] ||
82+
![recordClass instancesRespondToSelector:synthesizeSelector] ||
83+
![pathClass instancesRespondToSelector:initPathSelector] ||
84+
![pathClass instancesRespondToSelector:moveSelector] ||
85+
![pathClass instancesRespondToSelector:liftSelector]) {
86+
return @"private XCTest event synthesis unavailable: required selectors are missing";
87+
}
88+
89+
long long interfaceOrientation = RunnerInterfaceOrientationForApplication(application);
90+
long long targetProcessID = RunnerProcessIDForApplication(application);
91+
if (targetProcessID <= 0) {
92+
return @"private XCTest event synthesis unavailable: could not resolve target process ID";
93+
}
94+
double radius = RunnerTransformGestureRadius;
95+
RunnerPointerPair start = RunnerPointerPairAt(x, y, dx, dy, scale, radius, 0.0);
96+
97+
id record = ((RunnerMsgSendInitRecord)objc_msgSend)(
98+
[recordClass alloc],
99+
initRecordSelector,
100+
@"agent-device-transform",
101+
interfaceOrientation
102+
);
103+
if (record == nil) {
104+
return @"private XCTest event synthesis failed: could not create event record";
105+
}
106+
((RunnerMsgSendSetLongLong)objc_msgSend)(record, setTargetProcessIDSelector, targetProcessID);
107+
108+
id pathA = [self pathWithClass:pathClass
109+
start:start.a
110+
x:x
111+
y:y
112+
dx:dx
113+
dy:dy
114+
scale:scale
115+
radius:radius
116+
durationMs:durationMs
117+
finger:0];
118+
id pathB = [self pathWithClass:pathClass
119+
start:start.b
120+
x:x
121+
y:y
122+
dx:dx
123+
dy:dy
124+
scale:scale
125+
radius:radius
126+
durationMs:durationMs
127+
finger:1];
128+
if (pathA == nil || pathB == nil) {
129+
return @"private XCTest event synthesis failed: could not create pointer paths";
130+
}
131+
132+
((RunnerMsgSendAddPath)objc_msgSend)(record, addPathSelector, pathA);
133+
((RunnerMsgSendAddPath)objc_msgSend)(record, addPathSelector, pathB);
134+
135+
NSError *error = nil;
136+
BOOL ok = ((RunnerMsgSendSynthesize)objc_msgSend)(record, synthesizeSelector, &error);
137+
if (!ok) {
138+
NSString *detail = error.localizedDescription ?: @"synthesizeWithError returned false";
139+
return [NSString stringWithFormat:@"private XCTest event synthesis failed: %@", detail];
140+
}
141+
return nil;
142+
}
143+
144+
static long long RunnerInterfaceOrientationForApplication(id application) {
145+
SEL interfaceOrientationSelector = NSSelectorFromString(@"interfaceOrientation");
146+
if (![application respondsToSelector:interfaceOrientationSelector]) {
147+
return 1;
148+
}
149+
long long interfaceOrientation =
150+
((RunnerMsgSendLongLong)objc_msgSend)(application, interfaceOrientationSelector);
151+
return interfaceOrientation > 0 ? interfaceOrientation : 1;
152+
}
153+
154+
static long long RunnerProcessIDForApplication(id application) {
155+
SEL processIDSelector = NSSelectorFromString(@"processID");
156+
if (![application respondsToSelector:processIDSelector]) {
157+
return 0;
158+
}
159+
return ((RunnerMsgSendLongLong)objc_msgSend)(application, processIDSelector);
160+
}
161+
162+
+ (id)pathWithClass:(Class)pathClass
163+
start:(CGPoint)start
164+
x:(double)x
165+
y:(double)y
166+
dx:(double)dx
167+
dy:(double)dy
168+
scale:(double)scale
169+
radius:(double)radius
170+
durationMs:(double)durationMs
171+
finger:(int)finger {
172+
SEL initPathSelector = NSSelectorFromString(@"initForTouchAtPoint:offset:");
173+
SEL moveSelector = NSSelectorFromString(@"moveToPoint:atOffset:");
174+
SEL liftSelector = NSSelectorFromString(@"liftUpAtOffset:");
175+
176+
id path = ((RunnerMsgSendInitPath)objc_msgSend)([pathClass alloc], initPathSelector, start, 0.0);
177+
if (path == nil) {
178+
return nil;
179+
}
180+
181+
int frameCount = MAX(3, (int)lround(durationMs / 16.0));
182+
NSTimeInterval durationSeconds = durationMs / 1000.0;
183+
for (int index = 1; index < frameCount; index += 1) {
184+
double t = (double)index / (double)frameCount;
185+
RunnerPointerPair pair = RunnerPointerPairAt(x, y, dx, dy, scale, radius, t);
186+
CGPoint point = finger == 0 ? pair.a : pair.b;
187+
NSTimeInterval offset = durationSeconds * t;
188+
((RunnerMsgSendPathMove)objc_msgSend)(path, moveSelector, point, offset);
189+
}
190+
191+
((RunnerMsgSendPathOffset)objc_msgSend)(path, liftSelector, durationSeconds);
192+
return path;
193+
}
194+
195+
static RunnerPointerPair RunnerPointerPairAt(
196+
double x,
197+
double y,
198+
double dx,
199+
double dy,
200+
double scale,
201+
double baseRadius,
202+
double t
203+
) {
204+
double centerX = x + dx * t;
205+
double centerY = y + dy * t;
206+
double startRadius = baseRadius / MAX(scale, 1.0);
207+
double endRadius = baseRadius;
208+
if (scale < 1.0) {
209+
startRadius = baseRadius;
210+
endRadius = baseRadius * scale;
211+
}
212+
double radius = startRadius + (endRadius - startRadius) * t;
213+
RunnerPointerPair pair = {
214+
.a = CGPointMake(centerX, centerY - radius),
215+
.b = CGPointMake(centerX, centerY + radius),
216+
};
217+
return pair;
218+
}
219+
220+
@end

ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift

Lines changed: 18 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1281,28 +1281,27 @@ extension RunnerTests {
12811281
durationMs: Double
12821282
) -> RunnerInteractionOutcome {
12831283
#if os(iOS)
1284-
let holdDuration = max(0.02, min(durationMs / 1000.0, 10.0) / 3.0)
1285-
let panOutcome = performCoordinateDrag(
1286-
app: app,
1284+
if let message = RunnerSynthesizedGesture.synthesizeTransform(
1285+
withApplication: app,
12871286
x: x,
12881287
y: y,
1289-
x2: x + dx,
1290-
y2: y + dy,
1291-
holdDuration: holdDuration
1292-
)
1293-
guard case .performed = panOutcome else {
1294-
return panOutcome
1288+
dx: dx,
1289+
dy: dy,
1290+
scale: scale,
1291+
durationMs: durationMs
1292+
) {
1293+
return .unsupported(message)
1294+
}
1295+
if abs(degrees) > 0.001 {
1296+
return performCoordinateRotateGesture(
1297+
app: app,
1298+
degrees: degrees,
1299+
x: x + dx,
1300+
y: y + dy,
1301+
velocity: degrees >= 0 ? 1 : -1
1302+
)
12951303
}
1296-
1297-
let target = gestureElement(app: app, x: x, y: y)
1298-
target.pinch(withScale: CGFloat(scale), velocity: CGFloat(scale >= 1.0 ? 1.0 : -1.0))
1299-
return performCoordinateRotateGesture(
1300-
app: app,
1301-
degrees: degrees,
1302-
x: x,
1303-
y: y,
1304-
velocity: degrees >= 0 ? 1.0 : -1.0
1305-
)
1304+
return .performed
13061305
#elseif os(tvOS)
13071306
return .unsupported("transformGesture is not supported on tvOS")
13081307
#else
@@ -1361,21 +1360,6 @@ extension RunnerTests {
13611360
#endif
13621361
}
13631362

1364-
#if os(iOS)
1365-
private func gestureElement(app: XCUIApplication, x: Double, y: Double) -> XCUIElement {
1366-
let point = CGPoint(x: x, y: y)
1367-
let matches = app.descendants(matching: .any).allElementsBoundByIndex.filter { element in
1368-
element.exists && element.frame.contains(point) && !element.frame.isEmpty
1369-
}
1370-
if let smallest = matches.min(by: { left, right in
1371-
(left.frame.width * left.frame.height) < (right.frame.width * right.frame.height)
1372-
}) {
1373-
return smallest
1374-
}
1375-
return interactionRoot(app: app)
1376-
}
1377-
#endif
1378-
13791363
private func interactionRoot(app: XCUIApplication) -> XCUIElement {
13801364
let windows = app.windows.allElementsBoundByIndex
13811365
if let window = windows.first(where: { $0.exists && !$0.frame.isEmpty }) {

src/utils/__tests__/args.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,7 @@ test('usageForCommand resolves workflow help topic', () => {
962962
);
963963
assert.match(help, /agent-device clipboard write "some text"/);
964964
assert.match(help, /For gesture-heavy iOS simulator proof videos, prefer --hide-touches/);
965+
assert.match(help, /iOS simulator transform uses private XCTest synthesis for pan\/scale/);
965966
assert.match(help, /Android Gboard handwriting\/stylus UI can capture text/);
966967
assert.match(help, /targetInput\/actualInput details/);
967968
assert.match(help, /Do not keep retrying fill\/type against the same field/);

src/utils/command-schema.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ Command shape:
236236
Snapshot refs look like @e12. After snapshot -i, use the exact @eN ref from that output.
237237
If the exact ref is not known yet, first output snapshot -i, then use a concrete example shape like press @e12 in the next command; do not write @<ref>, @ref, @Label_Name, or @eN placeholders.
238238
Close means agent-device close. App-owned back means back; system back means back --system.
239-
Taps are press or click. Gestures use swipe, longpress, or gesture <pan|fling|pinch|rotate|transform>. Android pinch, rotate, and transform use provider-native touch injection when available, then the bundled multi-touch helper.
239+
Taps are press or click. Gestures use swipe, longpress, or gesture <pan|fling|pinch|rotate|transform>. Android pinch, rotate, and transform use provider-native touch injection when available, then the bundled multi-touch helper. iOS simulator transform uses private XCTest synthesis for pan/scale and XCTest rotation for degrees; otherwise it reports UNSUPPORTED_OPERATION.
240240
241241
Bootstrap:
242242
agent-device devices --platform ios
@@ -323,7 +323,7 @@ Navigation and gestures:
323323
agent-device gesture pinch 0.5 200 400
324324
agent-device gesture rotate 35 200 420
325325
agent-device gesture transform 200 420 80 -40 2 35 700
326-
iOS simulator transform uses XCTest gesture primitives; verify app metrics instead of assuming requested degrees map exactly to recognizer output.
326+
iOS simulator transform uses private XCTest synthesis for pan/scale and XCTest rotation for degrees; verify app metrics instead of assuming requested values map exactly to recognizer output.
327327
328328
Validation and evidence:
329329
Nearby mutation diff: agent-device diff snapshot -i.

test/skillgym/suites/agent-device-smoke-suite.ts

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1525,7 +1525,7 @@ const SKILL_GUIDANCE_CASES: Case[] = [
15251525
'Platform: Android',
15261526
'Current screen: gesture lab',
15271527
'Target center is x=200 y=420',
1528-
'Need one continuous two-finger gesture without lifting fingers',
1528+
'Need the direct transform command rather than separate gesture commands',
15291529
'Pan delta is dx=80 dy=-40',
15301530
'Zoom scale is 2',
15311531
'Rotation is 35 degrees',
@@ -1540,6 +1540,28 @@ const SKILL_GUIDANCE_CASES: Case[] = [
15401540
plannedCommand('compose-gestures'),
15411541
],
15421542
}),
1543+
makeCase({
1544+
id: 'ios-simulator-gesture-transform',
1545+
contract: [
1546+
'Platform: iOS simulator',
1547+
'Current screen: gesture lab',
1548+
'Target center is x=200 y=420',
1549+
'Need one continuous two-finger gesture without lifting fingers',
1550+
'Pan delta is dx=80 dy=-40',
1551+
'Zoom scale is 2',
1552+
'Rotation is 35 degrees',
1553+
'Duration is 700ms',
1554+
],
1555+
task: 'Plan the direct agent-device command for the combined pan, zoom, and rotate gesture.',
1556+
outputs: [plannedCommand('gesture transform'), /200\s+420\s+80\s+-40\s+2\s+35\s+700/i],
1557+
forbiddenOutputs: [
1558+
plannedCommand('gesture pan'),
1559+
plannedCommand('gesture pinch'),
1560+
plannedCommand('gesture rotate'),
1561+
plannedCommand('rotate-gesture'),
1562+
plannedCommand('swipe'),
1563+
],
1564+
}),
15431565
makeCase({
15441566
id: 'settings-animation-stabilizer',
15451567
contract: [

website/docs/docs/commands.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ On iOS, swipe duration is clamped to a safe range (`16..60ms`) to avoid longpres
280280
`gesture fling` accepts `up|down|left|right x y [distance] [durationMs]` for fast directional throws.
281281
`gesture rotate` accepts `degrees [x] [y] [velocity]`; the degree sign controls direction and velocity controls speed.
282282
`gesture transform` accepts `x y dx dy scale degrees [durationMs]` for one combined pan/zoom/rotate gesture on Android and iOS simulators.
283-
On iOS simulators it is implemented with XCTest gesture primitives, so verify app-level metrics instead of assuming the requested degrees map exactly to recognizer output.
283+
On iOS simulators it uses private XCTest synthesis for pan/scale and XCTest rotation for degrees, so verify app-level metrics instead of assuming the requested values map exactly to recognizer output.
284284
`scroll` accepts either a relative amount (`0.5` means roughly half of the viewport on that axis) or `--pixels <n>` for a fixed-distance gesture. Large distances are clamped to the usable drag band so the gesture stays reliable across Android, iOS, and macOS.
285285
Default snapshot text output is visible-first, so off-screen interactive content is summarized instead of shown as tappable refs.
286286
When a target only appears in an off-screen summary, use `scroll <direction>` and then take a fresh `snapshot -i`. For repeated checks, a small shell loop is enough:

0 commit comments

Comments
 (0)