Skip to content

Commit 457147a

Browse files
committed
feat: add runtime system and gesture commands
1 parent 2c41225 commit 457147a

15 files changed

Lines changed: 2225 additions & 304 deletions

COMMAND_OWNERSHIP.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,29 @@ Their semantics should live in `agent-device/commands` as they migrate.
6969
local file inputs remain command-policy gated.
7070
- `trigger-app-event`: runtime `apps.triggerEvent` implemented with event name
7171
and JSON payload validation.
72+
- `back`: runtime `system.back` implemented with typed in-app/system modes.
73+
- `home`: runtime `system.home` implemented.
74+
- `rotate`: runtime `system.rotate` implemented with explicit orientation
75+
validation.
76+
- `keyboard`: runtime `system.keyboard` implemented with explicit status/get
77+
and dismiss result shapes.
78+
- `clipboard`: runtime `system.clipboard` implemented with read/write result
79+
unions.
80+
- `settings`: runtime `system.settings` implemented as a typed settings-open
81+
primitive.
82+
- `alert`: runtime `system.alert` implemented with explicit status, handled,
83+
and wait result unions.
84+
- `app-switcher`: runtime `system.appSwitcher` implemented.
85+
- `focus`: runtime `interactions.focus` implemented for point, ref, and
86+
selector targets.
87+
- `longpress`: runtime `interactions.longPress` implemented for point, ref, and
88+
selector targets.
89+
- `swipe`: runtime `interactions.swipe` implemented with point, ref, selector,
90+
and viewport-derived directional starts.
91+
- `scroll`: runtime `interactions.scroll` implemented with viewport, point, ref,
92+
and selector targets.
93+
- `pinch`: runtime `interactions.pinch` implemented behind the typed backend
94+
primitive.
7295

7396
## Boundary Requirements
7497

src/__tests__/runtime-conformance.test.ts

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,19 @@ test('command conformance suites run against a fixture backend', async () => {
3131
assert.equal(calls.includes('tap'), true);
3232
assert.equal(calls.includes('fill'), true);
3333
assert.equal(calls.includes('typeText'), true);
34+
assert.equal(calls.includes('focus'), true);
35+
assert.equal(calls.includes('longPress'), true);
36+
assert.equal(calls.includes('swipe'), true);
37+
assert.equal(calls.includes('scroll'), true);
38+
assert.equal(calls.includes('pinch'), true);
39+
assert.equal(calls.includes('pressBack'), true);
40+
assert.equal(calls.includes('pressHome'), true);
41+
assert.equal(calls.includes('rotate'), true);
42+
assert.equal(calls.includes('setKeyboard'), true);
43+
assert.equal(calls.includes('getClipboard'), true);
44+
assert.equal(calls.includes('openSettings'), true);
45+
assert.equal(calls.includes('handleAlert'), true);
46+
assert.equal(calls.includes('openAppSwitcher'), true);
3447
assert.equal(calls.includes('openApp'), true);
3548
assert.equal(calls.includes('closeApp'), true);
3649
assert.equal(calls.includes('listApps'), true);
@@ -78,6 +91,48 @@ function createFixtureBackend(calls: string[]): AgentDeviceBackend {
7891
typeText: async () => {
7992
calls.push('typeText');
8093
},
94+
focus: async () => {
95+
calls.push('focus');
96+
},
97+
longPress: async () => {
98+
calls.push('longPress');
99+
},
100+
swipe: async () => {
101+
calls.push('swipe');
102+
},
103+
scroll: async () => {
104+
calls.push('scroll');
105+
},
106+
pinch: async () => {
107+
calls.push('pinch');
108+
},
109+
pressBack: async () => {
110+
calls.push('pressBack');
111+
},
112+
pressHome: async () => {
113+
calls.push('pressHome');
114+
},
115+
rotate: async () => {
116+
calls.push('rotate');
117+
},
118+
setKeyboard: async (_context, options) => {
119+
calls.push('setKeyboard');
120+
return { action: options.action, visible: false };
121+
},
122+
getClipboard: async () => {
123+
calls.push('getClipboard');
124+
return { text: 'copied' };
125+
},
126+
openSettings: async () => {
127+
calls.push('openSettings');
128+
},
129+
handleAlert: async () => {
130+
calls.push('handleAlert');
131+
return { kind: 'alertStatus', alert: null };
132+
},
133+
openAppSwitcher: async () => {
134+
calls.push('openAppSwitcher');
135+
},
81136
openApp: async () => {
82137
calls.push('openApp');
83138
},

src/__tests__/runtime-interactions.test.ts

Lines changed: 231 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,186 @@ test('runtime typeText validates refs and forwards text to the backend primitive
193193
);
194194
});
195195

196+
test('runtime focus and longPress share selector/ref target resolution', async () => {
197+
const calls: unknown[] = [];
198+
const device = createInteractionDevice(selectorSnapshot(), {
199+
focus: async (_context, point) => {
200+
calls.push({ command: 'focus', point });
201+
return { focused: true };
202+
},
203+
longPress: async (_context, point, options) => {
204+
calls.push({ command: 'longPress', point, durationMs: options?.durationMs });
205+
},
206+
});
207+
208+
const focused = await device.interactions.focus(selector('label=Continue'), {
209+
session: 'default',
210+
});
211+
const longPressed = await device.interactions.longPress(ref('@e1'), {
212+
session: 'default',
213+
durationMs: 750,
214+
});
215+
216+
assert.equal(focused.kind, 'selector');
217+
assert.deepEqual(focused.backendResult, { focused: true });
218+
assert.equal(longPressed.kind, 'ref');
219+
assert.deepEqual(calls, [
220+
{ command: 'focus', point: { x: 60, y: 40 } },
221+
{ command: 'longPress', point: { x: 60, y: 40 }, durationMs: 750 },
222+
]);
223+
});
224+
225+
test('runtime scroll resolves selector targets before calling the backend primitive', async () => {
226+
const calls: unknown[] = [];
227+
const device = createInteractionDevice(selectorSnapshot(), {
228+
scroll: async (_context, target, options) => {
229+
calls.push({ target, options });
230+
return { scrolled: true };
231+
},
232+
});
233+
234+
const selectorResult = await device.interactions.scroll({
235+
session: 'default',
236+
target: selector('label=Continue'),
237+
direction: 'down',
238+
pixels: 120,
239+
});
240+
const viewportResult = await device.interactions.scroll({
241+
direction: 'up',
242+
amount: 0.5,
243+
});
244+
245+
assert.equal(selectorResult.kind, 'selector');
246+
assert.equal(viewportResult.kind, 'viewport');
247+
assert.deepEqual(calls, [
248+
{
249+
target: { kind: 'point', point: { x: 60, y: 40 } },
250+
options: { direction: 'down', pixels: 120 },
251+
},
252+
{
253+
target: { kind: 'viewport' },
254+
options: { direction: 'up', amount: 0.5 },
255+
},
256+
]);
257+
});
258+
259+
test('runtime swipe supports explicit and viewport-derived targets', async () => {
260+
const calls: unknown[] = [];
261+
const device = createInteractionDevice(selectorSnapshot(), {
262+
swipe: async (_context, from, to, options) => {
263+
calls.push({ from, to, durationMs: options?.durationMs });
264+
},
265+
});
266+
267+
const explicit = await device.interactions.swipe({
268+
from: selector('label=Continue'),
269+
to: { x: 200, y: 40 },
270+
durationMs: 300,
271+
session: 'default',
272+
});
273+
const directional = await device.interactions.swipe({
274+
direction: 'left',
275+
distance: 25,
276+
session: 'default',
277+
});
278+
279+
assert.deepEqual(explicit.from, { x: 60, y: 40 });
280+
assert.deepEqual(directional.from, { x: 60, y: 40 });
281+
assert.deepEqual(directional.to, { x: 35, y: 40 });
282+
assert.deepEqual(calls, [
283+
{ from: { x: 60, y: 40 }, to: { x: 200, y: 40 }, durationMs: 300 },
284+
{ from: { x: 60, y: 40 }, to: { x: 35, y: 40 }, durationMs: undefined },
285+
]);
286+
});
287+
288+
test('runtime directional swipe uses the visible viewport instead of off-screen content bounds', async () => {
289+
const calls: unknown[] = [];
290+
const device = createInteractionDevice(snapshotWithOffscreenContent(), {
291+
swipe: async (_context, from, to) => {
292+
calls.push({ from, to });
293+
},
294+
});
295+
296+
const result = await device.interactions.swipe({
297+
direction: 'left',
298+
distance: 25,
299+
session: 'default',
300+
});
301+
302+
assert.deepEqual(result.from, { x: 50, y: 50 });
303+
assert.deepEqual(result.to, { x: 25, y: 50 });
304+
assert.deepEqual(calls, [{ from: { x: 50, y: 50 }, to: { x: 25, y: 50 } }]);
305+
});
306+
307+
test('runtime viewport gestures reject inspect-only macOS surfaces', async () => {
308+
for (const surface of ['desktop', 'menubar'] as const) {
309+
const device = createInteractionDevice(selectorSnapshot(), {
310+
platform: 'macos',
311+
sessionMetadata: { surface },
312+
scroll: async () => {
313+
throw new Error(`${surface} scroll should be rejected before backend call`);
314+
},
315+
swipe: async () => {
316+
throw new Error(`${surface} swipe should be rejected before backend call`);
317+
},
318+
pinch: async () => {
319+
throw new Error(`${surface} pinch should be rejected before backend call`);
320+
},
321+
});
322+
323+
await assert.rejects(
324+
() =>
325+
device.interactions.scroll({
326+
direction: 'down',
327+
target: { kind: 'viewport' },
328+
session: 'default',
329+
}),
330+
new RegExp(`scroll is not supported on macOS ${surface}`),
331+
);
332+
await assert.rejects(
333+
() =>
334+
device.interactions.swipe({
335+
direction: 'left',
336+
session: 'default',
337+
}),
338+
new RegExp(`swipe is not supported on macOS ${surface}`),
339+
);
340+
await assert.rejects(
341+
() =>
342+
device.interactions.pinch({
343+
scale: 1.2,
344+
session: 'default',
345+
}),
346+
new RegExp(`pinch is not supported on macOS ${surface}`),
347+
);
348+
}
349+
});
350+
351+
test('runtime pinch is backend-gated and resolves optional center targets', async () => {
352+
const calls: unknown[] = [];
353+
const unsupported = createInteractionDevice(selectorSnapshot());
354+
await assert.rejects(
355+
() => unsupported.interactions.pinch({ scale: 1.2 }),
356+
/pinch is not supported/,
357+
);
358+
359+
const device = createInteractionDevice(selectorSnapshot(), {
360+
pinch: async (_context, options) => {
361+
calls.push(options);
362+
},
363+
});
364+
365+
const result = await device.interactions.pinch({
366+
scale: 0.8,
367+
center: ref('@e1'),
368+
session: 'default',
369+
});
370+
371+
assert.equal(result.kind, 'pinch');
372+
assert.deepEqual(result.center, { x: 60, y: 40 });
373+
assert.deepEqual(calls, [{ scale: 0.8, center: { x: 60, y: 40 } }]);
374+
});
375+
196376
test('runtime interaction commands are available from the command namespace', async () => {
197377
const device = createInteractionDevice(selectorSnapshot(), {
198378
tap: async () => {},
@@ -233,9 +413,52 @@ function fillableSnapshot(): SnapshotState {
233413
]);
234414
}
235415

416+
function snapshotWithOffscreenContent(): SnapshotState {
417+
return makeSnapshotState([
418+
{
419+
index: 0,
420+
depth: 0,
421+
type: 'Application',
422+
label: 'Example',
423+
rect: { x: 0, y: 0, width: 100, height: 100 },
424+
},
425+
{
426+
index: 1,
427+
depth: 1,
428+
parentIndex: 0,
429+
type: 'Button',
430+
label: 'Visible',
431+
rect: { x: 10, y: 10, width: 20, height: 20 },
432+
hittable: true,
433+
},
434+
{
435+
index: 2,
436+
depth: 1,
437+
parentIndex: 0,
438+
type: 'Button',
439+
label: 'Offscreen',
440+
rect: { x: 10, y: 900, width: 20, height: 20 },
441+
hittable: true,
442+
},
443+
]);
444+
}
445+
236446
function createInteractionDevice(
237447
snapshot: SnapshotState,
238-
overrides: Partial<Pick<AgentDeviceBackend, 'captureSnapshot' | 'tap' | 'fill' | 'typeText'>> & {
448+
overrides: Partial<
449+
Pick<
450+
AgentDeviceBackend,
451+
| 'captureSnapshot'
452+
| 'tap'
453+
| 'fill'
454+
| 'typeText'
455+
| 'focus'
456+
| 'longPress'
457+
| 'scroll'
458+
| 'swipe'
459+
| 'pinch'
460+
>
461+
> & {
239462
platform?: AgentDeviceBackend['platform'];
240463
sessionMetadata?: Record<string, unknown>;
241464
} = {},
@@ -248,6 +471,13 @@ function createInteractionDevice(
248471
tap: async (...args) => await overrides.tap?.(...args),
249472
fill: async (...args) => await overrides.fill?.(...args),
250473
typeText: async (...args) => await overrides.typeText?.(...args),
474+
focus: overrides.focus ? async (...args) => await overrides.focus?.(...args) : undefined,
475+
longPress: overrides.longPress
476+
? async (...args) => await overrides.longPress?.(...args)
477+
: undefined,
478+
scroll: overrides.scroll ? async (...args) => await overrides.scroll?.(...args) : undefined,
479+
swipe: overrides.swipe ? async (...args) => await overrides.swipe?.(...args) : undefined,
480+
pinch: overrides.pinch ? async (...args) => await overrides.pinch?.(...args) : undefined,
251481
} satisfies AgentDeviceBackend,
252482
artifacts: createLocalArtifactAdapter(),
253483
sessions: createMemorySessionStore([

0 commit comments

Comments
 (0)