Skip to content

Commit ee2e421

Browse files
committed
feat: add runtime system and gesture commands
1 parent 2c41225 commit ee2e421

15 files changed

Lines changed: 2125 additions & 304 deletions

COMMAND_OWNERSHIP.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,29 @@ Their semantics should live in `agent-device/commands` as they migrate.
6969
local file inputs remain command-policy gated.
7070
- `trigger-app-event`: runtime `apps.triggerEvent` implemented with event name
7171
and JSON payload validation.
72+
- `back`: runtime `system.back` implemented with typed in-app/system modes.
73+
- `home`: runtime `system.home` implemented.
74+
- `rotate`: runtime `system.rotate` implemented with explicit orientation
75+
validation.
76+
- `keyboard`: runtime `system.keyboard` implemented with explicit status/get
77+
and dismiss result shapes.
78+
- `clipboard`: runtime `system.clipboard` implemented with read/write result
79+
unions.
80+
- `settings`: runtime `system.settings` implemented as a typed settings-open
81+
primitive.
82+
- `alert`: runtime `system.alert` implemented with explicit status, handled,
83+
and wait result unions.
84+
- `app-switcher`: runtime `system.appSwitcher` implemented.
85+
- `focus`: runtime `interactions.focus` implemented for point, ref, and
86+
selector targets.
87+
- `longpress`: runtime `interactions.longPress` implemented for point, ref, and
88+
selector targets.
89+
- `swipe`: runtime `interactions.swipe` implemented with point, ref, selector,
90+
and viewport-derived directional starts.
91+
- `scroll`: runtime `interactions.scroll` implemented with viewport, point, ref,
92+
and selector targets.
93+
- `pinch`: runtime `interactions.pinch` implemented behind the typed backend
94+
primitive.
7295

7396
## Boundary Requirements
7497

src/__tests__/runtime-conformance.test.ts

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,19 @@ test('command conformance suites run against a fixture backend', async () => {
3131
assert.equal(calls.includes('tap'), true);
3232
assert.equal(calls.includes('fill'), true);
3333
assert.equal(calls.includes('typeText'), true);
34+
assert.equal(calls.includes('focus'), true);
35+
assert.equal(calls.includes('longPress'), true);
36+
assert.equal(calls.includes('swipe'), true);
37+
assert.equal(calls.includes('scroll'), true);
38+
assert.equal(calls.includes('pinch'), true);
39+
assert.equal(calls.includes('pressBack'), true);
40+
assert.equal(calls.includes('pressHome'), true);
41+
assert.equal(calls.includes('rotate'), true);
42+
assert.equal(calls.includes('setKeyboard'), true);
43+
assert.equal(calls.includes('getClipboard'), true);
44+
assert.equal(calls.includes('openSettings'), true);
45+
assert.equal(calls.includes('handleAlert'), true);
46+
assert.equal(calls.includes('openAppSwitcher'), true);
3447
assert.equal(calls.includes('openApp'), true);
3548
assert.equal(calls.includes('closeApp'), true);
3649
assert.equal(calls.includes('listApps'), true);
@@ -78,6 +91,48 @@ function createFixtureBackend(calls: string[]): AgentDeviceBackend {
7891
typeText: async () => {
7992
calls.push('typeText');
8093
},
94+
focus: async () => {
95+
calls.push('focus');
96+
},
97+
longPress: async () => {
98+
calls.push('longPress');
99+
},
100+
swipe: async () => {
101+
calls.push('swipe');
102+
},
103+
scroll: async () => {
104+
calls.push('scroll');
105+
},
106+
pinch: async () => {
107+
calls.push('pinch');
108+
},
109+
pressBack: async () => {
110+
calls.push('pressBack');
111+
},
112+
pressHome: async () => {
113+
calls.push('pressHome');
114+
},
115+
rotate: async () => {
116+
calls.push('rotate');
117+
},
118+
setKeyboard: async (_context, options) => {
119+
calls.push('setKeyboard');
120+
return { action: options.action, visible: false };
121+
},
122+
getClipboard: async () => {
123+
calls.push('getClipboard');
124+
return { text: 'copied' };
125+
},
126+
openSettings: async () => {
127+
calls.push('openSettings');
128+
},
129+
handleAlert: async () => {
130+
calls.push('handleAlert');
131+
return { kind: 'alertStatus', alert: null };
132+
},
133+
openAppSwitcher: async () => {
134+
calls.push('openAppSwitcher');
135+
},
81136
openApp: async () => {
82137
calls.push('openApp');
83138
},

src/__tests__/runtime-interactions.test.ts

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,123 @@ test('runtime typeText validates refs and forwards text to the backend primitive
193193
);
194194
});
195195

196+
test('runtime focus and longPress share selector/ref target resolution', async () => {
197+
const calls: unknown[] = [];
198+
const device = createInteractionDevice(selectorSnapshot(), {
199+
focus: async (_context, point) => {
200+
calls.push({ command: 'focus', point });
201+
return { focused: true };
202+
},
203+
longPress: async (_context, point, options) => {
204+
calls.push({ command: 'longPress', point, durationMs: options?.durationMs });
205+
},
206+
});
207+
208+
const focused = await device.interactions.focus(selector('label=Continue'), {
209+
session: 'default',
210+
});
211+
const longPressed = await device.interactions.longPress(ref('@e1'), {
212+
session: 'default',
213+
durationMs: 750,
214+
});
215+
216+
assert.equal(focused.kind, 'selector');
217+
assert.deepEqual(focused.backendResult, { focused: true });
218+
assert.equal(longPressed.kind, 'ref');
219+
assert.deepEqual(calls, [
220+
{ command: 'focus', point: { x: 60, y: 40 } },
221+
{ command: 'longPress', point: { x: 60, y: 40 }, durationMs: 750 },
222+
]);
223+
});
224+
225+
test('runtime scroll resolves selector targets before calling the backend primitive', async () => {
226+
const calls: unknown[] = [];
227+
const device = createInteractionDevice(selectorSnapshot(), {
228+
scroll: async (_context, target, options) => {
229+
calls.push({ target, options });
230+
return { scrolled: true };
231+
},
232+
});
233+
234+
const selectorResult = await device.interactions.scroll({
235+
session: 'default',
236+
target: selector('label=Continue'),
237+
direction: 'down',
238+
pixels: 120,
239+
});
240+
const viewportResult = await device.interactions.scroll({
241+
direction: 'up',
242+
amount: 0.5,
243+
});
244+
245+
assert.equal(selectorResult.kind, 'selector');
246+
assert.equal(viewportResult.kind, 'viewport');
247+
assert.deepEqual(calls, [
248+
{
249+
target: { kind: 'point', point: { x: 60, y: 40 } },
250+
options: { direction: 'down', pixels: 120 },
251+
},
252+
{
253+
target: { kind: 'viewport' },
254+
options: { direction: 'up', amount: 0.5 },
255+
},
256+
]);
257+
});
258+
259+
test('runtime swipe supports explicit and viewport-derived targets', async () => {
260+
const calls: unknown[] = [];
261+
const device = createInteractionDevice(selectorSnapshot(), {
262+
swipe: async (_context, from, to, options) => {
263+
calls.push({ from, to, durationMs: options?.durationMs });
264+
},
265+
});
266+
267+
const explicit = await device.interactions.swipe({
268+
from: selector('label=Continue'),
269+
to: { x: 200, y: 40 },
270+
durationMs: 300,
271+
session: 'default',
272+
});
273+
const directional = await device.interactions.swipe({
274+
direction: 'left',
275+
distance: 25,
276+
session: 'default',
277+
});
278+
279+
assert.deepEqual(explicit.from, { x: 60, y: 40 });
280+
assert.deepEqual(directional.from, { x: 60, y: 40 });
281+
assert.deepEqual(directional.to, { x: 35, y: 40 });
282+
assert.deepEqual(calls, [
283+
{ from: { x: 60, y: 40 }, to: { x: 200, y: 40 }, durationMs: 300 },
284+
{ from: { x: 60, y: 40 }, to: { x: 35, y: 40 }, durationMs: undefined },
285+
]);
286+
});
287+
288+
test('runtime pinch is backend-gated and resolves optional center targets', async () => {
289+
const calls: unknown[] = [];
290+
const unsupported = createInteractionDevice(selectorSnapshot());
291+
await assert.rejects(
292+
() => unsupported.interactions.pinch({ scale: 1.2 }),
293+
/pinch is not supported/,
294+
);
295+
296+
const device = createInteractionDevice(selectorSnapshot(), {
297+
pinch: async (_context, options) => {
298+
calls.push(options);
299+
},
300+
});
301+
302+
const result = await device.interactions.pinch({
303+
scale: 0.8,
304+
center: ref('@e1'),
305+
session: 'default',
306+
});
307+
308+
assert.equal(result.kind, 'pinch');
309+
assert.deepEqual(result.center, { x: 60, y: 40 });
310+
assert.deepEqual(calls, [{ scale: 0.8, center: { x: 60, y: 40 } }]);
311+
});
312+
196313
test('runtime interaction commands are available from the command namespace', async () => {
197314
const device = createInteractionDevice(selectorSnapshot(), {
198315
tap: async () => {},
@@ -235,7 +352,20 @@ function fillableSnapshot(): SnapshotState {
235352

236353
function createInteractionDevice(
237354
snapshot: SnapshotState,
238-
overrides: Partial<Pick<AgentDeviceBackend, 'captureSnapshot' | 'tap' | 'fill' | 'typeText'>> & {
355+
overrides: Partial<
356+
Pick<
357+
AgentDeviceBackend,
358+
| 'captureSnapshot'
359+
| 'tap'
360+
| 'fill'
361+
| 'typeText'
362+
| 'focus'
363+
| 'longPress'
364+
| 'scroll'
365+
| 'swipe'
366+
| 'pinch'
367+
>
368+
> & {
239369
platform?: AgentDeviceBackend['platform'];
240370
sessionMetadata?: Record<string, unknown>;
241371
} = {},
@@ -248,6 +378,13 @@ function createInteractionDevice(
248378
tap: async (...args) => await overrides.tap?.(...args),
249379
fill: async (...args) => await overrides.fill?.(...args),
250380
typeText: async (...args) => await overrides.typeText?.(...args),
381+
focus: overrides.focus ? async (...args) => await overrides.focus?.(...args) : undefined,
382+
longPress: overrides.longPress
383+
? async (...args) => await overrides.longPress?.(...args)
384+
: undefined,
385+
scroll: overrides.scroll ? async (...args) => await overrides.scroll?.(...args) : undefined,
386+
swipe: overrides.swipe ? async (...args) => await overrides.swipe?.(...args) : undefined,
387+
pinch: overrides.pinch ? async (...args) => await overrides.pinch?.(...args) : undefined,
251388
} satisfies AgentDeviceBackend,
252389
artifacts: createLocalArtifactAdapter(),
253390
sessions: createMemorySessionStore([

src/__tests__/runtime-public.test.ts

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ const backend = {
4343
getAppState: async (_context, app: string) => ({ bundleId: app, state: 'foreground' as const }),
4444
pushFile: async () => {},
4545
triggerAppEvent: async () => {},
46+
pressHome: async () => {},
4647
} satisfies AgentDeviceBackend;
4748

4849
const artifacts = {
@@ -76,6 +77,7 @@ test('package root exposes command runtime skeleton', async () => {
7677
assert.equal(device.policy.allowLocalInputPaths, false);
7778
assert.equal(typeof device.capture.screenshot, 'function');
7879
assert.equal(typeof device.interactions.click, 'function');
80+
assert.equal(typeof device.system.back, 'function');
7981
assert.equal(typeof device.apps.open, 'function');
8082
const result = await device.capture.screenshot({});
8183
assert.equal(result.path, '/tmp/path.png');
@@ -365,11 +367,24 @@ test('public backend, commands, io, and conformance subpaths are importable', ()
365367
assert.equal(typeof commands.interactions.press, 'function');
366368
assert.equal(typeof commands.interactions.fill, 'function');
367369
assert.equal(typeof commands.interactions.typeText, 'function');
370+
assert.equal(typeof commands.interactions.focus, 'function');
371+
assert.equal(typeof commands.interactions.longPress, 'function');
372+
assert.equal(typeof commands.interactions.swipe, 'function');
373+
assert.equal(typeof commands.interactions.scroll, 'function');
374+
assert.equal(typeof commands.interactions.pinch, 'function');
375+
assert.equal(typeof commands.system.back, 'function');
376+
assert.equal(typeof commands.system.home, 'function');
377+
assert.equal(typeof commands.system.rotate, 'function');
378+
assert.equal(typeof commands.system.keyboard, 'function');
379+
assert.equal(typeof commands.system.clipboard, 'function');
380+
assert.equal(typeof commands.system.settings, 'function');
381+
assert.equal(typeof commands.system.alert, 'function');
382+
assert.equal(typeof commands.system.appSwitcher, 'function');
368383
assert.equal(
369384
commandCatalog.some((entry) => entry.command === 'click' && entry.status === 'implemented'),
370385
true,
371386
);
372-
assert.equal(commandConformanceSuites.length, 4);
387+
assert.equal(commandConformanceSuites.length, 5);
373388
assert.equal(typeof runCommandConformance, 'function');
374389
assert.equal(target.name, 'fake');
375390
});
@@ -421,6 +436,13 @@ test('command router dispatches implemented runtime commands and normalizes erro
421436
assert.equal(typed.ok, true);
422437
assert.equal(typed.ok && 'text' in typed.data ? typed.data.text : undefined, 'hello');
423438

439+
const home = await router.dispatch({
440+
command: 'system.home',
441+
options: {},
442+
});
443+
assert.equal(home.ok, true);
444+
assert.equal(home.ok && 'kind' in home.data ? home.data.kind : undefined, 'systemHome');
445+
424446
const opened = await router.dispatch({
425447
command: 'apps.open',
426448
options: {

0 commit comments

Comments
 (0)