Skip to content

Commit 3b6306d

Browse files
committed
perf: optimize and verify scrollintoview ref path
1 parent 40283f5 commit 3b6306d

7 files changed

Lines changed: 613 additions & 53 deletions

File tree

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ agent-device press 300 500 --count 12 --interval-ms 45
130130
agent-device press 300 500 --count 6 --hold-ms 120 --interval-ms 30 --jitter-px 2
131131
agent-device press @e5 --count 5 --double-tap
132132
agent-device swipe 540 1500 540 500 120 --count 8 --pause-ms 30 --pattern ping-pong
133+
agent-device scrollintoview "Sign in"
134+
agent-device scrollintoview @e42
133135
```
134136

135137
## Command Index
@@ -180,6 +182,7 @@ Swipe timing:
180182
- `swipe` accepts optional `durationMs` (default `250`, range `16..10000`).
181183
- Android uses requested swipe duration directly.
182184
- iOS uses a safe normalized duration to avoid longpress side effects.
185+
- `scrollintoview` accepts either plain text or a snapshot ref (`@eN`); ref mode uses geometry-based scrolling.
183186

184187
## Skills
185188
Install the automation skills listed in [SKILL.md](skills/agent-device/SKILL.md).
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import test from 'node:test';
2+
import assert from 'node:assert/strict';
3+
import { type RawSnapshotNode } from '../../utils/snapshot.ts';
4+
import {
5+
buildScrollIntoViewPlan,
6+
isRectWithinSafeViewportBand,
7+
resolveViewportRect,
8+
} from '../scroll-planner.ts';
9+
10+
function makeNode(index: number, type: string, rect?: RawSnapshotNode['rect']): RawSnapshotNode {
11+
return { index, type, rect };
12+
}
13+
14+
test('resolveViewportRect picks containing application/window viewport', () => {
15+
const targetRect = { x: 20, y: 1700, width: 120, height: 40 };
16+
const nodes: RawSnapshotNode[] = [
17+
makeNode(0, 'Application', { x: 0, y: 0, width: 390, height: 844 }),
18+
makeNode(1, 'Window', { x: 0, y: 0, width: 390, height: 844 }),
19+
makeNode(2, 'Cell', targetRect),
20+
];
21+
const viewport = resolveViewportRect(nodes, targetRect);
22+
assert.deepEqual(viewport, { x: 0, y: 0, width: 390, height: 844 });
23+
});
24+
25+
test('resolveViewportRect returns null when no valid viewport can be inferred', () => {
26+
const targetRect = { x: 20, y: 100, width: 120, height: 40 };
27+
const nodes: RawSnapshotNode[] = [makeNode(0, 'Cell', undefined)];
28+
const viewport = resolveViewportRect(nodes, targetRect);
29+
assert.equal(viewport, null);
30+
});
31+
32+
test('buildScrollIntoViewPlan computes downward content scroll when target is below safe band', () => {
33+
const targetRect = { x: 20, y: 2100, width: 120, height: 40 };
34+
const viewportRect = { x: 0, y: 0, width: 390, height: 844 };
35+
const plan = buildScrollIntoViewPlan(targetRect, viewportRect);
36+
assert.ok(plan);
37+
assert.equal(plan?.direction, 'down');
38+
assert.ok((plan?.count ?? 0) > 1);
39+
});
40+
41+
test('buildScrollIntoViewPlan returns null when already in safe viewport band', () => {
42+
const targetRect = { x: 20, y: 320, width: 120, height: 40 };
43+
const viewportRect = { x: 0, y: 0, width: 390, height: 844 };
44+
const plan = buildScrollIntoViewPlan(targetRect, viewportRect);
45+
assert.equal(plan, null);
46+
assert.equal(isRectWithinSafeViewportBand(targetRect, viewportRect), true);
47+
});

src/daemon/handlers/__tests__/interaction.test.ts

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,186 @@ test('press coordinates does not treat extra trailing args as selector', async (
185185
assert.deepEqual(dispatchCalls[0]?.positionals, ['100', '200']);
186186
assert.equal(sessionStore.get(sessionName)?.actions.length, 1);
187187
});
188+
189+
test('scrollintoview @ref dispatches geometry-based swipe series', async () => {
190+
const sessionStore = makeSessionStore();
191+
const sessionName = 'default';
192+
const session = makeSession(sessionName);
193+
session.snapshot = {
194+
nodes: attachRefs([
195+
{
196+
index: 0,
197+
type: 'Application',
198+
rect: { x: 0, y: 0, width: 390, height: 844 },
199+
},
200+
{
201+
index: 1,
202+
type: 'XCUIElementTypeStaticText',
203+
label: 'Far item',
204+
rect: { x: 20, y: 2600, width: 120, height: 40 },
205+
},
206+
]),
207+
createdAt: Date.now(),
208+
backend: 'xctest',
209+
};
210+
sessionStore.set(sessionName, session);
211+
212+
const dispatchCalls: Array<{
213+
command: string;
214+
positionals: string[];
215+
context: Record<string, unknown> | undefined;
216+
}> = [];
217+
let snapshotCallCount = 0;
218+
const response = await handleInteractionCommands({
219+
req: {
220+
token: 't',
221+
session: sessionName,
222+
command: 'scrollintoview',
223+
positionals: ['@e2'],
224+
flags: {},
225+
},
226+
sessionName,
227+
sessionStore,
228+
contextFromFlags,
229+
dispatch: async (_device, command, positionals, _out, context) => {
230+
if (command === 'snapshot') {
231+
snapshotCallCount += 1;
232+
return {
233+
nodes: [
234+
{ index: 0, type: 'Application', rect: { x: 0, y: 0, width: 390, height: 844 } },
235+
{ index: 1, type: 'XCUIElementTypeStaticText', label: 'Far item', rect: { x: 20, y: 320, width: 120, height: 40 } },
236+
],
237+
backend: 'xctest',
238+
};
239+
}
240+
dispatchCalls.push({ command, positionals, context: context as Record<string, unknown> | undefined });
241+
return { ok: true };
242+
},
243+
});
244+
245+
assert.ok(response);
246+
assert.equal(response.ok, true);
247+
assert.equal(snapshotCallCount, 1);
248+
assert.equal(dispatchCalls.length, 1);
249+
assert.equal(dispatchCalls[0]?.command, 'swipe');
250+
assert.equal(dispatchCalls[0]?.positionals.length, 5);
251+
assert.equal(dispatchCalls[0]?.context?.pattern, 'one-way');
252+
assert.equal(dispatchCalls[0]?.context?.pauseMs, 0);
253+
assert.equal(typeof dispatchCalls[0]?.context?.count, 'number');
254+
assert.ok((dispatchCalls[0]?.context?.count as number) > 1);
255+
256+
const stored = sessionStore.get(sessionName);
257+
assert.ok(stored);
258+
assert.equal(stored?.actions.length, 1);
259+
assert.equal(stored?.actions[0]?.command, 'scrollintoview');
260+
const result = (stored?.actions[0]?.result ?? {}) as Record<string, unknown>;
261+
assert.equal(result.ref, 'e2');
262+
assert.equal(result.strategy, 'ref-geometry');
263+
assert.equal(result.verified, true);
264+
});
265+
266+
test('scrollintoview @ref returns immediately when target is already in viewport safe band', async () => {
267+
const sessionStore = makeSessionStore();
268+
const sessionName = 'default';
269+
const session = makeSession(sessionName);
270+
session.snapshot = {
271+
nodes: attachRefs([
272+
{
273+
index: 0,
274+
type: 'Application',
275+
rect: { x: 0, y: 0, width: 390, height: 844 },
276+
},
277+
{
278+
index: 1,
279+
type: 'XCUIElementTypeStaticText',
280+
label: 'Visible item',
281+
rect: { x: 20, y: 320, width: 120, height: 40 },
282+
},
283+
]),
284+
createdAt: Date.now(),
285+
backend: 'xctest',
286+
};
287+
sessionStore.set(sessionName, session);
288+
289+
const dispatchCalls: Array<{ command: string }> = [];
290+
const response = await handleInteractionCommands({
291+
req: {
292+
token: 't',
293+
session: sessionName,
294+
command: 'scrollintoview',
295+
positionals: ['@e2'],
296+
flags: {},
297+
},
298+
sessionName,
299+
sessionStore,
300+
contextFromFlags,
301+
dispatch: async (_device, command) => {
302+
dispatchCalls.push({ command });
303+
return { ok: true };
304+
},
305+
});
306+
307+
assert.ok(response);
308+
assert.equal(response.ok, true);
309+
assert.equal(dispatchCalls.length, 0);
310+
if (response.ok) {
311+
assert.equal(response.data?.attempts, 0);
312+
assert.equal(response.data?.alreadyVisible, true);
313+
}
314+
});
315+
316+
test('scrollintoview @ref fails if target remains outside viewport after scroll', async () => {
317+
const sessionStore = makeSessionStore();
318+
const sessionName = 'default';
319+
const session = makeSession(sessionName);
320+
session.snapshot = {
321+
nodes: attachRefs([
322+
{
323+
index: 0,
324+
type: 'Application',
325+
rect: { x: 0, y: 0, width: 390, height: 844 },
326+
},
327+
{
328+
index: 1,
329+
type: 'XCUIElementTypeStaticText',
330+
label: 'Far item',
331+
rect: { x: 20, y: 2600, width: 120, height: 40 },
332+
},
333+
]),
334+
createdAt: Date.now(),
335+
backend: 'xctest',
336+
};
337+
sessionStore.set(sessionName, session);
338+
339+
const response = await handleInteractionCommands({
340+
req: {
341+
token: 't',
342+
session: sessionName,
343+
command: 'scrollintoview',
344+
positionals: ['@e2'],
345+
flags: {},
346+
},
347+
sessionName,
348+
sessionStore,
349+
contextFromFlags,
350+
dispatch: async (_device, command) => {
351+
if (command === 'snapshot') {
352+
return {
353+
nodes: [
354+
{ index: 0, type: 'Application', rect: { x: 0, y: 0, width: 390, height: 844 } },
355+
{ index: 1, type: 'XCUIElementTypeStaticText', label: 'Far item', rect: { x: 20, y: 2600, width: 120, height: 40 } },
356+
],
357+
backend: 'xctest',
358+
};
359+
}
360+
return { ok: true };
361+
},
362+
});
363+
364+
assert.ok(response);
365+
assert.equal(response.ok, false);
366+
if (!response.ok) {
367+
assert.equal(response.error?.code, 'COMMAND_FAILED');
368+
assert.match(response.error?.message ?? '', /outside viewport/i);
369+
}
370+
});

0 commit comments

Comments
 (0)