Skip to content

Commit ff7e837

Browse files
committed
fix: improve Android snapshot fidelity
1 parent 0bc1b1e commit ff7e837

11 files changed

Lines changed: 1217 additions & 261 deletions

File tree

src/daemon/__tests__/screenshot-overlay.test.ts

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,157 @@ test('buildScreenshotOverlayRefs prefers descendant text over generic android re
258258
]);
259259
});
260260

261+
test('buildScreenshotOverlayRefs keeps Android pixel rects aligned with screenshots', () => {
262+
const snapshot = makeSnapshotState(
263+
[
264+
{
265+
index: 0,
266+
type: 'android.widget.ScrollView',
267+
rect: { x: 0, y: 0, width: 1344, height: 2920 },
268+
},
269+
{
270+
index: 1,
271+
parentIndex: 0,
272+
type: 'android.widget.LinearLayout',
273+
hittable: true,
274+
rect: { x: 0, y: 2697, width: 1344, height: 223 },
275+
},
276+
{
277+
index: 2,
278+
parentIndex: 1,
279+
type: 'android.widget.TextView',
280+
label: 'Storage',
281+
rect: { x: 240, y: 2745, width: 205, height: 81 },
282+
},
283+
],
284+
{ backend: 'android' },
285+
);
286+
287+
const overlayRefs = buildScreenshotOverlayRefs(snapshot, 1344, 2992);
288+
289+
assert.deepEqual(overlayRefs, [
290+
{
291+
ref: 'e2',
292+
label: 'Storage',
293+
rect: { x: 0, y: 2697, width: 1344, height: 223 },
294+
overlayRect: { x: 0, y: 2697, width: 1344, height: 223 },
295+
center: { x: 672, y: 2809 },
296+
},
297+
]);
298+
});
299+
300+
test('buildScreenshotOverlayRefs includes unlabeled Android bottom tab controls', () => {
301+
const snapshot = makeSnapshotState(
302+
[
303+
{
304+
index: 0,
305+
type: 'android.widget.FrameLayout',
306+
rect: { x: 0, y: 0, width: 1344, height: 2992 },
307+
},
308+
{
309+
index: 1,
310+
parentIndex: 0,
311+
type: 'android.widget.ScrollView',
312+
hittable: true,
313+
rect: { x: 0, y: 159, width: 1344, height: 2593 },
314+
},
315+
{
316+
index: 2,
317+
parentIndex: 0,
318+
type: 'android.widget.TextView',
319+
label: 'Agent Device Tester',
320+
rect: { x: 54, y: 181, width: 770, height: 86 },
321+
},
322+
{
323+
index: 3,
324+
parentIndex: 0,
325+
type: 'android.view.ViewGroup',
326+
hittable: true,
327+
rect: { x: 72, y: 2724, width: 192, height: 132 },
328+
},
329+
{
330+
index: 4,
331+
parentIndex: 0,
332+
type: 'android.view.ViewGroup',
333+
hittable: true,
334+
rect: { x: 436, y: 2724, width: 192, height: 132 },
335+
},
336+
{
337+
index: 5,
338+
parentIndex: 0,
339+
type: 'android.view.ViewGroup',
340+
hittable: true,
341+
rect: { x: 800, y: 2724, width: 192, height: 132 },
342+
},
343+
{
344+
index: 6,
345+
parentIndex: 0,
346+
type: 'android.view.ViewGroup',
347+
hittable: true,
348+
rect: { x: 1164, y: 2724, width: 132, height: 132 },
349+
},
350+
],
351+
{ backend: 'android' },
352+
);
353+
354+
const overlayRefs = buildScreenshotOverlayRefs(snapshot, 1344, 2992);
355+
356+
assert.deepEqual(
357+
overlayRefs.map((overlayRef) => overlayRef.ref),
358+
['e4', 'e5', 'e6', 'e7'],
359+
);
360+
assert.ok(
361+
overlayRefs.every((overlayRef) => !overlayRef.label),
362+
'unlabeled Android tab controls should still get visual refs',
363+
);
364+
});
365+
366+
test('buildScreenshotOverlayRefs trims Android row spacing from unlabeled action containers', () => {
367+
const snapshot = makeSnapshotState(
368+
[
369+
{
370+
index: 0,
371+
type: 'android.widget.ScrollView',
372+
rect: { x: 0, y: 0, width: 1344, height: 2920 },
373+
},
374+
{
375+
index: 1,
376+
parentIndex: 0,
377+
type: 'android.widget.LinearLayout',
378+
hittable: true,
379+
rect: { x: 0, y: 447, width: 1344, height: 282 },
380+
},
381+
{
382+
index: 2,
383+
parentIndex: 1,
384+
type: 'android.widget.TextView',
385+
label: 'Google',
386+
rect: { x: 240, y: 495, width: 190, height: 81 },
387+
},
388+
{
389+
index: 3,
390+
parentIndex: 1,
391+
type: 'android.widget.TextView',
392+
label: 'Services & preferences',
393+
rect: { x: 240, y: 576, width: 425, height: 57 },
394+
},
395+
],
396+
{ backend: 'android' },
397+
);
398+
399+
const overlayRefs = buildScreenshotOverlayRefs(snapshot, 1344, 2992);
400+
401+
assert.deepEqual(overlayRefs, [
402+
{
403+
ref: 'e2',
404+
label: 'Google',
405+
rect: { x: 0, y: 447, width: 1344, height: 234 },
406+
overlayRect: { x: 0, y: 447, width: 1344, height: 234 },
407+
center: { x: 672, y: 564 },
408+
},
409+
]);
410+
});
411+
261412
test('annotateScreenshotWithRefs draws the overlay onto the saved PNG', async () => {
262413
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-device-screenshot-overlay-'));
263414
const screenshotPath = path.join(root, 'screen.png');
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import type { Rect, SnapshotNode } from '../utils/snapshot.ts';
2+
import { normalizeType } from './snapshot-processing.ts';
3+
4+
type ResolveAndroidOverlaySourceRectParams = {
5+
target: SnapshotNode;
6+
nodes: SnapshotNode[];
7+
hasActionableRole: (node: SnapshotNode) => boolean;
8+
hasOverlayLabel: (node: SnapshotNode) => boolean;
9+
};
10+
11+
export function resolveAndroidOverlaySourceRect({
12+
target,
13+
nodes,
14+
hasActionableRole,
15+
hasOverlayLabel,
16+
}: ResolveAndroidOverlaySourceRectParams): Rect | null {
17+
if (
18+
!target.rect ||
19+
!isUnlabeledAndroidActionContainer(target, hasActionableRole, hasOverlayLabel)
20+
) {
21+
return null;
22+
}
23+
return balanceAndroidActionRowRect(target, nodes, hasOverlayLabel);
24+
}
25+
26+
function isUnlabeledAndroidActionContainer(
27+
node: SnapshotNode,
28+
hasActionableRole: (node: SnapshotNode) => boolean,
29+
hasOverlayLabel: (node: SnapshotNode) => boolean,
30+
): boolean {
31+
return node.hittable === true && !hasActionableRole(node) && !hasOverlayLabel(node);
32+
}
33+
34+
function balanceAndroidActionRowRect(
35+
target: SnapshotNode,
36+
nodes: SnapshotNode[],
37+
hasOverlayLabel: (node: SnapshotNode) => boolean,
38+
): Rect | null {
39+
if (!target.rect) return null;
40+
const contentRect = measureAndroidActionRowContentRect(target, nodes, hasOverlayLabel);
41+
if (!contentRect) return null;
42+
43+
const topPadding = contentRect.y - target.rect.y;
44+
const bottomPadding = target.rect.y + target.rect.height - (contentRect.y + contentRect.height);
45+
if (topPadding < 0 || bottomPadding < 0) return null;
46+
if (Math.abs(bottomPadding - topPadding) < 16) return null;
47+
48+
const balancedPadding = Math.min(topPadding, bottomPadding);
49+
const y = Math.round(contentRect.y - balancedPadding);
50+
const height = Math.round(contentRect.height + balancedPadding * 2);
51+
if (height <= 0 || height >= target.rect.height) return null;
52+
53+
return {
54+
x: target.rect.x,
55+
y,
56+
width: target.rect.width,
57+
height,
58+
};
59+
}
60+
61+
function measureAndroidActionRowContentRect(
62+
target: SnapshotNode,
63+
nodes: SnapshotNode[],
64+
hasOverlayLabel: (node: SnapshotNode) => boolean,
65+
): Rect | null {
66+
if (!target.rect) return null;
67+
const nodeIndex = new Map(nodes.map((node) => [node.index, node]));
68+
const contentRects = nodes
69+
.filter(
70+
(node) =>
71+
node.ref !== target.ref &&
72+
isDescendantOf(node, target, nodeIndex) &&
73+
isAndroidActionRowVisualContent(node, hasOverlayLabel) &&
74+
hasPositiveRect(node.rect) &&
75+
rectContains(target.rect!, node.rect),
76+
)
77+
.map((node) => node.rect!);
78+
if (contentRects.length < 2) return null;
79+
return unionRects(contentRects);
80+
}
81+
82+
function isAndroidActionRowVisualContent(
83+
node: SnapshotNode,
84+
hasOverlayLabel: (node: SnapshotNode) => boolean,
85+
): boolean {
86+
const normalizedType = normalizeType(node.type ?? '');
87+
return (
88+
normalizedType.includes('text') || (normalizedType.includes('image') && hasOverlayLabel(node))
89+
);
90+
}
91+
92+
function isDescendantOf(
93+
node: SnapshotNode,
94+
ancestor: SnapshotNode,
95+
nodeIndex: ReadonlyMap<number, SnapshotNode>,
96+
): boolean {
97+
let current = node;
98+
while (current.parentIndex !== undefined) {
99+
const parent = nodeIndex.get(current.parentIndex);
100+
if (!parent) return false;
101+
if (parent.ref === ancestor.ref) return true;
102+
current = parent;
103+
}
104+
return false;
105+
}
106+
107+
function hasPositiveRect(rect: Rect | undefined): rect is Rect {
108+
return Boolean(rect && rect.width > 0 && rect.height > 0);
109+
}
110+
111+
function rectContains(container: Rect, nested: Rect): boolean {
112+
return (
113+
nested.x >= container.x &&
114+
nested.y >= container.y &&
115+
nested.x + nested.width <= container.x + container.width &&
116+
nested.y + nested.height <= container.y + container.height
117+
);
118+
}
119+
120+
function unionRects(rects: Rect[]): Rect | null {
121+
if (rects.length === 0) return null;
122+
let minX = Number.POSITIVE_INFINITY;
123+
let minY = Number.POSITIVE_INFINITY;
124+
let maxRight = Number.NEGATIVE_INFINITY;
125+
let maxBottom = Number.NEGATIVE_INFINITY;
126+
for (const rect of rects) {
127+
minX = Math.min(minX, rect.x);
128+
minY = Math.min(minY, rect.y);
129+
maxRight = Math.max(maxRight, rect.x + rect.width);
130+
maxBottom = Math.max(maxBottom, rect.y + rect.height);
131+
}
132+
return {
133+
x: minX,
134+
y: minY,
135+
width: maxRight - minX,
136+
height: maxBottom - minY,
137+
};
138+
}

0 commit comments

Comments
 (0)