Skip to content

Commit f56dda6

Browse files
XiaoPengYouCodeZhongjin Luthymikee
authored
fix(android): parse uiautomator XML attributes with single or double quotes (#37)
* fix(android): parse uiautomator XML attributes with single or double quotes * fix(android): harden hierarchy parser and tighten tests --------- Co-authored-by: Zhongjin Lu <zhongjin.lu@mondorobotics.com> Co-authored-by: Michał Pierzchała <thymikee@gmail.com>
1 parent 8c50cee commit f56dda6

4 files changed

Lines changed: 389 additions & 290 deletions

File tree

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
"prepack": "pnpm build:node && pnpm build:axsnapshot",
2727
"typecheck": "tsc -p tsconfig.json",
2828
"test": "node --test",
29-
"test:unit": "node --test src/core/__tests__/*.test.ts src/daemon/__tests__/*.test.ts src/daemon/handlers/__tests__/*.test.ts",
29+
"test:unit": "node --test src/core/__tests__/*.test.ts src/daemon/__tests__/*.test.ts src/daemon/handlers/__tests__/*.test.ts src/platforms/**/__tests__/*.test.ts",
3030
"test:smoke": "node --test test/integration/smoke-*.test.ts",
3131
"test:integration": "node --test test/integration/*.test.ts"
3232
},
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import test from 'node:test';
2+
import assert from 'node:assert/strict';
3+
import { findBounds, parseUiHierarchy } from '../ui-hierarchy.ts';
4+
5+
test('parseUiHierarchy reads double-quoted Android node attributes', () => {
6+
const xml =
7+
'<hierarchy><node class="android.widget.TextView" text="Hello" content-desc="Greeting" resource-id="com.demo:id/title" bounds="[10,20][110,60]" clickable="true" enabled="true"/></hierarchy>';
8+
9+
const result = parseUiHierarchy(xml, 800, { raw: true });
10+
assert.equal(result.nodes.length, 1);
11+
assert.equal(result.nodes[0].value, 'Hello');
12+
assert.equal(result.nodes[0].label, 'Hello');
13+
assert.equal(result.nodes[0].identifier, 'com.demo:id/title');
14+
assert.deepEqual(result.nodes[0].rect, { x: 10, y: 20, width: 100, height: 40 });
15+
assert.equal(result.nodes[0].hittable, true);
16+
assert.equal(result.nodes[0].enabled, true);
17+
});
18+
19+
test('parseUiHierarchy reads single-quoted Android node attributes', () => {
20+
const xml =
21+
"<hierarchy><node class='android.widget.TextView' text='Hello' content-desc='Greeting' resource-id='com.demo:id/title' bounds='[10,20][110,60]' clickable='true' enabled='true'/></hierarchy>";
22+
23+
const result = parseUiHierarchy(xml, 800, { raw: true });
24+
assert.equal(result.nodes.length, 1);
25+
assert.equal(result.nodes[0].value, 'Hello');
26+
assert.equal(result.nodes[0].label, 'Hello');
27+
assert.equal(result.nodes[0].identifier, 'com.demo:id/title');
28+
assert.deepEqual(result.nodes[0].rect, { x: 10, y: 20, width: 100, height: 40 });
29+
assert.equal(result.nodes[0].hittable, true);
30+
assert.equal(result.nodes[0].enabled, true);
31+
});
32+
33+
test('parseUiHierarchy supports mixed quote styles in one node', () => {
34+
const xml =
35+
'<hierarchy><node class="android.widget.TextView" text=\'Hello\' content-desc="Greeting" resource-id=\'com.demo:id/title\' bounds="[10,20][110,60]"/></hierarchy>';
36+
37+
const result = parseUiHierarchy(xml, 800, { raw: true });
38+
assert.equal(result.nodes.length, 1);
39+
assert.equal(result.nodes[0].value, 'Hello');
40+
assert.equal(result.nodes[0].label, 'Hello');
41+
assert.equal(result.nodes[0].identifier, 'com.demo:id/title');
42+
});
43+
44+
test('findBounds supports single and double quoted attributes', () => {
45+
const xml = [
46+
'<hierarchy>',
47+
'<node text="Nothing" content-desc="Irrelevant" bounds="[0,0][10,10]"/>',
48+
"<node text='Target from single quote' content-desc='Alt single' bounds='[100,200][300,500]'/>",
49+
'<node text="Target from double quote" content-desc="Alt double" bounds="[50,50][150,250]"/>',
50+
'</hierarchy>',
51+
].join('');
52+
53+
assert.deepEqual(findBounds(xml, 'single quote'), { x: 200, y: 350 });
54+
assert.deepEqual(findBounds(xml, 'alt double'), { x: 100, y: 150 });
55+
});
56+
57+
test('parseUiHierarchy ignores attribute-name prefix spoofing', () => {
58+
const xml =
59+
"<hierarchy><node class='android.widget.TextView' hint-text='Spoofed' text='Actual' bounds='[10,20][110,60]'/></hierarchy>";
60+
61+
const result = parseUiHierarchy(xml, 800, { raw: true });
62+
assert.equal(result.nodes.length, 1);
63+
assert.equal(result.nodes[0].value, 'Actual');
64+
});
65+
66+
test('findBounds ignores bounds-like fragments inside other attribute values', () => {
67+
const xml = [
68+
'<hierarchy>',
69+
"<node text='Target' content-desc=\"metadata bounds='[900,900][1000,1000]'\" bounds='[100,200][300,500]'/>",
70+
'</hierarchy>',
71+
].join('');
72+
73+
assert.deepEqual(findBounds(xml, 'target'), { x: 200, y: 350 });
74+
});

src/platforms/android/index.ts

Lines changed: 2 additions & 289 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@ import { runCmd, whichCmd } from '../../utils/exec.ts';
33
import { withRetry } from '../../utils/retry.ts';
44
import { AppError } from '../../utils/errors.ts';
55
import type { DeviceInfo } from '../../utils/device.ts';
6-
import type { RawSnapshotNode, Rect, SnapshotOptions } from '../../utils/snapshot.ts';
6+
import type { RawSnapshotNode, SnapshotOptions } from '../../utils/snapshot.ts';
77
import { waitForAndroidBoot } from './devices.ts';
8+
import { findBounds, parseBounds, parseUiHierarchy, readNodeAttributes } from './ui-hierarchy.ts';
89

910
const ALIASES: Record<string, { type: 'intent' | 'package'; value: string }> = {
1011
settings: { type: 'intent', value: 'android.settings.SETTINGS' },
@@ -623,291 +624,3 @@ async function sleep(ms: number): Promise<void> {
623624
function clampCount(value: number, min: number, max: number): number {
624625
return Math.max(min, Math.min(max, value));
625626
}
626-
627-
function findBounds(xml: string, query: string): { x: number; y: number } | null {
628-
const q = query.toLowerCase();
629-
const nodeRegex = /<node[^>]+>/g;
630-
let match = nodeRegex.exec(xml);
631-
while (match) {
632-
const node = match[0];
633-
const textMatch = /text="([^"]*)"/.exec(node);
634-
const descMatch = /content-desc="([^"]*)"/.exec(node);
635-
const textVal = (textMatch?.[1] ?? '').toLowerCase();
636-
const descVal = (descMatch?.[1] ?? '').toLowerCase();
637-
if (textVal.includes(q) || descVal.includes(q)) {
638-
const boundsMatch = /bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/.exec(node);
639-
if (boundsMatch) {
640-
const x1 = Number(boundsMatch[1]);
641-
const y1 = Number(boundsMatch[2]);
642-
const x2 = Number(boundsMatch[3]);
643-
const y2 = Number(boundsMatch[4]);
644-
return { x: Math.floor((x1 + x2) / 2), y: Math.floor((y1 + y2) / 2) };
645-
}
646-
return { x: 0, y: 0 };
647-
}
648-
match = nodeRegex.exec(xml);
649-
}
650-
return null;
651-
}
652-
653-
function parseUiHierarchy(
654-
xml: string,
655-
maxNodes: number,
656-
options: SnapshotOptions,
657-
): { nodes: RawSnapshotNode[]; truncated?: boolean } {
658-
const tree = parseUiHierarchyTree(xml);
659-
const nodes: RawSnapshotNode[] = [];
660-
let truncated = false;
661-
const maxDepth = options.depth ?? Number.POSITIVE_INFINITY;
662-
const scopedRoot = options.scope ? findScopeNode(tree, options.scope) : null;
663-
const roots = scopedRoot ? [scopedRoot] : tree.children;
664-
665-
const interactiveDescendantMemo = new Map<AndroidNode, boolean>();
666-
const hasInteractiveDescendant = (node: AndroidNode): boolean => {
667-
const cached = interactiveDescendantMemo.get(node);
668-
if (cached !== undefined) return cached;
669-
for (const child of node.children) {
670-
if (child.hittable || hasInteractiveDescendant(child)) {
671-
interactiveDescendantMemo.set(node, true);
672-
return true;
673-
}
674-
}
675-
interactiveDescendantMemo.set(node, false);
676-
return false;
677-
};
678-
679-
const walk = (
680-
node: AndroidNode,
681-
depth: number,
682-
parentIndex?: number,
683-
ancestorHittable: boolean = false,
684-
ancestorCollection: boolean = false,
685-
) => {
686-
if (nodes.length >= maxNodes) {
687-
truncated = true;
688-
return;
689-
}
690-
if (depth > maxDepth) return;
691-
692-
const include = options.raw
693-
? true
694-
: shouldIncludeAndroidNode(
695-
node,
696-
options,
697-
ancestorHittable,
698-
hasInteractiveDescendant(node),
699-
ancestorCollection,
700-
);
701-
let currentIndex = parentIndex;
702-
if (include) {
703-
currentIndex = nodes.length;
704-
nodes.push({
705-
index: currentIndex,
706-
type: node.type ?? undefined,
707-
label: node.label ?? undefined,
708-
value: node.value ?? undefined,
709-
identifier: node.identifier ?? undefined,
710-
rect: node.rect,
711-
enabled: node.enabled,
712-
hittable: node.hittable,
713-
depth,
714-
parentIndex,
715-
});
716-
}
717-
const nextAncestorHittable = ancestorHittable || Boolean(node.hittable);
718-
const nextAncestorCollection = ancestorCollection || isCollectionContainerType(node.type);
719-
for (const child of node.children) {
720-
walk(child, depth + 1, currentIndex, nextAncestorHittable, nextAncestorCollection);
721-
if (truncated) return;
722-
}
723-
};
724-
725-
for (const root of roots) {
726-
walk(root, 0, undefined, false, false);
727-
if (truncated) break;
728-
}
729-
730-
return truncated ? { nodes, truncated } : { nodes };
731-
}
732-
733-
function readNodeAttributes(node: string): {
734-
text: string | null;
735-
desc: string | null;
736-
resourceId: string | null;
737-
className: string | null;
738-
bounds: string | null;
739-
clickable?: boolean;
740-
enabled?: boolean;
741-
focusable?: boolean;
742-
focused?: boolean;
743-
} {
744-
const getAttr = (name: string): string | null => {
745-
const regex = new RegExp(`${name}="([^"]*)"`);
746-
const match = regex.exec(node);
747-
return match ? match[1] : null;
748-
};
749-
const boolAttr = (name: string): boolean | undefined => {
750-
const raw = getAttr(name);
751-
if (raw === null) return undefined;
752-
return raw === 'true';
753-
};
754-
return {
755-
text: getAttr('text'),
756-
desc: getAttr('content-desc'),
757-
resourceId: getAttr('resource-id'),
758-
className: getAttr('class'),
759-
bounds: getAttr('bounds'),
760-
clickable: boolAttr('clickable'),
761-
enabled: boolAttr('enabled'),
762-
focusable: boolAttr('focusable'),
763-
focused: boolAttr('focused'),
764-
};
765-
}
766-
767-
function parseBounds(bounds: string | null): Rect | undefined {
768-
if (!bounds) return undefined;
769-
const match = /\[(\d+),(\d+)\]\[(\d+),(\d+)\]/.exec(bounds);
770-
if (!match) return undefined;
771-
const x1 = Number(match[1]);
772-
const y1 = Number(match[2]);
773-
const x2 = Number(match[3]);
774-
const y2 = Number(match[4]);
775-
return { x: x1, y: y1, width: Math.max(0, x2 - x1), height: Math.max(0, y2 - y1) };
776-
}
777-
778-
type AndroidNode = {
779-
type: string | null;
780-
label: string | null;
781-
value: string | null;
782-
identifier: string | null;
783-
rect?: Rect;
784-
enabled?: boolean;
785-
hittable?: boolean;
786-
depth: number;
787-
parentIndex?: number;
788-
children: AndroidNode[];
789-
};
790-
791-
function parseUiHierarchyTree(xml: string): AndroidNode {
792-
const root: AndroidNode = {
793-
type: null,
794-
label: null,
795-
value: null,
796-
identifier: null,
797-
depth: -1,
798-
children: [],
799-
};
800-
const stack: AndroidNode[] = [root];
801-
const tokenRegex = /<node\b[^>]*>|<\/node>/g;
802-
let match = tokenRegex.exec(xml);
803-
while (match) {
804-
const token = match[0];
805-
if (token.startsWith('</node')) {
806-
if (stack.length > 1) stack.pop();
807-
match = tokenRegex.exec(xml);
808-
continue;
809-
}
810-
const attrs = readNodeAttributes(token);
811-
const rect = parseBounds(attrs.bounds);
812-
const parent = stack[stack.length - 1];
813-
const node: AndroidNode = {
814-
type: attrs.className,
815-
label: attrs.text || attrs.desc,
816-
value: attrs.text,
817-
identifier: attrs.resourceId,
818-
rect,
819-
enabled: attrs.enabled,
820-
hittable: attrs.clickable ?? attrs.focusable,
821-
depth: parent.depth + 1,
822-
parentIndex: undefined,
823-
children: [],
824-
};
825-
parent.children.push(node);
826-
if (!token.endsWith('/>')) {
827-
stack.push(node);
828-
}
829-
match = tokenRegex.exec(xml);
830-
}
831-
return root;
832-
}
833-
834-
function shouldIncludeAndroidNode(
835-
node: AndroidNode,
836-
options: SnapshotOptions,
837-
ancestorHittable: boolean,
838-
descendantHittable: boolean,
839-
ancestorCollection: boolean,
840-
): boolean {
841-
const type = normalizeAndroidType(node.type);
842-
const hasText = Boolean(node.label && node.label.trim().length > 0);
843-
const hasId = Boolean(node.identifier && node.identifier.trim().length > 0);
844-
const hasMeaningfulText = hasText && !isGenericAndroidId(node.label ?? '');
845-
const hasMeaningfulId = hasId && !isGenericAndroidId(node.identifier ?? '');
846-
const isStructural = isStructuralAndroidType(type);
847-
const isVisual = type === 'imageview' || type === 'imagebutton';
848-
if (options.interactiveOnly) {
849-
if (node.hittable) return true;
850-
// Keep text proxies for tappable rows while dropping structural noise.
851-
const proxyCandidate = hasMeaningfulText || hasMeaningfulId;
852-
if (!proxyCandidate) return false;
853-
if (isVisual) return false;
854-
if (isStructural && !ancestorCollection) return false;
855-
return ancestorHittable || descendantHittable || ancestorCollection;
856-
}
857-
if (options.compact) {
858-
return hasMeaningfulText || hasMeaningfulId || Boolean(node.hittable);
859-
}
860-
if (isStructural || isVisual) {
861-
if (node.hittable) return true;
862-
if (hasMeaningfulText) return true;
863-
if (hasMeaningfulId && descendantHittable) return true;
864-
return descendantHittable;
865-
}
866-
return true;
867-
}
868-
869-
function isCollectionContainerType(type: string | null): boolean {
870-
if (!type) return false;
871-
const normalized = normalizeAndroidType(type);
872-
return (
873-
normalized.includes('recyclerview') ||
874-
normalized.includes('listview') ||
875-
normalized.includes('gridview')
876-
);
877-
}
878-
879-
function normalizeAndroidType(type: string | null): string {
880-
if (!type) return '';
881-
return type.toLowerCase();
882-
}
883-
884-
function isStructuralAndroidType(type: string): boolean {
885-
const short = type.split('.').pop() ?? type;
886-
return (
887-
short.includes('layout') ||
888-
short === 'viewgroup' ||
889-
short === 'view'
890-
);
891-
}
892-
893-
function isGenericAndroidId(value: string): boolean {
894-
const trimmed = value.trim();
895-
if (!trimmed) return false;
896-
return /^[\w.]+:id\/[\w.-]+$/i.test(trimmed);
897-
}
898-
899-
function findScopeNode(root: AndroidNode, scope: string): AndroidNode | null {
900-
const query = scope.toLowerCase();
901-
const stack: AndroidNode[] = [...root.children];
902-
while (stack.length > 0) {
903-
const node = stack.shift() as AndroidNode;
904-
const label = node.label?.toLowerCase() ?? '';
905-
const value = node.value?.toLowerCase() ?? '';
906-
const identifier = node.identifier?.toLowerCase() ?? '';
907-
if (label.includes(query) || value.includes(query) || identifier.includes(query)) {
908-
return node;
909-
}
910-
stack.push(...node.children);
911-
}
912-
return null;
913-
}

0 commit comments

Comments
 (0)