Skip to content

Commit 294a829

Browse files
committed
fix: capture android helper window roots
1 parent bf52267 commit 294a829

9 files changed

Lines changed: 134 additions & 12 deletions

File tree

android-snapshot-helper/README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
# Android Snapshot Helper
22

33
Small instrumentation APK used to capture Android accessibility snapshots without relying on
4-
`uiautomator dump`'s fixed idle wait behavior.
4+
`uiautomator dump`'s fixed idle wait behavior. The helper enables Android's interactive-window
5+
retrieval flag and serializes every accessible window root returned by `UiAutomation.getWindows()`
6+
so keyboards and system overlays can appear in the same snapshot. If interactive window roots are
7+
unavailable, it falls back to the active-window root.
58

69
The helper is intentionally provider-neutral. Local `adb`, cloud ADB tunnels, and remote device
710
providers can all install and run the same APK as long as they can execute ADB-style operations.
@@ -50,6 +53,8 @@ The final instrumentation result includes:
5053
- `maxDepth`
5154
- `maxNodes`
5255
- `rootPresent`
56+
- `captureMode` (`interactive-windows` or `active-window`)
57+
- `windowCount`
5358
- `nodeCount`
5459
- `truncated`
5560
- `elapsedMs`

android-snapshot-helper/src/main/java/com/callstack/agentdevice/snapshothelper/SnapshotInstrumentation.java

Lines changed: 91 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
package com.callstack.agentdevice.snapshothelper;
22

3+
import android.accessibilityservice.AccessibilityServiceInfo;
34
import android.app.Instrumentation;
45
import android.app.UiAutomation;
56
import android.graphics.Rect;
67
import android.os.Bundle;
78
import android.util.Base64;
89
import android.view.accessibility.AccessibilityNodeInfo;
10+
import android.view.accessibility.AccessibilityWindowInfo;
911
import java.nio.charset.StandardCharsets;
12+
import java.util.List;
1013
import java.util.Locale;
1114
import java.util.concurrent.TimeoutException;
1215

@@ -52,6 +55,8 @@ public void onStart() {
5255
emitChunks(capture.xml);
5356
result.putString("ok", "true");
5457
result.putString("rootPresent", Boolean.toString(capture.rootPresent));
58+
result.putString("captureMode", capture.captureMode);
59+
result.putString("windowCount", Integer.toString(capture.windowCount));
5560
result.putString("nodeCount", Integer.toString(capture.nodeCount));
5661
result.putString("truncated", Boolean.toString(capture.truncated));
5762
result.putString("elapsedMs", Long.toString(System.currentTimeMillis() - startedAtMs));
@@ -70,6 +75,7 @@ private CaptureResult captureXml(
7075
long waitForIdleTimeoutMs, long timeoutMs, int maxDepth, int maxNodes)
7176
throws TimeoutException {
7277
UiAutomation automation = getUiAutomation();
78+
enableInteractiveWindowRetrieval(automation);
7379
if (waitForIdleTimeoutMs > 0) {
7480
try {
7581
// Best-effort settle: avoids empty roots without inheriting UIAutomator's long idle wait.
@@ -79,19 +85,87 @@ private CaptureResult captureXml(
7985
}
8086
}
8187

82-
AccessibilityNodeInfo root = automation.getRootInActiveWindow();
8388
CaptureStats stats = new CaptureStats();
8489
StringBuilder xml = new StringBuilder();
8590
xml.append("<?xml version='1.0' encoding='UTF-8' standalone='yes' ?>");
8691
xml.append("<hierarchy rotation=\"0\">");
87-
if (root != null) {
88-
appendNode(xml, root, 0, 0, maxDepth, maxNodes, stats);
92+
int windowCount = appendInteractiveWindowRoots(xml, automation, maxDepth, maxNodes, stats);
93+
String captureMode = "interactive-windows";
94+
if (windowCount == 0) {
95+
AccessibilityNodeInfo root = automation.getRootInActiveWindow();
96+
try {
97+
if (root != null) {
98+
appendNode(xml, root, 0, 0, maxDepth, maxNodes, stats);
99+
windowCount = 1;
100+
}
101+
captureMode = "active-window";
102+
} finally {
103+
if (root != null) {
104+
root.recycle();
105+
}
106+
}
89107
}
90108
xml.append("</hierarchy>");
91-
if (root != null) {
92-
root.recycle();
109+
return new CaptureResult(
110+
xml.toString(), windowCount > 0, captureMode, windowCount, stats.nodeCount, stats.truncated);
111+
}
112+
113+
private static void enableInteractiveWindowRetrieval(UiAutomation automation) {
114+
AccessibilityServiceInfo serviceInfo;
115+
try {
116+
serviceInfo = automation.getServiceInfo();
117+
} catch (RuntimeException error) {
118+
return;
119+
}
120+
if (serviceInfo == null) {
121+
return;
122+
}
123+
if ((serviceInfo.flags & AccessibilityServiceInfo.FLAG_RETRIEVE_INTERACTIVE_WINDOWS) != 0) {
124+
return;
125+
}
126+
serviceInfo.flags |= AccessibilityServiceInfo.FLAG_RETRIEVE_INTERACTIVE_WINDOWS;
127+
try {
128+
automation.setServiceInfo(serviceInfo);
129+
} catch (RuntimeException ignored) {
130+
// Fall back to active-window capture if the platform rejects dynamic service flags.
131+
}
132+
}
133+
134+
private static int appendInteractiveWindowRoots(
135+
StringBuilder xml,
136+
UiAutomation automation,
137+
int maxDepth,
138+
int maxNodes,
139+
CaptureStats stats) {
140+
List<AccessibilityWindowInfo> windows;
141+
try {
142+
windows = automation.getWindows();
143+
} catch (RuntimeException error) {
144+
return 0;
145+
}
146+
int windowCount = 0;
147+
for (int index = 0; index < windows.size(); index += 1) {
148+
if (stats.nodeCount >= maxNodes) {
149+
stats.truncated = true;
150+
break;
151+
}
152+
AccessibilityWindowInfo window = windows.get(index);
153+
AccessibilityNodeInfo root = null;
154+
try {
155+
root = window.getRoot();
156+
if (root == null) {
157+
continue;
158+
}
159+
appendNode(xml, root, windowCount, 0, maxDepth, maxNodes, stats);
160+
windowCount += 1;
161+
} finally {
162+
if (root != null) {
163+
root.recycle();
164+
}
165+
window.recycle();
166+
}
93167
}
94-
return new CaptureResult(xml.toString(), root != null, stats.nodeCount, stats.truncated);
168+
return windowCount;
95169
}
96170

97171
private void emitChunks(String payload) {
@@ -262,12 +336,22 @@ private static final class CaptureStats {
262336
private static final class CaptureResult {
263337
final String xml;
264338
final boolean rootPresent;
339+
final String captureMode;
340+
final int windowCount;
265341
final int nodeCount;
266342
final boolean truncated;
267343

268-
CaptureResult(String xml, boolean rootPresent, int nodeCount, boolean truncated) {
344+
CaptureResult(
345+
String xml,
346+
boolean rootPresent,
347+
String captureMode,
348+
int windowCount,
349+
int nodeCount,
350+
boolean truncated) {
269351
this.xml = xml;
270352
this.rootPresent = rootPresent;
353+
this.captureMode = captureMode;
354+
this.windowCount = windowCount;
271355
this.nodeCount = nodeCount;
272356
this.truncated = truncated;
273357
}

src/platforms/android/__tests__/snapshot-helper.test.ts

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ test('parseAndroidSnapshotHelperOutput reconstructs XML chunks and metadata', ()
4545
maxDepth: '128',
4646
maxNodes: '5000',
4747
rootPresent: 'true',
48+
captureMode: 'interactive-windows',
49+
windowCount: '2',
4850
nodeCount: '1',
4951
truncated: 'false',
5052
elapsedMs: '42',
@@ -62,6 +64,8 @@ test('parseAndroidSnapshotHelperOutput reconstructs XML chunks and metadata', ()
6264
maxDepth: 128,
6365
maxNodes: 5000,
6466
rootPresent: true,
67+
captureMode: 'interactive-windows',
68+
windowCount: 2,
6569
nodeCount: 1,
6670
truncated: false,
6771
elapsedMs: 42,
@@ -95,12 +99,14 @@ test('parseAndroidSnapshotHelperOutput decodes UTF-8 across byte chunk boundarie
9599
test('parseAndroidSnapshotHelperSnapshot returns shaped nodes', () => {
96100
const output = helperOutput({
97101
chunks: [
98-
'<hierarchy><node text="Continue" class="android.widget.Button" bounds="[1,2][21,42]" clickable="true" /></hierarchy>',
102+
'<hierarchy><node text="Continue" class="android.widget.Button" bounds="[1,2][21,42]" clickable="true" /><node text="Keyboard suggestion" class="android.widget.TextView" bounds="[1,44][121,84]" /></hierarchy>',
99103
],
100104
result: {
101105
ok: 'true',
102106
outputFormat: 'uiautomator-xml',
103-
nodeCount: '1',
107+
captureMode: 'interactive-windows',
108+
windowCount: '2',
109+
nodeCount: '2',
104110
},
105111
});
106112

@@ -109,7 +115,10 @@ test('parseAndroidSnapshotHelperSnapshot returns shaped nodes', () => {
109115
assert.equal(parsed.nodes[0]?.label, 'Continue');
110116
assert.equal(parsed.nodes[0]?.hittable, true);
111117
assert.deepEqual(parsed.nodes[0]?.rect, { x: 1, y: 2, width: 20, height: 40 });
112-
assert.equal(parsed.metadata.nodeCount, 1);
118+
assert.equal(parsed.nodes[1]?.label, 'Keyboard suggestion');
119+
assert.equal(parsed.metadata.captureMode, 'interactive-windows');
120+
assert.equal(parsed.metadata.windowCount, 2);
121+
assert.equal(parsed.metadata.nodeCount, 2);
113122
});
114123

115124
test('parseAndroidSnapshotHelperXml returns shaped nodes from captured helper output', () => {

src/platforms/android/__tests__/snapshot.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ function helperOutput(xml: string): string {
159159
'INSTRUMENTATION_RESULT: maxDepth=128',
160160
'INSTRUMENTATION_RESULT: maxNodes=5000',
161161
'INSTRUMENTATION_RESULT: rootPresent=true',
162+
'INSTRUMENTATION_RESULT: captureMode=interactive-windows',
163+
'INSTRUMENTATION_RESULT: windowCount=1',
162164
'INSTRUMENTATION_RESULT: nodeCount=1',
163165
'INSTRUMENTATION_RESULT: truncated=false',
164166
'INSTRUMENTATION_RESULT: elapsedMs=12',
@@ -250,6 +252,8 @@ test('snapshotAndroid uses configured helper before stock uiautomator', async ()
250252
assert.equal(result.androidSnapshot.backend, 'android-helper');
251253
assert.equal(result.androidSnapshot.helperVersion, '0.13.3');
252254
assert.equal(result.androidSnapshot.installReason, 'current');
255+
assert.equal(result.androidSnapshot.captureMode, 'interactive-windows');
256+
assert.equal(result.androidSnapshot.windowCount, 1);
253257
assert.deepEqual(timeouts, [30000, 13000]);
254258
assert.equal(mockRunCmd.mock.calls.length, 0);
255259
});

src/platforms/android/snapshot-helper.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ export type AndroidSnapshotHelperMetadata = {
8888
maxDepth?: number;
8989
maxNodes?: number;
9090
rootPresent?: boolean;
91+
captureMode?: 'interactive-windows' | 'active-window';
92+
windowCount?: number;
9193
nodeCount?: number;
9294
truncated?: boolean;
9395
elapsedMs?: number;
@@ -439,12 +441,20 @@ function readHelperMetadata(finalResult: Record<string, string>): AndroidSnapsho
439441
maxDepth: readOptionalNumber(finalResult.maxDepth),
440442
maxNodes: readOptionalNumber(finalResult.maxNodes),
441443
rootPresent: readOptionalBoolean(finalResult.rootPresent),
444+
captureMode: readOptionalCaptureMode(finalResult.captureMode),
445+
windowCount: readOptionalNumber(finalResult.windowCount),
442446
nodeCount: readOptionalNumber(finalResult.nodeCount),
443447
truncated: readOptionalBoolean(finalResult.truncated),
444448
elapsedMs: readOptionalNumber(finalResult.elapsedMs),
445449
};
446450
}
447451

452+
function readOptionalCaptureMode(
453+
value: string | undefined,
454+
): AndroidSnapshotHelperMetadata['captureMode'] {
455+
return value === 'interactive-windows' || value === 'active-window' ? value : undefined;
456+
}
457+
448458
export function parseAndroidSnapshotHelperSnapshot(
449459
output: string,
450460
options: SnapshotOptions = {},

src/platforms/android/snapshot-types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ export type AndroidSnapshotBackendMetadata = {
1111
maxDepth?: number;
1212
maxNodes?: number;
1313
rootPresent?: boolean;
14+
captureMode?: 'interactive-windows' | 'active-window';
15+
windowCount?: number;
1416
nodeCount?: number;
1517
helperTruncated?: boolean;
1618
elapsedMs?: number;

src/platforms/android/snapshot.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ async function captureAndroidUiHierarchy(
117117
maxDepth: capture.metadata.maxDepth,
118118
maxNodes: capture.metadata.maxNodes,
119119
rootPresent: capture.metadata.rootPresent,
120+
captureMode: capture.metadata.captureMode,
121+
windowCount: capture.metadata.windowCount,
120122
nodeCount: capture.metadata.nodeCount,
121123
helperTruncated: capture.metadata.truncated,
122124
elapsedMs: capture.metadata.elapsedMs,

website/docs/docs/client-api.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ const output = await captureAndroidSnapshotWithHelper({
130130
const snapshot = parseAndroidSnapshotHelperXml(output.xml, output.metadata);
131131
```
132132

133+
Helper captures report `metadata.captureMode` as `interactive-windows` when Android returns
134+
interactive window roots, or `active-window` when the helper falls back to
135+
`getRootInActiveWindow()`. `metadata.windowCount` is the number of serialized roots.
136+
133137
## Command methods
134138

135139
Use `client.command.<method>()` for command-level device actions. It uses the same daemon transport path as the higher-level client methods, including session metadata, tenant/run/lease fields, normalized daemon errors, and remote artifact handling.

website/docs/docs/commands.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,9 @@ agent-device get attrs @e1
199199
- Android snapshots use stock UIAutomator by default. To try the released Android snapshot helper
200200
before stock UIAutomator, set both `AGENT_DEVICE_ANDROID_SNAPSHOT_HELPER_APK` and
201201
`AGENT_DEVICE_ANDROID_SNAPSHOT_HELPER_MANIFEST`; helper failures fall back to stock UIAutomator
202-
and include `androidSnapshot.fallbackReason` in typed results.
202+
and include `androidSnapshot.fallbackReason` in typed results. The helper serializes Android
203+
interactive window roots when available, so keyboard and system-overlay nodes can appear alongside
204+
the app root; `androidSnapshot.captureMode` and `androidSnapshot.windowCount` describe the capture.
203205
- `diff snapshot` compares the current snapshot with the previous session baseline and then updates baseline.
204206
- `snapshot --diff` is an alias for `diff snapshot`.
205207

0 commit comments

Comments
 (0)