|
| 1 | +// GH #114 — agent-device envelope CONTRACT tests. |
| 2 | +// |
| 3 | +// Why: handler-side parsers (findRefByTestID, snapshotEnvelopeFailed, |
| 4 | +// the various `env.data?.nodes` consumers in device-interact/device-session/ |
| 5 | +// repair-action) read envelopes emitted by THREE distinct producer tiers: |
| 6 | +// |
| 7 | +// 1. In-tree iOS runner (rn-fast-runner-client.runIOS) |
| 8 | +// 2. In-tree Android runner (rn-android-runner-client.runAndroid) |
| 9 | +// 3. Legacy upstream agent-device dispatch tiers (daemon socket / CLI |
| 10 | +// subprocess / agent-device's internal fast-runner sub-tier) |
| 11 | +// |
| 12 | +// Each producer emits subtly different envelope shapes. Without contract |
| 13 | +// pinning, a future tightening of a handler parser (e.g. require `data.code` |
| 14 | +// on failure) can pass synthetic handler tests yet break real producer |
| 15 | +// output in prod — exactly the gap codex flagged on PR #109 (issue #114). |
| 16 | +// |
| 17 | +// These tests are PRODUCER-CONSUMER pairings: pin the canonical envelope |
| 18 | +// each producer emits for a class of event, then run every consumer parser |
| 19 | +// against it. If either side drifts, the test fails before users do. |
| 20 | + |
| 21 | +import { test } from 'node:test'; |
| 22 | +import assert from 'node:assert/strict'; |
| 23 | +import { |
| 24 | + findRefByTestID, |
| 25 | + snapshotEnvelopeFailed, |
| 26 | +} from '../../dist/tools/device-batch.js'; |
| 27 | + |
| 28 | +// ───────────────────────────────────────────────────────────────────────────── |
| 29 | +// Producer fixtures — canonical envelopes each dispatch tier emits. |
| 30 | +// Sourced from real runner output captured during the iOS-MVP (PR #164) and |
| 31 | +// Android-MVP (PR #165) integrations. Update these when the producer shapes |
| 32 | +// change AND verify all consumer parsers below still handle them. |
| 33 | +// ───────────────────────────────────────────────────────────────────────────── |
| 34 | + |
| 35 | +// In-tree iOS runner: mapRunnerNodesToFlat in rn-fast-runner-client.ts emits |
| 36 | +// `{ref: '@e<n>', type, rect, label?, identifier?, enabled?, hittable?}`. |
| 37 | +// `parentIndex`/`depth` are NOT in the post-mapping shape — they exist on the |
| 38 | +// raw runner JSON but are dropped before MCP consumers see them. |
| 39 | +const IN_TREE_IOS_SNAPSHOT_OK = { |
| 40 | + ok: true, |
| 41 | + data: { |
| 42 | + nodes: [ |
| 43 | + { ref: '@e0', type: 'Application', rect: { x: 0, y: 0, width: 393, height: 852 }, label: 'TaskApp', identifier: '', enabled: true, hittable: false }, |
| 44 | + { ref: '@e7', type: 'Button', rect: { x: 320, y: 720, width: 56, height: 56 }, label: 'Create task', identifier: 'fab-create-task', enabled: true, hittable: true }, |
| 45 | + { ref: '@e8', type: 'StaticText', rect: { x: 16, y: 60, width: 200, height: 24 }, label: 'Tasks', identifier: 'header-title', enabled: true, hittable: false }, |
| 46 | + ], |
| 47 | + }, |
| 48 | +}; |
| 49 | + |
| 50 | +// In-tree Android runner: identical flat-node shape (same field set, same |
| 51 | +// types) emitted by mapRunnerNodesToFlat in rn-android-runner-client.ts. |
| 52 | +// The parity test below pins this — a divergence would break platform-agnostic |
| 53 | +// handlers. |
| 54 | +const IN_TREE_ANDROID_SNAPSHOT_OK = { |
| 55 | + ok: true, |
| 56 | + data: { |
| 57 | + nodes: [ |
| 58 | + { ref: '@e0', type: 'FrameLayout', rect: { x: 0, y: 0, width: 1080, height: 2400 }, label: '', identifier: '', enabled: true, hittable: false }, |
| 59 | + { ref: '@e12', type: 'Button', rect: { x: 800, y: 2000, width: 168, height: 168 }, label: 'Create task', identifier: 'fab-create-task', enabled: true, hittable: true }, |
| 60 | + ], |
| 61 | + }, |
| 62 | +}; |
| 63 | + |
| 64 | +// Legacy upstream agent-device daemon (socket) tier: flat-nodes shape with |
| 65 | +// less metadata than the in-tree runners. Consumer parsers only care about |
| 66 | +// `ref` + `identifier`, so the same handler contract holds. |
| 67 | +const LEGACY_DAEMON_SNAPSHOT_OK = { |
| 68 | + ok: true, |
| 69 | + data: { |
| 70 | + nodes: [ |
| 71 | + { ref: 'el-0', identifier: 'fab-create-task', label: 'Create task' }, |
| 72 | + ], |
| 73 | + }, |
| 74 | +}; |
| 75 | + |
| 76 | +// Legacy upstream agent-device CLI tier: same flat-nodes shape as the daemon. |
| 77 | +// Pinned separately so a future divergence (e.g. CLI starts including a `type` |
| 78 | +// field, daemon doesn't) would surface this contract test as the canary. |
| 79 | +const LEGACY_CLI_SNAPSHOT_OK = { |
| 80 | + ok: true, |
| 81 | + data: { |
| 82 | + nodes: [ |
| 83 | + { ref: 'el-0', identifier: 'fab-create-task', label: 'Create task' }, |
| 84 | + ], |
| 85 | + }, |
| 86 | +}; |
| 87 | + |
| 88 | +// Legacy upstream-agent-device internal fast-runner sub-tier — nested tree |
| 89 | +// shape, NOT flat-nodes. findRefByTestID's second branch handles this; if |
| 90 | +// that branch is ever removed without warning, this test fails. |
| 91 | +const LEGACY_FAST_RUNNER_SNAPSHOT_OK_NESTED = { |
| 92 | + ok: true, |
| 93 | + data: { |
| 94 | + tree: { |
| 95 | + ref: 'app-0', |
| 96 | + identifier: '', |
| 97 | + label: 'Application', |
| 98 | + children: [ |
| 99 | + { |
| 100 | + ref: 'btn-7', |
| 101 | + identifier: 'fab-create-task', |
| 102 | + label: 'Create task', |
| 103 | + children: [], |
| 104 | + }, |
| 105 | + ], |
| 106 | + }, |
| 107 | + }, |
| 108 | +}; |
| 109 | + |
| 110 | +// In-tree runner failure shape AFTER the client converts raw runner errors |
| 111 | +// through failResult(message, code). The raw HTTP response from the |
| 112 | +// underlying XCTest/UIAutomator has `error: {message, code}`, but MCP |
| 113 | +// consumers (findRefByTestID etc.) never see that — they get the post- |
| 114 | +// failResult shape `{ok:false, error: string, code: string}` instead. |
| 115 | +const IN_TREE_RUNNER_FAILURE_APP_NOT_RUNNING = { |
| 116 | + ok: false, |
| 117 | + error: 'app not running', |
| 118 | + code: 'APP_NOT_RUNNING', |
| 119 | +}; |
| 120 | + |
| 121 | +const LEGACY_DAEMON_FAILURE_NO_DEVICE = { |
| 122 | + ok: false, |
| 123 | + error: 'No iOS simulator booted', |
| 124 | +}; |
| 125 | + |
| 126 | +const LEGACY_CLI_FAILURE_WITH_CODE = { |
| 127 | + ok: false, |
| 128 | + error: 'agent-device CLI exited with code 1', |
| 129 | + code: 'CLI_SPAWN_FAILED', |
| 130 | +}; |
| 131 | + |
| 132 | +// iOS-specific: XCUIElement.typeText quiescence-timeout shim. The text |
| 133 | +// landed in the field but XCTest's main-thread waitForIdle timed out |
| 134 | +// after the side effect succeeded. Runner returns ok:true with a meta |
| 135 | +// marker (exact shape: rn-fast-runner-client.ts emits |
| 136 | +// `okResult({typed, text}, {meta: {sideEffectSucceeded, runnerTimeoutShim}})`). |
| 137 | +// snapshotEnvelopeFailed must NOT report this as failed — it would route |
| 138 | +// a successful fill to SNAPSHOT_FAILED otherwise. |
| 139 | +const IOS_TYPETEXT_RUNNER_TIMEOUT_SHIM = { |
| 140 | + ok: true, |
| 141 | + data: { typed: true, text: 'hello' }, |
| 142 | + meta: { sideEffectSucceeded: true, runnerTimeoutShim: true }, |
| 143 | +}; |
| 144 | + |
| 145 | +// Snapshot succeeded but nothing matched the testID — empty nodes array. |
| 146 | +// This is "element not present", NOT "snapshot infrastructure failed". |
| 147 | +const IN_TREE_SNAPSHOT_OK_EMPTY = { |
| 148 | + ok: true, |
| 149 | + data: { nodes: [] }, |
| 150 | +}; |
| 151 | + |
| 152 | +// ───────────────────────────────────────────────────────────────────────────── |
| 153 | +// findRefByTestID — must extract ref by identifier from all producer shapes |
| 154 | +// ───────────────────────────────────────────────────────────────────────────── |
| 155 | + |
| 156 | +const SUCCESS_ENVELOPES_WITH_TARGET = [ |
| 157 | + { name: 'in-tree iOS (flat nodes)', env: IN_TREE_IOS_SNAPSHOT_OK, expectedRef: '@e7' }, |
| 158 | + { name: 'in-tree Android (flat nodes)', env: IN_TREE_ANDROID_SNAPSHOT_OK, expectedRef: '@e12' }, |
| 159 | + { name: 'legacy daemon (flat nodes)', env: LEGACY_DAEMON_SNAPSHOT_OK, expectedRef: 'el-0' }, |
| 160 | + { name: 'legacy CLI (flat nodes)', env: LEGACY_CLI_SNAPSHOT_OK, expectedRef: 'el-0' }, |
| 161 | + { name: 'legacy fast-runner (nested)', env: LEGACY_FAST_RUNNER_SNAPSHOT_OK_NESTED, expectedRef: 'btn-7' }, |
| 162 | +]; |
| 163 | + |
| 164 | +for (const { name, env, expectedRef } of SUCCESS_ENVELOPES_WITH_TARGET) { |
| 165 | + test(`findRefByTestID: ${name} — resolves testID 'fab-create-task' to ${expectedRef}`, () => { |
| 166 | + const ref = findRefByTestID(JSON.stringify(env), 'fab-create-task'); |
| 167 | + assert.equal(ref, expectedRef); |
| 168 | + }); |
| 169 | +} |
| 170 | + |
| 171 | +test('findRefByTestID: in-tree snapshot with empty nodes — returns null (testID not present)', () => { |
| 172 | + const ref = findRefByTestID(JSON.stringify(IN_TREE_SNAPSHOT_OK_EMPTY), 'fab-create-task'); |
| 173 | + assert.equal(ref, null); |
| 174 | +}); |
| 175 | + |
| 176 | +test('findRefByTestID: in-tree snapshot with present nodes but no match — returns null', () => { |
| 177 | + const ref = findRefByTestID(JSON.stringify(IN_TREE_IOS_SNAPSHOT_OK), 'nonexistent-testid'); |
| 178 | + assert.equal(ref, null); |
| 179 | +}); |
| 180 | + |
| 181 | +const FAILURE_ENVELOPES = [ |
| 182 | + { name: 'in-tree runner failure (object error)', env: IN_TREE_RUNNER_FAILURE_APP_NOT_RUNNING }, |
| 183 | + { name: 'legacy daemon failure (string error)', env: LEGACY_DAEMON_FAILURE_NO_DEVICE }, |
| 184 | + { name: 'legacy CLI failure (error + code)', env: LEGACY_CLI_FAILURE_WITH_CODE }, |
| 185 | +]; |
| 186 | + |
| 187 | +for (const { name, env } of FAILURE_ENVELOPES) { |
| 188 | + test(`findRefByTestID: ${name} — returns null (refuses to scan failed snapshot)`, () => { |
| 189 | + const ref = findRefByTestID(JSON.stringify(env), 'fab-create-task'); |
| 190 | + assert.equal(ref, null); |
| 191 | + }); |
| 192 | +} |
| 193 | + |
| 194 | +// ───────────────────────────────────────────────────────────────────────────── |
| 195 | +// snapshotEnvelopeFailed — classifies infrastructure failure vs element-absent |
| 196 | +// ───────────────────────────────────────────────────────────────────────────── |
| 197 | + |
| 198 | +for (const { name, env } of SUCCESS_ENVELOPES_WITH_TARGET) { |
| 199 | + test(`snapshotEnvelopeFailed: ${name} — returns false (snapshot succeeded)`, () => { |
| 200 | + assert.equal(snapshotEnvelopeFailed(JSON.stringify(env)), false); |
| 201 | + }); |
| 202 | +} |
| 203 | + |
| 204 | +test('snapshotEnvelopeFailed: empty-nodes success is NOT a failure (element not present is different)', () => { |
| 205 | + // Critical contract: handlers depend on this distinction to route |
| 206 | + // SNAPSHOT_FAILED vs TESTID_NOT_FOUND correctly (Phase 128 #5/#6). |
| 207 | + assert.equal(snapshotEnvelopeFailed(JSON.stringify(IN_TREE_SNAPSHOT_OK_EMPTY)), false); |
| 208 | +}); |
| 209 | + |
| 210 | +test('snapshotEnvelopeFailed: iOS typeText runner-timeout shim is NOT a failure', () => { |
| 211 | + // The shim shape carries ok:true + meta.runnerTimeoutShim. Treating |
| 212 | + // it as failure would route every successful iOS fill to |
| 213 | + // SNAPSHOT_FAILED — would visibly break the iOS device_fill smoke test. |
| 214 | + assert.equal(snapshotEnvelopeFailed(JSON.stringify(IOS_TYPETEXT_RUNNER_TIMEOUT_SHIM)), false); |
| 215 | +}); |
| 216 | + |
| 217 | +for (const { name, env } of FAILURE_ENVELOPES) { |
| 218 | + test(`snapshotEnvelopeFailed: ${name} — returns true`, () => { |
| 219 | + assert.equal(snapshotEnvelopeFailed(JSON.stringify(env)), true); |
| 220 | + }); |
| 221 | +} |
| 222 | + |
| 223 | +// ───────────────────────────────────────────────────────────────────────────── |
| 224 | +// Edge cases — what the parsers do under malformed / null input |
| 225 | +// ───────────────────────────────────────────────────────────────────────────── |
| 226 | + |
| 227 | +test('snapshotEnvelopeFailed: null/undefined input → true (treat missing as failed)', () => { |
| 228 | + assert.equal(snapshotEnvelopeFailed(null), true); |
| 229 | + assert.equal(snapshotEnvelopeFailed(undefined), true); |
| 230 | +}); |
| 231 | + |
| 232 | +test('snapshotEnvelopeFailed: empty string → true', () => { |
| 233 | + assert.equal(snapshotEnvelopeFailed(''), true); |
| 234 | +}); |
| 235 | + |
| 236 | +test('snapshotEnvelopeFailed: malformed JSON → true (the parser cannot vouch for the snapshot)', () => { |
| 237 | + assert.equal(snapshotEnvelopeFailed('not-json'), true); |
| 238 | + assert.equal(snapshotEnvelopeFailed('{ truncated'), true); |
| 239 | +}); |
| 240 | + |
| 241 | +test('findRefByTestID: malformed JSON → null (no ref to extract)', () => { |
| 242 | + assert.equal(findRefByTestID('not-json', 'any-testid'), null); |
| 243 | +}); |
| 244 | + |
| 245 | +// ───────────────────────────────────────────────────────────────────────────── |
| 246 | +// Cross-producer parity — same logical event, structurally compatible output |
| 247 | +// ───────────────────────────────────────────────────────────────────────────── |
| 248 | + |
| 249 | +test('parity: in-tree iOS and in-tree Android emit structurally identical flat-nodes shape', () => { |
| 250 | + // Both runners should expose the same `{ok, data: {nodes: [{ref, identifier, label}]}}` |
| 251 | + // shape so handlers can be platform-agnostic. A divergence here |
| 252 | + // (e.g. Android renaming `identifier` to `accessibilityId`) would |
| 253 | + // silently break the iOS-tested handlers on Android. |
| 254 | + const iosKeys = Object.keys(IN_TREE_IOS_SNAPSHOT_OK.data.nodes[0]).sort(); |
| 255 | + const androidKeys = Object.keys(IN_TREE_ANDROID_SNAPSHOT_OK.data.nodes[0]).sort(); |
| 256 | + assert.deepEqual(iosKeys, androidKeys, 'iOS and Android flat-node keys must match'); |
| 257 | +}); |
0 commit comments