From 8c909f6fd2671e8579b9635250c16a512cc19e54 Mon Sep 17 00:00:00 2001 From: Anton Lykhoyda Date: Wed, 20 May 2026 14:11:12 +0200 Subject: [PATCH] =?UTF-8?q?test(envelope-contract):=20pin=20producer?= =?UTF-8?q?=E2=86=92consumer=20envelope=20contracts=20(closes=20#114)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds 25 contract tests that pin the envelope shapes each dispatch tier emits against the handler-side parsers that consume them, so a future divergence on either side fails fast in CI rather than slipping through to production. Background: handler integration tests stub `runAgentDevice` via `_setRunAgentDeviceForTest` with synthetic envelopes. Codex flagged on PR #109 (conf 80) that this short-circuits the real wrapper's three dispatch tiers, each of which produces subtly different shapes. The original issue listed agent-device's internal tiers (fast-runner HTTP / daemon socket / CLI), but post-PR #164 (iOS-MVP) and PR #165 (Android- MVP) the surface widened to include the in-tree iOS and Android runner clients too. This PR covers the current surface. Producers pinned: 1. In-tree iOS runner (rn-fast-runner-client.runIOS) — flat nodes with `{ref: '@e', type, rect, label?, identifier?, enabled?, hittable?}` shape after mapRunnerNodesToFlat normalization 2. In-tree Android runner (rn-android-runner-client.runAndroid) — identical flat-node shape (the parity test pins this — a divergence here would silently break platform-agnostic handlers) 3. Legacy upstream agent-device daemon socket — flat-nodes with less metadata 4. Legacy upstream agent-device CLI subprocess — separate fixture even though current shape equals daemon, so a future divergence would surface here 5. Legacy upstream agent-device internal fast-runner sub-tier — nested-tree shape, NOT flat. findRefByTestID's `env.data.tree` branch handles this; removing the branch without warning would fail this test 6. iOS XCUIElement.typeText runner-timeout shim — `{ok:true, data: {typed, text}, meta: {sideEffectSucceeded, runnerTimeoutShim}}`. snapshotEnvelopeFailed must NOT report this as a failure (it would route every successful iOS fill to SNAPSHOT_FAILED otherwise) Consumers exercised: - findRefByTestID (device-batch.ts) — both flat-nodes and nested-tree branches - snapshotEnvelopeFailed (device-batch.ts) — including the critical distinction between empty-nodes success (TESTID_NOT_FOUND) and snapshot-infrastructure failure (SNAPSHOT_FAILED), per Phase 128 #5/#6 - Edge cases: null/undefined/empty/malformed JSON all classified as failed codex-pair caught three fidelity issues during review (MED): my initial fixtures used `ref: 'app-0'` style refs with `parentIndex`/`depth` fields, but the actual mapRunnerNodesToFlat output emits `@e` refs with `type`/`rect`/`enabled`/`hittable`. The failure fixture used the raw HTTP error shape `{error: {message, code}}` instead of the post- failResult `{ok:false, error: string, code: string}` shape MCP consumers actually see. And the comment claimed daemon + CLI were pinned separately but only daemon was. All three fixed before commit — a contract test with the wrong fixtures is worse than no contract test because it gives false confidence. Verified: 1506/1506 cdp-bridge unit tests passing (+25 net new). Co-Authored-By: Claude Opus 4.7 --- .../unit/gh-114-envelope-contract.test.js | 257 ++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 scripts/cdp-bridge/test/unit/gh-114-envelope-contract.test.js diff --git a/scripts/cdp-bridge/test/unit/gh-114-envelope-contract.test.js b/scripts/cdp-bridge/test/unit/gh-114-envelope-contract.test.js new file mode 100644 index 0000000..82520b8 --- /dev/null +++ b/scripts/cdp-bridge/test/unit/gh-114-envelope-contract.test.js @@ -0,0 +1,257 @@ +// GH #114 — agent-device envelope CONTRACT tests. +// +// Why: handler-side parsers (findRefByTestID, snapshotEnvelopeFailed, +// the various `env.data?.nodes` consumers in device-interact/device-session/ +// repair-action) read envelopes emitted by THREE distinct producer tiers: +// +// 1. In-tree iOS runner (rn-fast-runner-client.runIOS) +// 2. In-tree Android runner (rn-android-runner-client.runAndroid) +// 3. Legacy upstream agent-device dispatch tiers (daemon socket / CLI +// subprocess / agent-device's internal fast-runner sub-tier) +// +// Each producer emits subtly different envelope shapes. Without contract +// pinning, a future tightening of a handler parser (e.g. require `data.code` +// on failure) can pass synthetic handler tests yet break real producer +// output in prod — exactly the gap codex flagged on PR #109 (issue #114). +// +// These tests are PRODUCER-CONSUMER pairings: pin the canonical envelope +// each producer emits for a class of event, then run every consumer parser +// against it. If either side drifts, the test fails before users do. + +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { + findRefByTestID, + snapshotEnvelopeFailed, +} from '../../dist/tools/device-batch.js'; + +// ───────────────────────────────────────────────────────────────────────────── +// Producer fixtures — canonical envelopes each dispatch tier emits. +// Sourced from real runner output captured during the iOS-MVP (PR #164) and +// Android-MVP (PR #165) integrations. Update these when the producer shapes +// change AND verify all consumer parsers below still handle them. +// ───────────────────────────────────────────────────────────────────────────── + +// In-tree iOS runner: mapRunnerNodesToFlat in rn-fast-runner-client.ts emits +// `{ref: '@e', type, rect, label?, identifier?, enabled?, hittable?}`. +// `parentIndex`/`depth` are NOT in the post-mapping shape — they exist on the +// raw runner JSON but are dropped before MCP consumers see them. +const IN_TREE_IOS_SNAPSHOT_OK = { + ok: true, + data: { + nodes: [ + { ref: '@e0', type: 'Application', rect: { x: 0, y: 0, width: 393, height: 852 }, label: 'TaskApp', identifier: '', enabled: true, hittable: false }, + { ref: '@e7', type: 'Button', rect: { x: 320, y: 720, width: 56, height: 56 }, label: 'Create task', identifier: 'fab-create-task', enabled: true, hittable: true }, + { ref: '@e8', type: 'StaticText', rect: { x: 16, y: 60, width: 200, height: 24 }, label: 'Tasks', identifier: 'header-title', enabled: true, hittable: false }, + ], + }, +}; + +// In-tree Android runner: identical flat-node shape (same field set, same +// types) emitted by mapRunnerNodesToFlat in rn-android-runner-client.ts. +// The parity test below pins this — a divergence would break platform-agnostic +// handlers. +const IN_TREE_ANDROID_SNAPSHOT_OK = { + ok: true, + data: { + nodes: [ + { ref: '@e0', type: 'FrameLayout', rect: { x: 0, y: 0, width: 1080, height: 2400 }, label: '', identifier: '', enabled: true, hittable: false }, + { ref: '@e12', type: 'Button', rect: { x: 800, y: 2000, width: 168, height: 168 }, label: 'Create task', identifier: 'fab-create-task', enabled: true, hittable: true }, + ], + }, +}; + +// Legacy upstream agent-device daemon (socket) tier: flat-nodes shape with +// less metadata than the in-tree runners. Consumer parsers only care about +// `ref` + `identifier`, so the same handler contract holds. +const LEGACY_DAEMON_SNAPSHOT_OK = { + ok: true, + data: { + nodes: [ + { ref: 'el-0', identifier: 'fab-create-task', label: 'Create task' }, + ], + }, +}; + +// Legacy upstream agent-device CLI tier: same flat-nodes shape as the daemon. +// Pinned separately so a future divergence (e.g. CLI starts including a `type` +// field, daemon doesn't) would surface this contract test as the canary. +const LEGACY_CLI_SNAPSHOT_OK = { + ok: true, + data: { + nodes: [ + { ref: 'el-0', identifier: 'fab-create-task', label: 'Create task' }, + ], + }, +}; + +// Legacy upstream-agent-device internal fast-runner sub-tier — nested tree +// shape, NOT flat-nodes. findRefByTestID's second branch handles this; if +// that branch is ever removed without warning, this test fails. +const LEGACY_FAST_RUNNER_SNAPSHOT_OK_NESTED = { + ok: true, + data: { + tree: { + ref: 'app-0', + identifier: '', + label: 'Application', + children: [ + { + ref: 'btn-7', + identifier: 'fab-create-task', + label: 'Create task', + children: [], + }, + ], + }, + }, +}; + +// In-tree runner failure shape AFTER the client converts raw runner errors +// through failResult(message, code). The raw HTTP response from the +// underlying XCTest/UIAutomator has `error: {message, code}`, but MCP +// consumers (findRefByTestID etc.) never see that — they get the post- +// failResult shape `{ok:false, error: string, code: string}` instead. +const IN_TREE_RUNNER_FAILURE_APP_NOT_RUNNING = { + ok: false, + error: 'app not running', + code: 'APP_NOT_RUNNING', +}; + +const LEGACY_DAEMON_FAILURE_NO_DEVICE = { + ok: false, + error: 'No iOS simulator booted', +}; + +const LEGACY_CLI_FAILURE_WITH_CODE = { + ok: false, + error: 'agent-device CLI exited with code 1', + code: 'CLI_SPAWN_FAILED', +}; + +// iOS-specific: XCUIElement.typeText quiescence-timeout shim. The text +// landed in the field but XCTest's main-thread waitForIdle timed out +// after the side effect succeeded. Runner returns ok:true with a meta +// marker (exact shape: rn-fast-runner-client.ts emits +// `okResult({typed, text}, {meta: {sideEffectSucceeded, runnerTimeoutShim}})`). +// snapshotEnvelopeFailed must NOT report this as failed — it would route +// a successful fill to SNAPSHOT_FAILED otherwise. +const IOS_TYPETEXT_RUNNER_TIMEOUT_SHIM = { + ok: true, + data: { typed: true, text: 'hello' }, + meta: { sideEffectSucceeded: true, runnerTimeoutShim: true }, +}; + +// Snapshot succeeded but nothing matched the testID — empty nodes array. +// This is "element not present", NOT "snapshot infrastructure failed". +const IN_TREE_SNAPSHOT_OK_EMPTY = { + ok: true, + data: { nodes: [] }, +}; + +// ───────────────────────────────────────────────────────────────────────────── +// findRefByTestID — must extract ref by identifier from all producer shapes +// ───────────────────────────────────────────────────────────────────────────── + +const SUCCESS_ENVELOPES_WITH_TARGET = [ + { name: 'in-tree iOS (flat nodes)', env: IN_TREE_IOS_SNAPSHOT_OK, expectedRef: '@e7' }, + { name: 'in-tree Android (flat nodes)', env: IN_TREE_ANDROID_SNAPSHOT_OK, expectedRef: '@e12' }, + { name: 'legacy daemon (flat nodes)', env: LEGACY_DAEMON_SNAPSHOT_OK, expectedRef: 'el-0' }, + { name: 'legacy CLI (flat nodes)', env: LEGACY_CLI_SNAPSHOT_OK, expectedRef: 'el-0' }, + { name: 'legacy fast-runner (nested)', env: LEGACY_FAST_RUNNER_SNAPSHOT_OK_NESTED, expectedRef: 'btn-7' }, +]; + +for (const { name, env, expectedRef } of SUCCESS_ENVELOPES_WITH_TARGET) { + test(`findRefByTestID: ${name} — resolves testID 'fab-create-task' to ${expectedRef}`, () => { + const ref = findRefByTestID(JSON.stringify(env), 'fab-create-task'); + assert.equal(ref, expectedRef); + }); +} + +test('findRefByTestID: in-tree snapshot with empty nodes — returns null (testID not present)', () => { + const ref = findRefByTestID(JSON.stringify(IN_TREE_SNAPSHOT_OK_EMPTY), 'fab-create-task'); + assert.equal(ref, null); +}); + +test('findRefByTestID: in-tree snapshot with present nodes but no match — returns null', () => { + const ref = findRefByTestID(JSON.stringify(IN_TREE_IOS_SNAPSHOT_OK), 'nonexistent-testid'); + assert.equal(ref, null); +}); + +const FAILURE_ENVELOPES = [ + { name: 'in-tree runner failure (object error)', env: IN_TREE_RUNNER_FAILURE_APP_NOT_RUNNING }, + { name: 'legacy daemon failure (string error)', env: LEGACY_DAEMON_FAILURE_NO_DEVICE }, + { name: 'legacy CLI failure (error + code)', env: LEGACY_CLI_FAILURE_WITH_CODE }, +]; + +for (const { name, env } of FAILURE_ENVELOPES) { + test(`findRefByTestID: ${name} — returns null (refuses to scan failed snapshot)`, () => { + const ref = findRefByTestID(JSON.stringify(env), 'fab-create-task'); + assert.equal(ref, null); + }); +} + +// ───────────────────────────────────────────────────────────────────────────── +// snapshotEnvelopeFailed — classifies infrastructure failure vs element-absent +// ───────────────────────────────────────────────────────────────────────────── + +for (const { name, env } of SUCCESS_ENVELOPES_WITH_TARGET) { + test(`snapshotEnvelopeFailed: ${name} — returns false (snapshot succeeded)`, () => { + assert.equal(snapshotEnvelopeFailed(JSON.stringify(env)), false); + }); +} + +test('snapshotEnvelopeFailed: empty-nodes success is NOT a failure (element not present is different)', () => { + // Critical contract: handlers depend on this distinction to route + // SNAPSHOT_FAILED vs TESTID_NOT_FOUND correctly (Phase 128 #5/#6). + assert.equal(snapshotEnvelopeFailed(JSON.stringify(IN_TREE_SNAPSHOT_OK_EMPTY)), false); +}); + +test('snapshotEnvelopeFailed: iOS typeText runner-timeout shim is NOT a failure', () => { + // The shim shape carries ok:true + meta.runnerTimeoutShim. Treating + // it as failure would route every successful iOS fill to + // SNAPSHOT_FAILED — would visibly break the iOS device_fill smoke test. + assert.equal(snapshotEnvelopeFailed(JSON.stringify(IOS_TYPETEXT_RUNNER_TIMEOUT_SHIM)), false); +}); + +for (const { name, env } of FAILURE_ENVELOPES) { + test(`snapshotEnvelopeFailed: ${name} — returns true`, () => { + assert.equal(snapshotEnvelopeFailed(JSON.stringify(env)), true); + }); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Edge cases — what the parsers do under malformed / null input +// ───────────────────────────────────────────────────────────────────────────── + +test('snapshotEnvelopeFailed: null/undefined input → true (treat missing as failed)', () => { + assert.equal(snapshotEnvelopeFailed(null), true); + assert.equal(snapshotEnvelopeFailed(undefined), true); +}); + +test('snapshotEnvelopeFailed: empty string → true', () => { + assert.equal(snapshotEnvelopeFailed(''), true); +}); + +test('snapshotEnvelopeFailed: malformed JSON → true (the parser cannot vouch for the snapshot)', () => { + assert.equal(snapshotEnvelopeFailed('not-json'), true); + assert.equal(snapshotEnvelopeFailed('{ truncated'), true); +}); + +test('findRefByTestID: malformed JSON → null (no ref to extract)', () => { + assert.equal(findRefByTestID('not-json', 'any-testid'), null); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Cross-producer parity — same logical event, structurally compatible output +// ───────────────────────────────────────────────────────────────────────────── + +test('parity: in-tree iOS and in-tree Android emit structurally identical flat-nodes shape', () => { + // Both runners should expose the same `{ok, data: {nodes: [{ref, identifier, label}]}}` + // shape so handlers can be platform-agnostic. A divergence here + // (e.g. Android renaming `identifier` to `accessibilityId`) would + // silently break the iOS-tested handlers on Android. + const iosKeys = Object.keys(IN_TREE_IOS_SNAPSHOT_OK.data.nodes[0]).sort(); + const androidKeys = Object.keys(IN_TREE_ANDROID_SNAPSHOT_OK.data.nodes[0]).sort(); + assert.deepEqual(iosKeys, androidKeys, 'iOS and Android flat-node keys must match'); +});