Skip to content

Commit 8345f4b

Browse files
Lykhoydaclaude
andcommitted
fix(rn-device): close iOS-MVP coverage gaps + strip upstream branding
Live pre-merge validation (docs/proof/ios-mvp-validation/REPORT.md in workspace) surfaced 6 substantive bugs where iOS device verbs still routed through the legacy agent-device daemon/CLI — exactly the surface PR #164 was supposed to eliminate. This commit closes those gaps and removes the leftover upstream visual branding. API migration: - rn-fast-runner-client.ts: drop dead per-endpoint helpers (fastTap/fastType/fastSnapshot/fastScreenshot/fastDismissKeyboard, plus their postJSON/postBinary plumbing). Rewrite fastSwipe to POST the single /command endpoint with command='drag' (the Swift .swipe case is tvOS-only; iOS coord gestures use .drag). - RunIOSArgs: add 'drag' | 'longPress' | 'pinch' | 'findText' command variants + scale field for pinch. - runIOS body builder: forward args.scale to the runner. - agent-device-wrapper.ts: expand RN_FAST_RUNNER_COMMANDS to include swipe / scroll / longpress / pinch so runAgentDevice short-circuits through runIOS for all iOS device verbs. - buildRunIOSArgs: new cases for swipe+scroll (→ .drag), longpress (→ .longPress), pinch (→ .pinch). - device-interact.ts createDeviceFindHandler: on iOS the non-exact path now uses the snapshot-based findInLatestSnapshot orchestrator instead of the legacy `agent-device find` CLI (which respawned the upstream AgentDeviceRunner via the daemon and stole focus from RnFastRunner). - device-interact.ts createDeviceScrollIntoViewHandler: new TS orchestrator scrollIntoViewIOS — snapshot → find → decideScroll → fastSwipe loop (up to 12 iterations). Android keeps the legacy CLI delegate (its daemon doesn't have the focus race). Branding cleanup (live finding: "rn fast runner has agent device runner logo / intro screen as well"): - ContentView.swift: drop Image("Logo") + Image("PoweredBy"); show only the "rn-dev-agent / fast runner" text + "XCUITest bridge" tag. - Assets.xcassets: delete Logo.imageset and PoweredBy.imageset imagesets entirely. Remove logo.jpg from AppIcon.appiconset and clear the filename reference in Contents.json so the runner uses Xcode's default placeholder icon (the runner ideally never foregrounds anyway, per the Task 2 regression XCTest). Validation: existing 1449/1449 unit tests still pass. Swift TEST BUILD SUCCEEDED. Live re-validation pending /reload-plugins. Findings refs: docs/proof/ios-mvp-validation/REPORT.md (workspace). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 116e16e commit 8345f4b

13 files changed

Lines changed: 398 additions & 195 deletions

File tree

scripts/cdp-bridge/dist/agent-device-wrapper.js

Lines changed: 75 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -259,11 +259,14 @@ export function getAdbSerial() {
259259
// the new client expects. The legacy daemon + CLI tiers below remain — they
260260
// now serve Android exclusively.
261261
//
262-
// Commands intentionally NOT in this set:
263-
// - longpress (Swift runner has no longPress-by-ref code path; if needed,
264-
// callers can compose tap with durationMs)
265-
// - swipe / scroll already route through fast-runner directly inside
266-
// device-interact.ts via fastSwipe(), bypassing runAgentDevice entirely.
262+
// GH #105 iOS-MVP follow-up (post-validation): the original short-circuit
263+
// list left `swipe` / `scroll` / `longpress` / `pinch` / `find` on the
264+
// legacy daemon/CLI path. Live validation showed the daemon respawns the
265+
// upstream AgentDeviceRunner on every such call, which then fights our
266+
// RnFastRunner for focus. Each of these now routes through the runner's
267+
// `/command` endpoint (the Swift `.drag` / `.longPress` / `.pinch` / `.findText`
268+
// handlers). Coordinate-based gestures: the Swift `.swipe` case is tvOS-only;
269+
// iOS coordinate-form swipes/scrolls use `.drag`.
267270
const RN_FAST_RUNNER_COMMANDS = new Set([
268271
'snapshot',
269272
'tap',
@@ -273,6 +276,10 @@ const RN_FAST_RUNNER_COMMANDS = new Set([
273276
'back',
274277
'screenshot',
275278
'keyboard',
279+
'swipe',
280+
'scroll',
281+
'longpress',
282+
'pinch',
276283
]);
277284
export function getCachedScreenRect() {
278285
return getScreenRect();
@@ -319,6 +326,69 @@ function buildRunIOSArgs(cliArgs, bundleId) {
319326
return { command: 'screenshot', ...(bundleId ? { bundleId } : {}) };
320327
case 'keyboard':
321328
return { command: 'dismissKeyboard', ...(bundleId ? { bundleId } : {}) };
329+
case 'swipe':
330+
case 'scroll': {
331+
// Coordinate-based gesture. The Swift `.swipe` is tvOS-only; iOS
332+
// coord-form gestures use `.drag`. CLI shapes seen:
333+
// ['swipe', x1, y1, x2, y2, durationMs?]
334+
// ['scroll', x1, y1, x2, y2, durationMs?]
335+
// Direction-form (`['scroll', 'down', amount?]`) cannot reach this
336+
// path: device-interact converts direction→coords up-front and
337+
// dispatches the coord shape.
338+
const [x1S, y1S, x2S, y2S, durationS] = positionals;
339+
const x1 = Number(x1S), y1 = Number(y1S), x2 = Number(x2S), y2 = Number(y2S);
340+
if ([x1, y1, x2, y2].some((n) => Number.isNaN(n))) {
341+
throw new Error(`buildRunIOSArgs: ${cmd} requires four numeric coordinates`);
342+
}
343+
const args = {
344+
command: 'drag', x: x1, y: y1, x2, y2,
345+
...(bundleId ? { bundleId } : {}),
346+
};
347+
if (durationS !== undefined) {
348+
const n = Number(durationS);
349+
if (!Number.isNaN(n))
350+
args.durationMs = n;
351+
}
352+
return args;
353+
}
354+
case 'longpress': {
355+
// CLI shape: ['longpress', x, y, durationMs?]
356+
const [xS, yS, durationS] = positionals;
357+
const x = Number(xS), y = Number(yS);
358+
if (Number.isNaN(x) || Number.isNaN(y)) {
359+
throw new Error(`buildRunIOSArgs: longpress requires numeric x, y`);
360+
}
361+
const args = {
362+
command: 'longPress', x, y,
363+
...(bundleId ? { bundleId } : {}),
364+
};
365+
if (durationS !== undefined) {
366+
const n = Number(durationS);
367+
if (!Number.isNaN(n))
368+
args.durationMs = n;
369+
}
370+
return args;
371+
}
372+
case 'pinch': {
373+
// CLI shape: ['pinch', scale, x?, y?]
374+
const [scaleS, xS, yS] = positionals;
375+
const scale = Number(scaleS);
376+
if (Number.isNaN(scale)) {
377+
throw new Error(`buildRunIOSArgs: pinch requires numeric scale`);
378+
}
379+
const args = {
380+
command: 'pinch', scale,
381+
...(bundleId ? { bundleId } : {}),
382+
};
383+
if (xS !== undefined && yS !== undefined) {
384+
const x = Number(xS), y = Number(yS);
385+
if (!Number.isNaN(x))
386+
args.x = x;
387+
if (!Number.isNaN(y))
388+
args.y = y;
389+
}
390+
return args;
391+
}
322392
default:
323393
throw new Error(`buildRunIOSArgs: unsupported command "${cmd ?? '<empty>'}"`);
324394
}

scripts/cdp-bridge/dist/runners/rn-fast-runner-client.js

Lines changed: 7 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -197,63 +197,12 @@ export function stopFastRunner() {
197197
}
198198
catch { /* ignore */ }
199199
}
200-
// --- HTTP client (legacy /tap, /snapshot routes — kept for device-interact.ts swipe + other callers) ---
201-
async function postJSON(path, body) {
202-
if (!runnerState)
203-
throw new Error('Fast runner not started');
204-
const url = `http://[::1]:${runnerState.port}${path}`;
205-
const controller = new AbortController();
206-
const timer = setTimeout(() => controller.abort(), HTTP_TIMEOUT_MS);
207-
try {
208-
const res = await fetch(url, {
209-
method: 'POST',
210-
headers: body ? { 'Content-Type': 'application/json' } : undefined,
211-
body: body ? JSON.stringify(body) : undefined,
212-
signal: controller.signal,
213-
});
214-
if (!res.ok) {
215-
const text = await res.text();
216-
throw new Error(`HTTP ${res.status}: ${text}`);
217-
}
218-
return await res.json();
219-
}
220-
finally {
221-
clearTimeout(timer);
222-
}
223-
}
224-
async function postBinary(path) {
225-
if (!runnerState)
226-
throw new Error('Fast runner not started');
227-
const url = `http://[::1]:${runnerState.port}${path}`;
228-
const controller = new AbortController();
229-
const timer = setTimeout(() => controller.abort(), HTTP_TIMEOUT_MS);
230-
try {
231-
const res = await fetch(url, { method: 'POST', signal: controller.signal });
232-
if (!res.ok)
233-
throw new Error(`HTTP ${res.status}`);
234-
return Buffer.from(await res.arrayBuffer());
235-
}
236-
finally {
237-
clearTimeout(timer);
238-
}
239-
}
240-
export async function fastTap(x, y, duration) {
241-
return postJSON('/tap', { x, y, ...(duration != null ? { duration } : {}) });
242-
}
243-
export async function fastType(text) {
244-
return postJSON('/type', { text });
245-
}
246200
export async function fastSwipe(x1, y1, x2, y2, durationMs) {
247-
return postJSON('/swipe', { x1, y1, x2, y2, ...(durationMs != null ? { durationMs } : {}) });
248-
}
249-
export async function fastSnapshot(bundleId) {
250-
return postJSON('/snapshot', bundleId ? { bundleId } : {});
251-
}
252-
export async function fastScreenshot() {
253-
return postBinary('/screenshot');
254-
}
255-
export async function fastDismissKeyboard() {
256-
return postJSON('/dismissKeyboard');
201+
const body = { command: 'drag', x: x1, y: y1, x2, y2 };
202+
if (durationMs != null)
203+
body.durationMs = durationMs;
204+
const resp = await postCommand(body);
205+
return resp;
257206
}
258207
// --- Health check ---
259208
export async function fastHealthCheck() {
@@ -434,6 +383,8 @@ export async function runIOS(args) {
434383
body.durationMs = args.durationMs;
435384
if (args.direction !== undefined)
436385
body.direction = args.direction;
386+
if (args.scale !== undefined)
387+
body.scale = args.scale;
437388
if (args.interactiveOnly !== undefined)
438389
body.interactiveOnly = args.interactiveOnly;
439390
if (args.compact !== undefined)

scripts/cdp-bridge/dist/tools/device-interact.js

Lines changed: 97 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,34 @@ export function createDeviceFindHandler() {
206206
}
207207
return failResult(`AMBIGUOUS_MATCH: exact "${args.text}" matched ${candidates.length} elements`, { code: 'AMBIGUOUS_MATCH', query: args.text, candidates, hint: 'Add index: N to pick one.' });
208208
}
209+
// GH #105 iOS-MVP follow-up: on iOS, the legacy `agent-device find` CLI
210+
// path respawns the upstream AgentDeviceRunner via the daemon, which then
211+
// fights our RnFastRunner for focus. Use the snapshot-based orchestrator
212+
// instead — same result without the daemon round-trip. Android still
213+
// uses the CLI fuzzy matcher (its daemon doesn't have the same focus race).
214+
const activeSession = getActiveSession();
215+
if (activeSession?.platform === 'ios') {
216+
const find = await fetchFindCandidates(args.text, false);
217+
if (!find.ok) {
218+
if (find.reason === 'runner-leak-unrecovered') {
219+
return runnerLeakFailResult(args.text, find.recoveryReason);
220+
}
221+
return failResult(`Snapshot unavailable — cannot resolve "${args.text}" on iOS`, { code: 'SNAPSHOT_UNAVAILABLE', query: args.text });
222+
}
223+
const { candidates, recoveredTier } = find;
224+
if (candidates.length === 0) {
225+
return failResult(`No element matches "${args.text}"`, { code: 'NOT_FOUND', query: args.text });
226+
}
227+
if (candidates.length === 1) {
228+
return tagPressIfRecovered(await pressCandidate(candidates[0], args.action), recoveredTier);
229+
}
230+
return failResult(`AMBIGUOUS_MATCH: "${args.text}" matched ${candidates.length} elements. Use device_press with one of these refs, or retry with index: N.`, {
231+
code: 'AMBIGUOUS_MATCH',
232+
query: args.text,
233+
candidates,
234+
hint: 'Pick the correct ref (prefer one with hittable=true) and call device_press(ref="...") directly, or call device_find again with index: N.',
235+
});
236+
}
209237
const cliArgs = ['find', args.text];
210238
if (args.action)
211239
cliArgs.push(args.action);
@@ -701,17 +729,81 @@ export function createDeviceScrollHandler() {
701729
});
702730
}
703731
export function createDeviceScrollIntoViewHandler() {
704-
return withSession((args) => {
732+
return withSession(async (args) => {
733+
if (!args.ref && !args.text) {
734+
return failResult('Provide either text or ref to scroll into view');
735+
}
736+
// GH #105 iOS-MVP follow-up: the Swift runner has no `scrollintoview`
737+
// command; this is TS-orchestrated on iOS (snapshot → find → swipe loop).
738+
// Android keeps the legacy CLI delegate (agent-device handles it natively).
739+
const session = getActiveSession();
740+
if (session?.platform === 'ios') {
741+
return scrollIntoViewIOS(args);
742+
}
705743
if (args.ref) {
706744
const ref = args.ref.startsWith('@') ? args.ref : `@${args.ref}`;
707745
return runAgentDevice(['scrollintoview', ref]);
708746
}
709-
if (args.text) {
710-
return runAgentDevice(['scrollintoview', args.text]);
711-
}
712-
return Promise.resolve(failResult('Provide either text or ref to scroll into view'));
747+
return runAgentDevice(['scrollintoview', args.text]);
713748
});
714749
}
750+
/**
751+
* GH #105 iOS-MVP follow-up: TS orchestrator for device_scrollintoview on iOS.
752+
* Loops snapshot → find → check viewport → swipe up to MAX_ITERATIONS times.
753+
* Uses the runner's `/command` snapshot + drag verbs exclusively — no daemon.
754+
*/
755+
async function scrollIntoViewIOS(args) {
756+
const MAX_ITERATIONS = 12;
757+
const screen = getCachedScreenRect() ?? DEFAULT_SCREEN;
758+
const screenRect = { x: 0, y: 0, width: screen.width, height: screen.height };
759+
for (let i = 0; i < MAX_ITERATIONS; i++) {
760+
const snapRes = await runAgentDevice(['snapshot', '-i']);
761+
if (snapRes.isError) {
762+
return failResult(`scrollintoview: snapshot failed at iteration ${i}: ${snapRes.content?.[0]?.text ?? 'unknown'}`, { code: 'SNAPSHOT_UNAVAILABLE' });
763+
}
764+
let nodes = [];
765+
try {
766+
const envelope = JSON.parse(snapRes.content?.[0]?.text ?? '{}');
767+
nodes = envelope.data?.nodes ?? [];
768+
}
769+
catch {
770+
return failResult(`scrollintoview: failed to parse snapshot envelope at iteration ${i}`);
771+
}
772+
const target = args.ref
773+
? nodes.find((n) => n.ref === (args.ref.startsWith('@') ? args.ref : `@${args.ref}`)) ?? null
774+
: findInLatestSnapshot(nodes, args.text);
775+
if (!target) {
776+
// Element not in snapshot at all; can't decide direction. Probably needs
777+
// initial scroll. Default to swiping up (down-direction-of-content) once
778+
// and retry — common case is reaching a below-fold element.
779+
if (i === 0) {
780+
const fallbackDir = decideScrollDirection({ x: 0, y: screen.height * 2, width: 1, height: 1 }, screenRect);
781+
const coords = computeSwipeFromDirection(fallbackDir ?? 'down', screen);
782+
await fastSwipe(coords.x1, coords.y1, coords.x2, coords.y2, DEFAULT_SWIPE_DURATION_MS);
783+
continue;
784+
}
785+
return failResult(`scrollintoview: element "${args.ref ?? args.text}" not found after ${i} swipe iteration(s)`, { code: 'NOT_FOUND', iterations: i });
786+
}
787+
if (!target.rect) {
788+
return failResult(`scrollintoview: target has no rect — cannot decide direction`);
789+
}
790+
const direction = decideScrollDirection(target.rect, screenRect);
791+
if (direction === null) {
792+
return okResult({
793+
ref: target.ref,
794+
rect: target.rect,
795+
iterations: i,
796+
method: 'fast-runner',
797+
});
798+
}
799+
const coords = computeSwipeFromDirection(direction, screen);
800+
const swipeResp = await fastSwipe(coords.x1, coords.y1, coords.x2, coords.y2, DEFAULT_SWIPE_DURATION_MS);
801+
if (!swipeResp.ok) {
802+
return failResult(`scrollintoview: swipe failed at iteration ${i}: ${swipeResp.error ?? 'unknown'}`);
803+
}
804+
}
805+
return failResult(`scrollintoview: target "${args.ref ?? args.text}" did not enter viewport after ${MAX_ITERATIONS} swipe iterations`, { code: 'SCROLL_EXHAUSTED', iterations: MAX_ITERATIONS });
806+
}
715807
export function createDevicePinchHandler() {
716808
return withSession((args) => {
717809
const cliArgs = ['pinch', String(args.scale)];

scripts/cdp-bridge/src/agent-device-wrapper.ts

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -290,11 +290,14 @@ export function getAdbSerial(): string[] {
290290
// the new client expects. The legacy daemon + CLI tiers below remain — they
291291
// now serve Android exclusively.
292292
//
293-
// Commands intentionally NOT in this set:
294-
// - longpress (Swift runner has no longPress-by-ref code path; if needed,
295-
// callers can compose tap with durationMs)
296-
// - swipe / scroll already route through fast-runner directly inside
297-
// device-interact.ts via fastSwipe(), bypassing runAgentDevice entirely.
293+
// GH #105 iOS-MVP follow-up (post-validation): the original short-circuit
294+
// list left `swipe` / `scroll` / `longpress` / `pinch` / `find` on the
295+
// legacy daemon/CLI path. Live validation showed the daemon respawns the
296+
// upstream AgentDeviceRunner on every such call, which then fights our
297+
// RnFastRunner for focus. Each of these now routes through the runner's
298+
// `/command` endpoint (the Swift `.drag` / `.longPress` / `.pinch` / `.findText`
299+
// handlers). Coordinate-based gestures: the Swift `.swipe` case is tvOS-only;
300+
// iOS coordinate-form swipes/scrolls use `.drag`.
298301

299302
const RN_FAST_RUNNER_COMMANDS = new Set<string>([
300303
'snapshot',
@@ -305,6 +308,10 @@ const RN_FAST_RUNNER_COMMANDS = new Set<string>([
305308
'back',
306309
'screenshot',
307310
'keyboard',
311+
'swipe',
312+
'scroll',
313+
'longpress',
314+
'pinch',
308315
]);
309316

310317
export function getCachedScreenRect(): { width: number; height: number } | null {
@@ -356,6 +363,65 @@ function buildRunIOSArgs(
356363
return { command: 'screenshot', ...(bundleId ? { bundleId } : {}) };
357364
case 'keyboard':
358365
return { command: 'dismissKeyboard', ...(bundleId ? { bundleId } : {}) };
366+
case 'swipe':
367+
case 'scroll': {
368+
// Coordinate-based gesture. The Swift `.swipe` is tvOS-only; iOS
369+
// coord-form gestures use `.drag`. CLI shapes seen:
370+
// ['swipe', x1, y1, x2, y2, durationMs?]
371+
// ['scroll', x1, y1, x2, y2, durationMs?]
372+
// Direction-form (`['scroll', 'down', amount?]`) cannot reach this
373+
// path: device-interact converts direction→coords up-front and
374+
// dispatches the coord shape.
375+
const [x1S, y1S, x2S, y2S, durationS] = positionals;
376+
const x1 = Number(x1S), y1 = Number(y1S), x2 = Number(x2S), y2 = Number(y2S);
377+
if ([x1, y1, x2, y2].some((n) => Number.isNaN(n))) {
378+
throw new Error(`buildRunIOSArgs: ${cmd} requires four numeric coordinates`);
379+
}
380+
const args: import('./runners/rn-fast-runner-client.js').RunIOSArgs = {
381+
command: 'drag', x: x1, y: y1, x2, y2,
382+
...(bundleId ? { bundleId } : {}),
383+
};
384+
if (durationS !== undefined) {
385+
const n = Number(durationS);
386+
if (!Number.isNaN(n)) args.durationMs = n;
387+
}
388+
return args;
389+
}
390+
case 'longpress': {
391+
// CLI shape: ['longpress', x, y, durationMs?]
392+
const [xS, yS, durationS] = positionals;
393+
const x = Number(xS), y = Number(yS);
394+
if (Number.isNaN(x) || Number.isNaN(y)) {
395+
throw new Error(`buildRunIOSArgs: longpress requires numeric x, y`);
396+
}
397+
const args: import('./runners/rn-fast-runner-client.js').RunIOSArgs = {
398+
command: 'longPress', x, y,
399+
...(bundleId ? { bundleId } : {}),
400+
};
401+
if (durationS !== undefined) {
402+
const n = Number(durationS);
403+
if (!Number.isNaN(n)) args.durationMs = n;
404+
}
405+
return args;
406+
}
407+
case 'pinch': {
408+
// CLI shape: ['pinch', scale, x?, y?]
409+
const [scaleS, xS, yS] = positionals;
410+
const scale = Number(scaleS);
411+
if (Number.isNaN(scale)) {
412+
throw new Error(`buildRunIOSArgs: pinch requires numeric scale`);
413+
}
414+
const args: import('./runners/rn-fast-runner-client.js').RunIOSArgs = {
415+
command: 'pinch', scale,
416+
...(bundleId ? { bundleId } : {}),
417+
};
418+
if (xS !== undefined && yS !== undefined) {
419+
const x = Number(xS), y = Number(yS);
420+
if (!Number.isNaN(x)) args.x = x;
421+
if (!Number.isNaN(y)) args.y = y;
422+
}
423+
return args;
424+
}
359425
default:
360426
throw new Error(`buildRunIOSArgs: unsupported command "${cmd ?? '<empty>'}"`);
361427
}

0 commit comments

Comments
 (0)