From ef0e062ad37b856b6f7ff62173f09ff58a512bcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Tue, 26 May 2026 09:58:53 +0200 Subject: [PATCH 1/6] feat: auto-reverse Android localhost opens --- src/platforms/android/__tests__/index.test.ts | 128 ++++++++++++++++++ src/platforms/android/app-lifecycle.ts | 50 ++++++- src/utils/__tests__/args.test.ts | 6 +- src/utils/command-schema.ts | 8 +- .../suites/agent-device-smoke-suite.ts | 9 +- website/docs/docs/commands.md | 2 +- 6 files changed, 189 insertions(+), 14 deletions(-) diff --git a/src/platforms/android/__tests__/index.test.ts b/src/platforms/android/__tests__/index.test.ts index 35dbf00c9..35af41738 100644 --- a/src/platforms/android/__tests__/index.test.ts +++ b/src/platforms/android/__tests__/index.test.ts @@ -534,6 +534,134 @@ test('openAndroidApp rejects activity override for deep link URLs', async () => ); }); +test('openAndroidApp ensures Android reverse before localhost deep link launch', async () => { + const device: DeviceInfo = { + platform: 'android', + id: 'emulator-5554', + name: 'Pixel', + kind: 'emulator', + booted: true, + }; + const calls: Array< + { kind: 'exec'; args: string[] } | { kind: 'reverse'; local: string; remote: string } + > = []; + + await withAndroidAdbProvider( + { + exec: async (args) => { + calls.push({ kind: 'exec', args }); + return { stdout: '', stderr: '', exitCode: 0 }; + }, + reverse: { + ensure: async (mapping) => { + calls.push({ kind: 'reverse', local: mapping.local, remote: mapping.remote }); + }, + remove: async () => {}, + removeAllOwned: async () => {}, + }, + }, + { serial: 'emulator-5554' }, + async () => await openAndroidApp(device, 'exp://127.0.0.1:8083'), + ); + + assert.deepEqual(calls, [ + { kind: 'reverse', local: 'tcp:8083', remote: 'tcp:8083' }, + { + kind: 'exec', + args: [ + 'shell', + 'am', + 'start', + '-W', + '-a', + 'android.intent.action.VIEW', + '-d', + 'exp://127.0.0.1:8083', + ], + }, + ]); +}); + +test('openAndroidApp leaves non-localhost deep links unchanged', async () => { + const device: DeviceInfo = { + platform: 'android', + id: 'emulator-5554', + name: 'Pixel', + kind: 'emulator', + booted: true, + }; + const calls: string[][] = []; + + await withAndroidAdbProvider( + { + exec: async (args) => { + calls.push(args); + return { stdout: '', stderr: '', exitCode: 0 }; + }, + reverse: { + ensure: async () => { + throw new Error('reverse should not run for remote URLs'); + }, + remove: async () => {}, + removeAllOwned: async () => {}, + }, + }, + { serial: 'emulator-5554' }, + async () => await openAndroidApp(device, 'https://example.com:8083/path'), + ); + + assert.deepEqual(calls, [ + [ + 'shell', + 'am', + 'start', + '-W', + '-a', + 'android.intent.action.VIEW', + '-d', + 'https://example.com:8083/path', + ], + ]); +}); + +test('openAndroidApp reports localhost reverse failures with port context', async () => { + const device: DeviceInfo = { + platform: 'android', + id: 'emulator-5554', + name: 'Pixel', + kind: 'emulator', + booted: true, + }; + + await withAndroidAdbProvider( + { + exec: async (args) => { + throw new Error(`unexpected adb exec: ${args.join(' ')}`); + }, + reverse: { + ensure: async () => { + throw new Error('bridge unavailable'); + }, + remove: async () => {}, + removeAllOwned: async () => {}, + }, + }, + { serial: 'emulator-5554' }, + async () => { + await assert.rejects( + () => openAndroidApp(device, 'http://localhost:8081'), + (error: unknown) => { + assert.equal(error instanceof AppError, true); + assert.equal((error as AppError).code, 'COMMAND_FAILED'); + assert.match((error as Error).message, /tcp:8081/); + assert.match((error as Error).message, /reverse/i); + return true; + }, + ); + }, + ); +}); + test('setAndroidSetting appearance toggle flips current mode', async () => { await withMockedAdb( 'agent-device-android-appearance-toggle-', diff --git a/src/platforms/android/app-lifecycle.ts b/src/platforms/android/app-lifecycle.ts index 2841b7ad0..429b2f9bc 100644 --- a/src/platforms/android/app-lifecycle.ts +++ b/src/platforms/android/app-lifecycle.ts @@ -9,7 +9,12 @@ import { isDeepLinkTarget } from '../../core/open-target.ts'; import { createAppResolutionCache, type AppResolutionCacheScope } from '../app-resolution-cache.ts'; import { waitForAndroidBoot } from './devices.ts'; import { runAndroidAdb } from './adb.ts'; -import { installAndroidAdbPackage, resolveAndroidAdbProvider } from './adb-executor.ts'; +import { + createAndroidPortReverseManager, + installAndroidAdbPackage, + resolveAndroidAdbProvider, + type AndroidPortReverseEndpoint, +} from './adb-executor.ts'; import { classifyAndroidAppTarget } from './open-target.ts'; import { prepareAndroidInstallArtifact } from './install-artifact.ts'; import { @@ -217,6 +222,48 @@ async function readAndroidFocus( return null; } +function androidLocalhostReverseEndpoint(target: string): AndroidPortReverseEndpoint | null { + let url: URL; + try { + url = new URL(target); + } catch { + return null; + } + + const hostname = url.hostname.toLowerCase(); + if (hostname !== 'localhost' && hostname !== '127.0.0.1' && hostname !== '[::1]') { + return null; + } + if (!url.port) return null; + const port = Number(url.port); + if (!Number.isInteger(port)) return null; + return `tcp:${port}`; +} + +async function ensureAndroidLocalhostReverse(device: DeviceInfo, target: string): Promise { + const endpoint = androidLocalhostReverseEndpoint(target); + if (!endpoint) return; + + const reverse = createAndroidPortReverseManager(resolveAndroidAdbProvider(device)); + try { + await reverse.ensure({ local: endpoint, remote: endpoint }); + } catch (error) { + const causeDetails = error instanceof AppError ? error.details : undefined; + throw new AppError( + 'COMMAND_FAILED', + `Failed to ensure Android port reverse ${endpoint} before opening localhost URL`, + { + localPort: endpoint.replace('tcp:', ''), + operation: `adb reverse ${endpoint} ${endpoint}`, + ...(causeDetails?.hint ? { hint: causeDetails.hint } : {}), + ...(causeDetails?.diagnosticId ? { diagnosticId: causeDetails.diagnosticId } : {}), + ...(causeDetails?.logPath ? { logPath: causeDetails.logPath } : {}), + }, + error, + ); + } +} + export async function openAndroidApp( device: DeviceInfo, app: string, @@ -233,6 +280,7 @@ export async function openAndroidApp( 'Activity override is not supported when opening a deep link URL', ); } + await ensureAndroidLocalhostReverse(device, deepLinkTarget); await runAndroidAdb(device, [ 'shell', 'am', diff --git a/src/utils/__tests__/args.test.ts b/src/utils/__tests__/args.test.ts index 742e5f8dd..b419ccc0f 100644 --- a/src/utils/__tests__/args.test.ts +++ b/src/utils/__tests__/args.test.ts @@ -861,7 +861,7 @@ test('usage includes agent workflows, config, environment, and examples footers' assert.match(usageText, /agent-facing, token-efficient view for planning and targeting actions/); assert.match(usageText, /Truncated text\/input preview: expand first with snapshot -s @e12/); assert.match(usageText, /React Native apps: read help react-native/); - assert.match(usageText, /adb reverse tcp: tcp: is harmless/); + assert.match(usageText, /localhost URL opens with a port auto-ensure adb reverse/); assert.match(usageText, /Expo Go\/dev clients: use the provided URL when given/); assert.match(usageText, /on iOS prefer open "Expo Go" /); assert.match(usageText, /Install flows: install\/install-from-source first/); @@ -968,7 +968,7 @@ test('usageForCommand resolves workflow help topic', () => { assert.match(help, /provider-native text injection when available/); assert.match(help, /Do not switch to raw adb, clipboard, or paste as an agent fallback/); assert.match(help, /if no URL is provided but a target\/app name is provided, open that target/); - assert.match(help, /adb reverse tcp: tcp: before opening the app or URL/); + assert.match(help, /localhost\/127\.0\.0\.1\/\[::1\] with a port auto-ensure adb reverse/); assert.match(help, /do not split clear\/restart/); assert.match(help, /do not write network log headers/); assert.match(help, /agent-device open exp:\/\/127\.0\.0\.1:8081 --platform ios/); @@ -1037,7 +1037,7 @@ test('usageForCommand resolves dogfood help topic', () => { assert.match(help, /Static\/on-load issues can use one screenshot/); assert.match(help, /React Native warning\/error overlays can be real findings/); assert.match(help, /Expo Go\/dev-client shells/); - assert.match(help, /adb reverse tcp: tcp: before opening the app or URL/); + assert.match(help, /localhost\/127\.0\.0\.1\/\[::1\] with a port auto-ensure adb reverse/); assert.match(help, /Keep stateful commands serial within the same session/); assert.match(help, /prefer agent-device open "Expo Go" /); assert.match(help, /dogfood-output\/report\.md/); diff --git a/src/utils/command-schema.ts b/src/utils/command-schema.ts index 28710d73c..359f3b25c 100644 --- a/src/utils/command-schema.ts +++ b/src/utils/command-schema.ts @@ -169,7 +169,7 @@ const AGENT_QUICKSTART_LINES = [ 'Anti-pattern: snapshot -i followed by snapshot -i | grep ...; prior refs stay valid until app state changes, and --force-full is the explicit full re-read.', 'Truncated text/input preview: expand first with snapshot -s @e12, not get text.', 'React Native apps: read help react-native for Metro, DevTools routing, and RN-specific blockers; use react-native dismiss-overlay for LogBox/RedBox overlays.', - 'Android RN/Expo Metro: adb reverse tcp: tcp: is harmless and helps the device reach any local Metro port.', + 'Android RN/Expo Metro: direct Android localhost URL opens with a port auto-ensure adb reverse when supported; manual adb reverse tcp: tcp: remains harmless before app launches or unsupported flows.', 'Expo Go/dev clients: use the provided URL when given; on iOS prefer open "Expo Go" ; Android URL opens infer the foreground package for logs/perf when possible.', 'Install flows: install/install-from-source first, then open the installed id with --relaunch.', 'Text: fill \'id="field-email"\' "qa@example.com" replaces; type appends after press.', @@ -349,7 +349,7 @@ React Native dev loop: agent-device find "Home" Do not use agent-device reload. Use open --relaunch for native startup reset. React Native apps: use help react-native for Metro/Fast Refresh, DevTools routing, and RN-specific blockers; use react-native dismiss-overlay for LogBox/RedBox overlays. - Android RN/Expo Metro: run adb reverse tcp: tcp: before opening the app or URL; it is harmless even if already configured. + Android RN/Expo Metro: direct Android URL opens to localhost/127.0.0.1/[::1] with a port auto-ensure adb reverse when supported; manual adb reverse tcp: tcp: remains harmless before app launches or unsupported flows. Expo Go is a host shell. Use a provided project URL instead of inventing a bundle id; if no URL is provided but a target/app name is provided, open that target and do not inspect project files to find one. On iOS, prefer host + URL when the host shell is known because direct URL open can report success while leaving the runner/shell focused; verify with snapshot -i after opening: agent-device open "Expo Go" exp://127.0.0.1:8081 --platform ios agent-device snapshot -i --platform ios @@ -510,7 +510,7 @@ React Native dev loop: agent-device metro reload agent-device find "Home" Do not use agent-device reload. Use open --relaunch for native startup reset. - Android RN/Expo Metro: run adb reverse tcp: tcp: before opening the app or URL; it is harmless even if already configured. + Android RN/Expo Metro: direct Android URL opens to localhost/127.0.0.1/[::1] with a port auto-ensure adb reverse when supported; manual adb reverse tcp: tcp: remains harmless before app launches or unsupported flows. Expo Go/dev clients are host shells. Use provided project URLs, verify with snapshot -i after opening, and ask instead of inventing app ids or URLs. Help workflow owns the full Expo URL command shapes. Overlays and busy RN UIs: @@ -619,7 +619,7 @@ Coverage: Navigation, forms, empty/error/loading states, offline or retry behavior, permissions, settings, accessibility labels, orientation/keyboard, and obvious performance stalls. React Native warning/error overlays can be real findings or test blockers. Capture them, use react-native dismiss-overlay if unrelated, re-snapshot, and report them. Expo Go/dev-client shells: use the provided exp:// or dev-client URL and record whether the shell, project load, or app UI is being tested. On iOS dogfood, prefer agent-device open "Expo Go" when Expo Go is the known shell, then snapshot -i to confirm the project UI rather than the runner splash. - Android RN/Expo Metro: run adb reverse tcp: tcp: before opening the app or URL; it is harmless even if already configured. + Android RN/Expo Metro: direct Android URL opens to localhost/127.0.0.1/[::1] with a port auto-ensure adb reverse when supported; manual adb reverse tcp: tcp: remains harmless before app launches or unsupported flows. Categories: visual, functional, UX, content, performance, diagnostics, permissions, accessibility. Severity: critical blocks a core flow/data/crashes; high breaks a major feature; medium has friction or workaround; low is polish. diff --git a/test/skillgym/suites/agent-device-smoke-suite.ts b/test/skillgym/suites/agent-device-smoke-suite.ts index a2ab28153..efb353d81 100644 --- a/test/skillgym/suites/agent-device-smoke-suite.ts +++ b/test/skillgym/suites/agent-device-smoke-suite.ts @@ -1175,20 +1175,19 @@ const SKILL_GUIDANCE_CASES: Case[] = [ 'Platform: Android', 'Launch context: Expo Go because the user provided an exp:// project URL', 'Local Metro port: 8082', - 'Project URL after emulator port reverse: exp://127.0.0.1:8082', - 'Android Metro reachability should use adb reverse for any local Metro port', + 'Project URL: exp://127.0.0.1:8082', + 'Direct Android localhost URL opens auto-ensure adb reverse when supported', 'Do not assume every React Native app is Expo; this one is Expo only because an exp:// URL was provided', ], - task: 'Plan the commands to make the Android emulator reach local Metro on port 8082, open the project URL, and verify the app UI with an interactive snapshot.', + task: 'Plan the commands to open the Android Expo project URL on local Metro port 8082 and verify the app UI with an interactive snapshot.', outputs: [ - plannedCommand('adb reverse'), - /tcp:8082\s+tcp:8082/i, plannedCommand('open'), /exp:\/\/127\.0\.0\.1:8082/i, /--platform android/i, /snapshot -i/i, ], forbiddenOutputs: [ + /adb\s+reverse/i, /exp:\/\/10\.0\.2\.2:8082/i, /open\s+(?:"Expo Go"|Expo\s+Go)\s+exp:\/\//i, /com\.(?:expensify|agent|example)/i, diff --git a/website/docs/docs/commands.md b/website/docs/docs/commands.md index 75ad3bc77..e8f910c07 100644 --- a/website/docs/docs/commands.md +++ b/website/docs/docs/commands.md @@ -603,7 +603,7 @@ agent-device react-devtools profile report @c5 - Keep using `snapshot`, `press`, `fill`, `logs`, `network`, and `perf` for device/app runtime evidence. Use `react-devtools` for React internals. - For React Native apps, overlays, Metro/Fast Refresh blockers, and routing to React DevTools or debugging evidence, start with `agent-device help react-native`. - On Android, use `alert get`, `alert wait `, `alert accept`, and `alert dismiss` for runtime permission prompts and native alerts. On iOS, use the same alert commands for XCTest alerts, app-owned modal popups with native blocking markers, and blocking system dialogs. Do not use `settings permission` to answer a dialog already on screen; reserve it for setup or resetting permission state before a flow. -- React Native development builds can connect to the DevTools daemon on port 8097. For Android emulators or physical devices, run `adb reverse tcp:8097 tcp:8097` if the app cannot reach the host. If Metro is local, also run `adb reverse tcp:8081 tcp:8081`. +- React Native development builds can connect to the DevTools daemon on port 8097. For Android emulators or physical devices, run `adb reverse tcp:8097 tcp:8097` if the app cannot reach the host. Direct Android `open` URL targets for local Metro hosts with a port auto-ensure the matching reverse when supported; for app launches or unsupported flows, `adb reverse tcp:8081 tcp:8081` remains harmless. - For Android and iOS sessions connected through a remote bridge profile, `react-devtools` registers a lease-scoped companion tunnel to the sandbox-local DevTools daemon at `127.0.0.1:8097`. Android bridge profiles use the bridge-owned remote `adb reverse` mapping; iOS bridge profiles use the bridge-owned wildcard Metro host tunnel. The CLI keeps the companion alive until `agent-device react-devtools stop` or `agent-device disconnect`. - For remote iOS bridge sessions, open the app once to create the bridge session, run `agent-device react-devtools start`, then relaunch the same bundle id with `agent-device open --platform ios --relaunch` before `wait --connected`. React Native attempts the legacy DevTools websocket during JavaScript startup, so starting DevTools after the first launch can miss that connection attempt. - Remote bridge React DevTools assumes the React Native-bundled DevTools behavior in React Native 0.83+. Older browser/Chromium DevTools workflows are not assumed to exist inside remote sandboxes. Expo projects should be verified against the SDK's bundled React Native version before relying on this path; this release does not claim a separately verified Expo SDK version. From 0fc40b42a1a9d32e13a00fa2b28273b3229211e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Tue, 26 May 2026 10:02:54 +0200 Subject: [PATCH 2/6] test: cover Android localhost reverse edge cases --- src/platforms/android/__tests__/index.test.ts | 90 +++++++++++++++++++ src/platforms/android/app-lifecycle.ts | 5 +- 2 files changed, 92 insertions(+), 3 deletions(-) diff --git a/src/platforms/android/__tests__/index.test.ts b/src/platforms/android/__tests__/index.test.ts index 35af41738..35055db4c 100644 --- a/src/platforms/android/__tests__/index.test.ts +++ b/src/platforms/android/__tests__/index.test.ts @@ -582,6 +582,96 @@ test('openAndroidApp ensures Android reverse before localhost deep link launch', ]); }); +test('openAndroidApp ensures Android reverse before IPv6 localhost deep link launch', async () => { + const device: DeviceInfo = { + platform: 'android', + id: 'emulator-5554', + name: 'Pixel', + kind: 'emulator', + booted: true, + }; + const calls: Array< + { kind: 'exec'; args: string[] } | { kind: 'reverse'; local: string; remote: string } + > = []; + + await withAndroidAdbProvider( + { + exec: async (args) => { + calls.push({ kind: 'exec', args }); + return { stdout: '', stderr: '', exitCode: 0 }; + }, + reverse: { + ensure: async (mapping) => { + calls.push({ kind: 'reverse', local: mapping.local, remote: mapping.remote }); + }, + remove: async () => {}, + removeAllOwned: async () => {}, + }, + }, + { serial: 'emulator-5554' }, + async () => await openAndroidApp(device, 'http://[::1]:8081/status'), + ); + + assert.deepEqual(calls, [ + { kind: 'reverse', local: 'tcp:8081', remote: 'tcp:8081' }, + { + kind: 'exec', + args: [ + 'shell', + 'am', + 'start', + '-W', + '-a', + 'android.intent.action.VIEW', + '-d', + 'http://[::1]:8081/status', + ], + }, + ]); +}); + +test('openAndroidApp leaves localhost deep links without a port unchanged', async () => { + const device: DeviceInfo = { + platform: 'android', + id: 'emulator-5554', + name: 'Pixel', + kind: 'emulator', + booted: true, + }; + const calls: string[][] = []; + + await withAndroidAdbProvider( + { + exec: async (args) => { + calls.push(args); + return { stdout: '', stderr: '', exitCode: 0 }; + }, + reverse: { + ensure: async () => { + throw new Error('reverse should not run without a URL port'); + }, + remove: async () => {}, + removeAllOwned: async () => {}, + }, + }, + { serial: 'emulator-5554' }, + async () => await openAndroidApp(device, 'http://localhost/path'), + ); + + assert.deepEqual(calls, [ + [ + 'shell', + 'am', + 'start', + '-W', + '-a', + 'android.intent.action.VIEW', + '-d', + 'http://localhost/path', + ], + ]); +}); + test('openAndroidApp leaves non-localhost deep links unchanged', async () => { const device: DeviceInfo = { platform: 'android', diff --git a/src/platforms/android/app-lifecycle.ts b/src/platforms/android/app-lifecycle.ts index 429b2f9bc..a0df4318b 100644 --- a/src/platforms/android/app-lifecycle.ts +++ b/src/platforms/android/app-lifecycle.ts @@ -41,6 +41,7 @@ const ANDROID_APPS_DISCOVERY_HINT = 'Run agent-device apps --platform android to discover the installed package name, then retry open with that exact package.'; const ANDROID_AMBIGUOUS_APP_HINT = 'Run agent-device apps --platform android to see the exact installed package names before retrying open.'; +const ANDROID_LOCALHOST_HOSTNAMES = new Set(['localhost', '127.0.0.1', '::1', '[::1]']); type AndroidAppResolution = { type: 'intent' | 'package'; value: string }; @@ -231,9 +232,7 @@ function androidLocalhostReverseEndpoint(target: string): AndroidPortReverseEndp } const hostname = url.hostname.toLowerCase(); - if (hostname !== 'localhost' && hostname !== '127.0.0.1' && hostname !== '[::1]') { - return null; - } + if (!ANDROID_LOCALHOST_HOSTNAMES.has(hostname)) return null; if (!url.port) return null; const port = Number(url.port); if (!Number.isInteger(port)) return null; From 77f486f34a670cd49fe257fcf133405b0c21093b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Tue, 26 May 2026 10:05:40 +0200 Subject: [PATCH 3/6] fix: reduce Android reverse error complexity --- src/platforms/android/app-lifecycle.ts | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/platforms/android/app-lifecycle.ts b/src/platforms/android/app-lifecycle.ts index a0df4318b..2e3db407e 100644 --- a/src/platforms/android/app-lifecycle.ts +++ b/src/platforms/android/app-lifecycle.ts @@ -247,17 +247,21 @@ async function ensureAndroidLocalhostReverse(device: DeviceInfo, target: string) try { await reverse.ensure({ local: endpoint, remote: endpoint }); } catch (error) { - const causeDetails = error instanceof AppError ? error.details : undefined; + const details = { + localPort: endpoint.replace('tcp:', ''), + operation: `adb reverse ${endpoint} ${endpoint}`, + }; + if (error instanceof AppError) { + Object.assign(details, { + hint: error.details?.hint, + diagnosticId: error.details?.diagnosticId, + logPath: error.details?.logPath, + }); + } throw new AppError( 'COMMAND_FAILED', `Failed to ensure Android port reverse ${endpoint} before opening localhost URL`, - { - localPort: endpoint.replace('tcp:', ''), - operation: `adb reverse ${endpoint} ${endpoint}`, - ...(causeDetails?.hint ? { hint: causeDetails.hint } : {}), - ...(causeDetails?.diagnosticId ? { diagnosticId: causeDetails.diagnosticId } : {}), - ...(causeDetails?.logPath ? { logPath: causeDetails.logPath } : {}), - }, + details, error, ); } From eeac953d08899e1daa8b623c60d6a5629a419ef4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Tue, 26 May 2026 10:13:40 +0200 Subject: [PATCH 4/6] docs: simplify Android Metro reverse guidance --- src/utils/__tests__/args.test.ts | 7 ++++--- src/utils/command-schema.ts | 8 ++++---- test/skillgym/suites/agent-device-smoke-suite.ts | 2 +- website/docs/docs/commands.md | 3 ++- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/utils/__tests__/args.test.ts b/src/utils/__tests__/args.test.ts index b419ccc0f..723cdfd5a 100644 --- a/src/utils/__tests__/args.test.ts +++ b/src/utils/__tests__/args.test.ts @@ -861,7 +861,7 @@ test('usage includes agent workflows, config, environment, and examples footers' assert.match(usageText, /agent-facing, token-efficient view for planning and targeting actions/); assert.match(usageText, /Truncated text\/input preview: expand first with snapshot -s @e12/); assert.match(usageText, /React Native apps: read help react-native/); - assert.match(usageText, /localhost URL opens with a port auto-ensure adb reverse/); + assert.match(usageText, /localhost URL opens with a port auto-configure host reachability/); assert.match(usageText, /Expo Go\/dev clients: use the provided URL when given/); assert.match(usageText, /on iOS prefer open "Expo Go" /); assert.match(usageText, /Install flows: install\/install-from-source first/); @@ -968,7 +968,8 @@ test('usageForCommand resolves workflow help topic', () => { assert.match(help, /provider-native text injection when available/); assert.match(help, /Do not switch to raw adb, clipboard, or paste as an agent fallback/); assert.match(help, /if no URL is provided but a target\/app name is provided, open that target/); - assert.match(help, /localhost\/127\.0\.0\.1\/\[::1\] with a port auto-ensure adb reverse/); + assert.match(help, /localhost\/127\.0\.0\.1\/\[::1\] with a port auto-configure/); + assert.match(help, /Manual adb reverse tcp: tcp: is only needed/); assert.match(help, /do not split clear\/restart/); assert.match(help, /do not write network log headers/); assert.match(help, /agent-device open exp:\/\/127\.0\.0\.1:8081 --platform ios/); @@ -1037,7 +1038,7 @@ test('usageForCommand resolves dogfood help topic', () => { assert.match(help, /Static\/on-load issues can use one screenshot/); assert.match(help, /React Native warning\/error overlays can be real findings/); assert.match(help, /Expo Go\/dev-client shells/); - assert.match(help, /localhost\/127\.0\.0\.1\/\[::1\] with a port auto-ensure adb reverse/); + assert.match(help, /direct Android localhost URL opens with a port auto-configure/); assert.match(help, /Keep stateful commands serial within the same session/); assert.match(help, /prefer agent-device open "Expo Go" /); assert.match(help, /dogfood-output\/report\.md/); diff --git a/src/utils/command-schema.ts b/src/utils/command-schema.ts index 359f3b25c..fea9c6e64 100644 --- a/src/utils/command-schema.ts +++ b/src/utils/command-schema.ts @@ -169,7 +169,7 @@ const AGENT_QUICKSTART_LINES = [ 'Anti-pattern: snapshot -i followed by snapshot -i | grep ...; prior refs stay valid until app state changes, and --force-full is the explicit full re-read.', 'Truncated text/input preview: expand first with snapshot -s @e12, not get text.', 'React Native apps: read help react-native for Metro, DevTools routing, and RN-specific blockers; use react-native dismiss-overlay for LogBox/RedBox overlays.', - 'Android RN/Expo Metro: direct Android localhost URL opens with a port auto-ensure adb reverse when supported; manual adb reverse tcp: tcp: remains harmless before app launches or unsupported flows.', + 'Android RN/Expo Metro: direct Android localhost URL opens with a port auto-configure host reachability.', 'Expo Go/dev clients: use the provided URL when given; on iOS prefer open "Expo Go" ; Android URL opens infer the foreground package for logs/perf when possible.', 'Install flows: install/install-from-source first, then open the installed id with --relaunch.', 'Text: fill \'id="field-email"\' "qa@example.com" replaces; type appends after press.', @@ -349,7 +349,7 @@ React Native dev loop: agent-device find "Home" Do not use agent-device reload. Use open --relaunch for native startup reset. React Native apps: use help react-native for Metro/Fast Refresh, DevTools routing, and RN-specific blockers; use react-native dismiss-overlay for LogBox/RedBox overlays. - Android RN/Expo Metro: direct Android URL opens to localhost/127.0.0.1/[::1] with a port auto-ensure adb reverse when supported; manual adb reverse tcp: tcp: remains harmless before app launches or unsupported flows. + Android RN/Expo Metro: direct Android URL opens to localhost/127.0.0.1/[::1] with a port auto-configure host reachability. Manual adb reverse tcp: tcp: is only needed for app/package launches or unsupported flows where the app cannot reach local Metro. Expo Go is a host shell. Use a provided project URL instead of inventing a bundle id; if no URL is provided but a target/app name is provided, open that target and do not inspect project files to find one. On iOS, prefer host + URL when the host shell is known because direct URL open can report success while leaving the runner/shell focused; verify with snapshot -i after opening: agent-device open "Expo Go" exp://127.0.0.1:8081 --platform ios agent-device snapshot -i --platform ios @@ -510,7 +510,7 @@ React Native dev loop: agent-device metro reload agent-device find "Home" Do not use agent-device reload. Use open --relaunch for native startup reset. - Android RN/Expo Metro: direct Android URL opens to localhost/127.0.0.1/[::1] with a port auto-ensure adb reverse when supported; manual adb reverse tcp: tcp: remains harmless before app launches or unsupported flows. + Android RN/Expo Metro: direct Android localhost URL opens with a port auto-configure host reachability. For app/package launches, use help react-native if the app cannot reach local Metro. Expo Go/dev clients are host shells. Use provided project URLs, verify with snapshot -i after opening, and ask instead of inventing app ids or URLs. Help workflow owns the full Expo URL command shapes. Overlays and busy RN UIs: @@ -619,7 +619,7 @@ Coverage: Navigation, forms, empty/error/loading states, offline or retry behavior, permissions, settings, accessibility labels, orientation/keyboard, and obvious performance stalls. React Native warning/error overlays can be real findings or test blockers. Capture them, use react-native dismiss-overlay if unrelated, re-snapshot, and report them. Expo Go/dev-client shells: use the provided exp:// or dev-client URL and record whether the shell, project load, or app UI is being tested. On iOS dogfood, prefer agent-device open "Expo Go" when Expo Go is the known shell, then snapshot -i to confirm the project UI rather than the runner splash. - Android RN/Expo Metro: direct Android URL opens to localhost/127.0.0.1/[::1] with a port auto-ensure adb reverse when supported; manual adb reverse tcp: tcp: remains harmless before app launches or unsupported flows. + Android RN/Expo Metro: direct Android localhost URL opens with a port auto-configure host reachability. Categories: visual, functional, UX, content, performance, diagnostics, permissions, accessibility. Severity: critical blocks a core flow/data/crashes; high breaks a major feature; medium has friction or workaround; low is polish. diff --git a/test/skillgym/suites/agent-device-smoke-suite.ts b/test/skillgym/suites/agent-device-smoke-suite.ts index efb353d81..d28d69dd1 100644 --- a/test/skillgym/suites/agent-device-smoke-suite.ts +++ b/test/skillgym/suites/agent-device-smoke-suite.ts @@ -1176,7 +1176,7 @@ const SKILL_GUIDANCE_CASES: Case[] = [ 'Launch context: Expo Go because the user provided an exp:// project URL', 'Local Metro port: 8082', 'Project URL: exp://127.0.0.1:8082', - 'Direct Android localhost URL opens auto-ensure adb reverse when supported', + 'Direct Android localhost URL opens auto-configure host reachability', 'Do not assume every React Native app is Expo; this one is Expo only because an exp:// URL was provided', ], task: 'Plan the commands to open the Android Expo project URL on local Metro port 8082 and verify the app UI with an interactive snapshot.', diff --git a/website/docs/docs/commands.md b/website/docs/docs/commands.md index e8f910c07..faafa0112 100644 --- a/website/docs/docs/commands.md +++ b/website/docs/docs/commands.md @@ -603,7 +603,8 @@ agent-device react-devtools profile report @c5 - Keep using `snapshot`, `press`, `fill`, `logs`, `network`, and `perf` for device/app runtime evidence. Use `react-devtools` for React internals. - For React Native apps, overlays, Metro/Fast Refresh blockers, and routing to React DevTools or debugging evidence, start with `agent-device help react-native`. - On Android, use `alert get`, `alert wait `, `alert accept`, and `alert dismiss` for runtime permission prompts and native alerts. On iOS, use the same alert commands for XCTest alerts, app-owned modal popups with native blocking markers, and blocking system dialogs. Do not use `settings permission` to answer a dialog already on screen; reserve it for setup or resetting permission state before a flow. -- React Native development builds can connect to the DevTools daemon on port 8097. For Android emulators or physical devices, run `adb reverse tcp:8097 tcp:8097` if the app cannot reach the host. Direct Android `open` URL targets for local Metro hosts with a port auto-ensure the matching reverse when supported; for app launches or unsupported flows, `adb reverse tcp:8081 tcp:8081` remains harmless. +- React Native development builds can connect to the DevTools daemon on port 8097. For Android emulators or physical devices, run `adb reverse tcp:8097 tcp:8097` if the app cannot reach the host. +- Direct Android `open` URL targets for local Metro hosts with a port auto-configure host reachability. For app/package launches or unsupported flows, run `adb reverse tcp:8081 tcp:8081` if the app cannot reach local Metro. - For Android and iOS sessions connected through a remote bridge profile, `react-devtools` registers a lease-scoped companion tunnel to the sandbox-local DevTools daemon at `127.0.0.1:8097`. Android bridge profiles use the bridge-owned remote `adb reverse` mapping; iOS bridge profiles use the bridge-owned wildcard Metro host tunnel. The CLI keeps the companion alive until `agent-device react-devtools stop` or `agent-device disconnect`. - For remote iOS bridge sessions, open the app once to create the bridge session, run `agent-device react-devtools start`, then relaunch the same bundle id with `agent-device open --platform ios --relaunch` before `wait --connected`. React Native attempts the legacy DevTools websocket during JavaScript startup, so starting DevTools after the first launch can miss that connection attempt. - Remote bridge React DevTools assumes the React Native-bundled DevTools behavior in React Native 0.83+. Older browser/Chromium DevTools workflows are not assumed to exist inside remote sandboxes. Expo projects should be verified against the SDK's bundled React Native version before relying on this path; this release does not claim a separately verified Expo SDK version. From f62197c4ec22b15200ebbd6cf453119e692da96c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Tue, 26 May 2026 10:19:28 +0200 Subject: [PATCH 5/6] docs: prefer scripted SkillGym checks --- AGENTS.md | 8 +++++--- test/skillgym/README.md | 33 +++++++++++---------------------- 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 414db0449..5419d7523 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -191,10 +191,12 @@ Command-only flags (like `find --first`) that don't flow to the platform layer o ## Testing Matrix - Docs/skills only: no tests required unless a more specific rule below applies. - CLI help/guidance changes in `src/utils/command-schema.ts`: run `pnpm exec vitest run src/utils/__tests__/args.test.ts`. -- SkillGym prompt/assertion changes: run the touched `--case` checks. For broad validation, use `pnpm test:skillgym`; use `--tag fixture-smoke` or `--tag skill-guidance` when validating one suite group. +- SkillGym prompt/assertion changes: run `pnpm test:skillgym:case `; the script builds local CLI help first. For broad validation, use `pnpm test:skillgym`; append `-- --tag fixture-smoke` or `-- --tag skill-guidance` when validating one suite group. - Non-TS, no behavior impact: no tests unless requested. - Keep tests behavioral; do not assert shapes or cases TypeScript already proves. - Any TS change: `pnpm typecheck` or `pnpm check:quick`. +- Fallow CI failures: reproduce with `pnpm check:fallow --base origin/main` instead of manually estimating complexity/dead-code impact. +- Test-only DI seam CI failures: the workflow enforces this; do not add optional `typeof` DI params in production code. - Tooling/config change (`package.json`, `tsconfig*.json`, `.oxlintrc.json`, `.oxfmtrc.json`): `pnpm check:tooling`. - Daemon handler/shared module change: `pnpm check:unit`. - iOS runner/Swift change: `pnpm build:xcuitest`. @@ -226,8 +228,8 @@ Command-only flags (like `find --first`) that don't flow to the platform layer o - For behavior/CLI surface changes and command-planning guidance changes, write or update a SkillGym case in `test/skillgym/suites/agent-device-smoke-suite.ts` that captures the expected agent command plan. - Do not update `skills/**/SKILL.md` for command behavior or workflow guidance unless the user explicitly asks; skills must route to versioned CLI help instead of carrying behavior details. - Keep SkillGym cases behavioral and command-planning oriented. Prefer prompts that assert the user-visible contract and expected command family over brittle exact output, but forbid known bad patterns. -- Build before SkillGym when local CLI help is needed: `pnpm build`, then `pnpm exec skillgym run ... --case `. -- Run SkillGym broad validation with `pnpm test:skillgym`; use v0.8 `--tag` filters for focused suite groups. +- Use `pnpm test:skillgym:case ` for focused SkillGym validation; it runs the environment guard and builds local CLI help before `skillgym run`. +- Run SkillGym broad validation with `pnpm test:skillgym`; append v0.8 filters such as `-- --tag fixture-smoke` for focused suite groups. - Preserve current high-value workflow guidance: - iOS Expo Go dogfood: prefer `agent-device open "Expo Go" --platform ios` when the shell is known, then `snapshot -i` to confirm the project UI rather than the runner splash. - `keyboard dismiss` is the preferred iOS keyboard-dismissal path before manually pressing visible keyboard controls such as `Done`; it remains best-effort and can report unsupported layouts explicitly. diff --git a/test/skillgym/README.md b/test/skillgym/README.md index 46c85d3b7..d20298cb2 100644 --- a/test/skillgym/README.md +++ b/test/skillgym/README.md @@ -70,41 +70,30 @@ pnpm install pnpm test:skillgym ``` -If you want to run `skillgym` directly instead of using the convenience script, build the local CLI first so agents can call `node bin/agent-device.mjs help workflow`: +Prefer the package scripts so the environment guard and local CLI build run consistently: ```bash cd /absolute/path/to/agent-device -pnpm build -pnpm exec skillgym run \ - ./test/skillgym/suites/agent-device-smoke-suite.ts \ - --config ./test/skillgym/skillgym.config.ts +pnpm test:skillgym +pnpm test:skillgym:case open-and-snapshot ``` Useful v0.8 filters, reporters, and recovery options: ```bash -pnpm build -pnpm exec skillgym run \ - ./test/skillgym/suites/agent-device-smoke-suite.ts \ - --config ./test/skillgym/skillgym.config.ts \ - --tag fixture-smoke +pnpm test:skillgym -- --tag fixture-smoke +pnpm test:skillgym -- --reporter json +pnpm test:skillgym -- --repeat 3 --repeat-failure 1 +``` -pnpm exec skillgym run \ - ./test/skillgym/suites/agent-device-smoke-suite.ts \ - --config ./test/skillgym/skillgym.config.ts \ - --reporter json +If you need to run `skillgym` directly while developing the runner itself, build first so agents can call `node bin/agent-device.mjs help workflow`: +```bash +pnpm build pnpm exec skillgym run \ ./test/skillgym/suites/agent-device-smoke-suite.ts \ --config ./test/skillgym/skillgym.config.ts \ - --repeat 3 \ - --repeat-failure 1 -``` - -To run one case across the configured Codex and Claude runners: - -```bash -pnpm test:skillgym:case open-and-snapshot + --case open-and-snapshot ``` Use `--reporter github-actions` in CI when you want annotations in GitHub Actions logs. From 527b3f3065dc647ceea863d4ff89b20acfdd5961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Tue, 26 May 2026 10:36:09 +0200 Subject: [PATCH 6/6] test: clarify Android Metro SkillGym plan --- test/skillgym/suites/agent-device-smoke-suite.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/skillgym/suites/agent-device-smoke-suite.ts b/test/skillgym/suites/agent-device-smoke-suite.ts index d28d69dd1..a6f18c3ff 100644 --- a/test/skillgym/suites/agent-device-smoke-suite.ts +++ b/test/skillgym/suites/agent-device-smoke-suite.ts @@ -1177,9 +1177,11 @@ const SKILL_GUIDANCE_CASES: Case[] = [ 'Local Metro port: 8082', 'Project URL: exp://127.0.0.1:8082', 'Direct Android localhost URL opens auto-configure host reachability', + 'On Android, open the URL target directly; do not use the iOS host-plus-URL form with "Expo Go"', + 'Every agent-device command must target Android explicitly with --platform android', 'Do not assume every React Native app is Expo; this one is Expo only because an exp:// URL was provided', ], - task: 'Plan the commands to open the Android Expo project URL on local Metro port 8082 and verify the app UI with an interactive snapshot.', + task: 'Plan the explicit Android direct-URL commands to open the Expo project URL on local Metro port 8082 and verify the app UI with an interactive snapshot.', outputs: [ plannedCommand('open'), /exp:\/\/127\.0\.0\.1:8082/i,