diff --git a/.github/workflows/ios.yml b/.github/workflows/ios.yml index 671de884f..2e48edc78 100644 --- a/.github/workflows/ios.yml +++ b/.github/workflows/ios.yml @@ -52,37 +52,13 @@ jobs: - name: Build iOS integration artifacts run: pnpm build:xcuitest - - name: Wait for iOS simulator boot + - name: Boot preflight via agent-device run: | set -euo pipefail - - wait_boot() { - local deadline=$(( $(date +%s) + 180 )) - while [ "$(date +%s)" -lt "$deadline" ]; do - if xcrun simctl list devices "$IOS_UDID" | grep -q "(Booted)"; then - return 0 - fi - sleep 2 - done - return 1 - } - - if wait_boot; then - exit 0 - fi - - echo "Initial simulator boot wait timed out; retrying boot once..." - xcrun simctl shutdown "$IOS_UDID" || true - xcrun simctl boot "$IOS_UDID" || true - - if wait_boot; then - exit 0 - fi - - echo "Simulator failed to become ready after retry. Collecting diagnostics..." - xcrun simctl list devices || true - xcrun simctl list runtimes || true - exit 1 + node --experimental-strip-types src/bin.ts boot --platform ios --udid "$IOS_UDID" --json + env: + AGENT_DEVICE_IOS_BOOT_TIMEOUT_MS: "180000" + AGENT_DEVICE_RETRY_LOGS: "1" - name: Run iOS integration test env: diff --git a/.gitignore b/.gitignore index 4ccc68e7e..dc5f8d103 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ dist/ .DS_Store *.log test/screenshots/*.png +test/artifacts/ .build/ .swiftpm/ DerivedData/ diff --git a/README.md b/README.md index 13b95127f..2600d08ad 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ Coordinates: - X increases to the right, Y increases downward. ## Command Index -- `open`, `close`, `home`, `back`, `app-switcher` +- `boot`, `open`, `close`, `home`, `back`, `app-switcher` - `snapshot`, `find`, `get` - `click`, `focus`, `type`, `fill`, `press`, `long-press`, `scroll`, `scrollintoview`, `is` - `alert`, `wait`, `screenshot` @@ -186,8 +186,10 @@ App state: Boot diagnostics: - Boot failures include normalized reason codes in `error.details.reason` (JSON mode) and verbose logs. -- Reason codes: `BOOT_TIMEOUT`, `DEVICE_UNAVAILABLE`, `DEVICE_OFFLINE`, `PERMISSION_DENIED`, `TOOL_MISSING`, `BOOT_COMMAND_FAILED`, `UNKNOWN`. +- Reason codes: `IOS_BOOT_TIMEOUT`, `IOS_RUNNER_CONNECT_TIMEOUT`, `ANDROID_BOOT_TIMEOUT`, `ADB_TRANSPORT_UNAVAILABLE`, `CI_RESOURCE_STARVATION_SUSPECTED`, `BOOT_COMMAND_FAILED`, `UNKNOWN`. - Android boot waits fail fast for permission/tooling issues and do not always collapse into timeout errors. +- Use `agent-device boot --platform ios|android` for explicit CI preflight readiness checks. +- Set `AGENT_DEVICE_RETRY_LOGS=1` to print structured retry telemetry (attempt, phase, delay, elapsed/remaining deadline, reason). ## App resolution - Bundle/package identifiers are accepted directly (e.g., `com.apple.Preferences`). diff --git a/src/cli.ts b/src/cli.ts index c39f3e351..df22bc21a 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -105,6 +105,13 @@ export async function runCli(argv: string[]): Promise { if (logTailStopper) logTailStopper(); return; } + if (command === 'boot') { + const platform = (response.data as any)?.platform ?? 'unknown'; + const device = (response.data as any)?.device ?? (response.data as any)?.id ?? 'unknown'; + process.stdout.write(`Boot ready: ${device} (${platform})\n`); + if (logTailStopper) logTailStopper(); + return; + } if (command === 'click') { const ref = (response.data as any)?.ref ?? ''; const x = (response.data as any)?.x; diff --git a/src/core/__tests__/capabilities.test.ts b/src/core/__tests__/capabilities.test.ts index bc7628fcb..5fb17506e 100644 --- a/src/core/__tests__/capabilities.test.ts +++ b/src/core/__tests__/capabilities.test.ts @@ -37,6 +37,7 @@ test('iOS simulator + Android commands reject iOS devices', () => { 'app-switcher', 'apps', 'back', + 'boot', 'click', 'close', 'fill', diff --git a/src/core/capabilities.ts b/src/core/capabilities.ts index 631be4921..199ffe90d 100644 --- a/src/core/capabilities.ts +++ b/src/core/capabilities.ts @@ -19,6 +19,7 @@ const COMMAND_CAPABILITY_MATRIX: Record = { 'app-switcher': { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, apps: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, back: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, + boot: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, click: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, close: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, fill: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } }, diff --git a/src/daemon/handlers/__tests__/session.test.ts b/src/daemon/handlers/__tests__/session.test.ts new file mode 100644 index 000000000..e9ebe5ac4 --- /dev/null +++ b/src/daemon/handlers/__tests__/session.test.ts @@ -0,0 +1,122 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { handleSessionCommands } from '../session.ts'; +import { SessionStore } from '../../session-store.ts'; +import type { DaemonRequest, DaemonResponse, SessionState } from '../../types.ts'; + +function makeSessionStore(): SessionStore { + const root = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-device-session-handler-')); + return new SessionStore(path.join(root, 'sessions')); +} + +function makeSession(name: string, device: SessionState['device']): SessionState { + return { + name, + device, + createdAt: Date.now(), + actions: [], + }; +} + +const noopInvoke = async (_req: DaemonRequest): Promise => ({ ok: true, data: {} }); + +test('boot requires session or explicit selector', async () => { + const sessionStore = makeSessionStore(); + const response = await handleSessionCommands({ + req: { + token: 't', + session: 'default', + command: 'boot', + positionals: [], + flags: {}, + }, + sessionName: 'default', + logPath: path.join(os.tmpdir(), 'daemon.log'), + sessionStore, + invoke: noopInvoke, + ensureReady: async () => {}, + }); + assert.ok(response); + assert.equal(response?.ok, false); + if (response && !response.ok) { + assert.equal(response.error.code, 'INVALID_ARGS'); + } +}); + +test('boot rejects unsupported iOS device kind', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'ios-device-session'; + sessionStore.set( + sessionName, + makeSession(sessionName, { + platform: 'ios', + id: 'ios-device-1', + name: 'iPhone Device', + kind: 'device', + booted: true, + }), + ); + const response = await handleSessionCommands({ + req: { + token: 't', + session: sessionName, + command: 'boot', + positionals: [], + flags: {}, + }, + sessionName, + logPath: path.join(os.tmpdir(), 'daemon.log'), + sessionStore, + invoke: noopInvoke, + ensureReady: async () => { + throw new Error('ensureReady should not be called for unsupported boot'); + }, + }); + assert.ok(response); + assert.equal(response?.ok, false); + if (response && !response.ok) { + assert.equal(response.error.code, 'UNSUPPORTED_OPERATION'); + } +}); + +test('boot succeeds for supported device in session', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'android-session'; + sessionStore.set( + sessionName, + makeSession(sessionName, { + platform: 'android', + id: 'emulator-5554', + name: 'Pixel Emulator', + kind: 'emulator', + booted: true, + }), + ); + let ensureCalls = 0; + const response = await handleSessionCommands({ + req: { + token: 't', + session: sessionName, + command: 'boot', + positionals: [], + flags: {}, + }, + sessionName, + logPath: path.join(os.tmpdir(), 'daemon.log'), + sessionStore, + invoke: noopInvoke, + ensureReady: async () => { + ensureCalls += 1; + }, + }); + assert.ok(response); + assert.equal(response?.ok, true); + assert.equal(ensureCalls, 1); + if (response && response.ok) { + assert.equal(response.data?.platform, 'android'); + assert.equal(response.data?.booted, true); + } +}); diff --git a/src/daemon/handlers/session.ts b/src/daemon/handlers/session.ts index ca89efbf7..b81d6ab8a 100644 --- a/src/daemon/handlers/session.ts +++ b/src/daemon/handlers/session.ts @@ -21,9 +21,19 @@ export async function handleSessionCommands(params: { sessionStore: SessionStore; invoke: (req: DaemonRequest) => Promise; dispatch?: typeof dispatchCommand; + ensureReady?: typeof ensureDeviceReady; }): Promise { - const { req, sessionName, logPath, sessionStore, invoke, dispatch: dispatchOverride } = params; + const { + req, + sessionName, + logPath, + sessionStore, + invoke, + dispatch: dispatchOverride, + ensureReady: ensureReadyOverride, + } = params; const dispatch = dispatchOverride ?? dispatchCommand; + const ensureReady = ensureReadyOverride ?? ensureDeviceReady; const command = req.command; if (command === 'session_list') { @@ -82,7 +92,7 @@ export async function handleSessionCommands(params: { }; } const device = session?.device ?? (await resolveTargetDevice(flags)); - await ensureDeviceReady(device); + await ensureReady(device); if (!isCommandSupportedOnDevice('apps', device)) { return { ok: false, error: { code: 'UNSUPPORTED_OPERATION', message: 'apps is not supported on this device' } }; } @@ -106,11 +116,40 @@ export async function handleSessionCommands(params: { return { ok: true, data: { apps } }; } + if (command === 'boot') { + const session = sessionStore.get(sessionName); + const flags = req.flags ?? {}; + if (!session && !flags.platform && !flags.device && !flags.udid && !flags.serial) { + return { + ok: false, + error: { + code: 'INVALID_ARGS', + message: 'boot requires an active session or an explicit device selector (e.g. --platform ios).', + }, + }; + } + const device = session?.device ?? (await resolveTargetDevice(flags)); + if (!isCommandSupportedOnDevice('boot', device)) { + return { ok: false, error: { code: 'UNSUPPORTED_OPERATION', message: 'boot is not supported on this device' } }; + } + await ensureReady(device); + return { + ok: true, + data: { + platform: device.platform, + device: device.name, + id: device.id, + kind: device.kind, + booted: true, + }, + }; + } + if (command === 'appstate') { const session = sessionStore.get(sessionName); const flags = req.flags ?? {}; const device = session?.device ?? (await resolveTargetDevice(flags)); - await ensureDeviceReady(device); + await ensureReady(device); if (device.platform === 'ios') { if (session?.appBundleId) { return { diff --git a/src/platforms/__tests__/boot-diagnostics.test.ts b/src/platforms/__tests__/boot-diagnostics.test.ts index 5a46f12da..518668d55 100644 --- a/src/platforms/__tests__/boot-diagnostics.test.ts +++ b/src/platforms/__tests__/boot-diagnostics.test.ts @@ -1,23 +1,30 @@ import test from 'node:test'; import assert from 'node:assert/strict'; -import { classifyBootFailure } from '../boot-diagnostics.ts'; +import { bootFailureHint, classifyBootFailure } from '../boot-diagnostics.ts'; import { AppError } from '../../utils/errors.ts'; test('classifyBootFailure maps timeout errors', () => { - const reason = classifyBootFailure({ message: 'bootstatus timed out after 120s' }); - assert.equal(reason, 'BOOT_TIMEOUT'); + const reason = classifyBootFailure({ + message: 'bootstatus timed out after 120s', + context: { platform: 'ios', phase: 'boot' }, + }); + assert.equal(reason, 'IOS_BOOT_TIMEOUT'); }); test('classifyBootFailure maps adb offline errors', () => { - const reason = classifyBootFailure({ stderr: 'error: device offline' }); - assert.equal(reason, 'DEVICE_OFFLINE'); + const reason = classifyBootFailure({ + stderr: 'error: device offline', + context: { platform: 'android', phase: 'transport' }, + }); + assert.equal(reason, 'ADB_TRANSPORT_UNAVAILABLE'); }); test('classifyBootFailure maps tool missing from AppError code', () => { const reason = classifyBootFailure({ error: new AppError('TOOL_MISSING', 'adb not found in PATH'), + context: { platform: 'android', phase: 'transport' }, }); - assert.equal(reason, 'TOOL_MISSING'); + assert.equal(reason, 'ADB_TRANSPORT_UNAVAILABLE'); }); test('classifyBootFailure reads stderr from AppError details', () => { @@ -25,6 +32,20 @@ test('classifyBootFailure reads stderr from AppError details', () => { error: new AppError('COMMAND_FAILED', 'adb failed', { stderr: 'error: device unauthorized', }), + context: { platform: 'android', phase: 'transport' }, + }); + assert.equal(reason, 'ADB_TRANSPORT_UNAVAILABLE'); +}); + +test('bootFailureHint returns actionable guidance', () => { + const hint = bootFailureHint('IOS_RUNNER_CONNECT_TIMEOUT'); + assert.equal(hint.includes('xcodebuild logs'), true); +}); + +test('connect phase does not classify non-timeout errors as connect timeout', () => { + const reason = classifyBootFailure({ + message: 'Runner returned malformed JSON payload', + context: { platform: 'ios', phase: 'connect' }, }); - assert.equal(reason, 'PERMISSION_DENIED'); + assert.equal(reason, 'BOOT_COMMAND_FAILED'); }); diff --git a/src/platforms/android/devices.ts b/src/platforms/android/devices.ts index 2752b54aa..31cb73aa3 100644 --- a/src/platforms/android/devices.ts +++ b/src/platforms/android/devices.ts @@ -2,11 +2,14 @@ import { runCmd, whichCmd } from '../../utils/exec.ts'; import type { ExecResult } from '../../utils/exec.ts'; import { AppError, asAppError } from '../../utils/errors.ts'; import type { DeviceInfo } from '../../utils/device.ts'; -import { Deadline, retryWithPolicy } from '../../utils/retry.ts'; -import { classifyBootFailure } from '../boot-diagnostics.ts'; +import { Deadline, retryWithPolicy, type RetryTelemetryEvent } from '../../utils/retry.ts'; +import { bootFailureHint, classifyBootFailure } from '../boot-diagnostics.ts'; const EMULATOR_SERIAL_PREFIX = 'emulator-'; const ANDROID_BOOT_POLL_MS = 1000; +const RETRY_LOGS_ENABLED = ['1', 'true', 'yes', 'on'].includes( + (process.env.AGENT_DEVICE_RETRY_LOGS ?? '').toLowerCase(), +); function adbArgs(serial: string, args: string[]): string[] { return ['-s', serial, ...args]; @@ -79,8 +82,9 @@ export async function isAndroidBooted(serial: string): Promise { } export async function waitForAndroidBoot(serial: string, timeoutMs = 60000): Promise { - const deadline = Deadline.fromTimeoutMs(timeoutMs); - const maxAttempts = Math.max(1, Math.ceil(timeoutMs / ANDROID_BOOT_POLL_MS)); + const timeoutBudget = timeoutMs; + const deadline = Deadline.fromTimeoutMs(timeoutBudget); + const maxAttempts = Math.max(1, Math.ceil(timeoutBudget / ANDROID_BOOT_POLL_MS)); let lastBootResult: ExecResult | undefined; let timedOut = false; try { @@ -115,41 +119,61 @@ export async function waitForAndroidBoot(serial: string, timeoutMs = 60000): Pro error, stdout: lastBootResult?.stdout, stderr: lastBootResult?.stderr, + context: { platform: 'android', phase: 'boot' }, }); - return reason !== 'PERMISSION_DENIED' && reason !== 'TOOL_MISSING' && reason !== 'BOOT_TIMEOUT'; + return reason !== 'ADB_TRANSPORT_UNAVAILABLE' && reason !== 'ANDROID_BOOT_TIMEOUT'; + }, + }, + { + deadline, + phase: 'boot', + classifyReason: (error) => + classifyBootFailure({ + error, + stdout: lastBootResult?.stdout, + stderr: lastBootResult?.stderr, + context: { platform: 'android', phase: 'boot' }, + }), + onEvent: (event: RetryTelemetryEvent) => { + if (!RETRY_LOGS_ENABLED) return; + process.stderr.write(`[agent-device][retry] ${JSON.stringify(event)}\n`); }, }, - { deadline }, ); } catch (error) { const appErr = asAppError(error); const stdout = lastBootResult?.stdout; const stderr = lastBootResult?.stderr; const exitCode = lastBootResult?.exitCode; - const reason = classifyBootFailure({ + let reason = classifyBootFailure({ error, stdout, stderr, + context: { platform: 'android', phase: 'boot' }, }); + if (reason === 'BOOT_COMMAND_FAILED' && appErr.message === 'Android device is still booting') { + reason = 'ANDROID_BOOT_TIMEOUT'; + } const baseDetails = { serial, - timeoutMs, + timeoutMs: timeoutBudget, elapsedMs: deadline.elapsedMs(), reason, + hint: bootFailureHint(reason), stdout, stderr, exitCode, }; - if (timedOut || reason === 'BOOT_TIMEOUT') { + if (timedOut || reason === 'ANDROID_BOOT_TIMEOUT') { throw new AppError('COMMAND_FAILED', 'Android device did not finish booting in time', baseDetails); } - if (appErr.code === 'TOOL_MISSING' || reason === 'TOOL_MISSING') { + if (appErr.code === 'TOOL_MISSING') { throw new AppError('TOOL_MISSING', appErr.message, { ...baseDetails, ...(appErr.details ?? {}), }); } - if (reason === 'PERMISSION_DENIED' || reason === 'DEVICE_UNAVAILABLE' || reason === 'DEVICE_OFFLINE') { + if (reason === 'ADB_TRANSPORT_UNAVAILABLE') { throw new AppError('COMMAND_FAILED', appErr.message, { ...baseDetails, ...(appErr.details ?? {}), diff --git a/src/platforms/boot-diagnostics.ts b/src/platforms/boot-diagnostics.ts index 7dd592d76..c18140c5e 100644 --- a/src/platforms/boot-diagnostics.ts +++ b/src/platforms/boot-diagnostics.ts @@ -1,22 +1,32 @@ import { asAppError } from '../utils/errors.ts'; export type BootFailureReason = - | 'BOOT_TIMEOUT' - | 'DEVICE_UNAVAILABLE' - | 'DEVICE_OFFLINE' - | 'PERMISSION_DENIED' - | 'TOOL_MISSING' + | 'IOS_BOOT_TIMEOUT' + | 'IOS_RUNNER_CONNECT_TIMEOUT' + | 'ANDROID_BOOT_TIMEOUT' + | 'ADB_TRANSPORT_UNAVAILABLE' + | 'CI_RESOURCE_STARVATION_SUSPECTED' | 'BOOT_COMMAND_FAILED' | 'UNKNOWN'; +type BootDiagnosticContext = { + platform?: 'ios' | 'android'; + phase?: 'boot' | 'connect' | 'transport'; +}; + export function classifyBootFailure(input: { error?: unknown; message?: string; stdout?: string; stderr?: string; + context?: BootDiagnosticContext; }): BootFailureReason { const appErr = input.error ? asAppError(input.error) : null; - if (appErr?.code === 'TOOL_MISSING') return 'TOOL_MISSING'; + const platform = input.context?.platform; + const phase = input.context?.phase; + if (appErr?.code === 'TOOL_MISSING') { + return platform === 'android' ? 'ADB_TRANSPORT_UNAVAILABLE' : 'BOOT_COMMAND_FAILED'; + } const details = (appErr?.details ?? {}) as Record; const detailMessage = typeof details.message === 'string' ? details.message : undefined; const detailStdout = typeof details.stdout === 'string' ? details.stdout : undefined; @@ -45,23 +55,71 @@ export function classifyBootFailure(input: { .join('\n') .toLowerCase(); - if (haystack.includes('timed out') || haystack.includes('timeout')) return 'BOOT_TIMEOUT'; if ( - haystack.includes('device not found') || - haystack.includes('no devices') || - haystack.includes('unable to locate device') || - haystack.includes('invalid device') + platform === 'ios' && + ( + haystack.includes('runner did not accept connection') || + (phase === 'connect' && + ( + haystack.includes('timed out') || + haystack.includes('timeout') || + haystack.includes('econnrefused') || + haystack.includes('connection refused') || + haystack.includes('fetch failed') || + haystack.includes('socket hang up') + )) + ) + ) { + return 'IOS_RUNNER_CONNECT_TIMEOUT'; + } + if (platform === 'ios' && phase === 'boot' && (haystack.includes('timed out') || haystack.includes('timeout'))) { + return 'IOS_BOOT_TIMEOUT'; + } + if (platform === 'android' && phase === 'boot' && (haystack.includes('timed out') || haystack.includes('timeout'))) { + return 'ANDROID_BOOT_TIMEOUT'; + } + if ( + haystack.includes('resource temporarily unavailable') || + haystack.includes('killed: 9') || + haystack.includes('cannot allocate memory') || + haystack.includes('system is low on memory') ) { - return 'DEVICE_UNAVAILABLE'; + return 'CI_RESOURCE_STARVATION_SUSPECTED'; } - if (haystack.includes('offline')) return 'DEVICE_OFFLINE'; if ( - haystack.includes('permission denied') || - haystack.includes('not authorized') || - haystack.includes('unauthorized') + platform === 'android' && + ( + haystack.includes('device not found') || + haystack.includes('no devices') || + haystack.includes('device offline') || + haystack.includes('offline') || + haystack.includes('unauthorized') || + haystack.includes('not authorized') || + haystack.includes('unable to locate device') || + haystack.includes('invalid device') + ) ) { - return 'PERMISSION_DENIED'; + return 'ADB_TRANSPORT_UNAVAILABLE'; } if (appErr?.code === 'COMMAND_FAILED' || haystack.length > 0) return 'BOOT_COMMAND_FAILED'; return 'UNKNOWN'; } + +export function bootFailureHint(reason: BootFailureReason): string { + switch (reason) { + case 'IOS_BOOT_TIMEOUT': + return 'Retry simulator boot and inspect simctl bootstatus logs; in CI consider increasing AGENT_DEVICE_IOS_BOOT_TIMEOUT_MS.'; + case 'IOS_RUNNER_CONNECT_TIMEOUT': + return 'Retry runner startup, inspect xcodebuild logs, and verify simulator responsiveness before command execution.'; + case 'ANDROID_BOOT_TIMEOUT': + return 'Retry emulator startup and verify sys.boot_completed reaches 1; consider increasing startup budget in CI.'; + case 'ADB_TRANSPORT_UNAVAILABLE': + return 'Check adb server/device transport (adb devices -l), restart adb, and ensure the target device is online and authorized.'; + case 'CI_RESOURCE_STARVATION_SUSPECTED': + return 'CI machine may be resource constrained; reduce parallel jobs or use a larger runner.'; + case 'BOOT_COMMAND_FAILED': + return 'Inspect command stderr/stdout for the failing boot phase and retry after environment validation.'; + default: + return 'Retry once and inspect verbose logs for the failing phase.'; + } +} diff --git a/src/platforms/ios/index.ts b/src/platforms/ios/index.ts index 92ef996ae..0688e6561 100644 --- a/src/platforms/ios/index.ts +++ b/src/platforms/ios/index.ts @@ -2,14 +2,21 @@ import { runCmd } from '../../utils/exec.ts'; import type { ExecResult } from '../../utils/exec.ts'; import { AppError } from '../../utils/errors.ts'; import type { DeviceInfo } from '../../utils/device.ts'; -import { Deadline, retryWithPolicy } from '../../utils/retry.ts'; -import { classifyBootFailure } from '../boot-diagnostics.ts'; +import { Deadline, retryWithPolicy, TIMEOUT_PROFILES, type RetryTelemetryEvent } from '../../utils/retry.ts'; +import { bootFailureHint, classifyBootFailure } from '../boot-diagnostics.ts'; const ALIASES: Record = { settings: 'com.apple.Preferences', }; -const IOS_BOOT_TIMEOUT_MS = resolveTimeoutMs(process.env.AGENT_DEVICE_IOS_BOOT_TIMEOUT_MS, 120_000, 5_000); +const IOS_BOOT_TIMEOUT_MS = resolveTimeoutMs( + process.env.AGENT_DEVICE_IOS_BOOT_TIMEOUT_MS, + TIMEOUT_PROFILES.ios_boot.totalMs, + 5_000, +); +const RETRY_LOGS_ENABLED = ['1', 'true', 'yes', 'on'].includes( + (process.env.AGENT_DEVICE_RETRY_LOGS ?? '').toLowerCase(), +); export async function resolveIosApp(device: DeviceInfo, app: string): Promise { const trimmed = app.trim(); @@ -218,8 +225,6 @@ export async function ensureBootedSimulator(device: DeviceInfo): Promise { try { await retryWithPolicy( async () => { - const currentState = await getSimulatorState(device.id); - if (currentState === 'Booted') return; bootResult = await runCmd('xcrun', ['simctl', 'boot', device.id], { allowFailure: true }); const bootOutput = `${bootResult.stdout}\n${bootResult.stderr}`.toLowerCase(); const bootAlreadyDone = @@ -258,11 +263,26 @@ export async function ensureBootedSimulator(device: DeviceInfo): Promise { error, stdout: bootStatusResult?.stdout ?? bootResult?.stdout, stderr: bootStatusResult?.stderr ?? bootResult?.stderr, + context: { platform: 'ios', phase: 'boot' }, }); - return reason !== 'PERMISSION_DENIED' && reason !== 'TOOL_MISSING'; + return reason !== 'IOS_BOOT_TIMEOUT' && reason !== 'CI_RESOURCE_STARVATION_SUSPECTED'; + }, + }, + { + deadline, + phase: 'boot', + classifyReason: (error) => + classifyBootFailure({ + error, + stdout: bootStatusResult?.stdout ?? bootResult?.stdout, + stderr: bootStatusResult?.stderr ?? bootResult?.stderr, + context: { platform: 'ios', phase: 'boot' }, + }), + onEvent: (event: RetryTelemetryEvent) => { + if (!RETRY_LOGS_ENABLED) return; + process.stderr.write(`[agent-device][retry] ${JSON.stringify(event)}\n`); }, }, - { deadline }, ); } catch (error) { const bootStdout = bootResult?.stdout; @@ -275,6 +295,7 @@ export async function ensureBootedSimulator(device: DeviceInfo): Promise { error, stdout: bootstatusStdout ?? bootStdout, stderr: bootstatusStderr ?? bootStderr, + context: { platform: 'ios', phase: 'boot' }, }); throw new AppError('COMMAND_FAILED', 'iOS simulator failed to boot', { platform: 'ios', @@ -282,6 +303,7 @@ export async function ensureBootedSimulator(device: DeviceInfo): Promise { timeoutMs: IOS_BOOT_TIMEOUT_MS, elapsedMs: deadline.elapsedMs(), reason, + hint: bootFailureHint(reason), boot: bootResult ? { exitCode: bootExitCode, stdout: bootStdout, stderr: bootStderr } : undefined, diff --git a/src/platforms/ios/runner-client.ts b/src/platforms/ios/runner-client.ts index 35663f79e..8c8cedd5c 100644 --- a/src/platforms/ios/runner-client.ts +++ b/src/platforms/ios/runner-client.ts @@ -7,6 +7,7 @@ import { runCmd, runCmdStreaming, runCmdBackground, type ExecResult, type ExecBa import { withRetry } from '../../utils/retry.ts'; import type { DeviceInfo } from '../../utils/device.ts'; import net from 'node:net'; +import { bootFailureHint, classifyBootFailure } from '../boot-diagnostics.ts'; export type RunnerCommand = { command: @@ -449,6 +450,12 @@ async function waitForRunner( port, logPath, lastError: lastError ? String(lastError) : undefined, + reason: classifyBootFailure({ + error: lastError, + message: 'Runner did not accept connection', + context: { platform: 'ios', phase: 'connect' }, + }), + hint: bootFailureHint('IOS_RUNNER_CONNECT_TIMEOUT'), }); } @@ -478,11 +485,19 @@ async function postCommandViaSimulator( ); const body = result.stdout as string; if (result.exitCode !== 0) { + const reason = classifyBootFailure({ + message: 'Runner did not accept connection (simctl spawn)', + stdout: result.stdout, + stderr: result.stderr, + context: { platform: 'ios', phase: 'connect' }, + }); throw new AppError('COMMAND_FAILED', 'Runner did not accept connection (simctl spawn)', { port, stdout: result.stdout, stderr: result.stderr, exitCode: result.exitCode, + reason, + hint: bootFailureHint(reason), }); } return { status: 200, body }; diff --git a/src/utils/__tests__/retry.test.ts b/src/utils/__tests__/retry.test.ts index 674e922e0..864e7dca2 100644 --- a/src/utils/__tests__/retry.test.ts +++ b/src/utils/__tests__/retry.test.ts @@ -25,3 +25,20 @@ test('retryWithPolicy retries until success', async () => { assert.equal(result, 'ok'); assert.equal(attempts, 3); }); + +test('retryWithPolicy emits telemetry events', async () => { + const events: string[] = []; + await retryWithPolicy( + async ({ attempt }) => { + if (attempt === 1) throw new Error('transient'); + return 'ok'; + }, + { maxAttempts: 2, baseDelayMs: 1, maxDelayMs: 1, jitter: 0 }, + { + phase: 'boot', + classifyReason: () => 'ANDROID_BOOT_TIMEOUT', + onEvent: (event) => events.push(event.event), + }, + ); + assert.deepEqual(events, ['attempt_failed', 'retry_scheduled', 'succeeded']); +}); diff --git a/src/utils/args.ts b/src/utils/args.ts index 65b998d58..71f812ec6 100644 --- a/src/utils/args.ts +++ b/src/utils/args.ts @@ -173,6 +173,7 @@ export function usage(): string { CLI to control iOS and Android devices for AI agents. Commands: + boot Ensure target device/simulator is booted and ready open [app] Boot device/simulator; optionally launch app close [app] Close app or just end session snapshot [-i] [-c] [-d ] [-s ] [--raw] [--backend ax|xctest] diff --git a/src/utils/retry.ts b/src/utils/retry.ts index 231d8612d..fb8262485 100644 --- a/src/utils/retry.ts +++ b/src/utils/retry.ts @@ -22,6 +22,29 @@ export type RetryAttemptContext = { deadline?: Deadline; }; +export type TimeoutProfile = { + startupMs: number; + operationMs: number; + totalMs: number; +}; + +export type RetryTelemetryEvent = { + phase?: string; + event: 'attempt_failed' | 'retry_scheduled' | 'succeeded' | 'exhausted'; + attempt: number; + maxAttempts: number; + delayMs?: number; + elapsedMs?: number; + remainingMs?: number; + reason?: string; +}; + +export const TIMEOUT_PROFILES: Record = { + ios_boot: { startupMs: 120_000, operationMs: 20_000, totalMs: 120_000 }, + ios_runner_connect: { startupMs: 120_000, operationMs: 15_000, totalMs: 120_000 }, + android_boot: { startupMs: 60_000, operationMs: 10_000, totalMs: 60_000 }, +}; + const defaultOptions: Required> = { attempts: 3, baseDelayMs: 200, @@ -58,7 +81,12 @@ export class Deadline { export async function retryWithPolicy( fn: (context: RetryAttemptContext) => Promise, policy: Partial = {}, - options: { deadline?: Deadline } = {}, + options: { + deadline?: Deadline; + phase?: string; + classifyReason?: (error: unknown) => string | undefined; + onEvent?: (event: RetryTelemetryEvent) => void; + } = {}, ): Promise { const merged: RetryPolicy = { maxAttempts: policy.maxAttempts ?? defaultOptions.attempts, @@ -71,17 +99,55 @@ export async function retryWithPolicy( for (let attempt = 1; attempt <= merged.maxAttempts; attempt += 1) { if (options.deadline?.isExpired() && attempt > 1) break; try { - return await fn({ attempt, maxAttempts: merged.maxAttempts, deadline: options.deadline }); + const result = await fn({ attempt, maxAttempts: merged.maxAttempts, deadline: options.deadline }); + options.onEvent?.({ + phase: options.phase, + event: 'succeeded', + attempt, + maxAttempts: merged.maxAttempts, + elapsedMs: options.deadline?.elapsedMs(), + remainingMs: options.deadline?.remainingMs(), + }); + return result; } catch (err) { lastError = err; + const reason = options.classifyReason?.(err); + options.onEvent?.({ + phase: options.phase, + event: 'attempt_failed', + attempt, + maxAttempts: merged.maxAttempts, + elapsedMs: options.deadline?.elapsedMs(), + remainingMs: options.deadline?.remainingMs(), + reason, + }); if (attempt >= merged.maxAttempts) break; if (merged.shouldRetry && !merged.shouldRetry(err, attempt)) break; const delay = computeDelay(merged.baseDelayMs, merged.maxDelayMs, merged.jitter, attempt); const boundedDelay = options.deadline ? Math.min(delay, options.deadline.remainingMs()) : delay; if (boundedDelay <= 0) break; + options.onEvent?.({ + phase: options.phase, + event: 'retry_scheduled', + attempt, + maxAttempts: merged.maxAttempts, + delayMs: boundedDelay, + elapsedMs: options.deadline?.elapsedMs(), + remainingMs: options.deadline?.remainingMs(), + reason, + }); await sleep(boundedDelay); } } + options.onEvent?.({ + phase: options.phase, + event: 'exhausted', + attempt: merged.maxAttempts, + maxAttempts: merged.maxAttempts, + elapsedMs: options.deadline?.elapsedMs(), + remainingMs: options.deadline?.remainingMs(), + reason: options.classifyReason?.(lastError), + }); if (lastError) throw lastError; throw new AppError('COMMAND_FAILED', 'retry failed'); }