From 98cd0337df1bd7af322b7ac029e5e9039aa21442 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Sat, 14 Feb 2026 13:33:16 +0100 Subject: [PATCH 1/4] Refactor CLI command schema and strict flag validation --- src/cli.ts | 10 +- src/core/capabilities.ts | 4 + src/core/dispatch.ts | 30 +- src/utils/__tests__/args.test.ts | 68 +++ src/utils/args.ts | 418 +++++++---------- src/utils/command-schema.ts | 739 +++++++++++++++++++++++++++++++ 6 files changed, 982 insertions(+), 287 deletions(-) create mode 100644 src/utils/command-schema.ts diff --git a/src/cli.ts b/src/cli.ts index df22bc21a..0c1197cbf 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,4 +1,4 @@ -import { parseArgs, usage } from './utils/args.ts'; +import { parseArgs, toDaemonFlags, usage } from './utils/args.ts'; import { asAppError, AppError } from './utils/errors.ts'; import { formatSnapshotText, printHumanError, printJson } from './utils/output.ts'; import { readVersion } from './utils/version.ts'; @@ -10,6 +10,9 @@ import path from 'node:path'; export async function runCli(argv: string[]): Promise { const parsed = parseArgs(argv); + for (const warning of parsed.warnings) { + process.stderr.write(`Warning: ${warning}\n`); + } if (parsed.flags.version) { process.stdout.write(`${readVersion()}\n`); @@ -22,6 +25,7 @@ export async function runCli(argv: string[]): Promise { } const { command, positionals, flags } = parsed; + const daemonFlags = toDaemonFlags(flags); const sessionName = flags.session ?? process.env.AGENT_DEVICE_SESSION ?? 'default'; const logTailStopper = flags.verbose && !flags.json ? startDaemonLogTail() : null; try { @@ -34,7 +38,7 @@ export async function runCli(argv: string[]): Promise { session: sessionName, command: 'session_list', positionals: [], - flags: {}, + flags: daemonFlags, }); if (!response.ok) throw new AppError(response.error.code as any, response.error.message); if (flags.json) printJson({ success: true, data: response.data ?? {} }); @@ -47,7 +51,7 @@ export async function runCli(argv: string[]): Promise { session: sessionName, command: command!, positionals, - flags, + flags: daemonFlags, }); if (response.ok) { diff --git a/src/core/capabilities.ts b/src/core/capabilities.ts index 61b128db6..a9c52efd4 100644 --- a/src/core/capabilities.ts +++ b/src/core/capabilities.ts @@ -50,3 +50,7 @@ export function isCommandSupportedOnDevice(command: string, device: DeviceInfo): const kind = (device.kind ?? 'unknown') as keyof KindMatrix; return byPlatform[kind] === true; } + +export function listCapabilityCommands(): string[] { + return Object.keys(COMMAND_CAPABILITY_MATRIX).sort(); +} diff --git a/src/core/dispatch.ts b/src/core/dispatch.ts index cc4ef6972..a2d58e056 100644 --- a/src/core/dispatch.ts +++ b/src/core/dispatch.ts @@ -17,35 +17,9 @@ import { runIosRunnerCommand } from '../platforms/ios/runner-client.ts'; import { snapshotAx } from '../platforms/ios/ax-snapshot.ts'; import { setIosSetting } from '../platforms/ios/index.ts'; import type { RawSnapshotNode } from '../utils/snapshot.ts'; +import type { DaemonFlags } from '../utils/command-schema.ts'; -export type CommandFlags = { - session?: string; - platform?: 'ios' | 'android'; - device?: string; - udid?: string; - serial?: string; - out?: string; - activity?: string; - verbose?: boolean; - snapshotInteractiveOnly?: boolean; - snapshotCompact?: boolean; - snapshotDepth?: number; - snapshotScope?: string; - snapshotRaw?: boolean; - snapshotBackend?: 'ax' | 'xctest'; - saveScript?: boolean; - relaunch?: boolean; - noRecord?: boolean; - appsFilter?: 'launchable' | 'user-installed' | 'all'; - appsMetadata?: boolean; - count?: number; - intervalMs?: number; - holdMs?: number; - jitterPx?: number; - pauseMs?: number; - pattern?: 'one-way' | 'ping-pong'; - replayUpdate?: boolean; -}; +export type CommandFlags = DaemonFlags; export async function resolveTargetDevice(flags: CommandFlags): Promise { const selector = { diff --git a/src/utils/__tests__/args.test.ts b/src/utils/__tests__/args.test.ts index 6bc95bfb0..6ab7b0e9b 100644 --- a/src/utils/__tests__/args.test.ts +++ b/src/utils/__tests__/args.test.ts @@ -1,6 +1,9 @@ import test from 'node:test'; import assert from 'node:assert/strict'; import { parseArgs, usage } from '../args.ts'; +import { AppError } from '../errors.ts'; +import { getCliCommandNames } from '../command-schema.ts'; +import { listCapabilityCommands } from '../../core/capabilities.ts'; test('parseArgs recognizes --relaunch', () => { const parsed = parseArgs(['open', 'settings', '--relaunch']); @@ -61,6 +64,71 @@ test('parseArgs rejects invalid swipe pattern', () => { test('usage includes --relaunch flag', () => { assert.match(usage(), /--relaunch/); + assert.match(usage(), /pinch \[x\] \[y\]/); + assert.match(usage(), /--metadata/); +}); + +test('every capability command has a parser schema entry', () => { + const schemaCommands = new Set(getCliCommandNames()); + for (const command of listCapabilityCommands()) { + assert.equal(schemaCommands.has(command), true, `Missing schema for command: ${command}`); + } +}); + +test('compat mode warns and strips unsupported pilot-command flags', () => { + const parsed = parseArgs(['press', '10', '20', '--depth', '2'], { strictFlags: false }); + assert.equal(parsed.command, 'press'); + assert.equal(parsed.flags.snapshotDepth, undefined); + assert.equal(parsed.warnings.length, 1); + assert.match(parsed.warnings[0], /not supported for command press/); +}); + +test('strict mode rejects unsupported pilot-command flags', () => { + assert.throws( + () => parseArgs(['press', '10', '20', '--depth', '2'], { strictFlags: true }), + (error) => + error instanceof AppError && + error.code === 'INVALID_ARGS' && + error.message.includes('not supported for command press'), + ); +}); + +test('snapshot command accepts command-specific flags', () => { + const parsed = parseArgs(['snapshot', '-i', '-c', '--depth', '3', '-s', 'Login'], { strictFlags: true }); + assert.equal(parsed.command, 'snapshot'); + assert.equal(parsed.flags.snapshotInteractiveOnly, true); + assert.equal(parsed.flags.snapshotCompact, true); + assert.equal(parsed.flags.snapshotDepth, 3); + assert.equal(parsed.flags.snapshotScope, 'Login'); +}); + +test('unknown short flags are rejected', () => { + assert.throws( + () => parseArgs(['press', '10', '20', '-x'], { strictFlags: true }), + (error) => error instanceof AppError && error.code === 'INVALID_ARGS' && error.message === 'Unknown flag: -x', + ); +}); + +test('all commands participate in strict command-flag validation', () => { + assert.throws( + () => parseArgs(['open', 'Settings', '--depth', '1'], { strictFlags: true }), + (error) => + error instanceof AppError && + error.code === 'INVALID_ARGS' && + error.message.includes('not supported for command open'), + ); +}); + +test('invalid enum/range errors are deterministic', () => { + assert.throws( + () => parseArgs(['snapshot', '--backend', 'foo'], { strictFlags: true }), + (error) => + error instanceof AppError && error.code === 'INVALID_ARGS' && error.message === 'Invalid backend: foo', + ); + assert.throws( + () => parseArgs(['snapshot', '--depth', '-1'], { strictFlags: true }), + (error) => error instanceof AppError && error.code === 'INVALID_ARGS' && error.message === 'Invalid depth: -1', + ); }); test('usage includes swipe and press series options', () => { diff --git a/src/utils/args.ts b/src/utils/args.ts index 6587357ec..f871a90c7 100644 --- a/src/utils/args.ts +++ b/src/utils/args.ts @@ -1,292 +1,198 @@ import { AppError } from './errors.ts'; +import { + buildUsageText, + getCommandSchema, + getFlagDefinition, + GLOBAL_FLAG_KEYS, + isStrictFlagModeEnabled, + type CliFlags, + type FlagDefinition, + type FlagKey, +} from './command-schema.ts'; export type ParsedArgs = { command: string | null; positionals: string[]; - flags: { - json: boolean; - platform?: 'ios' | 'android'; - device?: string; - udid?: string; - serial?: string; - out?: string; - session?: string; - verbose?: boolean; - snapshotInteractiveOnly?: boolean; - snapshotCompact?: boolean; - snapshotDepth?: number; - snapshotScope?: string; - snapshotRaw?: boolean; - snapshotBackend?: 'ax' | 'xctest'; - appsFilter?: 'launchable' | 'user-installed' | 'all'; - appsMetadata?: boolean; - count?: number; - intervalMs?: number; - holdMs?: number; - jitterPx?: number; - pauseMs?: number; - pattern?: 'one-way' | 'ping-pong'; - activity?: string; - saveScript?: boolean; - relaunch?: boolean; - noRecord?: boolean; - replayUpdate?: boolean; - help: boolean; - version: boolean; - }; + flags: CliFlags; + warnings: string[]; }; -export function parseArgs(argv: string[]): ParsedArgs { - const flags: ParsedArgs['flags'] = { json: false, help: false, version: false }; +type ParseArgsOptions = { + strictFlags?: boolean; +}; + +type ParsedFlagRecord = { + key: FlagKey; + token: string; +}; + +export function parseArgs(argv: string[], options?: ParseArgsOptions): ParsedArgs { + const strictFlags = options?.strictFlags ?? isStrictFlagModeEnabled(process.env.AGENT_DEVICE_STRICT_FLAGS); + const flags: CliFlags = { json: false, help: false, version: false }; + let command: string | null = null; const positionals: string[] = []; + const warnings: string[] = []; + const providedFlags: ParsedFlagRecord[] = []; + let parseFlags = true; for (let i = 0; i < argv.length; i += 1) { const arg = argv[i]; - if (arg === '--json') { - flags.json = true; - continue; - } - if (arg === '--help' || arg === '-h') { - flags.help = true; - continue; - } - if (arg === '--version' || arg === '-V') { - flags.version = true; + if (parseFlags && arg === '--') { + parseFlags = false; continue; } - if (arg === '--verbose' || arg === '-v') { - flags.verbose = true; + if (!parseFlags) { + if (!command) command = arg; + else positionals.push(arg); continue; } - if (arg === '-i') { - flags.snapshotInteractiveOnly = true; + const isLongFlag = arg.startsWith('--'); + const isShortFlag = arg.startsWith('-') && arg.length > 1; + if (!isLongFlag && !isShortFlag) { + if (!command) command = arg; + else positionals.push(arg); continue; } - if (arg === '-c') { - flags.snapshotCompact = true; - continue; - } - if (arg === '--raw') { - flags.snapshotRaw = true; - continue; - } - if (arg === '--no-record') { - flags.noRecord = true; - continue; - } - if (arg === '--save-script') { - flags.saveScript = true; - continue; - } - if (arg === '--relaunch') { - flags.relaunch = true; - continue; - } - if (arg === '--update' || arg === '-u') { - flags.replayUpdate = true; - continue; - } - if (arg === '--user-installed') { - flags.appsFilter = 'user-installed'; - continue; - } - if (arg === '--all') { - flags.appsFilter = 'all'; - continue; - } - if (arg === '--metadata') { - flags.appsMetadata = true; - continue; - } - if (arg.startsWith('--backend')) { - const value = arg.includes('=') - ? arg.split('=')[1] - : argv[i + 1]; - if (!arg.includes('=')) i += 1; - if (value !== 'ax' && value !== 'xctest') { - throw new AppError('INVALID_ARGS', `Invalid backend: ${value}`); + + const [token, inlineValue] = isLongFlag ? splitLongFlag(arg) : [arg, undefined]; + const definition = getFlagDefinition(token); + if (!definition) { + if (shouldTreatUnknownDashTokenAsPositional(command, positionals, arg)) { + if (!command) command = arg; + else positionals.push(arg); + continue; } - flags.snapshotBackend = value; - continue; + throw new AppError('INVALID_ARGS', `Unknown flag: ${token}`); } - if (arg.startsWith('--')) { - const [key, valueInline] = arg.split('='); - const value = valueInline ?? argv[i + 1]; - if (!valueInline) i += 1; - switch (key) { - case '--platform': - if (value !== 'ios' && value !== 'android') { - throw new AppError('INVALID_ARGS', `Invalid platform: ${value}`); - } - flags.platform = value; - break; - case '--depth': { - const parsed = Number(value); - if (!Number.isFinite(parsed) || parsed < 0) { - throw new AppError('INVALID_ARGS', `Invalid depth: ${value}`); - } - flags.snapshotDepth = Math.floor(parsed); - break; - } - case '--scope': - flags.snapshotScope = value; - break; - case '--device': - flags.device = value; - break; - case '--udid': - flags.udid = value; - break; - case '--serial': - flags.serial = value; - break; - case '--out': - flags.out = value; - break; - case '--session': - flags.session = value; - break; - case '--activity': - flags.activity = value; - break; - case '--count': - flags.count = parseNumericFlag(key, value); - break; - case '--interval-ms': - flags.intervalMs = parseNumericFlag(key, value); - break; - case '--hold-ms': - flags.holdMs = parseNumericFlag(key, value); - break; - case '--jitter-px': - flags.jitterPx = parseNumericFlag(key, value); - break; - case '--pause-ms': - flags.pauseMs = parseNumericFlag(key, value); - break; - case '--pattern': - if (value !== 'one-way' && value !== 'ping-pong') { - throw new AppError('INVALID_ARGS', `Invalid pattern: ${value}`); - } - flags.pattern = value; - break; - default: - throw new AppError('INVALID_ARGS', `Unknown flag: ${key}`); - } - continue; + const parsed = parseFlagValue(definition, token, inlineValue, argv[i + 1]); + if (parsed.consumeNext) i += 1; + (flags as Record)[definition.key] = parsed.value; + providedFlags.push({ key: definition.key, token }); + } + + const commandSchema = getCommandSchema(command); + const allowedFlagKeys = new Set([ + ...GLOBAL_FLAG_KEYS, + ...(commandSchema?.allowedFlags ?? []), + ]); + const disallowed = providedFlags.filter((entry) => !allowedFlagKeys.has(entry.key)); + if (disallowed.length > 0 && command) { + const unsupported = disallowed.map((entry) => entry.token); + const message = + unsupported.length === 1 + ? `Flag ${unsupported[0]} is not supported for command ${command}.` + : `Flags ${unsupported.join(', ')} are not supported for command ${command}.`; + if (strictFlags) { + throw new AppError('INVALID_ARGS', message); + } + warnings.push(`${message} Enable AGENT_DEVICE_STRICT_FLAGS=1 to fail fast.`); + for (const entry of disallowed) { + delete (flags as Record)[entry.key]; } - if (arg === '-d') { - const value = argv[i + 1]; - i += 1; - const parsed = Number(value); - if (!Number.isFinite(parsed) || parsed < 0) { - throw new AppError('INVALID_ARGS', `Invalid depth: ${value}`); + } + if (commandSchema?.defaults) { + for (const [key, value] of Object.entries(commandSchema.defaults) as Array<[FlagKey, unknown]>) { + if ((flags as Record)[key] === undefined) { + (flags as Record)[key] = value; } - flags.snapshotDepth = Math.floor(parsed); - continue; - } - if (arg === '-s') { - const value = argv[i + 1]; - i += 1; - flags.snapshotScope = value; - continue; } - positionals.push(arg); } + return { command, positionals, flags, warnings }; +} - const command = positionals.shift() ?? null; - return { command, positionals, flags }; +function splitLongFlag(flag: string): [string, string | undefined] { + const equals = flag.indexOf('='); + if (equals === -1) return [flag, undefined]; + return [flag.slice(0, equals), flag.slice(equals + 1)]; } -function parseNumericFlag(name: string, value: string): number { +function parseFlagValue( + definition: FlagDefinition, + token: string, + inlineValue: string | undefined, + nextArg: string | undefined, +): { value: unknown; consumeNext: boolean } { + if (definition.setValue !== undefined) { + if (inlineValue !== undefined) { + throw new AppError('INVALID_ARGS', `Flag ${token} does not take a value.`); + } + return { value: definition.setValue, consumeNext: false }; + } + if (definition.type === 'boolean') { + if (inlineValue !== undefined) { + throw new AppError('INVALID_ARGS', `Flag ${token} does not take a value.`); + } + return { value: true, consumeNext: false }; + } + + const value = inlineValue ?? nextArg; + if (value === undefined) { + throw new AppError('INVALID_ARGS', `Flag ${token} requires a value.`); + } + if (inlineValue === undefined && looksLikeFlagToken(value)) { + throw new AppError('INVALID_ARGS', `Flag ${token} requires a value.`); + } + + if (definition.type === 'string') { + return { value, consumeNext: inlineValue === undefined }; + } + if (definition.type === 'enum') { + if (!definition.enumValues?.includes(value)) { + throw new AppError('INVALID_ARGS', `Invalid ${labelForFlag(token)}: ${value}`); + } + return { value, consumeNext: inlineValue === undefined }; + } const parsed = Number(value); if (!Number.isFinite(parsed)) { - throw new AppError('INVALID_ARGS', `Invalid ${name}: ${value}`); + throw new AppError('INVALID_ARGS', `Invalid ${labelForFlag(token)}: ${value}`); + } + if (typeof definition.min === 'number' && parsed < definition.min) { + throw new AppError('INVALID_ARGS', `Invalid ${labelForFlag(token)}: ${value}`); } - return parsed; + if (typeof definition.max === 'number' && parsed > definition.max) { + throw new AppError('INVALID_ARGS', `Invalid ${labelForFlag(token)}: ${value}`); + } + return { value: Math.floor(parsed), consumeNext: inlineValue === undefined }; } -export function usage(): string { - return `agent-device [args] [--json] +function labelForFlag(token: string): string { + if (token === '--backend') return 'backend'; + if (token === '--platform') return 'platform'; + if (token === '--depth' || token === '-d') return 'depth'; + if (token.startsWith('--')) return token.slice(2); + return token.slice(1); +} -CLI to control iOS and Android devices for AI agents. +function looksLikeFlagToken(value: string): boolean { + if (!value.startsWith('-') || value === '-') return false; + const [token] = value.startsWith('--') ? splitLongFlag(value) : [value, undefined]; + return getFlagDefinition(token) !== undefined; +} + +function shouldTreatUnknownDashTokenAsPositional( + command: string | null, + positionals: string[], + arg: string, +): boolean { + if (!isNegativeNumericToken(arg)) return false; + if (!command) return false; + const schema = getCommandSchema(command); + if (!schema) return true; + if (schema.positionalArgs.length === 0) return false; + if (positionals.length < schema.positionalArgs.length) return true; + return schema.positionalArgs.some((entry) => entry.includes('?')); +} -Commands: - boot Ensure target device/simulator is booted and ready - open [app|url] Boot device/simulator; optionally launch app or deep link URL - close [app] Close app or just end session - reinstall Uninstall + install app from binary path - snapshot [-i] [-c] [-d ] [-s ] [--raw] [--backend ax|xctest] - Capture accessibility tree - -i Interactive elements only - -c Compact output (drop empty structure) - -d Limit snapshot depth - -s Scope snapshot to label/identifier - --raw Raw node output - --backend ax|xctest xctest: default; XCTest snapshot (slower, no permissions) - ax: macOS Accessibility tree (fast, needs permissions) - devices List available devices - apps [--user-installed|--all|--metadata] List installed apps (Android launchable by default, iOS simulator) - appstate Show foreground app/activity - back Navigate back (where supported) - home Go to home screen (where supported) - app-switcher Open app switcher (where supported) - wait |text |@ref| [timeoutMs] - Wait for duration, text, ref, or selector to appear - alert [get|accept|dismiss|wait] [timeout] Inspect or handle alert (iOS simulator) - click <@ref|selector> Click element by snapshot ref or selector - get text <@ref|selector> Return element text by ref or selector - get attrs <@ref|selector> Return element attributes by ref or selector - replay [--update|-u] Replay a recorded session - press [--count N] [--interval-ms I] [--hold-ms H] [--jitter-px J] - Tap/press at coordinates (supports repeated gesture series) - long-press [durationMs] Long press (where supported) - swipe [durationMs] [--count N] [--pause-ms P] [--pattern one-way|ping-pong] - Swipe coordinates with optional repeat pattern - focus Focus input at coordinates - type Type text in focused field - fill | fill <@ref|selector> - Tap then type - scroll [amount] Scroll in direction (0-1 amount) - pinch [x] [y] Pinch/zoom (iOS simulator only) - scrollintoview Scroll until text appears (Android only) - screenshot [path] Capture screenshot - record start [path] Start screen recording - record stop Stop screen recording - trace start [path] Start trace log capture - trace stop [path] Stop trace log capture - find [value] Find by any text (label/value/id) - find text [value] Find by text content - find label