From 87aa82e7e50dd8bfde35ae23b5020c49d322235f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Mon, 1 Jun 2026 17:05:12 +0200 Subject: [PATCH] fix: improve maestro test output --- src/__tests__/cli-network.test.ts | 211 +++++++++++++- src/cli-test.ts | 262 ++++++++++++++++-- src/cli.ts | 36 ++- src/compat/maestro/replay-flow.ts | 7 + src/compat/maestro/types.ts | 1 + .../__tests__/session-test-discovery.test.ts | 9 +- .../__tests__/session-test-suite.test.ts | 10 +- src/daemon/handlers/session-test-discovery.ts | 19 +- src/daemon/handlers/session-test.ts | 15 + src/daemon/types.ts | 11 + src/utils/__tests__/args.test.ts | 1 + src/utils/cli-flags.ts | 3 +- 12 files changed, 547 insertions(+), 38 deletions(-) diff --git a/src/__tests__/cli-network.test.ts b/src/__tests__/cli-network.test.ts index 3bcd3e43b..5805fce82 100644 --- a/src/__tests__/cli-network.test.ts +++ b/src/__tests__/cli-network.test.ts @@ -8,6 +8,7 @@ import { runCliCapture } from './cli-capture.ts'; function makeFailedReplayResult() { return { file: '/tmp/02-fail.ad', + title: 'Checkout failure', session: 'default:test:suite:2', status: 'failed', durationMs: 5, @@ -106,11 +107,14 @@ test('test command prints suite summary and exits non-zero on failures', async ( assert.equal(result.calls[0]?.meta?.requestProgress, 'replay-test'); assert.match(result.stderr, /Running replay suite\.\.\./); assert.doesNotMatch(result.stdout, /PASS \/tmp\/01-pass\.ad/); - assert.match(result.stdout, /FAIL \/tmp\/02-fail\.ad after 2 attempts \(5ms\)/); + assert.match( + result.stdout, + /FAIL "Checkout failure" in 02-fail\.ad after 2 attempts \(total 0\.005s\)/, + ); assert.match(result.stdout, /Replay failed at step 1 \(open Demo\): boom/); assert.match(result.stdout, /artifacts: \/tmp\/test-artifacts\/02-fail/); assert.doesNotMatch(result.stdout, /SKIP \/tmp\/03-skip\.ad/); - assert.match(result.stdout, /Test summary: 1 passed, 1 failed in 25ms/); + assert.match(result.stdout, /Test summary: 1 passed, 1 failed in 0\.025s/); }); test('test command --verbose prints all test statuses', async () => { @@ -119,9 +123,93 @@ test('test command --verbose prints all test statuses', async () => { ); assert.equal(result.code, 1); + assert.equal(result.calls[0]?.meta?.debug, false); assert.match(result.stderr, /Running replay suite\.\.\./); - assert.match(result.stdout, /PASS \/tmp\/01-pass\.ad \(10ms\)/); - assert.match(result.stdout, /SKIP \/tmp\/03-skip\.ad/); + assert.match(result.stdout, /PASS 01-pass\.ad \(0\.01s\)/); + assert.match(result.stdout, /SKIP 03-skip\.ad/); +}); + +test('test command --verbose prints step telemetry for passing tests without debug mode', async () => { + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'agent-device-cli-test-verbose-')); + const artifactsDir = path.join(tmpDir, 'auth-flow'); + const attemptDir = path.join(artifactsDir, 'attempt-1'); + await fs.mkdir(attemptDir, { recursive: true }); + await fs.writeFile( + path.join(attemptDir, 'replay-timing.ndjson'), + [ + { + type: 'replay_action_start', + step: 1, + line: 3, + command: '__maestroTapOn', + positionals: ['text="Log in"'], + }, + { + type: 'replay_action_stop', + step: 1, + line: 3, + command: '__maestroTapOn', + ok: true, + durationMs: 250, + }, + { + type: 'replay_action_start', + step: 2, + line: 4, + command: '__maestroAssertVisible', + positionals: ['text="Home"'], + }, + { + type: 'replay_action_stop', + step: 2, + line: 4, + command: '__maestroAssertVisible', + ok: true, + durationMs: 75, + }, + ] + .map((entry) => JSON.stringify(entry)) + .join('\n'), + ); + + try { + const result = await runCliCapture(['test', './suite', '--verbose'], async () => ({ + ok: true, + data: { + total: 1, + executed: 1, + passed: 1, + failed: 0, + skipped: 0, + notRun: 0, + durationMs: 500, + failures: [], + tests: [ + { + file: '/tmp/auth-flow.yml', + title: 'Authentication flow', + session: 'default:test:suite:1', + status: 'passed', + durationMs: 500, + finalAttemptDurationMs: 500, + attempts: 1, + artifactsDir, + replayed: 2, + healed: 0, + }, + ], + }, + })); + + assert.equal(result.code, null); + assert.equal(result.calls[0]?.meta?.debug, false); + assert.match(result.stdout, /PASS "Authentication flow" \(0\.5s\)/); + assert.match(result.stdout, /steps \(attempt 1\):/); + assert.match(result.stdout, /\[ok\] tapOn "text=\\"Log in\\"" \(line 3, 0\.25s\)/); + assert.match(result.stdout, /\[ok\] assertVisible "text=\\"Home\\"" \(line 4, 0\.075s\)/); + } finally { + await fs.rm(tmpDir, { recursive: true, force: true }); + } }); test('test command reports flaky passed-on-retry cases in the default summary', async () => { @@ -138,11 +226,20 @@ test('test command reports flaky passed-on-retry cases in the default summary', failures: [], tests: [ { - file: '/tmp/01-flaky.ad', + file: '/tmp/auth-flow.yml', + title: 'Authentication flow', session: 'default:test:suite:1', status: 'passed', - durationMs: 10, + durationMs: 112151, + finalAttemptDurationMs: 17492, attempts: 2, + attemptFailures: [ + { + attempt: 1, + message: 'Replay failed at step 3 (tapOn "Log in"): selector not found', + durationMs: 94659, + }, + ], }, ], }, @@ -150,8 +247,106 @@ test('test command reports flaky passed-on-retry cases in the default summary', assert.equal(result.code, null); assert.match(result.stderr, /Running replay suite\.\.\./); - assert.match(result.stdout, /FLAKY \/tmp\/01-flaky\.ad after 2 attempts \(10ms\)/); - assert.match(result.stdout, /Test summary: 1 passed, 0 failed, 1 flaky in 25ms/); + assert.doesNotMatch(result.stdout, /FLAKY/); + assert.match(result.stdout, /Test summary: 1 passed, 0 failed, 1 flaky in 0\.025s/); + assert.match(result.stdout, /Flaky tests:/); + assert.match( + result.stdout, + /PASS "Authentication flow" after 2 attempts \(passed attempt 17\.5s, total 112\.2s\)/, + ); + assert.match( + result.stdout, + /attempt 1 failed \(94\.7s\): Replay failed at step 3 \(tapOn "Log in"\): selector not found/, + ); +}); + +test('test command prints failed attempt step telemetry when timing trace exists', async () => { + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'agent-device-cli-test-steps-')); + const artifactsDir = path.join(tmpDir, 'checkout-flow'); + const attemptDir = path.join(artifactsDir, 'attempt-2'); + await fs.mkdir(attemptDir, { recursive: true }); + await fs.writeFile( + path.join(attemptDir, 'replay-timing.ndjson'), + [ + { + type: 'replay_action_start', + step: 1, + line: 3, + command: 'open', + positionals: ['Demo'], + }, + { + type: 'replay_action_stop', + step: 1, + line: 3, + command: 'open', + ok: true, + durationMs: 125, + resultTiming: { launchMs: 100 }, + }, + { + type: 'replay_action_start', + step: 2, + line: 4, + command: '__maestroTapOn', + positionals: ['text="Pay"'], + }, + { + type: 'replay_action_stop', + step: 2, + line: 4, + command: '__maestroTapOn', + ok: false, + durationMs: 1500, + errorCode: 'ASSERTION_FAILED', + }, + ] + .map((entry) => JSON.stringify(entry)) + .join('\n'), + ); + + try { + const failedReplayResult = { + file: '/tmp/checkout-flow.yml', + title: 'Checkout flow', + session: 'default:test:suite:1', + status: 'failed', + durationMs: 2000, + attempts: 2, + artifactsDir, + error: { + code: 'ASSERTION_FAILED', + message: 'Replay failed at step 2 (click "Pay"): selector not found', + }, + }; + const result = await runCliCapture(['test', './suite'], async () => ({ + ok: true, + data: { + total: 1, + executed: 1, + passed: 0, + failed: 1, + skipped: 0, + notRun: 0, + durationMs: 2000, + failures: [failedReplayResult], + tests: [failedReplayResult], + }, + })); + + assert.equal(result.code, 1); + assert.match(result.stdout, /steps \(attempt 2\):/); + assert.match( + result.stdout, + /\[ok\] open "Demo" \(line 3, 0\.125s, timing \{"launchMs":100\}\)/, + ); + assert.match( + result.stdout, + /\[FAIL\] tapOn "text=\\"Pay\\"" \(line 4, 1\.50s, ASSERTION_FAILED\)/, + ); + } finally { + await fs.rm(tmpDir, { recursive: true, force: true }); + } }); test('test --maestro forwards Maestro backend and platform for directory suites', async () => { diff --git a/src/cli-test.ts b/src/cli-test.ts index 127eb3c4f..7b54999e0 100644 --- a/src/cli-test.ts +++ b/src/cli-test.ts @@ -40,16 +40,15 @@ function renderReplayTestSummary( for (const entry of data.failures) { renderFailedTestResult(entry); } - for (const entry of flaky) { - renderFlakyTestResult(entry); - } } const durationMs = typeof data.durationMs === 'number' ? data.durationMs : undefined; const flakySuffix = flaky.length > 0 ? `, ${flaky.length} flaky` : ''; + const durationSuffix = durationMs !== undefined ? ` in ${formatDurationSeconds(durationMs)}` : ''; process.stdout.write( - `Test summary: ${data.passed} passed, ${data.failed} failed${flakySuffix}${durationMs !== undefined ? ` in ${durationMs}ms` : ''}\n`, + `Test summary: ${data.passed} passed, ${data.failed} failed${flakySuffix}${durationSuffix}\n`, ); + renderFlakyTestSummary(flaky); return getReplayTestExitCode(data); } @@ -59,30 +58,37 @@ function renderVerboseTestResult(result: ReplaySuiteTestResult): void { return; } - const prefix = replayResultPrefix(result); - const attemptSuffix = - 'attempts' in result && result.attempts > 1 ? ` after ${result.attempts} attempts` : ''; - const durationSuffix = result.durationMs > 0 ? ` (${result.durationMs}ms)` : ''; - process.stdout.write(`${prefix} ${result.file}${attemptSuffix}${durationSuffix}\n`); + const durationSuffix = formatReplayTestDurationSuffix(result); + process.stdout.write( + `${replayResultPrefix(result)} ${replayTestDisplayName(result)}${durationSuffix}\n`, + ); if (result.status === 'skipped') { process.stdout.write(` ${result.message ?? 'skipped'}\n`); } + for (const line of replayTestStepLines(result)) { + process.stdout.write(` ${line}\n`); + } } function renderFailedTestResult( result: Extract, ): void { const attemptSuffix = result.attempts > 1 ? ` after ${result.attempts} attempts` : ''; - const durationSuffix = result.durationMs > 0 ? ` (${result.durationMs}ms)` : ''; - process.stdout.write(`FAIL ${result.file}${attemptSuffix}${durationSuffix}\n`); + const durationSuffix = formatReplayTestDurationSuffix(result); + process.stdout.write( + `FAIL ${replayFailedTestDisplayName(result)}${attemptSuffix}${durationSuffix}\n`, + ); process.stdout.write(` ${result.error?.message ?? 'Unknown test failure'}\n`); for (const line of replayFailureConsoleLines(result)) { process.stdout.write(` ${line}\n`); } + for (const line of replayTestStepLines(result)) { + process.stdout.write(` ${line}\n`); + } } function replayResultPrefix(result: ReplaySuiteTestResult): string { - if (result.status === 'passed') return result.attempts > 1 ? 'FLAKY' : 'PASS'; + if (result.status === 'passed') return 'PASS'; if (result.status === 'skipped') return 'SKIP'; return 'INFO'; } @@ -98,9 +104,155 @@ function replayFailureConsoleLines( ].filter(Boolean); } -function renderFlakyTestResult(result: Extract): void { - const durationSuffix = result.durationMs > 0 ? ` (${result.durationMs}ms)` : ''; - process.stdout.write(`FLAKY ${result.file} after ${result.attempts} attempts${durationSuffix}\n`); +type ReplayActionStartTrace = { + type: 'replay_action_start'; + step: number; + line?: number; + command?: string; + positionals?: unknown[]; +}; + +type ReplayActionStopTrace = { + type: 'replay_action_stop'; + step: number; + line?: number; + command?: string; + ok?: boolean; + durationMs?: number; + errorCode?: string; + resultTiming?: Record; +}; + +function replayTestStepLines(result: ReplaySuiteTestResult): string[] { + if (result.status === 'skipped') return []; + const tracePath = replayTestTimingTracePath(result); + if (!tracePath) return []; + const events = readReplayTimingTrace(tracePath); + if (events.length === 0) return []; + + const starts = new Map(); + const stops: ReplayActionStopTrace[] = []; + for (const event of events) { + if (isReplayActionStartTrace(event)) starts.set(event.step, event); + if (isReplayActionStopTrace(event)) stops.push(event); + } + if (stops.length === 0) return []; + + return [ + `steps (attempt ${result.attempts}):`, + ...stops.map((stop) => renderReplayStepTrace(stop, starts.get(stop.step))), + ]; +} + +function replayTestTimingTracePath( + result: Extract, +): string | undefined { + return result.artifactsDir + ? path.join(result.artifactsDir, `attempt-${result.attempts}`, 'replay-timing.ndjson') + : undefined; +} + +function readReplayTimingTrace(tracePath: string): Record[] { + try { + return fs + .readFileSync(tracePath, 'utf8') + .split(/\r?\n/) + .filter((line) => line.trim().length > 0) + .flatMap((line) => { + try { + const parsed = JSON.parse(line) as unknown; + return isPlainRecord(parsed) ? [parsed] : []; + } catch { + return []; + } + }); + } catch { + return []; + } +} + +function isReplayActionStartTrace(event: Record): event is ReplayActionStartTrace { + return ( + event.type === 'replay_action_start' && + hasTraceStep(event) && + hasOptionalNumber(event, 'line') && + hasOptionalString(event, 'command') && + (event.positionals === undefined || Array.isArray(event.positionals)) + ); +} + +function isReplayActionStopTrace(event: Record): event is ReplayActionStopTrace { + return ( + event.type === 'replay_action_stop' && + hasTraceStep(event) && + hasOptionalNumber(event, 'line') && + hasOptionalString(event, 'command') && + (event.ok === undefined || typeof event.ok === 'boolean') && + hasOptionalNumber(event, 'durationMs') && + hasOptionalString(event, 'errorCode') && + (event.resultTiming === undefined || isPlainRecord(event.resultTiming)) + ); +} + +function hasTraceStep(event: Record): boolean { + return typeof event.step === 'number'; +} + +function hasOptionalNumber(event: Record, key: string): boolean { + return event[key] === undefined || typeof event[key] === 'number'; +} + +function hasOptionalString(event: Record, key: string): boolean { + return event[key] === undefined || typeof event[key] === 'string'; +} + +function renderReplayStepTrace( + stop: ReplayActionStopTrace, + start: ReplayActionStartTrace | undefined, +): string { + const failed = stop.ok === false; + const status = failed ? '[FAIL]' : stop.ok === true ? '[ok]' : '[info]'; + return ` ${status} ${formatReplayStepCommand(start, stop)}${formatReplayStepDetails(stop, start)}`; +} + +function formatReplayStepDetails( + stop: ReplayActionStopTrace, + start: ReplayActionStartTrace | undefined, +): string { + const line = start?.line ?? stop.line; + const details = [ + typeof line === 'number' ? `line ${line}` : '', + typeof stop.durationMs === 'number' ? formatDurationSeconds(stop.durationMs) : '', + stop.errorCode ?? '', + stop.resultTiming ? `timing ${JSON.stringify(stop.resultTiming)}` : '', + ].filter(Boolean); + return details.length > 0 ? ` (${details.join(', ')})` : ''; +} + +function formatReplayStepCommand( + start: ReplayActionStartTrace | undefined, + stop: ReplayActionStopTrace, +): string { + const command = formatReplayStepCommandName(start?.command ?? stop.command); + const positionals = start?.positionals ?? []; + return [command, ...positionals.map(formatReplayStepArg)].join(' '); +} + +function formatReplayStepCommandName(command: string | undefined): string { + if (!command) return 'unknown'; + if (!command.startsWith('__maestro')) return command; + const name = command.slice('__maestro'.length); + return name.length > 0 ? name[0]!.toLowerCase() + name.slice(1) : command; +} + +function formatReplayStepArg(value: unknown): string { + if (typeof value === 'string') return JSON.stringify(value); + if (typeof value === 'number' || typeof value === 'boolean') return String(value); + return JSON.stringify(value); +} + +function isPlainRecord(value: unknown): value is Record { + return !!value && typeof value === 'object' && !Array.isArray(value); } function isFlakyReplayTestResult( @@ -109,6 +261,77 @@ function isFlakyReplayTestResult( return result.status === 'passed' && result.attempts > 1; } +function renderFlakyTestSummary( + results: Array>, +): void { + if (results.length === 0) return; + process.stdout.write('Flaky tests:\n'); + for (const result of results) { + process.stdout.write( + ` PASS ${replayTestDisplayName(result)} after ${result.attempts} attempts${formatFlakyReplayDurationSuffix(result)}\n`, + ); + for (const failure of result.attemptFailures ?? []) { + const attemptDuration = + typeof failure.durationMs === 'number' + ? ` (${formatDurationSeconds(failure.durationMs)})` + : ''; + process.stdout.write( + ` attempt ${failure.attempt} failed${attemptDuration}: ${failure.message}\n`, + ); + } + } +} + +function replayTestDisplayName(result: ReplaySuiteTestResult): string { + const title = replayTestTitle(result); + if (title && title.length > 0) return JSON.stringify(title); + return path.basename(result.file); +} + +function replayFailedTestDisplayName( + result: Extract, +): string { + const title = replayTestTitle(result); + const filename = path.basename(result.file); + return title && title.length > 0 ? `${JSON.stringify(title)} in ${filename}` : filename; +} + +function replayTestCaseName(result: ReplaySuiteTestResult): string { + return replayTestTitle(result) ?? path.basename(result.file); +} + +function replayTestTitle(result: ReplaySuiteTestResult): string | undefined { + const title = result.title?.trim(); + return title && title.length > 0 ? title : undefined; +} + +function formatReplayTestDurationSuffix(result: ReplaySuiteTestResult): string { + if (result.status === 'passed' && result.attempts > 1) { + return formatFlakyReplayDurationSuffix(result); + } + if (result.status === 'failed' && result.attempts > 1 && result.durationMs > 0) { + return ` (total ${formatDurationSeconds(result.durationMs)})`; + } + + const durationMs = + result.status === 'passed' && typeof result.finalAttemptDurationMs === 'number' + ? result.finalAttemptDurationMs + : result.durationMs; + return durationMs > 0 ? ` (${formatDurationSeconds(durationMs)})` : ''; +} + +function formatFlakyReplayDurationSuffix( + result: Extract, +): string { + const timings = [ + typeof result.finalAttemptDurationMs === 'number' + ? `passed attempt ${formatDurationSeconds(result.finalAttemptDurationMs)}` + : '', + result.durationMs > 0 ? `total ${formatDurationSeconds(result.durationMs)}` : '', + ].filter(Boolean); + return timings.length > 0 ? ` (${timings.join(', ')})` : ''; +} + function getReplayTestExitCode(data: ReplaySuiteResult): number { return data.failed > 0 ? 1 : 0; } @@ -144,7 +367,7 @@ function buildReplayJunitXml(suite: ReplaySuiteResult): string { } function renderJUnitTestCase(test: ReplaySuiteTestResult): string[] { - const name = xmlEscape(path.basename(test.file)); + const name = xmlEscape(replayTestCaseName(test)); const className = xmlEscape( path.dirname(test.file) === '.' ? test.file : path.dirname(test.file), ); @@ -239,6 +462,13 @@ function formatJUnitSeconds(durationMs: number): string { return (Math.max(0, durationMs) / 1000).toFixed(3); } +function formatDurationSeconds(durationMs: number): string { + const seconds = Math.max(0, durationMs) / 1000; + if (seconds >= 10) return `${seconds.toFixed(1)}s`; + if (seconds >= 1) return `${seconds.toFixed(2)}s`; + return `${seconds.toFixed(3).replace(/0+$/, '').replace(/\.$/, '')}s`; +} + function xmlEscape(value: string): string { return value .replaceAll('&', '&') diff --git a/src/cli.ts b/src/cli.ts index 3675be0dd..efc2b9ea0 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,4 +1,4 @@ -import { usage, usageForCommand } from './utils/args.ts'; +import { parseRawArgs, usage, usageForCommand } from './utils/args.ts'; import { asAppError, AppError, normalizeError } from './utils/errors.ts'; import { printHumanError, printJson } from './utils/output.ts'; import { readVersion } from './utils/version.ts'; @@ -68,8 +68,7 @@ const REMOTE_MATERIALIZATION_DEFERRED_COMMANDS = new Set([ export async function runCli(argv: string[], deps: CliDeps = DEFAULT_CLI_DEPS): Promise { const requestId = createRequestId(); const version = readVersion(); - const debugEnabled = - argv.includes('--debug') || argv.includes('--verbose') || argv.includes('-v'); + const debugEnabled = isDebugRequested(argv); const jsonRequested = argv.includes('--json'); // Best-effort session guess used only for pre-parse diagnostics scope. // After parse succeeds, request dispatch uses parsed flags/session resolution. @@ -144,6 +143,7 @@ export async function runCli(argv: string[], deps: CliDeps = DEFAULT_CLI_DEPS): } const { command, positionals } = parsed; + const debugOutputEnabled = isParsedDebugRequested(command, parsed.providedFlags); let binding: ReturnType; let flags: typeof parsed.flags; let daemonPaths: ReturnType; @@ -186,7 +186,7 @@ export async function runCli(argv: string[], deps: CliDeps = DEFAULT_CLI_DEPS): if (parsed.flags.json) { printJson({ success: false, error: normalized }); } else { - printHumanError(normalized, { showDetails: parsed.flags.verbose }); + printHumanError(normalized, { showDetails: debugOutputEnabled }); } process.exit(1); return; @@ -231,7 +231,7 @@ export async function runCli(argv: string[], deps: CliDeps = DEFAULT_CLI_DEPS): lockPolicy: binding.lockPolicy, lockPlatform: binding.defaultPlatform, cwd: process.cwd(), - debug: Boolean(currentFlags.verbose), + debug: debugOutputEnabled, }); let parsedBatchSteps: BatchStep[] | undefined; if (command === 'batch') { @@ -284,7 +284,7 @@ export async function runCli(argv: string[], deps: CliDeps = DEFAULT_CLI_DEPS): } const remoteDaemonBaseUrl = effectiveFlags.daemonBaseUrl; logTailStopper = - effectiveFlags.verbose && !effectiveFlags.json && !remoteDaemonBaseUrl + debugOutputEnabled && !effectiveFlags.json && !remoteDaemonBaseUrl ? startDaemonLogTail(daemonPaths.logPath) : null; const client = createAgentDeviceClient(buildClientConfig(effectiveFlags, resolvedRuntime), { @@ -350,8 +350,8 @@ export async function runCli(argv: string[], deps: CliDeps = DEFAULT_CLI_DEPS): error: normalized, }); } else { - printHumanError(normalized, { showDetails: effectiveFlags.verbose }); - if (effectiveFlags.verbose) { + printHumanError(normalized, { showDetails: debugOutputEnabled }); + if (debugOutputEnabled) { try { const logPath = daemonPaths.logPath; if (fs.existsSync(logPath)) { @@ -374,6 +374,26 @@ export async function runCli(argv: string[], deps: CliDeps = DEFAULT_CLI_DEPS): ); } +function isDebugRequested(argv: string[]): boolean { + try { + const parsed = parseRawArgs(argv); + return isParsedDebugRequested(parsed.command ?? '', parsed.providedFlags); + } catch { + return argv.includes('--debug') || argv.includes('-v') || argv.includes('--verbose'); + } +} + +function isParsedDebugRequested( + command: string, + providedFlags: Array<{ key: FlagKey; token: string }>, +): boolean { + return providedFlags.some( + (entry) => + entry.key === 'verbose' && + (entry.token === '--debug' || entry.token === '-v' || command !== 'test'), + ); +} + function readBatchSteps(flags: ReturnType['flags']): BatchStep[] { let raw = ''; if (flags.steps) { diff --git a/src/compat/maestro/replay-flow.ts b/src/compat/maestro/replay-flow.ts index 05aa9f6a8..5dd55b8b6 100644 --- a/src/compat/maestro/replay-flow.ts +++ b/src/compat/maestro/replay-flow.ts @@ -21,6 +21,12 @@ export function parseMaestroReplayFlow( return parseMaestroReplayFlowInternal(script, createParseContext(options)); } +export function readMaestroFlowName(script: string): string | undefined { + const values = parseYamlDocuments(script); + const { config } = splitMaestroDocuments(values); + return config.name; +} + function parseMaestroReplayFlowInternal( script: string, context: MaestroParseContext, @@ -259,6 +265,7 @@ function normalizeConfig(value: unknown): MaestroFlowConfig { throw new AppError('INVALID_ARGS', 'Maestro flow config must be a YAML map.'); } return { + ...(typeof value.name === 'string' && value.name.length > 0 ? { name: value.name } : {}), ...(typeof value.appId === 'string' && value.appId.length > 0 ? { appId: value.appId } : {}), ...(isPlainRecord(value.env) ? { env: readEnvMap(value.env, 'env') } : {}), ...(Array.isArray(value.onFlowStart) diff --git a/src/compat/maestro/types.ts b/src/compat/maestro/types.ts index 8ea39be92..73e933aec 100644 --- a/src/compat/maestro/types.ts +++ b/src/compat/maestro/types.ts @@ -1,6 +1,7 @@ import type { ParsedReplayScript, ReplayScriptMetadata } from '../../replay/script.ts'; export type MaestroFlowConfig = { + name?: string; appId?: string; env?: Record; onFlowStart?: MaestroCommand[]; diff --git a/src/daemon/handlers/__tests__/session-test-discovery.test.ts b/src/daemon/handlers/__tests__/session-test-discovery.test.ts index f0c572537..e5e0f0d7a 100644 --- a/src/daemon/handlers/__tests__/session-test-discovery.test.ts +++ b/src/daemon/handlers/__tests__/session-test-discovery.test.ts @@ -57,7 +57,10 @@ test('discoverReplayTestEntries rejects empty post-filter suites', () => { test('discoverReplayTestEntries includes Maestro yaml flows for Maestro test suites', () => { const root = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-device-test-discovery-maestro-')); - fs.writeFileSync(path.join(root, '01-flow.yaml'), 'appId: demo\n---\n- launchApp\n'); + fs.writeFileSync( + path.join(root, '01-flow.yaml'), + 'appId: demo\nname: Bottom Tabs - Dynamic\n---\n- launchApp\n', + ); fs.writeFileSync(path.join(root, '02-flow.yml'), 'appId: demo\n---\n- launchApp\n'); fs.writeFileSync(path.join(root, '03-flow.ad'), 'open "Demo"\n'); @@ -76,4 +79,8 @@ test('discoverReplayTestEntries includes Maestro yaml flows for Maestro test sui entries.map((entry) => entry.kind), ['run', 'run', 'run'], ); + assert.equal(entries[0]?.kind, 'run'); + if (entries[0]?.kind === 'run') { + assert.equal(entries[0].title, 'Bottom Tabs - Dynamic'); + } }); diff --git a/src/daemon/handlers/__tests__/session-test-suite.test.ts b/src/daemon/handlers/__tests__/session-test-suite.test.ts index 29ed54ce0..46e24d972 100644 --- a/src/daemon/handlers/__tests__/session-test-suite.test.ts +++ b/src/daemon/handlers/__tests__/session-test-suite.test.ts @@ -65,7 +65,7 @@ test('test discovers Maestro YAML suites when replay backend is set', async () = const root = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-device-test-suite-maestro-')); fs.writeFileSync( path.join(root, 'auth-flow.yml'), - ['appId: demo.app', '---', '- launchApp', ''].join('\n'), + ['appId: demo.app', 'name: Authentication flow', '---', '- launchApp', ''].join('\n'), ); const invoked: DaemonRequest[] = []; @@ -91,6 +91,7 @@ test('test discovers Maestro YAML suites when replay backend is set', async () = expect(invoked.map((req) => [req.command, req.positionals])).toEqual([['open', ['demo.app']]]); expect(data.passed).toBe(1); expect(data.failed).toBe(0); + expect((data.tests as Array>)[0]?.title).toBe('Authentication flow'); }); test('test emits progress when attempts retry and pass', async () => { @@ -130,6 +131,13 @@ test('test emits progress when attempts retry and pass', async () => { const data = expectOkData(response); expect(data.passed).toBe(1); + expect((data.tests as Array>)[0]?.attemptFailures).toEqual([ + { + attempt: 1, + message: 'Replay failed at step 1 (open "Demo"): first attempt failed', + durationMs: expect.any(Number), + }, + ]); expect(events.map((event) => event.status)).toEqual(['fail', 'pass']); expect(events[0]).toMatchObject({ type: 'replay-test', diff --git a/src/daemon/handlers/session-test-discovery.ts b/src/daemon/handlers/session-test-discovery.ts index 85ecc5113..6032559d9 100644 --- a/src/daemon/handlers/session-test-discovery.ts +++ b/src/daemon/handlers/session-test-discovery.ts @@ -5,6 +5,7 @@ import type { PlatformSelector } from '../../utils/device.ts'; import { resolveRequestTrackingId } from '../request-cancel.ts'; import { SessionStore } from '../session-store.ts'; import { readReplayScriptMetadata, type ReplayScriptMetadata } from '../../replay/script.ts'; +import { readMaestroFlowName } from '../../compat/maestro/replay-flow.ts'; const GLOB_PATTERN_CHARS = /[*?[\]{}]/; @@ -14,6 +15,7 @@ export type ReplayTestDiscoveryEntry = | { kind: 'run'; path: string; + title?: string; metadata: ReplayScriptMetadata; } | { @@ -42,13 +44,14 @@ export function discoverReplayTestEntries(params: { for (const filePath of filePaths) { const script = fs.readFileSync(filePath, 'utf8'); const metadata = readReplayScriptMetadata(script); + const title = readReplayTestTitle(script, filePath, replayBackend); if (!platformFilter) { - entries.push({ kind: 'run', path: filePath, metadata }); + entries.push({ kind: 'run', path: filePath, title, metadata }); continue; } if (!metadata.platform) { if (isMaestroReplayBackend(replayBackend)) { - entries.push({ kind: 'run', path: filePath, metadata }); + entries.push({ kind: 'run', path: filePath, title, metadata }); } else { entries.push({ kind: 'skip', @@ -62,7 +65,7 @@ export function discoverReplayTestEntries(params: { if (!matchesPlatformFilter(platformFilter, metadata.platform)) { continue; } - entries.push({ kind: 'run', path: filePath, metadata }); + entries.push({ kind: 'run', path: filePath, title, metadata }); } const runnableCount = entries.filter((entry) => entry.kind === 'run').length; @@ -177,6 +180,16 @@ function isMaestroReplayBackend(replayBackend: string | undefined): boolean { return replayBackend === 'maestro'; } +function readReplayTestTitle( + script: string, + filePath: string, + replayBackend: string | undefined, +): string | undefined { + return isMaestroReplayBackend(replayBackend) && path.extname(filePath) !== '.ad' + ? readMaestroFlowName(script) + : undefined; +} + function looksLikeGlob(value: string): boolean { return GLOB_PATTERN_CHARS.test(value); } diff --git a/src/daemon/handlers/session-test.ts b/src/daemon/handlers/session-test.ts index 6430f43fc..39fd2d2e2 100644 --- a/src/daemon/handlers/session-test.ts +++ b/src/daemon/handlers/session-test.ts @@ -150,9 +150,14 @@ async function runReplayTestCase( let finalResponse: DaemonResponse | undefined; let finalSessionName = ''; let attempts = 0; + let finalAttemptDurationMs = 0; + const attemptFailures: NonNullable< + Extract['attemptFailures'] + > = []; for (let attemptIndex = 0; attemptIndex <= retries; attemptIndex += 1) { attempts = attemptIndex + 1; + const attemptStartedAt = Date.now(); const testSessionName = buildReplayTestSessionName( sessionName, suiteInvocationId, @@ -181,6 +186,7 @@ async function runReplayTestCase( runReplay, cleanupSession, }); + finalAttemptDurationMs = Date.now() - attemptStartedAt; materializeReplayTestAttemptArtifacts({ response, filePath: entry.path, @@ -192,6 +198,11 @@ async function runReplayTestCase( finalResponse = response; finalSessionName = testSessionName; if (response.ok) break; + attemptFailures.push({ + attempt: attempts, + message: response.error.message, + durationMs: finalAttemptDurationMs, + }); if (isReplayInfrastructureFailure(response)) break; if (attemptIndex >= retries) break; emitRequestProgress({ @@ -222,13 +233,16 @@ async function runReplayTestCase( }); return { file: entry.path, + title: entry.title, session: finalSessionName, status: 'passed', durationMs, + finalAttemptDurationMs, attempts, artifactsDir: testArtifactsDir, replayed: typeof finalResponse.data?.replayed === 'number' ? finalResponse.data.replayed : 0, healed: typeof finalResponse.data?.healed === 'number' ? finalResponse.data.healed : 0, + ...(attemptFailures.length > 0 ? { attemptFailures } : {}), }; } @@ -252,6 +266,7 @@ async function runReplayTestCase( }); return { file: entry.path, + title: entry.title, session: finalSessionName, status: 'failed', durationMs, diff --git a/src/daemon/types.ts b/src/daemon/types.ts index afb2494da..7eeb54df5 100644 --- a/src/daemon/types.ts +++ b/src/daemon/types.ts @@ -38,17 +38,21 @@ export type ReplaySuiteTestSkipReason = 'skipped-by-filter'; export type ReplaySuiteTestPassed = { file: string; + title?: string; session: string; status: 'passed'; durationMs: number; + finalAttemptDurationMs?: number; attempts: number; artifactsDir?: string; replayed: number; healed: number; + attemptFailures?: ReplaySuiteAttemptFailure[]; }; export type ReplaySuiteTestFailed = { file: string; + title?: string; session: string; status: 'failed'; durationMs: number; @@ -66,12 +70,19 @@ export type ReplaySuiteTestFailed = { export type ReplaySuiteTestSkipped = { file: string; + title?: string; status: 'skipped'; durationMs: 0; reason: ReplaySuiteTestSkipReason; message: string; }; +export type ReplaySuiteAttemptFailure = { + attempt: number; + message: string; + durationMs?: number; +}; + export type ReplaySuiteTestResult = | ReplaySuiteTestPassed | ReplaySuiteTestFailed diff --git a/src/utils/__tests__/args.test.ts b/src/utils/__tests__/args.test.ts index e6d0d70e2..318c69708 100644 --- a/src/utils/__tests__/args.test.ts +++ b/src/utils/__tests__/args.test.ts @@ -1442,6 +1442,7 @@ test('command usage describes test suite flags', () => { assert.match(help, /--timeout /); assert.match(help, /--retries /); assert.match(help, /--artifacts-dir /); + assert.match(help, /test --verbose prints per-test step timings without debug logs/); }); test('command usage describes delayed typing flags', () => { diff --git a/src/utils/cli-flags.ts b/src/utils/cli-flags.ts index aa0470ddc..d3bbfb710 100644 --- a/src/utils/cli-flags.ts +++ b/src/utils/cli-flags.ts @@ -673,7 +673,8 @@ const FLAG_DEFINITIONS: readonly FlagDefinition[] = [ names: ['--debug', '--verbose', '-v'], type: 'boolean', usageLabel: '--debug, --verbose, -v', - usageDescription: 'Enable debug diagnostics and stream daemon/runner logs', + usageDescription: + 'Enable debug diagnostics; test --verbose prints per-test step timings without debug logs', }, { key: 'json',