Skip to content

Commit 5331372

Browse files
committed
refactor: reduce Maestro replay quality debt
1 parent d7a7ce4 commit 5331372

7 files changed

Lines changed: 510 additions & 368 deletions

File tree

src/commands/selector-read.ts

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -166,13 +166,7 @@ export const findCommand: RuntimeCommand<FindReadCommandOptions, FindReadCommand
166166
return await waitForFindMatch(runtime, options, locator);
167167
}
168168

169-
const capture = await captureSelectorSnapshot(runtime, options, {
170-
updateSession: true,
171-
scope: shouldScopeFind(locator) ? options.query : undefined,
172-
});
173-
const match = findBestMatchesByLocator(capture.snapshot.nodes, locator, options.query, {
174-
requireRect: false,
175-
}).matches[0];
169+
const { capture, match } = await findFirstLocatorMatch(runtime, options, locator);
176170
if (!match) {
177171
throw new AppError('COMMAND_FAILED', 'find did not match any element');
178172
}
@@ -412,19 +406,28 @@ async function waitForFindMatch(
412406
const timeout = options.timeoutMs ?? DEFAULT_TIMEOUT_MS;
413407
const start = now(runtime);
414408
while (now(runtime) - start < timeout) {
415-
const capture = await captureSelectorSnapshot(runtime, options, {
416-
updateSession: true,
417-
scope: shouldScopeFind(locator) ? options.query : undefined,
418-
});
419-
const match = findBestMatchesByLocator(capture.snapshot.nodes, locator, options.query, {
420-
requireRect: false,
421-
}).matches[0];
409+
const { match } = await findFirstLocatorMatch(runtime, options, locator);
422410
if (match) return { kind: 'found', found: true, waitedMs: now(runtime) - start };
423411
await sleep(runtime, POLL_INTERVAL_MS);
424412
}
425413
throw new AppError('COMMAND_FAILED', 'find wait timed out');
426414
}
427415

416+
async function findFirstLocatorMatch(
417+
runtime: AgentDeviceRuntime,
418+
options: FindReadCommandOptions,
419+
locator: FindLocator,
420+
): Promise<{ capture: CapturedSnapshot; match: SnapshotNode | undefined }> {
421+
const capture = await captureSelectorSnapshot(runtime, options, {
422+
updateSession: true,
423+
scope: shouldScopeFind(locator) ? options.query : undefined,
424+
});
425+
const match = findBestMatchesByLocator(capture.snapshot.nodes, locator, options.query, {
426+
requireRect: false,
427+
}).matches[0];
428+
return { capture, match };
429+
}
430+
428431
async function waitForSelector(
429432
runtime: AgentDeviceRuntime,
430433
options: WaitCommandOptions,

src/commands/system.ts

Lines changed: 50 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -207,25 +207,20 @@ export const keyboardCommand: RuntimeCommand<
207207
throw new AppError('UNSUPPORTED_OPERATION', 'system.keyboard is not supported by this backend');
208208
}
209209
const action = options.action ?? 'status';
210-
if (
211-
action !== 'status' &&
212-
action !== 'get' &&
213-
action !== 'dismiss' &&
214-
action !== 'enter' &&
215-
action !== 'return'
216-
) {
210+
if (!isKeyboardAction(action)) {
217211
throw new AppError(
218212
'INVALID_ARGS',
219213
'system.keyboard action must be status, get, dismiss, enter, or return',
220214
);
221215
}
222216
const state = await runtime.backend.setKeyboard(toBackendContext(runtime, options), { action });
223217
const formattedBackendResult = toBackendResult(state);
218+
const keyboardState = isKeyboardResult(state) ? state : {};
224219
if (action === 'enter' || action === 'return') {
225220
return {
226221
kind: 'keyboardEnterPressed',
227222
action: 'enter',
228-
state: isKeyboardResult(state) ? state : {},
223+
state: keyboardState,
229224
...(formattedBackendResult ? { backendResult: formattedBackendResult } : {}),
230225
...successText('Keyboard enter pressed'),
231226
};
@@ -235,15 +230,15 @@ export const keyboardCommand: RuntimeCommand<
235230
return {
236231
kind: 'keyboardDismissed',
237232
action,
238-
state: isKeyboardResult(state) ? state : {},
233+
state: keyboardState,
239234
...(formattedBackendResult ? { backendResult: formattedBackendResult } : {}),
240235
...successText(dismissed === false ? 'Keyboard already hidden' : 'Keyboard dismissed'),
241236
};
242237
}
243238
return {
244239
kind: 'keyboardState',
245240
action,
246-
state: isKeyboardResult(state) ? state : {},
241+
state: keyboardState,
247242
...(formattedBackendResult ? { backendResult: formattedBackendResult } : {}),
248243
};
249244
};
@@ -373,25 +368,41 @@ function normalizeAlertResult(
373368
action: BackendAlertAction,
374369
result: BackendAlertResult,
375370
): SystemAlertCommandResult {
376-
if (action === 'get') {
377-
if (result.kind !== 'alertStatus') {
378-
throw new AppError('COMMAND_FAILED', 'system.alert get returned an invalid backend result');
379-
}
380-
return { kind: 'alertStatus', action, alert: result.alert };
371+
switch (action) {
372+
case 'get':
373+
return normalizeAlertStatusResult(result);
374+
case 'wait':
375+
return normalizeAlertWaitResult(result);
376+
default:
377+
return normalizeAlertHandledResult(action, result);
381378
}
382-
if (action === 'wait') {
383-
if (result.kind !== 'alertWait') {
384-
throw new AppError('COMMAND_FAILED', 'system.alert wait returned an invalid backend result');
385-
}
386-
return {
387-
kind: 'alertWait',
388-
action,
389-
alert: result.alert,
390-
...(result.waitedMs !== undefined ? { waitedMs: result.waitedMs } : {}),
391-
...(result.timedOut !== undefined ? { timedOut: result.timedOut } : {}),
392-
...successText(result.alert ? 'Alert visible' : 'Alert wait timed out'),
393-
};
379+
}
380+
381+
function normalizeAlertStatusResult(result: BackendAlertResult): SystemAlertCommandResult {
382+
if (result.kind !== 'alertStatus') {
383+
throw new AppError('COMMAND_FAILED', 'system.alert get returned an invalid backend result');
384+
}
385+
return { kind: 'alertStatus', action: 'get', alert: result.alert };
386+
}
387+
388+
function normalizeAlertWaitResult(result: BackendAlertResult): SystemAlertCommandResult {
389+
if (result.kind !== 'alertWait') {
390+
throw new AppError('COMMAND_FAILED', 'system.alert wait returned an invalid backend result');
394391
}
392+
return {
393+
kind: 'alertWait',
394+
action: 'wait',
395+
alert: result.alert,
396+
...(result.waitedMs !== undefined ? { waitedMs: result.waitedMs } : {}),
397+
...(result.timedOut !== undefined ? { timedOut: result.timedOut } : {}),
398+
...successText(result.alert ? 'Alert visible' : 'Alert wait timed out'),
399+
};
400+
}
401+
402+
function normalizeAlertHandledResult(
403+
action: Extract<BackendAlertAction, 'accept' | 'dismiss'>,
404+
result: BackendAlertResult,
405+
): SystemAlertCommandResult {
395406
if (result.kind !== 'alertHandled') {
396407
throw new AppError(
397408
'COMMAND_FAILED',
@@ -408,6 +419,18 @@ function normalizeAlertResult(
408419
};
409420
}
410421

422+
function isKeyboardAction(
423+
action: string,
424+
): action is 'status' | 'get' | 'dismiss' | 'enter' | 'return' {
425+
return (
426+
action === 'status' ||
427+
action === 'get' ||
428+
action === 'dismiss' ||
429+
action === 'enter' ||
430+
action === 'return'
431+
);
432+
}
433+
411434
function isKeyboardResult(value: unknown): value is BackendKeyboardResult {
412435
return Boolean(value && typeof value === 'object');
413436
}

src/compat/maestro/interactions.ts

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -189,27 +189,56 @@ export function convertSwipe(value: unknown, context: MaestroParseContext): Sess
189189
assertOnlyKeys(value, 'swipe', ['start', 'end', 'direction', 'duration', 'from', 'label']);
190190
const from = value.from ?? (typeof value.label === 'string' ? value.label : undefined);
191191
if (from !== undefined) {
192-
const direction = readSwipeDirection(
193-
typeof value.direction === 'string' ? value.direction : 'up',
194-
);
195-
return action(MAESTRO_RUNTIME_COMMAND.swipeOn, [
196-
maestroSelector(from, 'swipe.from', [], context),
197-
direction,
198-
...swipeDurationPositionals(value),
199-
]);
192+
return convertTargetedSwipe(value, from, context);
200193
}
201194
if (typeof value.direction === 'string') {
202195
return action('scroll', readScrollPositionalsFromDirectionSwipe(value.direction));
203196
}
204-
if (typeof value.start !== 'string' || typeof value.end !== 'string') {
205-
throw unsupportedMaestroSyntax('Only Maestro swipe start/end coordinates are supported.');
206-
}
207-
const start = parseMaestroPoint(value.start);
208-
const end = parseMaestroPoint(value.end);
209-
const durationMs =
210-
typeof value.duration === 'number' && Number.isFinite(value.duration)
211-
? String(Math.max(16, Math.floor(value.duration)))
212-
: undefined;
197+
return convertCoordinateSwipe(value);
198+
}
199+
200+
function convertTargetedSwipe(
201+
value: Record<string, unknown>,
202+
from: unknown,
203+
context: MaestroParseContext,
204+
): SessionAction {
205+
const direction = readSwipeDirection(
206+
typeof value.direction === 'string' ? value.direction : 'up',
207+
);
208+
return action(MAESTRO_RUNTIME_COMMAND.swipeOn, [
209+
maestroSelector(from, 'swipe.from', [], context),
210+
direction,
211+
...swipeDurationPositionals(value),
212+
]);
213+
}
214+
215+
function convertCoordinateSwipe(value: Record<string, unknown>): SessionAction {
216+
const { start, end } = readCoordinateSwipePoints(value);
217+
const durationMs = readSwipeDurationMs(value.duration);
218+
return convertCoordinateSwipePoints(start, end, durationMs);
219+
}
220+
221+
function readCoordinateSwipePoints(value: Record<string, unknown>): {
222+
start: ReturnType<typeof parseMaestroPoint>;
223+
end: ReturnType<typeof parseMaestroPoint>;
224+
} {
225+
if (typeof value.start === 'string' && typeof value.end === 'string') {
226+
return { start: parseMaestroPoint(value.start), end: parseMaestroPoint(value.end) };
227+
}
228+
throw unsupportedMaestroSyntax('Only Maestro swipe start/end coordinates are supported.');
229+
}
230+
231+
function readSwipeDurationMs(duration: unknown): string | undefined {
232+
return typeof duration === 'number' && Number.isFinite(duration)
233+
? String(Math.max(16, Math.floor(duration)))
234+
: undefined;
235+
}
236+
237+
function convertCoordinateSwipePoints(
238+
start: ReturnType<typeof parseMaestroPoint>,
239+
end: ReturnType<typeof parseMaestroPoint>,
240+
durationMs: string | undefined,
241+
): SessionAction {
213242
if (start.kind === 'absolute' && end.kind === 'absolute') {
214243
return action('swipe', [
215244
String(start.x),

src/compat/maestro/replay-flow.ts

Lines changed: 40 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -82,48 +82,57 @@ function optimizeInputTextActions(
8282
actions: SessionAction[],
8383
actionLines: number[],
8484
): { actions: SessionAction[]; actionLines: number[] } {
85-
const maestroTapTimeoutMs = '30000';
8685
const mergedActions: SessionAction[] = [];
8786
const mergedLines: number[] = [];
8887
for (let index = 0; index < actions.length; index += 1) {
8988
const action = actions[index];
90-
const nextAction = actions[index + 1];
91-
const typedAfterTap = readPlainTypeText(nextAction);
92-
if (typedAfterTap !== null) {
93-
const tapSelector = readPlainMaestroTapSelector(action);
94-
const pressEnterAfterType =
95-
actions[index + 2]?.command === MAESTRO_RUNTIME_COMMAND.pressEnter;
96-
if (tapSelector !== null && pressEnterAfterType) {
97-
mergedActions.push({
98-
...action,
99-
command: 'wait',
100-
positionals: [tapSelector, maestroTapTimeoutMs],
101-
});
102-
mergedLines.push(actionLines[index] ?? 1);
103-
mergedActions.push({
104-
...nextAction,
105-
command: 'fill',
106-
positionals: [tapSelector, typedAfterTap],
107-
flags: action.flags,
108-
});
109-
mergedLines.push(actionLines[index] ?? 1);
110-
mergedActions.push(actions[index + 2] as SessionAction);
111-
mergedLines.push(actionLines[index + 2] ?? actionLines[index] ?? 1);
112-
index += 2;
113-
continue;
114-
}
115-
if (tapSelector !== null) {
116-
mergedActions.push(clearMaestroNonHittableTap(action));
117-
mergedLines.push(actionLines[index] ?? 1);
118-
continue;
119-
}
89+
const optimized = optimizeTypedAfterTap(actions, actionLines, index);
90+
if (optimized) {
91+
mergedActions.push(...optimized.actions);
92+
mergedLines.push(...optimized.actionLines);
93+
index += optimized.consumed - 1;
94+
continue;
12095
}
12196
mergedActions.push(action);
12297
mergedLines.push(actionLines[index] ?? 1);
12398
}
12499
return { actions: mergedActions, actionLines: mergedLines };
125100
}
126101

102+
function optimizeTypedAfterTap(
103+
actions: SessionAction[],
104+
actionLines: number[],
105+
index: number,
106+
): { actions: SessionAction[]; actionLines: number[]; consumed: number } | null {
107+
const action = actions[index];
108+
const nextAction = actions[index + 1];
109+
const typedAfterTap = readPlainTypeText(nextAction);
110+
const tapSelector = readPlainMaestroTapSelector(action);
111+
if (typedAfterTap === null || tapSelector === null) return null;
112+
const line = actionLines[index] ?? 1;
113+
if (actions[index + 2]?.command !== MAESTRO_RUNTIME_COMMAND.pressEnter) {
114+
return { actions: [clearMaestroNonHittableTap(action)], actionLines: [line], consumed: 1 };
115+
}
116+
return {
117+
actions: [
118+
{
119+
...action,
120+
command: 'wait',
121+
positionals: [tapSelector, '30000'],
122+
},
123+
{
124+
...nextAction,
125+
command: 'fill',
126+
positionals: [tapSelector, typedAfterTap],
127+
flags: action.flags,
128+
},
129+
actions[index + 2] as SessionAction,
130+
],
131+
actionLines: [line, line, actionLines[index + 2] ?? line],
132+
consumed: 3,
133+
};
134+
}
135+
127136
function clearMaestroNonHittableTap(action: SessionAction): SessionAction {
128137
const maestro = { ...(action.flags?.maestro ?? {}) };
129138
delete maestro.allowNonHittableCoordinateFallback;

0 commit comments

Comments
 (0)