From b72061fcacb9b62e20a0607ab8ace433c42a5431 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 27 Jun 2026 04:16:21 +0000 Subject: [PATCH] fix(android): persist connect-time agent args for stateless CLI commands CLI commands run in separate, stateless processes, so agent behavior options (e.g. screenshotShrinkFactor) passed only to `connect` were lost on the following `act`/`tap` process, which rebuilt the agent without them. As a result `--screenshot-shrink-factor` set at connect time had no effect on `act`, keeping token usage at the unshrunk level. Persist the agent behavior args into the CLI report session at `connect` and merge them back in `ensureAgent` so a later command inherits them. Command-provided args still take priority. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01B2U6dXW6An9rxzpSTd6fhM --- packages/android/src/agent-tools.ts | 24 +++++++++- packages/android/tests/unit-test/cli.test.ts | 45 +++++++++++++++++++ packages/shared/src/agent-tools/base-tools.ts | 18 ++++++++ .../src/agent-tools/cli-report-session.ts | 8 ++++ 4 files changed, 94 insertions(+), 1 deletion(-) diff --git a/packages/android/src/agent-tools.ts b/packages/android/src/agent-tools.ts index 1e3c13eb62..9d40d980c3 100644 --- a/packages/android/src/agent-tools.ts +++ b/packages/android/src/agent-tools.ts @@ -105,9 +105,22 @@ export class AndroidMidsceneTools extends BaseMidsceneTools< debug('Creating Android agent with deviceId:', deviceId || 'auto-detect'); const reportOptions = this.readCliReportAgentOptions(); + // Behavior args (e.g. screenshotShrinkFactor) may have been supplied only + // on `connect`. Because CLI commands run in separate, stateless processes, + // fall back to the values persisted in the report session so options + // survive across `connect` → `act`/`tap`. Command-provided args win. + const persistedBehaviorArgs = extractAgentBehaviorInitArgs( + this.readPersistedAgentInitArgs() as + | Partial + | undefined, + ); + const behaviorArgs: AgentBehaviorInitArgs = { + ...(persistedBehaviorArgs ?? {}), + ...(extractAgentBehaviorInitArgs(initArgs) ?? {}), + }; const agent = await agentFromAdbDevice(deviceId, { autoDismissKeyboard: false, - ...(extractAgentBehaviorInitArgs(initArgs) ?? {}), + ...behaviorArgs, ...(initArgs?.useScrcpy ? { scrcpyConfig: { enabled: true } } : {}), ...(reportOptions ?? {}), }); @@ -133,6 +146,15 @@ export class AndroidMidsceneTools extends BaseMidsceneTools< const reportSession = this.createNewCliReportSession( deviceId ?? 'auto', ); + if (reportSession) { + // Persist behavior args so later stateless commands inherit them. + const behaviorArgs = extractAgentBehaviorInitArgs(initArgs); + if (behaviorArgs && Object.keys(behaviorArgs).length > 0) { + reportSession.agentInitArgs = { + ...behaviorArgs, + } as Record; + } + } this.commitCliReportSession(reportSession); if (this.agent) { try { diff --git a/packages/android/tests/unit-test/cli.test.ts b/packages/android/tests/unit-test/cli.test.ts index c12a5b3591..63ee5214e6 100644 --- a/packages/android/tests/unit-test/cli.test.ts +++ b/packages/android/tests/unit-test/cli.test.ts @@ -6,6 +6,9 @@ * instance (only the adb agent factory is mocked). This complements the * handler-level unit tests by locking down the CLI argument plumbing. */ +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; import { runToolsCLI } from '@midscene/shared/cli'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { agentFromAdbDevice } from '../../src/agent'; @@ -165,6 +168,48 @@ describe('Android CLI integration', () => { }); }); + it('inherits connect-time shrink factor on a later stateless command', async () => { + // CLI commands run in separate processes, so behavior args set on `connect` + // must be persisted to the report session and picked up by the next `act`. + const runDir = mkdtempSync(join(tmpdir(), 'midscene-shrink-')); + vi.stubEnv('MIDSCENE_RUN_DIR', runDir); + + try { + // First process: connect with a shrink factor (no act). + vi.mocked(agentFromAdbDevice).mockResolvedValue(createMockAgent() as any); + await runToolsCLI(new AndroidMidsceneTools(), 'midscene-android', { + stripPrefix: 'android_', + argv: [ + 'connect', + '--device-id', + 'shrink-device', + '--screenshot-shrink-factor', + '2', + ], + }); + + // Second process: act WITHOUT repeating the shrink factor. + const actAgent = createMockAgent(); + vi.mocked(agentFromAdbDevice).mockResolvedValue(actAgent as any); + await runToolsCLI(new AndroidMidsceneTools(), 'midscene-android', { + stripPrefix: 'android_', + argv: ['act', '--prompt', 'open settings'], + }); + + // The act process should still build the agent with the persisted factor. + expect(agentFromAdbDevice).toHaveBeenLastCalledWith( + undefined, + expect.objectContaining({ screenshotShrinkFactor: 2 }), + ); + expect(actAgent.aiAction).toHaveBeenCalledWith('open settings', { + deepThink: false, + }); + } finally { + vi.unstubAllEnvs(); + rmSync(runDir, { recursive: true, force: true }); + } + }); + it('enables scrcpy when --use-scrcpy is provided', async () => { const tools = new AndroidMidsceneTools(); diff --git a/packages/shared/src/agent-tools/base-tools.ts b/packages/shared/src/agent-tools/base-tools.ts index 31f22833ec..e5e06eabdb 100644 --- a/packages/shared/src/agent-tools/base-tools.ts +++ b/packages/shared/src/agent-tools/base-tools.ts @@ -231,6 +231,24 @@ export abstract class BaseMidsceneTools< return readCliReportSession(sessionName)?.reportFileName; } + /** + * Read the agent construction args persisted at `connect` time, so a + * subsequent stateless CLI command can rebuild the agent with the same + * behavior (e.g. `screenshotShrinkFactor`). Returns `undefined` when no + * session exists or it carries no init args. + */ + protected readPersistedAgentInitArgs(): Record | undefined { + const sessionName = this.getCliReportSessionName(); + if (!sessionName) { + return undefined; + } + const args = readCliReportSession(sessionName)?.agentInitArgs; + if (args && typeof args === 'object' && !Array.isArray(args)) { + return args; + } + return undefined; + } + protected readCliReportAgentOptions(): | { reportFileName: string; diff --git a/packages/shared/src/agent-tools/cli-report-session.ts b/packages/shared/src/agent-tools/cli-report-session.ts index ffbbfdaca7..43e383f88f 100644 --- a/packages/shared/src/agent-tools/cli-report-session.ts +++ b/packages/shared/src/agent-tools/cli-report-session.ts @@ -9,6 +9,14 @@ export interface CliReportSession { reportFileName: string; reportPath: string; createdAt: number; + /** + * Agent construction args captured at `connect` time. CLI commands are + * stateless between invocations (each runs in its own process), so options + * passed only to `connect` — e.g. `screenshotShrinkFactor` — would otherwise + * be lost on the next `act`/`tap` process. Persisting them here lets the + * follow-up command rebuild the agent with the same behavior. + */ + agentInitArgs?: Record; } const sessionDirName = 'cli-report-session';