Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion packages/android/src/agent-tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,22 @@ export class AndroidMidsceneTools extends BaseMidsceneTools<

debug('Creating Android agent with deviceId:', deviceId || 'auto-detect');
const reportOptions = this.readCliReportAgentOptions();
// Behavior args (e.g. screenshotShrinkFactor) may have been supplied only
// on `connect`. Because CLI commands run in separate, stateless processes,
// fall back to the values persisted in the report session so options
// survive across `connect` → `act`/`tap`. Command-provided args win.
const persistedBehaviorArgs = extractAgentBehaviorInitArgs(
this.readPersistedAgentInitArgs() as
| Partial<AgentBehaviorInitArgs>
| undefined,
);
const behaviorArgs: AgentBehaviorInitArgs = {
...(persistedBehaviorArgs ?? {}),
...(extractAgentBehaviorInitArgs(initArgs) ?? {}),
};
const agent = await agentFromAdbDevice(deviceId, {
autoDismissKeyboard: false,
...(extractAgentBehaviorInitArgs(initArgs) ?? {}),
...behaviorArgs,
...(initArgs?.useScrcpy ? { scrcpyConfig: { enabled: true } } : {}),
...(reportOptions ?? {}),
});
Expand All @@ -133,6 +146,15 @@ export class AndroidMidsceneTools extends BaseMidsceneTools<
const reportSession = this.createNewCliReportSession(
deviceId ?? 'auto',
);
if (reportSession) {
// Persist behavior args so later stateless commands inherit them.
const behaviorArgs = extractAgentBehaviorInitArgs(initArgs);
if (behaviorArgs && Object.keys(behaviorArgs).length > 0) {
reportSession.agentInitArgs = {
...behaviorArgs,
} as Record<string, unknown>;
Comment on lines +153 to +155

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Persist the connected device for stateless Android commands

When midscene-android connect --device-id X --screenshot-shrink-factor 2 is followed by a stateless act/tap without repeating --device-id, this writes only behavior args into the session. ensureAgent() then rebuilds with deviceId still undefined, so on machines with multiple ADB devices the follow-up command can run against whichever device auto-detect returns instead of the device the user just connected to, while still using that device's report session/options.

Useful? React with 👍 / 👎.

}
}
this.commitCliReportSession(reportSession);
if (this.agent) {
try {
Expand Down
45 changes: 45 additions & 0 deletions packages/android/tests/unit-test/cli.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
* instance (only the adb agent factory is mocked). This complements the
* handler-level unit tests by locking down the CLI argument plumbing.
*/
import { mkdtempSync, rmSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { runToolsCLI } from '@midscene/shared/cli';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { agentFromAdbDevice } from '../../src/agent';
Expand Down Expand Up @@ -165,6 +168,48 @@ describe('Android CLI integration', () => {
});
});

it('inherits connect-time shrink factor on a later stateless command', async () => {
// CLI commands run in separate processes, so behavior args set on `connect`
// must be persisted to the report session and picked up by the next `act`.
const runDir = mkdtempSync(join(tmpdir(), 'midscene-shrink-'));
vi.stubEnv('MIDSCENE_RUN_DIR', runDir);

try {
// First process: connect with a shrink factor (no act).
vi.mocked(agentFromAdbDevice).mockResolvedValue(createMockAgent() as any);
await runToolsCLI(new AndroidMidsceneTools(), 'midscene-android', {
stripPrefix: 'android_',
argv: [
'connect',
'--device-id',
'shrink-device',
'--screenshot-shrink-factor',
'2',
],
});

// Second process: act WITHOUT repeating the shrink factor.
const actAgent = createMockAgent();
vi.mocked(agentFromAdbDevice).mockResolvedValue(actAgent as any);
await runToolsCLI(new AndroidMidsceneTools(), 'midscene-android', {
stripPrefix: 'android_',
argv: ['act', '--prompt', 'open settings'],
});

// The act process should still build the agent with the persisted factor.
expect(agentFromAdbDevice).toHaveBeenLastCalledWith(
undefined,
expect.objectContaining({ screenshotShrinkFactor: 2 }),
);
expect(actAgent.aiAction).toHaveBeenCalledWith('open settings', {
deepThink: false,
});
} finally {
vi.unstubAllEnvs();
rmSync(runDir, { recursive: true, force: true });
}
});

it('enables scrcpy when --use-scrcpy is provided', async () => {
const tools = new AndroidMidsceneTools();

Expand Down
18 changes: 18 additions & 0 deletions packages/shared/src/agent-tools/base-tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,24 @@ export abstract class BaseMidsceneTools<
return readCliReportSession(sessionName)?.reportFileName;
}

/**
* Read the agent construction args persisted at `connect` time, so a
* subsequent stateless CLI command can rebuild the agent with the same
* behavior (e.g. `screenshotShrinkFactor`). Returns `undefined` when no
* session exists or it carries no init args.
*/
protected readPersistedAgentInitArgs(): Record<string, unknown> | undefined {
const sessionName = this.getCliReportSessionName();
if (!sessionName) {
return undefined;
}
const args = readCliReportSession(sessionName)?.agentInitArgs;
if (args && typeof args === 'object' && !Array.isArray(args)) {
return args;
}
return undefined;
}

protected readCliReportAgentOptions():
| {
reportFileName: string;
Expand Down
8 changes: 8 additions & 0 deletions packages/shared/src/agent-tools/cli-report-session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ export interface CliReportSession {
reportFileName: string;
reportPath: string;
createdAt: number;
/**
* Agent construction args captured at `connect` time. CLI commands are
* stateless between invocations (each runs in its own process), so options
* passed only to `connect` — e.g. `screenshotShrinkFactor` — would otherwise
* be lost on the next `act`/`tap` process. Persisting them here lets the
* follow-up command rebuild the agent with the same behavior.
*/
agentInitArgs?: Record<string, unknown>;
}

const sessionDirName = 'cli-report-session';
Expand Down
Loading