web-infra-dev · yuyutaotao · Jun 27, 2026 · chatgpt-codex-connector · Jun 27, 2026
diff --git a/packages/android/src/agent-tools.ts b/packages/android/src/agent-tools.ts
@@ -105,9 +105,22 @@ export class AndroidMidsceneTools extends BaseMidsceneTools<
 
     debug('Creating Android agent with deviceId:', deviceId || 'auto-detect');
     const reportOptions = this.readCliReportAgentOptions();
+    // Behavior args (e.g. screenshotShrinkFactor) may have been supplied only
+    // on `connect`. Because CLI commands run in separate, stateless processes,
+    // fall back to the values persisted in the report session so options
+    // survive across `connect` → `act`/`tap`. Command-provided args win.
+    const persistedBehaviorArgs = extractAgentBehaviorInitArgs(
+      this.readPersistedAgentInitArgs() as
+        | Partial<AgentBehaviorInitArgs>
+        | undefined,
+    );
+    const behaviorArgs: AgentBehaviorInitArgs = {
+      ...(persistedBehaviorArgs ?? {}),
+      ...(extractAgentBehaviorInitArgs(initArgs) ?? {}),
+    };
     const agent = await agentFromAdbDevice(deviceId, {
       autoDismissKeyboard: false,
-      ...(extractAgentBehaviorInitArgs(initArgs) ?? {}),
+      ...behaviorArgs,
       ...(initArgs?.useScrcpy ? { scrcpyConfig: { enabled: true } } : {}),
       ...(reportOptions ?? {}),
     });
@@ -133,6 +146,15 @@ export class AndroidMidsceneTools extends BaseMidsceneTools<
           const reportSession = this.createNewCliReportSession(
             deviceId ?? 'auto',
           );
+          if (reportSession) {
+            // Persist behavior args so later stateless commands inherit them.
+            const behaviorArgs = extractAgentBehaviorInitArgs(initArgs);
+            if (behaviorArgs && Object.keys(behaviorArgs).length > 0) {
+              reportSession.agentInitArgs = {
+                ...behaviorArgs,
+              } as Record<string, unknown>;
+            }
+          }
           this.commitCliReportSession(reportSession);
           if (this.agent) {
             try {

diff --git a/packages/android/tests/unit-test/cli.test.ts b/packages/android/tests/unit-test/cli.test.ts
@@ -6,6 +6,9 @@
  * instance (only the adb agent factory is mocked). This complements the
  * handler-level unit tests by locking down the CLI argument plumbing.
  */
+import { mkdtempSync, rmSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
 import { runToolsCLI } from '@midscene/shared/cli';
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { agentFromAdbDevice } from '../../src/agent';
@@ -165,6 +168,48 @@ describe('Android CLI integration', () => {
     });
   });
 
+  it('inherits connect-time shrink factor on a later stateless command', async () => {
+    // CLI commands run in separate processes, so behavior args set on `connect`
+    // must be persisted to the report session and picked up by the next `act`.
+    const runDir = mkdtempSync(join(tmpdir(), 'midscene-shrink-'));
+    vi.stubEnv('MIDSCENE_RUN_DIR', runDir);
+
+    try {
+      // First process: connect with a shrink factor (no act).
+      vi.mocked(agentFromAdbDevice).mockResolvedValue(createMockAgent() as any);
+      await runToolsCLI(new AndroidMidsceneTools(), 'midscene-android', {
+        stripPrefix: 'android_',
+        argv: [
+          'connect',
+          '--device-id',
+          'shrink-device',
+          '--screenshot-shrink-factor',
+          '2',
+        ],
+      });
+
+      // Second process: act WITHOUT repeating the shrink factor.
+      const actAgent = createMockAgent();
+      vi.mocked(agentFromAdbDevice).mockResolvedValue(actAgent as any);
+      await runToolsCLI(new AndroidMidsceneTools(), 'midscene-android', {
+        stripPrefix: 'android_',
+        argv: ['act', '--prompt', 'open settings'],
+      });
+
+      // The act process should still build the agent with the persisted factor.
+      expect(agentFromAdbDevice).toHaveBeenLastCalledWith(
+        undefined,
+        expect.objectContaining({ screenshotShrinkFactor: 2 }),
+      );
+      expect(actAgent.aiAction).toHaveBeenCalledWith('open settings', {
+        deepThink: false,
+      });
+    } finally {
+      vi.unstubAllEnvs();
+      rmSync(runDir, { recursive: true, force: true });
+    }
+  });
+
   it('enables scrcpy when --use-scrcpy is provided', async () => {
     const tools = new AndroidMidsceneTools();
 

diff --git a/packages/shared/src/agent-tools/base-tools.ts b/packages/shared/src/agent-tools/base-tools.ts
@@ -231,6 +231,24 @@ export abstract class BaseMidsceneTools<
     return readCliReportSession(sessionName)?.reportFileName;
   }
 
+  /**
+   * Read the agent construction args persisted at `connect` time, so a
+   * subsequent stateless CLI command can rebuild the agent with the same
+   * behavior (e.g. `screenshotShrinkFactor`). Returns `undefined` when no
+   * session exists or it carries no init args.
+   */
+  protected readPersistedAgentInitArgs(): Record<string, unknown> | undefined {
+    const sessionName = this.getCliReportSessionName();
+    if (!sessionName) {
+      return undefined;
+    }
+    const args = readCliReportSession(sessionName)?.agentInitArgs;
+    if (args && typeof args === 'object' && !Array.isArray(args)) {
+      return args;
+    }
+    return undefined;
+  }
+
   protected readCliReportAgentOptions():
     | {
         reportFileName: string;

diff --git a/packages/shared/src/agent-tools/cli-report-session.ts b/packages/shared/src/agent-tools/cli-report-session.ts
@@ -9,6 +9,14 @@ export interface CliReportSession {
   reportFileName: string;
   reportPath: string;
   createdAt: number;
+  /**
+   * Agent construction args captured at `connect` time. CLI commands are
+   * stateless between invocations (each runs in its own process), so options
+   * passed only to `connect` — e.g. `screenshotShrinkFactor` — would otherwise
+   * be lost on the next `act`/`tap` process. Persisting them here lets the
+   * follow-up command rebuild the agent with the same behavior.
+   */
+  agentInitArgs?: Record<string, unknown>;
 }
 
 const sessionDirName = 'cli-report-session';