callstack
diff --git a/‎.github/workflows/perf-nightly.yml‎
Lines changed: 127 additions & 0 deletions b/‎.github/workflows/perf-nightly.yml‎
Lines changed: 127 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎package.json‎
Lines changed: 3 additions & 0 deletions b/‎package.json‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎scripts/perf/cli.ts‎
Lines changed: 81 additions & 0 deletions b/‎scripts/perf/cli.ts‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎scripts/perf/config.ts‎
Lines changed: 87 additions & 0 deletions b/‎scripts/perf/config.ts‎
Lines changed: 87 additions & 0 deletions
@@ -0,0 +1,127 @@
+name: Perf Nightly
+
+# End-to-end command perf benchmark (scripts/perf). Scheduled + manual only — perf timing on
+# shared CI runners is noisy, so treat this as a trend/regression signal, not absolute numbers.
+# Reuses the same build artifacts as the device suites: the cached iOS XCUITest runner
+# (setup-apple-replay, ios-runner-prebuilt cache) and the Android replay host, and runs the CLI
+# from source via --experimental-strip-types (no dist build), matching the replay workflows.
+
+on:
+  schedule:
+    - cron: "0 4 * * *"
+  workflow_dispatch:
+    inputs:
+      rounds:
+        description: "Measured rounds per command (samples)"
+        required: false
+        default: "5"
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ci-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  AGENT_DEVICE_PERF_CLI: "--experimental-strip-types src/bin.ts"
+  PERF_ROUNDS: ${{ github.event.inputs.rounds || '5' }}
+
+jobs:
+  perf-ios:
+    name: iOS Command Perf
+    runs-on: macos-26
+    timeout-minutes: 80
+    env:
+      IOS_RUNTIME_VERSION: "26.2"
+      AGENT_DEVICE_IOS_RUNNER_DERIVED_PATH: ${{ github.workspace }}/.tmp/ios-runner-derived
+    steps:
+      - name: Checkout
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Setup toolchain
+        uses: ./.github/actions/setup-node-pnpm
+
+      - name: Setup Apple replay
+        id: apple-replay
+        uses: ./.github/actions/setup-apple-replay
+        with:
+          derived-path: ${{ env.AGENT_DEVICE_IOS_RUNNER_DERIVED_PATH }}
+          cache-key-prefix: ios-runner-prebuilt
+          cache-key-suffix: -ios-${{ env.IOS_RUNTIME_VERSION }}
+          build-command: sh ./scripts/build-xcuitest-apple.sh
+          xcuitest-platform: ios
+          xcuitest-destination: generic/platform=iOS Simulator
+          clean-derived: "1"
+
+      - name: Boot iOS test simulator
+        uses: ./.github/actions/boot-ios-test-simulator
+        with:
+          runtime-version: ${{ env.IOS_RUNTIME_VERSION }}
+          preferred-device-name: iPhone 17 Pro
+
+      - name: Run iOS command perf benchmark
+        run: |
+          pnpm clean:daemon
+          node --experimental-strip-types scripts/perf/run.ts \
+            --platform ios \
+            --device "iPhone 17 Pro" \
+            --n "$PERF_ROUNDS" --warmup 1 \
+            --out-dir "$GITHUB_WORKSPACE/perf-results"
+
+      - name: Upload iOS perf report
+        if: always()
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: perf-ios
+          path: perf-results/
+          if-no-files-found: warn
+
+  perf-android:
+    name: Android Command Perf
+    runs-on: ubuntu-latest
+    timeout-minutes: 80
+    steps:
+      - name: Checkout
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Setup toolchain
+        uses: ./.github/actions/setup-node-pnpm
+
+      - name: Setup Android replay host
+        id: android-replay-host
+        uses: ./.github/actions/setup-android-replay-host
+
+      - name: Package npm-bundled Android helpers
+        run: |
+          pnpm package:android-snapshot-helper:npm
+          pnpm package:android-multitouch-helper:npm
+
+      - name: Run Android command perf benchmark
+        uses: reactivecircus/android-emulator-runner@b530d96654c385303d652368551fb075bc2f0b6b # v2.35.0
+        with:
+          api-level: 36
+          arch: x86_64
+          profile: pixel_7
+          target: google_apis_playstore
+          emulator-options: -no-window -gpu swiftshader_indirect -no-snapshot -noaudio -no-boot-anim -no-metrics
+          script: |
+            set -e
+            # Disable animations up front so accessibility dumps don't time out (the harness
+            # also runs `settings animations off`, this is belt-and-suspenders).
+            adb -s emulator-5554 shell settings put global window_animation_scale 0 || true
+            adb -s emulator-5554 shell settings put global transition_animation_scale 0 || true
+            adb -s emulator-5554 shell settings put global animator_duration_scale 0 || true
+            node --experimental-strip-types scripts/perf/run.ts \
+              --platform android \
+              --serial emulator-5554 \
+              --n "$PERF_ROUNDS" --warmup 1 \
+              --out-dir "$GITHUB_WORKSPACE/perf-results"
+
+      - name: Upload Android perf report
+        if: always()
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: perf-android
+          path: perf-results/
+          if-no-files-found: warn
@@ -1,4 +1,5 @@
 node_modules/
+scripts/perf/.results/
 .pnpm-store/
 .fallow/
 dist/
 
@@ -96,6 +96,9 @@
     "build:macos-helper": "swift build -c release --package-path macos-helper",
     "build:all": "pnpm build:node && pnpm build:xcuitest",
     "ad": "node bin/agent-device.mjs",
+    "perf": "node --experimental-strip-types scripts/perf/run.ts",
+    "perf:ios": "node --experimental-strip-types scripts/perf/run.ts --platform ios",
+    "perf:android": "node --experimental-strip-types scripts/perf/run.ts --platform android",
     "lint": "oxlint . --deny-warnings",
     "format": "oxfmt --write src test skills package.json tsconfig.json tsconfig.lib.json rslib.config.ts vitest.config.ts .github/actions/setup-node-pnpm/action.yml .oxlintrc.json .oxfmtrc.json '!test/skillgym/.skillgym-results/**'",
     "fallow": "fallow --summary",
 
@@ -0,0 +1,81 @@
+import { spawnSync } from 'node:child_process';
+import { performance } from 'node:perf_hooks';
+import { resolveCliArgv, REPO_ROOT } from './config.ts';
+import type { BatchStepSpec } from './scenario.ts';
+import type { CliResult } from './types.ts';
+
+const MAX_BUFFER = 64 * 1024 * 1024;
+const CLI_ARGV = resolveCliArgv();
+
+function tryParseJson(stdout: string): unknown {
+  const trimmed = stdout.trim();
+  if (!trimmed) return undefined;
+  try {
+    return JSON.parse(trimmed);
+  } catch {
+    // Some commands print a trailing line after JSON; try the last JSON-looking block.
+    const start = trimmed.indexOf('{');
+    const end = trimmed.lastIndexOf('}');
+    if (start >= 0 && end > start) {
+      try {
+        return JSON.parse(trimmed.slice(start, end + 1));
+      } catch {
+        return undefined;
+      }
+    }
+    return undefined;
+  }
+}
+
+function jsonOk(json: unknown): boolean {
+  return !(json !== null && typeof json === 'object' && (json as { ok?: unknown }).ok === false);
+}
+
+// Invoke the built CLI once. `args` includes the command + positionals + dash-flags;
+// `baseFlags` carries the isolation + device flags shared by every call.
+export function invokeCli(args: string[], baseFlags: string[]): CliResult {
+  const full = [...CLI_ARGV, ...args, ...baseFlags, '--json'];
+  const t0 = performance.now();
+  const r = spawnSync(process.execPath, full, {
+    encoding: 'utf8',
+    cwd: REPO_ROOT,
+    maxBuffer: MAX_BUFFER,
+  });
+  const wallClockMs = performance.now() - t0;
+  const stdout = r.stdout ?? '';
+  const stderr = r.stderr ?? '';
+  const json = tryParseJson(stdout);
+  const exitCode = r.status ?? -1;
+  return { exitCode, wallClockMs, stdout, stderr, json, ok: exitCode === 0 && jsonOk(json) };
+}
+
+// Wrap a single command in its own `batch` invocation to read per-step durationMs.
+export function invokeBatchStep(spec: BatchStepSpec, baseFlags: string[]): CliResult {
+  return invokeCli(['batch', '--steps', JSON.stringify([spec])], baseFlags);
+}
+
+function firstBatchResult(json: unknown): Record<string, unknown> | undefined {
+  const data = (json as { data?: { results?: unknown[] } } | undefined)?.data;
+  const first = data?.results?.[0];
+  return first && typeof first === 'object' ? (first as Record<string, unknown>) : undefined;
+}
+
+export function readBatchStepDurationMs(result: CliResult): number | undefined {
+  const v = firstBatchResult(result.json)?.durationMs;
+  return typeof v === 'number' ? v : undefined;
+}
+
+export function readBatchStepError(result: CliResult): { code?: string; message?: string } {
+  const err = (result.json as { error?: { code?: string; message?: string } } | undefined)?.error;
+  return { code: err?.code, message: err?.message };
+}
+
+// Proxy for a11y-tree size: snapshot node count (falls back to distinct @eN refs).
+export function countElements(result: CliResult): number | undefined {
+  const stepData = firstBatchResult(result.json)?.data;
+  if (stepData === undefined || typeof stepData !== 'object') return undefined;
+  const nodes = (stepData as { nodes?: unknown }).nodes;
+  if (Array.isArray(nodes)) return nodes.length;
+  const matches = JSON.stringify(stepData).match(/@e\d+/g);
+  return matches ? new Set(matches).size : 0;
+}
@@ -0,0 +1,87 @@
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import type { Platform } from './types.ts';
+
+const HERE = path.dirname(fileURLToPath(import.meta.url));
+export const REPO_ROOT = path.resolve(HERE, '..', '..');
+export const CLI_BIN = path.join(REPO_ROOT, 'bin', 'agent-device.mjs');
+export const DEFAULT_OUT_DIR = path.join(HERE, '.results');
+
+export type PerfConfig = {
+  platform: Platform;
+  rounds: number; // measured rounds (samples per command)
+  warmup: number; // leading rounds dropped from stats
+  keepArtifacts: boolean; // keep temp state dir + leave device booted
+  outDir: string;
+  udid?: string; // iOS device override (UDID)
+  device?: string; // device override by name (e.g. "iPhone 17 Pro"); preferred over udid
+  serial?: string; // Android device override
+};
+
+// How to invoke the CLI. Defaults to the built dist binary (bin/agent-device.mjs).
+// Set AGENT_DEVICE_PERF_CLI to run from source instead, e.g. on CI:
+//   AGENT_DEVICE_PERF_CLI="--experimental-strip-types src/bin.ts"
+// (matches the device workflows, which run from source and skip the dist build).
+export function resolveCliArgv(): string[] {
+  const override = process.env.AGENT_DEVICE_PERF_CLI?.trim();
+  if (override) return override.split(/\s+/);
+  return [CLI_BIN];
+}
+
+export function usesSourceCli(): boolean {
+  return Boolean(process.env.AGENT_DEVICE_PERF_CLI?.trim());
+}
+
+function readValue(argv: string[], i: number, flag: string): string {
+  const v = argv[i + 1];
+  if (v === undefined) throw new Error(`Missing value for ${flag}`);
+  return v;
+}
+
+export function parseConfig(argv: string[]): PerfConfig {
+  const cfg: PerfConfig = {
+    platform: 'ios',
+    rounds: 5,
+    warmup: 1,
+    keepArtifacts: false,
+    outDir: DEFAULT_OUT_DIR,
+  };
+  for (let i = 0; i < argv.length; i++) {
+    const a = argv[i];
+    switch (a) {
+      case '--platform': {
+        const v = readValue(argv, i++, a);
+        if (v !== 'ios' && v !== 'android') throw new Error(`Unknown platform: ${v}`);
+        cfg.platform = v;
+        break;
+      }
+      case '--n':
+      case '--rounds':
+        cfg.rounds = Number(readValue(argv, i++, a));
+        break;
+      case '--warmup':
+        cfg.warmup = Number(readValue(argv, i++, a));
+        break;
+      case '--keep-artifacts':
+        cfg.keepArtifacts = true;
+        break;
+      case '--out-dir':
+        cfg.outDir = path.resolve(readValue(argv, i++, a));
+        break;
+      case '--udid':
+        cfg.udid = readValue(argv, i++, a);
+        break;
+      case '--device':
+        cfg.device = readValue(argv, i++, a);
+        break;
+      case '--serial':
+        cfg.serial = readValue(argv, i++, a);
+        break;
+      default:
+        throw new Error(`Unknown flag: ${a}`);
+    }
+  }
+  if (!Number.isInteger(cfg.rounds) || cfg.rounds < 1) throw new Error('--n must be >= 1');
+  if (!Number.isInteger(cfg.warmup) || cfg.warmup < 0) throw new Error('--warmup must be >= 0');
+  return cfg;
+}
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`node_modules/`
	`2`	`+scripts/perf/.results/`
`2`	`3`	`.pnpm-store/`
`3`	`4`	`.fallow/`
`4`	`5`	`dist/`