Skip to content

Commit 0dbcc16

Browse files
committed
feat: add e2e command perf benchmark harness + nightly CI
Adds scripts/perf, a cheap end-to-end perf benchmark that drives the built CLI through an ordered Settings tour of ~24 commands for N rounds, on a fully isolated daemon/state-dir and self-cleaning device, and emits JSON + Markdown reports. Per-command timing comes from wrapping each batchable command in its own single-step batch (daemon durationMs) plus wall-clock around the process. Wires a scheduled + workflow_dispatch CI job (perf-nightly.yml) that reuses the cached iOS XCUITest runner (setup-apple-replay) and the Android replay host, and runs the CLI from source via --experimental-strip-types (no dist build).
1 parent dc37f86 commit 0dbcc16

12 files changed

Lines changed: 858 additions & 0 deletions

File tree

.github/workflows/perf-nightly.yml

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
name: Perf Nightly
2+
3+
# End-to-end command perf benchmark (scripts/perf). Scheduled + manual only — perf timing on
4+
# shared CI runners is noisy, so treat this as a trend/regression signal, not absolute numbers.
5+
# Reuses the same build artifacts as the device suites: the cached iOS XCUITest runner
6+
# (setup-apple-replay, ios-runner-prebuilt cache) and the Android replay host, and runs the CLI
7+
# from source via --experimental-strip-types (no dist build), matching the replay workflows.
8+
9+
on:
10+
schedule:
11+
- cron: "0 4 * * *"
12+
workflow_dispatch:
13+
inputs:
14+
rounds:
15+
description: "Measured rounds per command (samples)"
16+
required: false
17+
default: "5"
18+
19+
permissions:
20+
contents: read
21+
22+
concurrency:
23+
group: ci-${{ github.workflow }}-${{ github.ref }}
24+
cancel-in-progress: true
25+
26+
env:
27+
AGENT_DEVICE_PERF_CLI: "--experimental-strip-types src/bin.ts"
28+
PERF_ROUNDS: ${{ github.event.inputs.rounds || '5' }}
29+
30+
jobs:
31+
perf-ios:
32+
name: iOS Command Perf
33+
runs-on: macos-26
34+
timeout-minutes: 80
35+
env:
36+
IOS_RUNTIME_VERSION: "26.2"
37+
AGENT_DEVICE_IOS_RUNNER_DERIVED_PATH: ${{ github.workspace }}/.tmp/ios-runner-derived
38+
steps:
39+
- name: Checkout
40+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
41+
42+
- name: Setup toolchain
43+
uses: ./.github/actions/setup-node-pnpm
44+
45+
- name: Setup Apple replay
46+
id: apple-replay
47+
uses: ./.github/actions/setup-apple-replay
48+
with:
49+
derived-path: ${{ env.AGENT_DEVICE_IOS_RUNNER_DERIVED_PATH }}
50+
cache-key-prefix: ios-runner-prebuilt
51+
cache-key-suffix: -ios-${{ env.IOS_RUNTIME_VERSION }}
52+
build-command: sh ./scripts/build-xcuitest-apple.sh
53+
xcuitest-platform: ios
54+
xcuitest-destination: generic/platform=iOS Simulator
55+
clean-derived: "1"
56+
57+
- name: Boot iOS test simulator
58+
uses: ./.github/actions/boot-ios-test-simulator
59+
with:
60+
runtime-version: ${{ env.IOS_RUNTIME_VERSION }}
61+
preferred-device-name: iPhone 17 Pro
62+
63+
- name: Run iOS command perf benchmark
64+
run: |
65+
pnpm clean:daemon
66+
node --experimental-strip-types scripts/perf/run.ts \
67+
--platform ios \
68+
--device "iPhone 17 Pro" \
69+
--n "$PERF_ROUNDS" --warmup 1 \
70+
--out-dir "$GITHUB_WORKSPACE/perf-results"
71+
72+
- name: Upload iOS perf report
73+
if: always()
74+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
75+
with:
76+
name: perf-ios
77+
path: perf-results/
78+
if-no-files-found: warn
79+
80+
perf-android:
81+
name: Android Command Perf
82+
runs-on: ubuntu-latest
83+
timeout-minutes: 80
84+
steps:
85+
- name: Checkout
86+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
87+
88+
- name: Setup toolchain
89+
uses: ./.github/actions/setup-node-pnpm
90+
91+
- name: Setup Android replay host
92+
id: android-replay-host
93+
uses: ./.github/actions/setup-android-replay-host
94+
95+
- name: Package npm-bundled Android helpers
96+
run: |
97+
pnpm package:android-snapshot-helper:npm
98+
pnpm package:android-multitouch-helper:npm
99+
100+
- name: Run Android command perf benchmark
101+
uses: reactivecircus/android-emulator-runner@b530d96654c385303d652368551fb075bc2f0b6b # v2.35.0
102+
with:
103+
api-level: 36
104+
arch: x86_64
105+
profile: pixel_7
106+
target: google_apis_playstore
107+
emulator-options: -no-window -gpu swiftshader_indirect -no-snapshot -noaudio -no-boot-anim -no-metrics
108+
script: |
109+
set -e
110+
# Disable animations up front so accessibility dumps don't time out (the harness
111+
# also runs `settings animations off`, this is belt-and-suspenders).
112+
adb -s emulator-5554 shell settings put global window_animation_scale 0 || true
113+
adb -s emulator-5554 shell settings put global transition_animation_scale 0 || true
114+
adb -s emulator-5554 shell settings put global animator_duration_scale 0 || true
115+
node --experimental-strip-types scripts/perf/run.ts \
116+
--platform android \
117+
--serial emulator-5554 \
118+
--n "$PERF_ROUNDS" --warmup 1 \
119+
--out-dir "$GITHUB_WORKSPACE/perf-results"
120+
121+
- name: Upload Android perf report
122+
if: always()
123+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
124+
with:
125+
name: perf-android
126+
path: perf-results/
127+
if-no-files-found: warn

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
node_modules/
2+
scripts/perf/.results/
23
.pnpm-store/
34
.fallow/
45
dist/

package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@
9696
"build:macos-helper": "swift build -c release --package-path macos-helper",
9797
"build:all": "pnpm build:node && pnpm build:xcuitest",
9898
"ad": "node bin/agent-device.mjs",
99+
"perf": "node --experimental-strip-types scripts/perf/run.ts",
100+
"perf:ios": "node --experimental-strip-types scripts/perf/run.ts --platform ios",
101+
"perf:android": "node --experimental-strip-types scripts/perf/run.ts --platform android",
99102
"lint": "oxlint . --deny-warnings",
100103
"format": "oxfmt --write src test skills package.json tsconfig.json tsconfig.lib.json rslib.config.ts vitest.config.ts .github/actions/setup-node-pnpm/action.yml .oxlintrc.json .oxfmtrc.json '!test/skillgym/.skillgym-results/**'",
101104
"fallow": "fallow --summary",

scripts/perf/cli.ts

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import { spawnSync } from 'node:child_process';
2+
import { performance } from 'node:perf_hooks';
3+
import { resolveCliArgv, REPO_ROOT } from './config.ts';
4+
import type { BatchStepSpec } from './scenario.ts';
5+
import type { CliResult } from './types.ts';
6+
7+
const MAX_BUFFER = 64 * 1024 * 1024;
8+
const CLI_ARGV = resolveCliArgv();
9+
10+
function tryParseJson(stdout: string): unknown {
11+
const trimmed = stdout.trim();
12+
if (!trimmed) return undefined;
13+
try {
14+
return JSON.parse(trimmed);
15+
} catch {
16+
// Some commands print a trailing line after JSON; try the last JSON-looking block.
17+
const start = trimmed.indexOf('{');
18+
const end = trimmed.lastIndexOf('}');
19+
if (start >= 0 && end > start) {
20+
try {
21+
return JSON.parse(trimmed.slice(start, end + 1));
22+
} catch {
23+
return undefined;
24+
}
25+
}
26+
return undefined;
27+
}
28+
}
29+
30+
function jsonOk(json: unknown): boolean {
31+
return !(json !== null && typeof json === 'object' && (json as { ok?: unknown }).ok === false);
32+
}
33+
34+
// Invoke the built CLI once. `args` includes the command + positionals + dash-flags;
35+
// `baseFlags` carries the isolation + device flags shared by every call.
36+
export function invokeCli(args: string[], baseFlags: string[]): CliResult {
37+
const full = [...CLI_ARGV, ...args, ...baseFlags, '--json'];
38+
const t0 = performance.now();
39+
const r = spawnSync(process.execPath, full, {
40+
encoding: 'utf8',
41+
cwd: REPO_ROOT,
42+
maxBuffer: MAX_BUFFER,
43+
});
44+
const wallClockMs = performance.now() - t0;
45+
const stdout = r.stdout ?? '';
46+
const stderr = r.stderr ?? '';
47+
const json = tryParseJson(stdout);
48+
const exitCode = r.status ?? -1;
49+
return { exitCode, wallClockMs, stdout, stderr, json, ok: exitCode === 0 && jsonOk(json) };
50+
}
51+
52+
// Wrap a single command in its own `batch` invocation to read per-step durationMs.
53+
export function invokeBatchStep(spec: BatchStepSpec, baseFlags: string[]): CliResult {
54+
return invokeCli(['batch', '--steps', JSON.stringify([spec])], baseFlags);
55+
}
56+
57+
function firstBatchResult(json: unknown): Record<string, unknown> | undefined {
58+
const data = (json as { data?: { results?: unknown[] } } | undefined)?.data;
59+
const first = data?.results?.[0];
60+
return first && typeof first === 'object' ? (first as Record<string, unknown>) : undefined;
61+
}
62+
63+
export function readBatchStepDurationMs(result: CliResult): number | undefined {
64+
const v = firstBatchResult(result.json)?.durationMs;
65+
return typeof v === 'number' ? v : undefined;
66+
}
67+
68+
export function readBatchStepError(result: CliResult): { code?: string; message?: string } {
69+
const err = (result.json as { error?: { code?: string; message?: string } } | undefined)?.error;
70+
return { code: err?.code, message: err?.message };
71+
}
72+
73+
// Proxy for a11y-tree size: snapshot node count (falls back to distinct @eN refs).
74+
export function countElements(result: CliResult): number | undefined {
75+
const stepData = firstBatchResult(result.json)?.data;
76+
if (stepData === undefined || typeof stepData !== 'object') return undefined;
77+
const nodes = (stepData as { nodes?: unknown }).nodes;
78+
if (Array.isArray(nodes)) return nodes.length;
79+
const matches = JSON.stringify(stepData).match(/@e\d+/g);
80+
return matches ? new Set(matches).size : 0;
81+
}

scripts/perf/config.ts

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import path from 'node:path';
2+
import { fileURLToPath } from 'node:url';
3+
import type { Platform } from './types.ts';
4+
5+
const HERE = path.dirname(fileURLToPath(import.meta.url));
6+
export const REPO_ROOT = path.resolve(HERE, '..', '..');
7+
export const CLI_BIN = path.join(REPO_ROOT, 'bin', 'agent-device.mjs');
8+
export const DEFAULT_OUT_DIR = path.join(HERE, '.results');
9+
10+
export type PerfConfig = {
11+
platform: Platform;
12+
rounds: number; // measured rounds (samples per command)
13+
warmup: number; // leading rounds dropped from stats
14+
keepArtifacts: boolean; // keep temp state dir + leave device booted
15+
outDir: string;
16+
udid?: string; // iOS device override (UDID)
17+
device?: string; // device override by name (e.g. "iPhone 17 Pro"); preferred over udid
18+
serial?: string; // Android device override
19+
};
20+
21+
// How to invoke the CLI. Defaults to the built dist binary (bin/agent-device.mjs).
22+
// Set AGENT_DEVICE_PERF_CLI to run from source instead, e.g. on CI:
23+
// AGENT_DEVICE_PERF_CLI="--experimental-strip-types src/bin.ts"
24+
// (matches the device workflows, which run from source and skip the dist build).
25+
export function resolveCliArgv(): string[] {
26+
const override = process.env.AGENT_DEVICE_PERF_CLI?.trim();
27+
if (override) return override.split(/\s+/);
28+
return [CLI_BIN];
29+
}
30+
31+
export function usesSourceCli(): boolean {
32+
return Boolean(process.env.AGENT_DEVICE_PERF_CLI?.trim());
33+
}
34+
35+
function readValue(argv: string[], i: number, flag: string): string {
36+
const v = argv[i + 1];
37+
if (v === undefined) throw new Error(`Missing value for ${flag}`);
38+
return v;
39+
}
40+
41+
export function parseConfig(argv: string[]): PerfConfig {
42+
const cfg: PerfConfig = {
43+
platform: 'ios',
44+
rounds: 5,
45+
warmup: 1,
46+
keepArtifacts: false,
47+
outDir: DEFAULT_OUT_DIR,
48+
};
49+
for (let i = 0; i < argv.length; i++) {
50+
const a = argv[i];
51+
switch (a) {
52+
case '--platform': {
53+
const v = readValue(argv, i++, a);
54+
if (v !== 'ios' && v !== 'android') throw new Error(`Unknown platform: ${v}`);
55+
cfg.platform = v;
56+
break;
57+
}
58+
case '--n':
59+
case '--rounds':
60+
cfg.rounds = Number(readValue(argv, i++, a));
61+
break;
62+
case '--warmup':
63+
cfg.warmup = Number(readValue(argv, i++, a));
64+
break;
65+
case '--keep-artifacts':
66+
cfg.keepArtifacts = true;
67+
break;
68+
case '--out-dir':
69+
cfg.outDir = path.resolve(readValue(argv, i++, a));
70+
break;
71+
case '--udid':
72+
cfg.udid = readValue(argv, i++, a);
73+
break;
74+
case '--device':
75+
cfg.device = readValue(argv, i++, a);
76+
break;
77+
case '--serial':
78+
cfg.serial = readValue(argv, i++, a);
79+
break;
80+
default:
81+
throw new Error(`Unknown flag: ${a}`);
82+
}
83+
}
84+
if (!Number.isInteger(cfg.rounds) || cfg.rounds < 1) throw new Error('--n must be >= 1');
85+
if (!Number.isInteger(cfg.warmup) || cfg.warmup < 0) throw new Error('--warmup must be >= 0');
86+
return cfg;
87+
}

0 commit comments

Comments
 (0)