callstackincubator
diff --git a/‎README.md‎
Lines changed: 21 additions & 0 deletions b/‎README.md‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎skills/agent-device/SKILL.md‎
Lines changed: 7 additions & 0 deletions b/‎skills/agent-device/SKILL.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎skills/agent-device/references/perf-metrics.md‎
Lines changed: 53 additions & 0 deletions b/‎skills/agent-device/references/perf-metrics.md‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎src/cli.ts‎
Lines changed: 5 additions & 0 deletions b/‎src/cli.ts‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/core/__tests__/capabilities.test.ts‎
Lines changed: 1 addition & 0 deletions b/‎src/core/__tests__/capabilities.test.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/core/capabilities.ts‎
Lines changed: 1 addition & 0 deletions b/‎src/core/capabilities.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/daemon/handlers/__tests__/session.test.ts‎
Lines changed: 112 additions & 0 deletions b/‎src/daemon/handlers/__tests__/session.test.ts‎
Lines changed: 112 additions & 0 deletions
@@ -17,6 +17,7 @@ The project is in early development and considered experimental. Pull requests a
 - Core commands: `open`, `back`, `home`, `app-switcher`, `press`, `long-press`, `focus`, `type`, `fill`, `scroll`, `scrollintoview`, `wait`, `alert`, `screenshot`, `close`, `reinstall`, `push`.
 - Inspection commands: `snapshot` (accessibility tree), `diff snapshot` (structural baseline diff), `appstate`, `apps`, `devices`.
 - Clipboard commands: `clipboard read`, `clipboard write <text>`.
+- Performance command: `perf` (alias: `metrics`) returns a metrics JSON blob for the active session; startup timing is currently sampled.
 - App logs: `logs path` returns session log metadata; `logs start` / `logs stop` stream app output; `logs clear` truncates session app logs; `logs clear --restart` resets and restarts stream in one step; `logs doctor` checks readiness; `logs mark` writes timeline markers.
 - Device tooling: `adb` (Android), `simctl`/`devicectl` (iOS via Xcode).
 - Minimal dependencies; TypeScript executed directly on Node 22+ (no build step).
@@ -154,6 +155,7 @@ agent-device scrollintoview @e42
 - `settings faceid match|nonmatch|enroll|unenroll` (iOS simulator only)
 - `settings permission grant|deny|reset camera|microphone|photos|contacts|notifications [full|limited]`
 - `appstate`, `apps`, `devices`, `session list`
+- `perf` (alias: `metrics`)
 
 Push notification simulation:
 
@@ -278,6 +280,25 @@ Assertions:
 - `is` predicates: `visible`, `hidden`, `exists`, `editable`, `selected`, `text`.
 - `is text` uses exact equality.
 
+Performance metrics:
+- `perf` (or `metrics`) requires an active session and returns a JSON metrics blob.
+- Current metric: `startup` sampled from the elapsed wall-clock time around each session `open` command dispatch (`open-command-roundtrip`), unit `ms`.
+- Startup samples are session-scoped and include sample history from recent `open` actions.
+- Platform support for current sampling: iOS simulator, iOS physical device, Android emulator/device.
+- `fps`, `memory`, and `cpu` are reported as not yet implemented in this release.
+- Quick usage:
+
+```bash
+agent-device open Settings --platform ios
+agent-device perf --json
+```
+
+- How to read it:
+  - `metrics.startup.lastDurationMs`: most recent startup sample in milliseconds.
+  - `metrics.startup.samples[]`: recent startup history for this session.
+  - `sampling.startup.method`: currently `open-command-roundtrip`.
+- Caveat: startup here is command-to-launch round-trip timing, not true app TTI/first-interactive telemetry.
+
 Replay update:
 - `replay <path>` runs deterministic replay from `.ad` scripts.
 - `replay -u <path>` attempts selector updates on failures and atomically rewrites the same file.
 
@@ -88,6 +88,7 @@ agent-device is visible 'id="anchor"'
 agent-device appstate
 agent-device clipboard read
 agent-device clipboard write "token"
+agent-device perf --json
 agent-device push <bundle|package> <payload.json|inline-json>
 agent-device get text @e1
 agent-device screenshot out.png
@@ -103,6 +104,11 @@ agent-device trace stop ./trace.log
 agent-device batch --steps-file /tmp/batch-steps.json --json
 ```
 
+### Performance Check
+
+- Use `agent-device perf --json` (or `metrics --json`) after `open`.
+- For detailed metric semantics, caveats, and interpretation guidance, see [references/perf-metrics.md](references/perf-metrics.md).
+
 ## Guardrails (High Value Only)
 
 - Re-snapshot after UI mutations (navigation/modal/list changes).
@@ -145,3 +151,4 @@ agent-device batch --steps-file /tmp/batch-steps.json --json
 - [references/video-recording.md](references/video-recording.md)
 - [references/coordinate-system.md](references/coordinate-system.md)
 - [references/batching.md](references/batching.md)
+- [references/perf-metrics.md](references/perf-metrics.md)
@@ -0,0 +1,53 @@
+# Performance Metrics (`perf` / `metrics`)
+
+Use this reference when you need to measure launch performance in agent workflows.
+
+## Quick flow
+
+```bash
+agent-device open Settings --platform ios
+agent-device perf --json
+```
+
+Alias:
+
+```bash
+agent-device metrics --json
+```
+
+## What is measured today
+
+- Session-scoped `startup` timing only.
+- Sampling method: `open-command-roundtrip`.
+- Unit: milliseconds (`ms`).
+- Source: elapsed wall-clock time around each session `open` command dispatch for the active app target.
+
+## Output fields to use
+
+- `metrics.startup.lastDurationMs`: most recent startup sample.
+- `metrics.startup.lastMeasuredAt`: ISO timestamp of most recent sample.
+- `metrics.startup.sampleCount`: number of retained samples.
+- `metrics.startup.samples[]`: recent startup history for the current session.
+- `sampling.startup.method`: current sampling method identifier.
+
+## Platform support (current)
+
+- iOS simulator: supported for startup sampling.
+- iOS physical device: supported for startup sampling.
+- Android emulator/device: supported for startup sampling.
+- `fps`, `memory`, and `cpu`: currently placeholders (`available: false`).
+
+## Interpretation guidance
+
+- Treat startup values as command round-trip timing, not true app first-frame or first-interactive telemetry.
+- Compare like-for-like runs:
+  - same device target
+  - same app build
+  - same workflow/session steps
+- Use multiple runs and compare trend/median, not one-off samples.
+
+## Common pitfalls
+
+- Running `perf` before any `open` in the session yields no startup sample yet.
+- Comparing values across different devices/runtimes introduces large noise.
+- Interpreting current `startup` as CPU/FPS/memory would be incorrect.
@@ -368,6 +368,11 @@ export async function runCli(argv: string[], deps: CliDeps = DEFAULT_CLI_DEPS):
             return;
           }
         }
+        if (command === 'perf') {
+          process.stdout.write(`${JSON.stringify(data, null, 2)}\n`);
+          if (logTailStopper) logTailStopper();
+          return;
+        }
       }
       if (logTailStopper) logTailStopper();
       return;
 
@@ -69,6 +69,7 @@ test('core commands support iOS simulator, iOS device, and Android', () => {
     'longpress',
     'logs',
     'open',
+    'perf',
     'press',
     'record',
     'screenshot',
 
@@ -33,6 +33,7 @@ const COMMAND_CAPABILITY_MATRIX: Record<string, CommandCapability> = {
   logs: { ios: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true } },
   longpress: { ios: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true } },
   open: { ios: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true } },
+  perf: { ios: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true } },
   reinstall: { ios: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true } },
   press: { ios: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true } },
   push: { ios: { simulator: true }, android: { emulator: true, device: true, unknown: true } },
 
@@ -764,6 +764,118 @@ test('clipboard rejects unsupported iOS physical devices', async () => {
   }
 });
 
+test('perf requires an active session', async () => {
+  const sessionStore = makeSessionStore();
+  const response = await handleSessionCommands({
+    req: {
+      token: 't',
+      session: 'default',
+      command: 'perf',
+      positionals: [],
+      flags: {},
+    },
+    sessionName: 'default',
+    logPath: path.join(os.tmpdir(), 'daemon.log'),
+    sessionStore,
+    invoke: noopInvoke,
+  });
+  assert.ok(response);
+  assert.equal(response?.ok, false);
+  if (response && !response.ok) {
+    assert.equal(response.error.code, 'SESSION_NOT_FOUND');
+  }
+});
+
+test('perf returns startup samples captured from open actions', async () => {
+  const sessionStore = makeSessionStore();
+  const sessionName = 'perf-session';
+  const measuredAt = new Date('2026-02-24T10:00:00.000Z').toISOString();
+  const session = makeSession(sessionName, {
+    platform: 'ios',
+    id: 'sim-1',
+    name: 'iPhone 16',
+    kind: 'simulator',
+    booted: true,
+  });
+  session.actions.push({
+    ts: Date.now(),
+    command: 'open',
+    positionals: ['Settings'],
+    flags: {},
+    result: {
+      startup: {
+        durationMs: 184,
+        measuredAt,
+        method: 'open-command-roundtrip',
+        appTarget: 'Settings',
+        appBundleId: 'com.apple.Preferences',
+      },
+    },
+  });
+  sessionStore.set(sessionName, session);
+
+  const response = await handleSessionCommands({
+    req: {
+      token: 't',
+      session: sessionName,
+      command: 'perf',
+      positionals: [],
+      flags: {},
+    },
+    sessionName,
+    logPath: path.join(os.tmpdir(), 'daemon.log'),
+    sessionStore,
+    invoke: noopInvoke,
+  });
+  assert.ok(response);
+  assert.equal(response?.ok, true);
+  if (response && response.ok) {
+    const startup = (response.data?.metrics as any)?.startup;
+    assert.equal(startup?.available, true);
+    assert.equal(startup?.lastDurationMs, 184);
+    assert.equal(startup?.lastMeasuredAt, measuredAt);
+    assert.equal(startup?.method, 'open-command-roundtrip');
+    assert.equal(startup?.sampleCount, 1);
+    assert.equal(Array.isArray(startup?.samples), true);
+  }
+});
+
+test('perf reports startup metric as unavailable when no sample exists', async () => {
+  const sessionStore = makeSessionStore();
+  const sessionName = 'perf-session-empty';
+  sessionStore.set(
+    sessionName,
+    makeSession(sessionName, {
+      platform: 'android',
+      id: 'emulator-5554',
+      name: 'Pixel Emulator',
+      kind: 'emulator',
+      booted: true,
+    }),
+  );
+
+  const response = await handleSessionCommands({
+    req: {
+      token: 't',
+      session: sessionName,
+      command: 'perf',
+      positionals: [],
+      flags: {},
+    },
+    sessionName,
+    logPath: path.join(os.tmpdir(), 'daemon.log'),
+    sessionStore,
+    invoke: noopInvoke,
+  });
+  assert.ok(response);
+  assert.equal(response?.ok, true);
+  if (response && response.ok) {
+    const startup = (response.data?.metrics as any)?.startup;
+    assert.equal(startup?.available, false);
+    assert.match(String(startup?.reason ?? ''), /no startup sample captured yet/i);
+  }
+});
+
 test('open URL on existing iOS session clears stale app bundle id', async () => {
   const sessionStore = makeSessionStore();
   const sessionName = 'ios-session';
Original file line number	Diff line number	Diff line change
`@@ -368,6 +368,11 @@ export async function runCli(argv: string[], deps: CliDeps = DEFAULT_CLI_DEPS):`
`368`	`368`	`return;`
`369`	`369`	`}`
`370`	`370`	`}`
	`371`	`+ if (command === 'perf') {`
	`372`	+ process.stdout.write(`${JSON.stringify(data, null, 2)}\n`);
	`373`	`+ if (logTailStopper) logTailStopper();`
	`374`	`+ return;`
	`375`	`+ }`
`371`	`376`	`}`
`372`	`377`	`if (logTailStopper) logTailStopper();`
`373`	`378`	`return;`