Expensify
diff --git a/‎.claude/skills/agent-device/SKILL.md‎
Lines changed: 80 additions & 12 deletions b/‎.claude/skills/agent-device/SKILL.md‎
Lines changed: 80 additions & 12 deletions
diff --git a/‎.claude/skills/agent-device/flows/README.md‎
Lines changed: 39 additions & 19 deletions b/‎.claude/skills/agent-device/flows/README.md‎
Lines changed: 39 additions & 19 deletions
diff --git a/‎…gent-device/flows/complete-onboarding.ad‎ ‎…vice/flows/macros/complete-onboarding.ad‎.claude/skills/agent-device/flows/complete-onboarding.ad renamed to .claude/skills/agent-device/flows/macros/complete-onboarding.ad
Lines changed: 2 additions & 2 deletions b/‎…gent-device/flows/complete-onboarding.ad‎ ‎…vice/flows/macros/complete-onboarding.ad‎.claude/skills/agent-device/flows/complete-onboarding.ad renamed to .claude/skills/agent-device/flows/macros/complete-onboarding.ad
Lines changed: 2 additions & 2 deletions
diff --git a/‎…ude/skills/agent-device/flows/go-back.ad‎ ‎…lls/agent-device/flows/macros/go-back.ad‎.claude/skills/agent-device/flows/go-back.ad renamed to .claude/skills/agent-device/flows/macros/go-back.ad b/‎…ude/skills/agent-device/flows/go-back.ad‎ ‎…lls/agent-device/flows/macros/go-back.ad‎.claude/skills/agent-device/flows/go-back.ad renamed to .claude/skills/agent-device/flows/macros/go-back.ad
diff --git a/‎.claude/skills/agent-device/flows/macros/send-message.ad‎
Lines changed: 9 additions & 0 deletions b/‎.claude/skills/agent-device/flows/macros/send-message.ad‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎…ude/skills/agent-device/flows/sign-in.ad‎ ‎…lls/agent-device/flows/macros/sign-in.ad‎.claude/skills/agent-device/flows/sign-in.ad renamed to .claude/skills/agent-device/flows/macros/sign-in.ad
Lines changed: 3 additions & 3 deletions b/‎…ude/skills/agent-device/flows/sign-in.ad‎ ‎…lls/agent-device/flows/macros/sign-in.ad‎.claude/skills/agent-device/flows/sign-in.ad renamed to .claude/skills/agent-device/flows/macros/sign-in.ad
Lines changed: 3 additions & 3 deletions
diff --git a/‎…nt-device/flows/create-expense-manual.ad‎ ‎…ice/flows/tests/create-expense-manual.ad‎.claude/skills/agent-device/flows/create-expense-manual.ad renamed to .claude/skills/agent-device/flows/tests/create-expense-manual.ad
Lines changed: 3 additions & 3 deletions b/‎…nt-device/flows/create-expense-manual.ad‎ ‎…ice/flows/tests/create-expense-manual.ad‎.claude/skills/agent-device/flows/create-expense-manual.ad renamed to .claude/skills/agent-device/flows/tests/create-expense-manual.ad
Lines changed: 3 additions & 3 deletions
diff --git a/‎…gent-device/flows/open-create-expense.ad‎ ‎…evice/flows/tests/open-create-expense.ad‎.claude/skills/agent-device/flows/open-create-expense.ad renamed to .claude/skills/agent-device/flows/tests/open-create-expense.ad b/‎…gent-device/flows/open-create-expense.ad‎ ‎…evice/flows/tests/open-create-expense.ad‎.claude/skills/agent-device/flows/open-create-expense.ad renamed to .claude/skills/agent-device/flows/tests/open-create-expense.ad
diff --git a/‎…skills/agent-device/flows/open-report.ad‎ ‎…/agent-device/flows/tests/open-report.ad‎.claude/skills/agent-device/flows/open-report.ad renamed to .claude/skills/agent-device/flows/tests/open-report.ad
Lines changed: 2 additions & 2 deletions b/‎…skills/agent-device/flows/open-report.ad‎ ‎…/agent-device/flows/tests/open-report.ad‎.claude/skills/agent-device/flows/open-report.ad renamed to .claude/skills/agent-device/flows/tests/open-report.ad
Lines changed: 2 additions & 2 deletions
diff --git a/‎…agent-device/flows/open-search-router.ad‎ ‎…device/flows/tests/open-search-router.ad‎.claude/skills/agent-device/flows/open-search-router.ad renamed to .claude/skills/agent-device/flows/tests/open-search-router.ad b/‎…agent-device/flows/open-search-router.ad‎ ‎…device/flows/tests/open-search-router.ad‎.claude/skills/agent-device/flows/open-search-router.ad renamed to .claude/skills/agent-device/flows/tests/open-search-router.ad
@@ -1,30 +1,98 @@
 ---
 name: agent-device
 description: Drive iOS and Android devices for the Expensify App - testing, debugging, performance profiling, bug reproduction, and feature verification. Use when the developer needs to interact with the mobile app on a device.
-allowed-tools: Bash(agent-device *) Bash(npm root *)
+allowed-tools: Bash(agent-device *) Bash(npm root *) Bash(scripts/is-hybrid-app.sh)
 ---
 
 # agent-device
 
-## Pre-flight
+## Pre-flight (auto)
 
-<!-- The line below compares the installed `agent-device --version` to the required minimum 0.13.0 -->
+These checks evaluate at skill load. If any line shows `FAIL`, stop and surface the fix before running any device command.
 
-`agent-device` version check: !`R=0.13.0; V=$(agent-device --version 2>/dev/null); [ -n "$V" ] && [ "$(printf '%s\n%s\n' "$R" "$V" | sort -V | head -1)" = "$R" ] && echo "OK ($V)" || echo "FAIL (need v$R+, got: ${V:-not installed})"`
+`agent-device` version: !`R=0.13.0; V=$(agent-device --version 2>/dev/null); [ -n "$V" ] && [ "$(printf '%s\n%s\n' "$R" "$V" | sort -V | head -1)" = "$R" ] && echo "OK ($V)" || echo "FAIL (need v$R+, got: ${V:-not installed}). Fix: npm install -g agent-device@latest"`
 
-> If the version check above shows `FAIL`, **STOP** and instruct the developer: `npm install -g agent-device@latest`.
+Bundled CLI skills dir: !`D="$(npm root -g)/agent-device/skills/agent-device"; test -s "$D/SKILL.md" && echo "OK ($D)" || echo "FAIL (missing $D/SKILL.md). Fix: npm install -g agent-device@latest"`
 
-Canonical skill reference path (read these files directly for device automation guidance - bootstrap, exploration, verification, debugging): !`echo "$(npm root -g)/agent-device/skills/agent-device"`
+HybridApp mode: !`M=$(scripts/is-hybrid-app.sh 2>/dev/null | tail -1); [ "$M" = "true" ] && echo "OK (HybridApp)" || echo "FAIL (got: ${M:-unknown}). This skill only supports the HybridApp build - ensure the Mobile-Expensify submodule is present."`
 
-## Dev prerequisites
+## Bring-up
 
-Default assumption: dev build from this repo. Before `open <app>`, both must be true:
+Run this sequence the first time the user asks for device interaction in a session, before any `open` / `snapshot` / `replay`.
 
-1. **Metro dev server** running: `npm run start` (background).
-2. **Dev build installed** on target: `npm run ios` or `npm run android` from the repo root.
+### 1. Platform
 
-Skip these only when the developer explicitly targets a non-dev build (e.g., standalone/prod artifact, or a pre-installed release build).
+If the user prompt names `ios` or `android` explicitly, use it. Otherwise ask. Only iOS and Android are supported; reject other platforms.
+
+### 2. Bundle ID
+
+HybridApp dev builds only (the pre-flight gate enforces this).
+
+| Platform  | Bundle ID                       | Build script      |
+| --------- | ------------------------------- | ----------------- |
+| `ios`     | `com.expensify.expensifylite`   | `npm run ios`     |
+| `android` | `org.me.mobiexpensifyg.dev`     | `npm run android` |
+
+### 3. Confirm dev build is installed
+
+```bash
+agent-device apps --user-installed --platform <p> --json
+```
+
+If the resolved bundle ID is missing from the list, **STOP** and instruct the developer to run the matching build script from the table. HybridApp mobile builds **must** be initiated from `Mobile-Expensify/` (per project CLAUDE.md).
+
+### 4. Metro
+
+```bash
+agent-device metro prepare --public-base-url http://localhost:8081 --port 8081 --kind react-native
+```
+
+If `metro prepare` fails, **STOP** and surface the error verbatim.
+
+### 5. Pick a target device
+
+```bash
+agent-device devices --platform <p> --json
+```
+
+- Prefer the first device with `booted=true`.
+- If none are booted, choose the default target device (usually the first listed), then continue to step 6 to detect and clear any stale session bound to that device before opening.
+- If multiple are booted, ask the user which.
+
+Capture the device name and (for iOS) the simulator UDID, or (for Android) the serial.
+
+### 6. Session reuse vs reset
+
+```bash
+agent-device session list --json
+```
+
+For each entry whose `device_udid` (iOS) or `serial` (Android) matches the chosen device:
+
+- If the session was created earlier in the **current** Claude invocation, reuse it silently.
+- Otherwise prompt: `reuse` (continue with the existing session) or `reset` (force-close it).
+  - To reset: `agent-device close --shutdown --session <name>`. `--shutdown` also frees the simulator.
+
+### 7. Open
+
+```bash
+agent-device open <bundle-id> --platform <p> --device "<name>"
+```
+
+If `open` errors with "app not installed", revisit step 3.
+
+### 8. Sanity
+
+```bash
+agent-device snapshot -i
+```
+
+Confirm the app rendered. From here, follow the [Agent decision loop](flows/README.md) for repeatable flows or drive interactively.
+
+### Canonical skill references
+
+Read these files directly for device automation guidance (bootstrap, exploration, verification, debugging): !`echo "$(npm root -g)/agent-device/skills/agent-device"`
 
 ## Flows
 
-Repeatable steps (sign-in, onboarding, etc.) are captured as composable `.ad` snippets under [`flows/`](flows/README.md). Before manually tapping through a screen, follow the **Agent decision loop** in [`flows/README.md`](flows/README.md) - it covers discovery, `@pre` filtering, replay, and `@post` verification.
+Repeatable steps (sign-in, onboarding, etc.) are captured as composable `.ad` snippets under [`flows/`](flows/README.md). For interactive usage, propose and run only `flows/macros/` helpers. `flows/tests/` belongs to a separate QA workflow and must not be proposed by this skill; QA/perf runs execute them via `agent-device test <path>`.
@@ -1,18 +1,34 @@
 # Flows
 
-Composable `.ad` snippets - bounded units of work. A flow may span one or multiple screens as long as it represents a coherent, reusable action with clear start (`@pre`) and completion (`@post`) checkpoints. Each flow advertises machine-matchable metadata (`@pre`, `@post`, `@tag`) via `# @`-prefixed comment headers, so an agent can pick the right one from a snapshot.
+## Directory layout
 
-## Agent decision loop
+- `macros/` - reusable helpers for common setup/navigation actions that stop in a navigable state for further interactive work.
+- `tests/` - critical-scenario scripts for QA/perf verification that assert explicit outcomes (for example Sentry spans) and then stop.
+
+Composable `.ad` snippets - bounded units of work. A flow may span one or multiple screens as long as it represents a coherent, reusable action with clear start (`@pre`) and completion (`@post`) checkpoints. Each flow advertises machine-matchable metadata (`@pre`, `@post`, `@tag`, `@param`) via `# @`-prefixed comment headers, while flow type is derived from location (`flows/macros/` or `flows/tests/`).
+
+## Agent decision loop (interactive)
 
 Before manually navigating, use this human-in-the-loop loop:
 
 1. `agent-device snapshot -i` - see current state.
-2. `grep -H '^# @' .claude/skills/agent-device/flows/*.ad` - full catalog in one read.
+2. `grep -H '^# @' .claude/skills/agent-device/flows/macros/*.ad` - interactive catalog.
 3. For each candidate flow, run `agent-device is exists "<selector>"` per `@pre`. Keep flows where every `@pre` passes.
-4. Rank survivors by goal closeness (`@post` overlap with the requested destination when present) and present top candidates to the user with a short "why this flow" note.
+4. Rank survivors by goal closeness and present top macro candidates to the user with a short "why this flow" note:
+   - Prefer flows whose `@post` selectors literally match destination language from the user request (same `text`, `label`, or selector phrase).
 5. Wait for user selection before replaying. **Auto-run is allowed only when there is exactly one survivor and it is an unambiguous match for an explicit user request.**
-6. `agent-device replay <path>`.
-7. If the flow declares `@post`, verify each `@post` with `is exists`. On success, re-enter the loop only if the user's stated goal is not complete; otherwise stop and report completion. On failure, propose peer flow/manual fallback options and ask before continuing. If no `@post` is declared (utility flow), rely on explicit user confirmation or the next snapshot before continuing.
+   - Only propose flows from `flows/macros/` in interactive usage.
+6. Scan selected flow `# @param` headers. Ask the user for any missing parameter values, then build explicit CLI args (`-e KEY=VALUE`) for replay.
+7. `agent-device replay <path> -e KEY=VALUE ...`.
+8. If the flow declares `@post`, verify each `@post` with `is exists`. On success, re-enter the loop only if the user's stated goal is not complete; otherwise stop and report completion. On failure, propose peer flow/manual fallback options and ask before continuing. If no `@post` is declared (utility flow), rely on explicit user confirmation or the next snapshot before continuing.
+
+## QA workflow (separate)
+
+`flows/tests/` is reserved for dedicated QA automation and should not be offered to users as part of the interactive helper loop above. Run these flows with the dedicated test runner:
+
+```bash
+agent-device test .claude/skills/agent-device/flows/tests/<name>.ad -e KEY=VALUE ...
+```
 
 ## Metadata header spec
 
@@ -24,21 +40,22 @@ Each flow starts with `# @key value` comment lines. The `.ad` parser treats `#`
 | `@pre`   | 1..N        | Selector that must resolve in the current snapshot. Multiple lines are ANDed.                    |
 | `@post`  | 0..N        | Selector expected after replay. Multiple lines are ANDed. Used for chaining + success.           |
 | `@tag`   | 0..N        | Free-form category (`auth`, `onboarding`, ...) or scoped (`sentry-<spanName>`).                  |
+| `@param` | 0..N        | Runtime input contract: `@param KEY description.` Use with `${KEY}` in flow body.                |
 
 Selector syntax matches the body: `id="..."`, `role="..." label="..."`, `text="..."`, `||` for fallbacks.
 
 ## Parametrization (`agent-device` v0.13.0+)
 
-Lift body literals to named variables via `env` + `${VAR}` interpolation so values can be overridden at runtime without editing the file.
+Declare runtime inputs via metadata (`@param`) and reference them in the body with `${VAR}` interpolation. Values are supplied by caller arguments (`-e`) or shell imports (`AD_VAR_*`) - never by in-file `env` directives.
 
-| Construct          | Where                | Purpose                                                                          |
-| ------------------ | -------------------- | -------------------------------------------------------------------------------- |
-| `env KEY=VALUE`    | Header (after `# @`) | File-level default. Quote values with spaces or `||` chains: `env KEY="a || b"`. |
-| `${KEY}`           | Body                 | Interpolation point. Resolves at replay time.                                    |
-| `${KEY:-fallback}` | Body                 | Use `fallback` if `KEY` is unset.                                                |
-| `\${KEY}`          | Body                 | Literal `${KEY}` (escape).                                                       |
+| Construct          | Where                    | Purpose                                                                          |
+| ------------------ | ------------------------ | -------------------------------------------------------------------------------- |
+| `# @param KEY ...` | Metadata header comments | Declares expected input and documents meaning for the agent/user handoff.         |
+| `${KEY}`           | Body                     | Interpolation point. Resolves at replay time.                                    |
+| `${KEY:-fallback}` | Body                     | Use `fallback` if `KEY` is unset.                                                |
+| `\${KEY}`          | Body                     | Literal `${KEY}` (escape).                                                       |
 
-Resolution precedence (high to low): CLI `-e KEY=VALUE` (repeatable) > shell `AD_KEY=...` (auto-imported, prefix stripped) > file `env` > built-ins (`AD_PLATFORM`, `AD_SESSION`, `AD_FILENAME`, `AD_DEVICE`, `AD_ARTIFACTS`). Unresolved `${X}` errors with `file:line`.
+Resolution precedence (high to low): CLI `-e KEY=VALUE` (repeatable) > shell `AD_VAR_KEY=...` (auto-imported as `KEY`) > built-ins (`AD_PLATFORM`, `AD_SESSION`, `AD_FILENAME`, `AD_DEVICE`, `AD_ARTIFACTS`). Unresolved `${X}` errors with `file:line`.
 
 Override at runtime without editing the file:
 
@@ -52,30 +69,33 @@ agent-device replay <flow>.ad -e EMAIL=other@example.com
 - **No fixed `wait` calls.** `fill`/`press` resolve selectors with retry. Only add `wait <selector>` for real post-action blocks.
 - **Durable selectors.** Prefer `id=...` first, then `role=... label=...`, with `||` fallbacks. Avoid `@eN` refs.
 - **Every flow declares `@desc` and `@pre`.** Add `@post` for outcome-bearing flows; utility flows (for example `go-back`) may omit it. Add `@tag` when applicable.
+- **Choose directory intentionally.** Put reusable setup/navigation steps in `flows/macros/`; put outcome verification scenarios in `flows/tests/`.
 - **Keep scope coherent, not artificially tiny.** Flows can span multiple screens when that sequence is the reusable intent (for example "create and submit manual expense").
 - **Peers share `@pre` and differ on `@post`.** One flow per narrow outcome is better than a mega-flow with conditional branches.
-- **Use `env` for substituted values.** If a literal is interpolated into the body, declare a matching `env` default and reference it as `${VAR}`.
+- **Use `@param` for substituted values.** If a literal is interpolated into the body, declare `# @param KEY description.` and reference it as `${KEY}`.
+- **Do not use `env` directives in repo flows.** Runtime values must come from `-e KEY=VALUE` (preferred) or `AD_VAR_KEY=...`.
+- **Use inline defaults sparingly.** Optional tuning values can use `${KEY:-fallback}` in the body; required values should have no fallback and must be provided by caller input.
 
 ## Recording a new flow
 
 1. Drive the target screen manually.
 2. Start a session with `--save-script`:
    ```bash
-   agent-device open <app> --save-script .claude/skills/agent-device/flows/<name>.ad
+   agent-device open <app> --save-script .claude/skills/agent-device/flows/<kind>/<name>.ad
    ```
 3. Perform the steps.
 4. `agent-device close` - flushes the `.ad`.
 5. Edit the generated file:
    - Delete the `context` line, leading `open ... --relaunch`, trailing `close`, and eyeballing `wait`s.
-   - Add `@desc`, `@pre`, optional `@post`, and `@tag` headers.
+   - Move file to `flows/macros/` or `flows/tests/`, then add `@desc`, `@pre`, optional `@post`, optional `@tag`, and any needed `@param` headers.
 6. Verify: pre-check from a matching state, replay, post-check.
 
 ## Maintenance
 
 Heal selector drift in place:
 
 ```bash
-agent-device replay -u .claude/skills/agent-device/flows/<name>.ad
+agent-device replay -u .claude/skills/agent-device/flows/<kind>/<name>.ad
 ```
 
-Re-verify `@pre`/`@post` still hold, then commit. Note: `replay -u` is rejected when the script declares `env` directives (rewrite would drop them); strip the `env` block manually before healing, then re-add it.
+Re-verify `@pre`/`@post` still hold, then commit. Note: `replay -u` can rewrite interpolated lines to concrete selectors/values; review diffs and restore `${KEY}` placeholders where needed. Keep runtime inputs in `@param` + `-e`/`AD_VAR_*`; do not reintroduce in-file `env` directives.
@@ -3,8 +3,8 @@
 # @post   text="Home"
 # @post   role="button" label="Search"
 # @tag    onboarding
-env FIRST_NAME=Agent
-env LAST_NAME=Device
+# @param  FIRST_NAME First name to enter on onboarding profile step.
+# @param  LAST_NAME Last name to enter on onboarding profile step.
 
 press "id=\"onboardingPrivateEmailSkipButton\" || role=\"button\" label=\"Skip\" || label=\"Skip\""
 press "role=\"button\" label=\"Something else\" || label=\"Something else\""
 
@@ -0,0 +1,9 @@
+# @desc   Send a chat message from inside an already-open chat. Reusable helper for setting up state in other flows. Does not navigate or open a chat - assumes the composer is visible. For the QA scenario that exercises the ManualSendMessage Sentry span, see flows/tests/send-message.ad.
+# @pre    label="Write something..." editable=true
+# @post   label="Write something..." editable=true
+# @tag    chat
+# @param  MESSAGE Message text to send in the currently open chat.
+
+is exists "label=\"Write something...\" editable=true"
+fill "label=\"Write something...\" editable=true" "${MESSAGE}"
+press "role=\"button\" label=\"Send\" || label=\"Send\""
@@ -1,10 +1,10 @@
 # @desc   Sign in with the shared agent-device test account. Supports both new-account and returning-account outcomes. Caller MUST randomize EMAIL via `-e EMAIL=agent-device-testing+<9digits>@gmail.com` to avoid account flagging.
 # @pre    role="textfield" label="Phone or email"
 # @pre    role="button" label="Continue"
-# @post   text="Welcome || Home"
-# @post   role="button" label="Join || Search"
+# @post   text="Welcome" || text="Home"
+# @post   role="button" label="Join" || role="button" label="Search"
 # @tag    auth
-env EMAIL=agent-device-testing@gmail.com
+# @param  EMAIL Login email. Use randomized alias format `agent-device-testing+<9digits>@gmail.com` to avoid account flagging.
 
 fill "id=\"username\" || role=\"textfield\" label=\"Phone or email\" editable=true || label=\"Phone or email\" editable=true" "${EMAIL}"
 press "role=\"button\" label=\"Continue\" || label=\"Continue\""
@@ -12,9 +12,9 @@
 # @tag    sentry-ManualConfirmationListReady
 # @tag    sentry-ManualConfirmationReceiptLoad
 # @tag    sentry-ManualGeolocationWait
-env AMOUNT=22
-env CURRENCY=PLN
-env MERCHANT="Test Coffee"
+# @param  AMOUNT Whole-number amount to type into the Manual amount input (for example `22`).
+# @param  CURRENCY Currency symbol used by the submit CTA selector (for example `PLN` or `USD`).
+# @param  MERCHANT Merchant name to populate before submit.
 
 press "role=\"button\" label=\"Open actions menu\""
 press "role=\"button\" label=\"Create expense\" || label=\"Create expense\""
 
@@ -1,12 +1,12 @@
 # @desc   From the Inbox tab, open a chat/report matching TARGET_CHAT in the LHN, with fallback to the first available chat row. Triggers the ManualOpenReport Sentry span (LHN row press -> report screen mounted with composer visible). Assumes the user is signed in with at least one chat in the Inbox.
 # @pre    text="Inbox"
-# @pre    text="Navigates to a chat"
+# @pre    role="button" label="Search"
 # @post   role="textview" label="Write something..."
 # @tag    chat
 # @tag    navigation
 # @tag    perf
 # @tag    sentry-ManualOpenReport
-env TARGET_CHAT="Navigates to a chat"
+# @param  TARGET_CHAT Visible report/chat title to open from Inbox LHN.
 
 find "Inbox" "click"
 find "${TARGET_CHAT}" "click"