diff --git a/.eas/workflows/agent-qa-mobile.yml b/.eas/workflows/agent-qa-mobile.yml index c1955a8..326abf2 100644 --- a/.eas/workflows/agent-qa-mobile.yml +++ b/.eas/workflows/agent-qa-mobile.yml @@ -106,7 +106,10 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@0.10.1 + npm install -g agent-device@latest cali@0.4.0-2 + - id: install_agent_device_skill + run: | + npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y - uses: eas/download_build id: download_build with: @@ -116,10 +119,6 @@ jobs: run: | bash ./scripts/agent-qa/provision-android-emulator.sh - id: run_agent_qa - env: - AGENT_DEVICE_SESSION: qa-android - AGENT_DEVICE_PLATFORM: android - AGENT_DEVICE_SESSION_LOCK: strip run: | bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" @@ -150,20 +149,16 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@0.10.1 + npm install -g agent-device@latest cali@0.4.0-2 + - id: install_agent_device_skill + run: | + npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y - uses: eas/download_build id: download_build with: build_id: ${{ env.BUILD_ID }} extensions: [app] - - id: provision_ios_simulator - run: | - bash ./scripts/agent-qa/provision-ios-simulator.sh - id: run_agent_qa - env: - AGENT_DEVICE_SESSION: qa-ios - AGENT_DEVICE_PLATFORM: ios - AGENT_DEVICE_SESSION_LOCK: strip run: | bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" diff --git a/README.md b/README.md index 2604ae3..f8f4137 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # EAS agent-device demo -This repo is a minimal Expo + CNG example for running AI-assisted Android and iOS QA on EAS Workflows. +This repo is a minimal Expo + CNG example for running AI-assisted Android and iOS QA on EAS Workflows with [`cali`](https://github.com/callstackincubator/cali). ## What it does - Reuses compatible Android and iOS simulator builds with `fingerprint` + `get-build` + `repack` - Falls back to a fresh `build` when the fingerprint changes -- Runs a small Node.js QA agent built with the AI SDK `ToolLoopAgent` +- Uses `cali qa` as the mobile QA agent runtime - Uses `agent-device` to drive the Android app and iOS simulator, take screenshots, and summarize findings - Posts one combined mobile QA summary back to the GitHub pull request with `github-comment` - Optionally uploads screenshots to Vercel Blob so the PR comment can link them @@ -17,7 +17,8 @@ This repo is a minimal Expo + CNG example for running AI-assisted Android and iO - [eas.json](./eas.json) - [.eas/workflows/agent-qa-mobile.yml](./.eas/workflows/agent-qa-mobile.yml) -- [scripts/agent-qa/index.ts](./scripts/agent-qa/index.ts) +- [cali.config.json](./cali.config.json) +- [scripts/agent-qa/run-and-export.sh](./scripts/agent-qa/run-and-export.sh) ## Required setup @@ -34,38 +35,39 @@ Optional environment variables for the QA job: - `QA_MODEL`: Override the default model (`openai/gpt-5.4-mini`) - `BLOB_READ_WRITE_TOKEN`: Upload screenshots to Vercel Blob and include public links in the PR comment +The repo-level [cali.config.json](./cali.config.json) points Cali at the bundled [`agent-device`](https://www.npmjs.com/package/agent-device) skills under `./node_modules/agent-device/skills`, so CI does not need a separate `~/.agents/skills` setup. + ## Local smoke test ```bash npm install -npx tsc --noEmit +npx cali qa --help ``` -The workflow runner writes `section.md`, `status.txt`, and `report.json` to `artifacts/qa/` during execution. Temporary screenshots are written outside the workspace and uploaded to Vercel Blob when configured. +The workflow runner writes `section.md`, `status.txt`, `report.json`, and `cali-context.json` to `artifacts/qa/` during execution. Screenshots are written to `artifacts/qa/screenshots` and uploaded to Vercel Blob when configured. -To execute the runner directly with Node 24, provide the same environment variables the workflow sets: +To execute the QA command directly, provide the same inputs that the workflow uses: Android: ```bash AI_GATEWAY_API_KEY=... \ -QA_PLATFORM=android \ -APP_PATH=/absolute/path/to/app.apk \ -APPLICATION_ID=dev.expo.easagentdevice \ -BUILD_ID=test-build \ -PR_JSON='{"number":1,"title":"Test PR","body":"Smoke test"}' \ -node ./scripts/agent-qa/index.ts +./node_modules/.bin/cali qa \ + --env local-android \ + --artifact /absolute/path/to/app.apk \ + --app-id dev.expo.easagentdevice \ + --device ci-android \ + --prompt "verify the updated welcome title" ``` iOS simulator: ```bash AI_GATEWAY_API_KEY=... \ -QA_PLATFORM=ios \ -APP_PATH=/absolute/path/to/MyApp.app \ -APPLICATION_ID=dev.expo.easagentdevice \ -AGENT_DEVICE_IOS_DEVICE="iPhone 17" \ -BUILD_ID=test-build \ -PR_JSON='{"number":1,"title":"Test PR","body":"Smoke test"}' \ -node ./scripts/agent-qa/index.ts +./node_modules/.bin/cali qa \ + --env local-ios \ + --artifact /absolute/path/to/MyApp.app \ + --app-id dev.expo.easagentdevice \ + --device "iPhone 17" \ + --prompt "verify the updated welcome title" ``` diff --git a/cali.config.json b/cali.config.json new file mode 100644 index 0000000..dbea13b --- /dev/null +++ b/cali.config.json @@ -0,0 +1,13 @@ +{ + "skillPaths": [ + "./node_modules/agent-device/skills" + ], + "commands": { + "qa": { + "extraInstructions": [ + "When you need to verify whether text is visible on screen, prefer `snapshot` over `snapshot -i`. Use `snapshot -i` mainly for interactive exploration and choosing refs.", + "When you save screenshots, use short descriptive file names and include matching screenshotLabels so downstream PR comments can label them clearly." + ] + } + } +} diff --git a/package-lock.json b/package-lock.json index 7316351..705039e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,7 +12,6 @@ "@react-navigation/bottom-tabs": "^7.4.0", "@react-navigation/elements": "^2.6.3", "@react-navigation/native": "^7.1.8", - "@vercel/blob": "^2.3.1", "expo": "^55.0.8", "expo-constants": "~55.0.9", "expo-font": "~55.0.4", @@ -37,22 +36,38 @@ }, "devDependencies": { "@types/react": "~19.2.10", - "agent-device": "^0.10.1", - "ai": "^6.0.116", + "cali": "0.4.0-0", "eslint": "^9.25.0", "eslint-config-expo": "~55.0.0", "typescript": "~5.9.2" } }, + "node_modules/@ai-sdk/anthropic": { + "version": "3.0.68", + "resolved": "https://registry.npmjs.org/@ai-sdk/anthropic/-/anthropic-3.0.68.tgz", + "integrity": "sha512-BAd+fmgYoJMmGw0/uV+jRlXX60PyGxelA6Clp4cK/NI0dsyv9jOOwzQmKNaz2nwb+Jz7HqI7I70KK4XtU5EcXQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "3.0.8", + "@ai-sdk/provider-utils": "4.0.23" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.25.76 || ^4.1.8" + } + }, "node_modules/@ai-sdk/gateway": { - "version": "3.0.66", - "resolved": "https://registry.npmjs.org/@ai-sdk/gateway/-/gateway-3.0.66.tgz", - "integrity": "sha512-SIQ0YY0iMuv+07HLsZ+bB990zUJ6S4ujORAh+Jv1V2KGNn73qQKnGO0JBk+w+Res8YqOFSycwDoWcFlQrVxS4A==", + "version": "3.0.93", + "resolved": "https://registry.npmjs.org/@ai-sdk/gateway/-/gateway-3.0.93.tgz", + "integrity": "sha512-8D6C9eEvDq6IgrdlWzpbniahDkoLiieTCrpzH8p/Hw63/0iPnZJ1uZcqxHrDIVDW/+aaGhBXqmx5C7HSd2eMmQ==", "dev": true, "license": "Apache-2.0", "dependencies": { "@ai-sdk/provider": "3.0.8", - "@ai-sdk/provider-utils": "4.0.19", + "@ai-sdk/provider-utils": "4.0.23", "@vercel/oidc": "3.1.0" }, "engines": { @@ -76,9 +91,9 @@ } }, "node_modules/@ai-sdk/provider-utils": { - "version": "4.0.19", - "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-4.0.19.tgz", - "integrity": "sha512-3eG55CrSWCu2SXlqq2QCsFjo3+E7+Gmg7i/oRVoSZzIodTuDSfLb3MRje67xE9RFea73Zao7Lm4mADIfUETKGg==", + "version": "4.0.23", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-4.0.23.tgz", + "integrity": "sha512-z8GlDaCmRSDlqkMF2f4/RFgWxdarvIbyuk+m6WXT1LYgsnGiXRJGTD2Z1+SDl3LqtFuRtGX1aghYvQLoHL/9pg==", "dev": true, "license": "Apache-2.0", "dependencies": { @@ -2219,6 +2234,16 @@ "excpretty": "build/cli.js" } }, + "node_modules/@fastify/busboy": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@fastify/busboy/-/busboy-2.1.1.tgz", + "integrity": "sha512-vBZP4NlzfOlerQTnba4aqZoMhE/a9HY7HRqoOPaETQcSQuWEIyZMHGfVu6w9wGtGK5fED5qRs2DteVCjOH60sA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14" + } + }, "node_modules/@humanfs/core": { "version": "0.19.1", "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", @@ -3278,6 +3303,13 @@ "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", "license": "MIT" }, + "node_modules/@types/tinycolor2": { + "version": "1.4.6", + "resolved": "https://registry.npmjs.org/@types/tinycolor2/-/tinycolor2-1.4.6.tgz", + "integrity": "sha512-iEN8J0BoMnsWBqjVbWH/c0G0Hh7O21lpR2/+PrvAVgWdzL7eexIFm4JN/Wn10PTcmNdtS6U67r499mlWMXOxNw==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/yargs": { "version": "17.0.35", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.35.tgz", @@ -3864,22 +3896,6 @@ "win32" ] }, - "node_modules/@vercel/blob": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/@vercel/blob/-/blob-2.3.1.tgz", - "integrity": "sha512-6f9oWC+DbWxIgBLOdqjjn2/REpFrPDB7y5B5HA1ptYkzZaBgL6E34kWrptJvJ7teApJdbAs3I1a5A7z1y8SDHw==", - "license": "Apache-2.0", - "dependencies": { - "async-retry": "^1.3.3", - "is-buffer": "^2.0.5", - "is-node-process": "^1.2.0", - "throttleit": "^2.1.0", - "undici": "^6.23.0" - }, - "engines": { - "node": ">=20.0.0" - } - }, "node_modules/@vercel/oidc": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/@vercel/oidc/-/oidc-3.1.0.tgz", @@ -3956,42 +3972,16 @@ "node": ">= 14" } }, - "node_modules/agent-device": { - "version": "0.10.1", - "resolved": "https://registry.npmjs.org/agent-device/-/agent-device-0.10.1.tgz", - "integrity": "sha512-3k7yoXE4yVtTL0qokeurOtnO3W9bgMcQl88QUtp9dkSqiOHIczm8e0ynfHn1fMtKiV6N+vjdOcu06Z/ZPpBvFw==", - "dev": true, - "license": "MIT", - "dependencies": { - "pngjs": "^7.0.0" - }, - "bin": { - "agent-device": "bin/agent-device.mjs" - }, - "engines": { - "node": ">=22" - } - }, - "node_modules/agent-device/node_modules/pngjs": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/pngjs/-/pngjs-7.0.0.tgz", - "integrity": "sha512-LKWqWJRhstyYo9pGvgor/ivk2w94eSjE3RGVuzLGlr3NmD8bf7RcYGze1mNdEHRP6TRP6rMuDHk5t44hnTRyow==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14.19.0" - } - }, "node_modules/ai": { - "version": "6.0.116", - "resolved": "https://registry.npmjs.org/ai/-/ai-6.0.116.tgz", - "integrity": "sha512-7yM+cTmyRLeNIXwt4Vj+mrrJgVQ9RMIW5WO0ydoLoYkewIvsMcvUmqS4j2RJTUXaF1HphwmSKUMQ/HypNRGOmA==", + "version": "6.0.153", + "resolved": "https://registry.npmjs.org/ai/-/ai-6.0.153.tgz", + "integrity": "sha512-UlgBe4k0Ja1m1Eufn6FVSsHoF0sc7qwxX35ywJPDogIvBz0pHc+NOmCqiRY904DczNYIuwpZfKBLVz8HXgu3mg==", "dev": true, "license": "Apache-2.0", "dependencies": { - "@ai-sdk/gateway": "3.0.66", + "@ai-sdk/gateway": "3.0.93", "@ai-sdk/provider": "3.0.8", - "@ai-sdk/provider-utils": "4.0.19", + "@ai-sdk/provider-utils": "4.0.23", "@opentelemetry/api": "1.9.0" }, "engines": { @@ -4292,6 +4282,7 @@ "version": "1.3.3", "resolved": "https://registry.npmjs.org/async-retry/-/async-retry-1.3.3.tgz", "integrity": "sha512-wfr/jstw9xNi/0teMHrRW7dsz3Lt5ARhYNZ2ewpadnhaIp5mbALhOAP+EAdsC7t4Z6wqsDVv9+W6gm1Dk9mEyw==", + "dev": true, "license": "MIT", "dependencies": { "retry": "0.13.1" @@ -4728,6 +4719,69 @@ "node": ">= 0.8" } }, + "node_modules/cac": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/cac/-/cac-7.0.0.tgz", + "integrity": "sha512-tixWYgm5ZoOD+3g6UTea91eow5z6AAHaho3g0V9CNSNb45gM8SmflpAc+GRd1InC4AqN/07Unrgp56Y94N9hJQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=20.19.0" + } + }, + "node_modules/cali": { + "version": "0.4.0-0", + "resolved": "https://registry.npmjs.org/cali/-/cali-0.4.0-0.tgz", + "integrity": "sha512-xr6oQOb3k6dgXqkJZ9v3EXk8Ko4jZq9OqmOvAbNt36r1uA6qBitgjhmzfiESXow3mmqP4BE2WPhCPgx+5aCiPA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@ai-sdk/anthropic": "^3.0.64", + "@vercel/blob": "^0.27.0", + "ai": "^6.0.138", + "cac": "^7.0.0", + "cosmiconfig": "^9.0.1", + "dotenv": "^16.4.5", + "gradient-string": "^3.0.0", + "zod": "^4.3.6" + }, + "bin": { + "cali": "dist/index.js" + }, + "engines": { + "node": ">=22" + } + }, + "node_modules/cali/node_modules/@vercel/blob": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@vercel/blob/-/blob-0.27.3.tgz", + "integrity": "sha512-WizeAxzOTmv0JL7wOaxvLIU/KdBcrclM1ZUOdSlIZAxsTTTe1jsyBthStLby0Ueh7FnmKYAjLz26qRJTk5SDkQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "async-retry": "^1.3.3", + "is-buffer": "^2.0.5", + "is-node-process": "^1.2.0", + "throttleit": "^2.1.0", + "undici": "^5.28.4" + }, + "engines": { + "node": ">=16.14" + } + }, + "node_modules/cali/node_modules/undici": { + "version": "5.29.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-5.29.0.tgz", + "integrity": "sha512-raqeBD6NQK4SkWhQzeYKd1KmIG6dllBOTt55Rmkt4HtI9mwdWtJljnrXjAFUBLTSN67HWrOIZ3EPF4kjUw80Bg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@fastify/busboy": "^2.0.0" + }, + "engines": { + "node": ">=14.0" + } + }, "node_modules/call-bind": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.8.tgz", @@ -5086,6 +5140,33 @@ "url": "https://opencollective.com/core-js" } }, + "node_modules/cosmiconfig": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.1.tgz", + "integrity": "sha512-hr4ihw+DBqcvrsEDioRO31Z17x71pUYoNe/4h6Z0wB72p7MU7/9gH8Q3s12NFhHPfYBBOV3qyfUxmr/Yn3shnQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "env-paths": "^2.2.1", + "import-fresh": "^3.3.0", + "js-yaml": "^4.1.0", + "parse-json": "^5.2.0" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/d-fischer" + }, + "peerDependencies": { + "typescript": ">=4.9.5" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, "node_modules/cross-fetch": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.2.0.tgz", @@ -5331,6 +5412,19 @@ "node": ">=0.10.0" } }, + "node_modules/dotenv": { + "version": "16.6.1", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.6.1.tgz", + "integrity": "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", @@ -5373,6 +5467,33 @@ "node": ">= 0.8" } }, + "node_modules/env-paths": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-2.2.1.tgz", + "integrity": "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/error-ex": { + "version": "1.3.4", + "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.4.tgz", + "integrity": "sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-arrayish": "^0.2.1" + } + }, + "node_modules/error-ex/node_modules/is-arrayish": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", + "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", + "dev": true, + "license": "MIT" + }, "node_modules/error-stack-parser": { "version": "2.1.4", "resolved": "https://registry.npmjs.org/error-stack-parser/-/error-stack-parser-2.1.4.tgz", @@ -5790,6 +5911,7 @@ "integrity": "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.9", @@ -7282,6 +7404,33 @@ "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", "license": "ISC" }, + "node_modules/gradient-string": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/gradient-string/-/gradient-string-3.0.0.tgz", + "integrity": "sha512-frdKI4Qi8Ihp4C6wZNB565de/THpIaw3DjP5ku87M+N9rNSGmPTjfkq61SdRXB7eCaL8O1hkKDvf6CDMtOzIAg==", + "dev": true, + "license": "MIT", + "dependencies": { + "chalk": "^5.3.0", + "tinygradient": "^1.1.5" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/gradient-string/node_modules/chalk": { + "version": "5.6.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.2.tgz", + "integrity": "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^12.17.0 || ^14.13 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, "node_modules/has-bigints": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.1.0.tgz", @@ -7667,6 +7816,7 @@ "version": "2.0.5", "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-2.0.5.tgz", "integrity": "sha512-i2R6zNFDwgEHJyQUtJEk0XFi1i0dPFn/oqjK3/vPCcDeJvW5NQ83V8QbicfF1SupOaB0h8ntgBC2YiE7dfyctQ==", + "dev": true, "funding": [ { "type": "github", @@ -7885,6 +8035,7 @@ "version": "1.2.0", "resolved": "https://registry.npmjs.org/is-node-process/-/is-node-process-1.2.0.tgz", "integrity": "sha512-Vg4o6/fqPxIjtxgUH5QLJhwZ7gW5diGCVlXpuUfELC62CuxM1iHcRe51f2W1FDy04Ai4KJkagKjx3XaqyfRKXw==", + "dev": true, "license": "MIT" }, "node_modules/is-number": { @@ -8348,6 +8499,13 @@ "dev": true, "license": "MIT" }, + "node_modules/json-parse-even-better-errors": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", + "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", + "dev": true, + "license": "MIT" + }, "node_modules/json-schema": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz", @@ -8722,6 +8880,13 @@ "url": "https://opencollective.com/parcel" } }, + "node_modules/lines-and-columns": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", + "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", + "dev": true, + "license": "MIT" + }, "node_modules/locate-path": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", @@ -9820,6 +9985,25 @@ "node": ">=6" } }, + "node_modules/parse-json": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", + "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.0.0", + "error-ex": "^1.3.1", + "json-parse-even-better-errors": "^2.3.0", + "lines-and-columns": "^1.1.6" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/parse-png": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/parse-png/-/parse-png-2.1.0.tgz", @@ -10820,6 +11004,7 @@ "version": "0.13.1", "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz", "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==", + "dev": true, "license": "MIT", "engines": { "node": ">= 4" @@ -11724,6 +11909,7 @@ "version": "2.1.0", "resolved": "https://registry.npmjs.org/throttleit/-/throttleit-2.1.0.tgz", "integrity": "sha512-nt6AMGKW1p/70DF/hGBdJB57B8Tspmbp5gfJ8ilhLnt7kkr2ye7hzD6NVG8GGErk2HWF34igrL2CXmNIkzKqKw==", + "dev": true, "license": "MIT", "engines": { "node": ">=18" @@ -11732,6 +11918,13 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/tinycolor2": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/tinycolor2/-/tinycolor2-1.6.0.tgz", + "integrity": "sha512-XPaBkWQJdsf3pLKJV9p4qN/S+fm2Oj8AIPo1BTUhg5oxkvm9+SVEGFdhyOz7tTdUTfvxMiAs4sp6/eZO2Ew+pw==", + "dev": true, + "license": "MIT" + }, "node_modules/tinyglobby": { "version": "0.2.15", "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", @@ -11781,6 +11974,17 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/tinygradient": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/tinygradient/-/tinygradient-1.1.5.tgz", + "integrity": "sha512-8nIfc2vgQ4TeLnk2lFj4tRLvvJwEfQuabdsmvDdQPT0xlk9TaNtpGd6nNRxXoK6vQhN6RSzj+Cnp5tTQmpxmbw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/tinycolor2": "^1.4.0", + "tinycolor2": "^1.0.0" + } + }, "node_modules/tmpl": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", @@ -12034,15 +12238,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/undici": { - "version": "6.24.1", - "resolved": "https://registry.npmjs.org/undici/-/undici-6.24.1.tgz", - "integrity": "sha512-sC+b0tB1whOCzbtlx20fx3WgCXwkW627p4EA9uM+/tNNPkSS+eSEld6pAs9nDv7WbY1UUljBMYPtu9BCOrCWKA==", - "license": "MIT", - "engines": { - "node": ">=18.17" - } - }, "node_modules/undici-types": { "version": "7.18.2", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", diff --git a/package.json b/package.json index 1c647f8..064e622 100644 --- a/package.json +++ b/package.json @@ -5,18 +5,17 @@ "scripts": { "start": "expo start", "reset-project": "node ./scripts/reset-project.js", - "android": "expo start --android", - "ios": "expo start --ios", + "android": "expo run:android", + "ios": "expo run:ios", "web": "expo start --web", "lint": "expo lint", - "agent-qa": "node ./scripts/agent-qa/index.ts" + "agent-qa": "cali qa --env mobile-pr --quiet" }, "dependencies": { "@expo/vector-icons": "^15.0.3", "@react-navigation/bottom-tabs": "^7.4.0", "@react-navigation/elements": "^2.6.3", "@react-navigation/native": "^7.1.8", - "@vercel/blob": "^2.3.1", "expo": "^55.0.8", "expo-constants": "~55.0.9", "expo-font": "~55.0.4", @@ -41,8 +40,7 @@ }, "devDependencies": { "@types/react": "~19.2.10", - "agent-device": "^0.10.1", - "ai": "^6.0.116", + "cali": "0.4.0-0", "eslint": "^9.25.0", "eslint-config-expo": "~55.0.0", "typescript": "~5.9.2" diff --git a/scripts/agent-qa/index.ts b/scripts/agent-qa/index.ts deleted file mode 100644 index 138e494..0000000 --- a/scripts/agent-qa/index.ts +++ /dev/null @@ -1,946 +0,0 @@ -import { execFile as execFileCallback } from 'node:child_process'; -import { existsSync } from 'node:fs'; -import { mkdir, readFile, readdir, stat, writeFile } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; -import path from 'node:path'; -import process from 'node:process'; -import { promisify } from 'node:util'; - -import { put } from '@vercel/blob'; -import { ToolLoopAgent, gateway, jsonSchema } from 'ai'; - -type SkillMetadata = { - name: string; - description: string; - directoryPath: string; - skillFilePath: string; -}; - -type QaPlatform = 'android' | 'ios'; - -type ScreenshotInfo = { - fileName: string; - absolutePath: string; - bytes: number; - label?: string; - blobUrl?: string; - blobDownloadUrl?: string; - blobPathname?: string; - uploadError?: string; -}; - -type ScreenshotLabel = { - fileName: string; - label: string; -}; - -type AgentDeviceTraceEntry = { - command: string; - ok: boolean; - exitCode: number; - stdout: string; - stderr: string; -}; - -type ResultStatus = 'passed' | 'failed' | 'blocked' | 'not_tested' | 'unsure'; - -type ReportInput = { - overallStatus: ResultStatus; - summary: string; - checked?: string[]; - issues?: string[]; - nextSteps?: string[]; - screenshotLabels?: ScreenshotLabel[]; -}; - -type Report = ReportInput & { - generatedAt: string; - model: string; - buildId: string; - workflowUrl: string; - platform: QaPlatform; - platformLabel: string; - prNumber: number; - screenshots: ScreenshotInfo[]; - agentDeviceTrace: AgentDeviceTraceEntry[]; -}; - -type ParsedPr = { - number?: number; - title?: string; - body?: string | null; - draft?: boolean; - labels?: Array<{ name?: string }>; -}; - -type CommandResult = { - ok: boolean; - exitCode: number; - stdout: string; - stderr: string; -}; - -type CommandOptions = { - cwd?: string; - allowFailure?: boolean; -}; - -type ExecFileError = Error & { - stdout?: string; - stderr?: string; - code?: number | string; -}; - -const execFile = promisify(execFileCallback); -const ROOT_DIR = process.cwd(); -const ARTIFACTS_DIR = path.join(ROOT_DIR, 'artifacts', 'qa'); -const SCREENSHOTS_DIR = path.join(tmpdir(), 'agent-qa-screenshots'); -const REPORT_PATH = path.join(ARTIFACTS_DIR, 'report.json'); -const SECTION_PATH = path.join(ARTIFACTS_DIR, 'section.md'); -const STATUS_PATH = path.join(ARTIFACTS_DIR, 'status.txt'); -const AGENT_DEVICE_BIN = 'agent-device'; -const QA_PLATFORM = normalizePlatform(process.env.QA_PLATFORM); -const APP_PATH = process.env.APP_PATH; -const BOOTSTRAP_ERROR = process.env.AGENT_QA_BOOTSTRAP_ERROR; -const BLOB_READ_WRITE_TOKEN = process.env.BLOB_READ_WRITE_TOKEN; -const MODEL_ID = process.env.QA_MODEL || 'openai/gpt-5.4-mini'; -const EMPTY_INPUT_SCHEMA = jsonSchema({ - type: 'object', - properties: {}, - additionalProperties: false, -}); -const SKILL_DIRECTORIES = [ - path.join(ROOT_DIR, 'node_modules', 'agent-device', 'skills'), -]; - -const pr = parseJson(process.env.PR_JSON, {}); -const context = { - platform: QA_PLATFORM, - platformLabel: QA_PLATFORM === 'ios' ? 'iOS' : 'Android', - buildId: process.env.BUILD_ID || '', - buildPath: APP_PATH || '', - prNumber: Number(pr.number || 0), - workflowUrl: process.env.WORKFLOW_URL || '', - applicationId: process.env.APPLICATION_ID || '', - deviceName: - process.env.DEVICE_NAME || - (QA_PLATFORM === 'ios' - ? process.env.AGENT_DEVICE_IOS_DEVICE || '' - : process.env.AGENT_DEVICE_ANDROID_DEVICE || ''), -}; -const agentDeviceTrace: AgentDeviceTraceEntry[] = []; - -function normalizePlatform(value: string | undefined): QaPlatform { - return value === 'ios' ? 'ios' : 'android'; -} - -function parseJson(value: string | undefined, fallback: T): T { - if (!value) { - return fallback; - } - - try { - return JSON.parse(value) as T; - } catch { - return fallback; - } -} - -function trim(value: string, max = 6000): string { - if (value.length <= max) { - return value; - } - - return `${value.slice(0, max)}\n...`; -} - -function humanizeScreenshotLabel(fileName: string): string { - const stem = fileName.replace(/\.[^.]+$/, ''); - const words = stem - .split(/[-_]+/g) - .filter(Boolean) - .map((word) => word.charAt(0).toUpperCase() + word.slice(1)); - return words.join(' ') || fileName; -} - -function stripFrontmatter(content: string): string { - const match = content.match(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/); - return match ? content.slice(match[0].length).trim() : content.trim(); -} - -function parseFrontmatter(content: string): { - name: string; - description: string; -} { - const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/); - if (!match?.[1]) { - throw new Error('No frontmatter found'); - } - - const frontmatter = match[1]; - const nameMatch = frontmatter.match(/^name:\s*(.+)$/m); - const descriptionMatch = frontmatter.match(/^description:\s*(.+)$/m); - const name = nameMatch?.[1]?.trim().replace(/^['"]|['"]$/g, ''); - const description = descriptionMatch?.[1] - ?.trim() - .replace(/^['"]|['"]$/g, ''); - - if (!name || !description) { - throw new Error('Skill frontmatter is missing name or description'); - } - - return { name, description }; -} - -async function discoverSkills(directories: string[]): Promise { - const skills: SkillMetadata[] = []; - const seenNames = new Set(); - - for (const directory of directories) { - let entries; - try { - entries = await readdir(directory, { withFileTypes: true }); - } catch { - continue; - } - - for (const entry of entries) { - if (!entry.isDirectory()) { - continue; - } - - const skillDirectoryPath = path.join(directory, entry.name); - const skillFilePath = path.join(skillDirectoryPath, 'SKILL.md'); - - try { - const content = await readFile(skillFilePath, 'utf8'); - const frontmatter = parseFrontmatter(content); - - if (seenNames.has(frontmatter.name.toLowerCase())) { - continue; - } - - seenNames.add(frontmatter.name.toLowerCase()); - skills.push({ - name: frontmatter.name, - description: frontmatter.description, - directoryPath: skillDirectoryPath, - skillFilePath, - }); - } catch { - continue; - } - } - } - - return skills.sort((left, right) => left.name.localeCompare(right.name)); -} - -function buildSkillsPrompt(skills: SkillMetadata[]): string { - if (skills.length === 0) { - return 'No local skills were discovered for this run.'; - } - - const skillList = skills - .map((skill) => `- ${skill.name}: ${skill.description}`) - .join('\n'); - - return [ - 'Available local skills:', - skillList, - '', - 'Load a skill before relying on its instructions. Use read_skill_file only for files inside the loaded skill directory.', - ].join('\n'); -} - -function findSkill(skills: SkillMetadata[], name: string): SkillMetadata { - const skill = skills.find( - (candidate) => candidate.name.toLowerCase() === name.toLowerCase(), - ); - - if (!skill) { - throw new Error(`Skill not found: ${name}`); - } - - return skill; -} - -function resolveSkillFilePath(skill: SkillMetadata, relativeFilePath: string): string { - const absolutePath = path.resolve(skill.directoryPath, relativeFilePath); - const relativePath = path.relative(skill.directoryPath, absolutePath); - const normalizedRelativePath = relativePath.split(path.sep).join('/'); - - if ( - normalizedRelativePath === '' || - normalizedRelativePath.startsWith('../') || - normalizedRelativePath === '..' - ) { - throw new Error( - `Refusing to read a path outside the skill directory: ${relativeFilePath}`, - ); - } - - return absolutePath; -} - -function ensureRequiredAgentQaEnvs(): void { - if (!process.env.AI_GATEWAY_API_KEY) { - throw new Error( - 'Missing required environment variable: AI_GATEWAY_API_KEY', - ); - } - if (!APP_PATH) { - throw new Error('Missing required environment variable: APP_PATH'); - } - if (!context.applicationId) { - throw new Error('Missing required environment variable: APPLICATION_ID'); - } - if (context.platform === 'ios' && !context.deviceName) { - throw new Error( - 'Missing required environment variable: AGENT_DEVICE_IOS_DEVICE', - ); - } -} - -async function runCommand( - file: string, - args: string[], - options: CommandOptions = {}, -): Promise { - const { cwd = ROOT_DIR, allowFailure = false } = options; - - try { - const result = await execFile(file, args, { - cwd, - env: process.env, - maxBuffer: 20 * 1024 * 1024, - }); - - return { - ok: true, - exitCode: 0, - stdout: result.stdout ?? '', - stderr: result.stderr ?? '', - }; - } catch (unknownError) { - const error = unknownError as ExecFileError; - const stdout = typeof error.stdout === 'string' ? error.stdout : ''; - const stderr = - typeof error.stderr === 'string' ? error.stderr : error.message; - const exitCode = typeof error.code === 'number' ? error.code : 1; - - if (!allowFailure) { - throw new Error( - [`Command failed: ${file} ${args.join(' ')}`, stderr || stdout] - .filter(Boolean) - .join('\n\n'), - ); - } - - return { - ok: false, - exitCode, - stdout, - stderr, - }; - } -} - -async function runAgentDeviceCommand(command: string, args: string[] = []): Promise<{ - ok: boolean; - exitCode: number; - stdout: string; - stderr: string; - json: unknown; -}> { - const result = await runCommand(AGENT_DEVICE_BIN, [command, ...args], { - allowFailure: true, - }); - - agentDeviceTrace.push({ - command: [command, ...args].join(' '), - ok: result.ok, - exitCode: result.exitCode, - stdout: trim(result.stdout, 4000), - stderr: trim(result.stderr, 2000), - }); - - return { - ok: result.ok, - exitCode: result.exitCode, - stdout: trim(result.stdout, 8000), - stderr: trim(result.stderr, 4000), - json: parseJson(result.stdout, null as unknown), - }; -} - -async function ensureArtifactsDir(): Promise { - await mkdir(ARTIFACTS_DIR, { recursive: true }); -} - -async function ensureScreenshotsDir(): Promise { - await mkdir(SCREENSHOTS_DIR, { recursive: true }); -} - -async function listScreenshots(): Promise { - if (!existsSync(SCREENSHOTS_DIR)) { - return []; - } - - const entries = await readdir(SCREENSHOTS_DIR); - const screenshots: ScreenshotInfo[] = []; - for (const entry of entries) { - if (!entry.endsWith('.png')) { - continue; - } - - const absolutePath = path.join(SCREENSHOTS_DIR, entry); - const fileStat = await stat(absolutePath); - screenshots.push({ - fileName: entry, - absolutePath, - bytes: fileStat.size, - }); - } - - return screenshots.sort((left, right) => - left.fileName.localeCompare(right.fileName), - ); -} - -async function uploadScreenshotsToBlob( - screenshots: ScreenshotInfo[], -): Promise { - if (!BLOB_READ_WRITE_TOKEN || screenshots.length === 0) { - return screenshots; - } - - return Promise.all( - screenshots.map(async (screenshot) => { - try { - const fileBuffer = await readFile(screenshot.absolutePath); - const pathnameParts = [ - 'agent-qa', - context.platform, - context.prNumber ? `pr-${context.prNumber}` : 'pr-unknown', - context.buildId || 'local-build', - screenshot.fileName, - ]; - const pathname = pathnameParts.join('/'); - const blob = await put(pathname, fileBuffer, { - access: 'public', - addRandomSuffix: true, - contentType: 'image/png', - token: BLOB_READ_WRITE_TOKEN, - }); - - return { - ...screenshot, - blobUrl: blob.url, - blobDownloadUrl: blob.downloadUrl, - blobPathname: blob.pathname, - }; - } catch (unknownError) { - const error = - unknownError instanceof Error - ? unknownError - : new Error(String(unknownError)); - - console.error( - `Failed to upload screenshot ${screenshot.fileName} to Vercel Blob: ${error.message}`, - ); - - return { - ...screenshot, - uploadError: error.message, - }; - } - }), - ); -} - -async function writeBlockedReport(error: Error): Promise { - const summary: ReportInput = { - overallStatus: 'blocked', - summary: error.message, - checked: [ - `Attempted to run ${context.platformLabel} QA agent on PR changes`, - ], - issues: [error.message], - nextSteps: [ - 'Check the workflow logs for command failures.', - `Verify AI_GATEWAY_API_KEY, ${context.platformLabel} build availability, and ${context.platform === 'ios' ? 'simulator' : 'emulator'} configuration.`, - ], - }; - - await persistReport(summary); -} - -async function persistReport(reportInput: ReportInput) { - await ensureArtifactsDir(); - await ensureScreenshotsDir(); - const screenshotLabelMap = new Map( - (reportInput.screenshotLabels || []) - .filter( - (item): item is ScreenshotLabel => - Boolean(item?.fileName) && Boolean(item?.label), - ) - .map((item) => [item.fileName, item.label.trim()]), - ); - const screenshots = (await uploadScreenshotsToBlob(await listScreenshots())).map( - (screenshot) => ({ - ...screenshot, - label: - screenshotLabelMap.get(screenshot.fileName) || - humanizeScreenshotLabel(screenshot.fileName), - }), - ); - const report: Report = { - generatedAt: new Date().toISOString(), - model: MODEL_ID, - buildId: context.buildId, - workflowUrl: context.workflowUrl, - platform: context.platform, - platformLabel: context.platformLabel, - prNumber: context.prNumber, - screenshots, - agentDeviceTrace: agentDeviceTrace.slice(-20), - ...reportInput, - }; - - await writeFile(REPORT_PATH, `${JSON.stringify(report, null, 2)}\n`, 'utf8'); - await writeFile(SECTION_PATH, trim(renderPlatformSection(report), 16000), 'utf8'); - await writeFile(STATUS_PATH, `${report.overallStatus}\n`, 'utf8'); -} - -function renderScreenshotRows( - screenshots: ScreenshotInfo[], - platformLabel: string, -): string[] { - if (screenshots.length === 0) { - return ['- No screenshots were saved.']; - } - - const screenshotRows = screenshots.map((screenshot) => { - if (screenshot.blobUrl) { - return `| ${screenshot.fileName} |`; - } - - const details = [screenshot.fileName, `${screenshot.bytes} bytes`]; - if (screenshot.uploadError) { - details.push(`upload failed: ${screenshot.uploadError}`); - } - - return details.join(', '); - }); - - if (screenshots.some((screenshot) => screenshot.blobUrl)) { - return [ - `| ${platformLabel} |`, - '| --- |', - ...screenshotRows.filter((row) => row.startsWith('|')), - ]; - } - - return screenshotRows - .filter((value) => !value.startsWith('|')) - .map((row) => `- ${row}`); -} - -function getStatusEmoji(status: ResultStatus): string { - switch (status) { - case 'passed': - return '✅'; - case 'failed': - return '❌'; - case 'blocked': - return '⛔'; - case 'unsure': - return '🤔'; - case 'not_tested': - default: - return '⚪'; - } -} - -function renderPlatformSection(report: Report): string { - const lines = [ - `### ${report.platformLabel}`, - '', - `**Status:** ${getStatusEmoji(report.overallStatus)} ${report.overallStatus}`, - '', - report.summary || 'No summary was provided.', - '', - '### Checked', - ]; - - if (report.checked?.length) { - for (const item of report.checked) { - lines.push(`- ${item}`); - } - } else { - lines.push('- No checks were recorded.'); - } - - lines.push('', '### Issues'); - if (report.issues?.length) { - for (const issue of report.issues) { - lines.push(`- ${issue}`); - } - } else { - lines.push('- No issues noted.'); - } - - lines.push('', '### Screenshots'); - lines.push(...renderScreenshotRows(report.screenshots || [], report.platformLabel)); - - lines.push('', '### Next steps'); - if (report.nextSteps?.length) { - for (const step of report.nextSteps) { - lines.push(`- ${step}`); - } - } else { - lines.push('- No follow-up actions were suggested.'); - } - - lines.push('', '### Metadata'); - lines.push(`- Build ID: \`${report.buildId || 'n/a'}\``); - lines.push(`- Workflow: ${report.workflowUrl || 'n/a'}`); - lines.push('', '### JSON Report', ''); - lines.push('```json'); - lines.push(JSON.stringify(report, null, 2)); - lines.push('```'); - - return `${lines.join('\n')}\n`; -} - -function buildPrompt(skills: SkillMetadata[]): string { - const prTitle = pr.title || 'Untitled PR'; - const prBody = pr.body || 'No PR body was provided.'; - const platformSpecificContext = - context.platform === 'ios' - ? [`- Preferred iOS simulator: ${context.deviceName || 'n/a'}`] - : [`- Preferred Android device: ${context.deviceName || 'n/a'}`]; - const platformSpecificFlow = - context.platform === 'ios' - ? `For iOS simulator runs, the workflow already booted the app on ${context.deviceName}. Do not pass --device, --udid, or --session in normal app commands.` - : `For Android runs, the workflow already booted the app on ${context.deviceName || 'the booted emulator'}.`; - - return [ - `Review this pull request and run a lightweight ${context.platformLabel} QA pass.`, - '', - `PR #${context.prNumber}: ${prTitle}`, - '', - prBody, - '', - 'Execution context:', - `- Build ID: ${context.buildId || 'n/a'}`, - `- Build path: ${context.buildPath || 'n/a'}`, - `- Platform: ${context.platformLabel}`, - `- Application id: ${context.applicationId || 'n/a'}`, - ...platformSpecificContext, - `- Workflow URL: ${context.workflowUrl || 'n/a'}`, - `- Temporary screenshot directory: ${SCREENSHOTS_DIR}`, - '', - buildSkillsPrompt(skills), - '', - platformSpecificFlow, - `You must infer concise acceptance criteria from the PR, test only the highest-signal ${context.platformLabel} flows, load the relevant local skill before relying on it, save temporary screenshots into ${SCREENSHOTS_DIR}/*.png, and call write_report exactly once before finishing.`, - 'When you need to verify that text is actually visible on screen, prefer plain snapshot over snapshot -i. Use snapshot -i mainly for exploration and choosing refs.', - 'Use short, descriptive screenshot file names and include matching screenshotLabels with brief route or state labels like Home, Explore, or Welcome screen.', - 'If the accessibility tree or snapshot text is inconclusive but the screenshots likely show the changed UI, use overallStatus "unsure" instead of "blocked" or "failed".', - 'Do not end with plain text. Your final action must be a write_report tool call.', - ].join('\n'); -} - -function hasToolActivity( - steps: Array<{ - toolCalls?: Array<{ toolName?: string }>; - toolResults?: Array<{ toolName?: string }>; - }>, - toolName: string, -): boolean { - return steps.some((step) => { - const calledTool = step.toolCalls?.some((call) => call.toolName === toolName); - const completedTool = step.toolResults?.some( - (result) => result.toolName === toolName, - ); - return Boolean(calledTool || completedTool); - }); -} - -async function main(): Promise { - await ensureArtifactsDir(); - await ensureScreenshotsDir(); - ensureRequiredAgentQaEnvs(); - if (BOOTSTRAP_ERROR) { - await writeBlockedReport(new Error(BOOTSTRAP_ERROR)); - return; - } - const skills = await discoverSkills(SKILL_DIRECTORIES); - - const agent = new ToolLoopAgent({ - model: gateway(MODEL_ID), - instructions: [ - `You are a ${context.platformLabel} QA agent running inside EAS Workflows.`, - 'Treat the app and repository as a black box.', - 'Infer a short list of acceptance criteria from PR metadata, focusing on user-visible behavior.', - 'The workflow has already installed and launched the app before the agent starts.', - 'Use the local skills list in the prompt. Load a relevant skill before making non-trivial command choices.', - context.platform === 'ios' - ? `For iOS simulator runs, the workflow already booted and bound the simulator ${context.deviceName}. Do not pass --device, --udid, --serial, or --session in normal app commands.` - : 'For Android runs, the workflow already booted and bound the emulator.', - 'When verifying whether text is visible on screen, prefer plain snapshot. Use snapshot -i mainly for interactive exploration and choosing refs.', - `Take screenshots for meaningful states and save them temporarily in ${SCREENSHOTS_DIR} with .png filenames.`, - 'After any UI transition, refresh your understanding with snapshot or diff snapshot.', - 'Do not inspect repository source files, run git commands, or modify project code. The only allowed filesystem writes are the QA report files and temporary screenshots.', - 'Do not claim success without evidence from tool results.', - 'The workflow pre-binds the mobile target. Avoid explicit routing flags like --device, --udid, --serial, or --session in normal app commands unless you are inspecting device inventory.', - 'When you save screenshots, use short descriptive file names and include matching screenshotLabels in write_report so the PR comment can label them clearly.', - 'If text-based automation evidence is inconclusive but screenshots likely show the relevant UI, report overallStatus as unsure.', - 'If a prerequisite is missing or the environment is broken, mark the relevant checks as blocked.', - 'When you are done with the simulator or emulator session, prefer close --shutdown.', - 'You must call write_report exactly once before you finish.', - 'Never finish by returning plain text. Finish only by calling write_report.', - ].join(' '), - toolChoice: 'required', - prepareStep: async ({ steps, stepNumber }) => { - const hasWrittenReport = hasToolActivity(steps, 'write_report'); - const hasUsedDeviceTools = hasToolActivity(steps, 'agent_device'); - - if (hasWrittenReport || !hasUsedDeviceTools || stepNumber < 6) { - return undefined; - } - - return { - activeTools: ['write_report'], - toolChoice: { type: 'tool', toolName: 'write_report' }, - }; - }, - tools: { - get_pr_context: { - description: - 'Read the GitHub pull request context and workflow metadata for this QA run.', - inputSchema: EMPTY_INPUT_SCHEMA, - execute: async () => ({ - prNumber: context.prNumber, - title: pr.title || '', - body: pr.body || '', - labels: Array.isArray(pr.labels) - ? pr.labels.map((label) => label.name).filter(Boolean) - : [], - draft: Boolean(pr.draft), - buildId: context.buildId, - buildPath: context.buildPath, - workflowUrl: context.workflowUrl, - platform: context.platform, - platformLabel: context.platformLabel, - applicationId: context.applicationId, - deviceName: context.deviceName, - }), - }, - load_skill: { - description: - 'Load a local skill and return its instructions plus the skill directory path.', - inputSchema: jsonSchema({ - type: 'object', - properties: { - name: { - type: 'string', - description: 'Skill name from the available local skills list.', - }, - }, - required: ['name'], - additionalProperties: false, - }), - execute: async ({ name }: { name: string }) => { - const skill = findSkill(skills, name); - const content = await readFile(skill.skillFilePath, 'utf8'); - return { - name: skill.name, - description: skill.description, - skillDirectory: skill.directoryPath, - skillFilePath: skill.skillFilePath, - content: stripFrontmatter(content), - }; - }, - }, - read_skill_file: { - description: - 'Read a text file inside a loaded skill directory, such as references or scripts.', - inputSchema: jsonSchema({ - type: 'object', - properties: { - skillName: { - type: 'string', - description: 'Skill name from the available local skills list.', - }, - path: { - type: 'string', - description: - 'Path relative to the skill directory, such as references/foo.md.', - }, - startLine: { - type: 'integer', - minimum: 1, - description: '1-based line number to start reading from.', - }, - maxLines: { - type: 'integer', - minimum: 1, - maximum: 400, - description: 'Maximum number of lines to read.', - }, - }, - required: ['skillName', 'path'], - additionalProperties: false, - }), - execute: async ({ - skillName, - path: relativeFilePath, - startLine = 1, - maxLines = 200, - }: { - skillName: string; - path: string; - startLine?: number; - maxLines?: number; - }) => { - const skill = findSkill(skills, skillName); - const absolutePath = resolveSkillFilePath(skill, relativeFilePath); - const content = await readFile(absolutePath, 'utf8'); - const lines = content.split('\n'); - const slice = lines.slice( - Math.max(startLine - 1, 0), - Math.max(startLine - 1, 0) + maxLines, - ); - - return { - skillName: skill.name, - absolutePath, - startLine, - endLine: startLine + slice.length - 1, - content: slice.join('\n'), - }; - }, - }, - agent_device: { - description: - 'Run an agent-device command for mobile UI automation and screenshot capture.', - inputSchema: jsonSchema({ - type: 'object', - properties: { - command: { - type: 'string', - description: - 'The first agent-device subcommand to run, such as devices, reinstall, open, snapshot, press, fill, or screenshot.', - }, - args: { - type: 'array', - items: { type: 'string' }, - description: - `Remaining CLI arguments. Use ${SCREENSHOTS_DIR}/*.png for screenshots.`, - }, - }, - required: ['command'], - additionalProperties: false, - }), - execute: async ({ - command, - args = [], - }: { - command: string; - args?: string[]; - }) => runAgentDeviceCommand(command, args), - }, - write_report: { - description: - 'Persist the final QA summary, findings, and screenshot index to artifacts/qa.', - inputSchema: jsonSchema({ - type: 'object', - properties: { - overallStatus: { - type: 'string', - enum: ['passed', 'failed', 'blocked', 'not_tested', 'unsure'], - }, - summary: { - type: 'string', - }, - checked: { - type: 'array', - items: { type: 'string' }, - }, - issues: { - type: 'array', - items: { type: 'string' }, - }, - nextSteps: { - type: 'array', - items: { type: 'string' }, - }, - screenshotLabels: { - type: 'array', - items: { - type: 'object', - properties: { - fileName: { - type: 'string', - description: 'Saved screenshot file name, including .png.', - }, - label: { - type: 'string', - description: - 'Very short route or state label for this screenshot, such as Home or Welcome screen.', - }, - }, - required: ['fileName', 'label'], - additionalProperties: false, - }, - }, - }, - required: ['overallStatus', 'summary'], - additionalProperties: false, - }), - execute: async (input: ReportInput) => persistReport(input), - }, - }, - }); - - const result = await agent.generate({ - prompt: buildPrompt(skills), - }); - - if (result.text) { - console.log(trim(`Agent finished with final text:\n${result.text}`, 4000)); - } - - if (!existsSync(SECTION_PATH)) { - await persistReport({ - overallStatus: 'blocked', - summary: result.text || 'The agent completed without calling write_report.', - checked: [`Produce a ${context.platformLabel} QA report`], - issues: ['The write_report tool was not called by the agent.'], - nextSteps: [ - 'Inspect the workflow logs and tighten the agent instructions.', - ], - }); - console.log( - `Fallback QA report written to ${SECTION_PATH} because write_report was not called.`, - ); - return; - } - - console.log(`QA report written to ${SECTION_PATH}`); -} - -try { - await main(); -} catch (unknownError) { - const message = - unknownError instanceof Error - ? unknownError - : new Error(String(unknownError)); - console.error(message); - await writeBlockedReport(message); - process.exitCode = 1; -} diff --git a/scripts/agent-qa/package.json b/scripts/agent-qa/package.json deleted file mode 100644 index 3dbc1ca..0000000 --- a/scripts/agent-qa/package.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "type": "module" -} diff --git a/scripts/agent-qa/provision-ios-simulator.sh b/scripts/agent-qa/provision-ios-simulator.sh deleted file mode 100644 index f708763..0000000 --- a/scripts/agent-qa/provision-ios-simulator.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -set -euxo pipefail - -DEVICE_NAME="${AGENT_DEVICE_IOS_DEVICE:?AGENT_DEVICE_IOS_DEVICE is required}" -export AGENT_DEVICE_DAEMON_TIMEOUT_MS="${AGENT_DEVICE_DAEMON_TIMEOUT_MS:-180000}" -export AGENT_DEVICE_IOS_BOOT_TIMEOUT_MS="${AGENT_DEVICE_IOS_BOOT_TIMEOUT_MS:-180000}" - -agent-device ensure-simulator --platform ios --device "${DEVICE_NAME}" --boot - -if command -v set-env >/dev/null 2>&1; then - set-env AGENT_DEVICE_IOS_DEVICE "${DEVICE_NAME}" -fi diff --git a/scripts/agent-qa/run-and-export.sh b/scripts/agent-qa/run-and-export.sh old mode 100644 new mode 100755 index 55a3def..9a23056 --- a/scripts/agent-qa/run-and-export.sh +++ b/scripts/agent-qa/run-and-export.sh @@ -5,6 +5,11 @@ set -uo pipefail APP_PATH_ARG="${1:?APP_PATH argument is required}" QA_PLATFORM_VALUE="${QA_PLATFORM:?QA_PLATFORM is required}" APPLICATION_ID_VALUE="${APPLICATION_ID:?APPLICATION_ID is required}" +OUTPUT_DIR="artifacts/qa" +CONTEXT_PATH="${OUTPUT_DIR}/cali-context.json" +SCREENSHOTS_JSON_PATH="${OUTPUT_DIR}/screenshots.json" + +mkdir -p "${OUTPUT_DIR}" case "${QA_PLATFORM_VALUE}" in ios) @@ -18,40 +23,34 @@ case "${QA_PLATFORM_VALUE}" in ;; esac -set +e export APP_PATH="${APP_PATH_ARG}" - -BOOTSTRAP_ERROR="" -if [ "${QA_PLATFORM_VALUE}" = "android" ]; then - BOOTSTRAP_STEP="install" - agent-device install "${APPLICATION_ID_VALUE}" "${APP_PATH}" -else - BOOTSTRAP_STEP="reinstall" - agent-device reinstall "${APPLICATION_ID_VALUE}" "${APP_PATH}" -fi -BOOTSTRAP_EXIT=$? - -if [ "${BOOTSTRAP_EXIT}" -ne 0 ] && [ "${QA_PLATFORM_VALUE}" = "android" ]; then - BOOTSTRAP_STEP="reinstall" - agent-device reinstall "${APPLICATION_ID_VALUE}" "${APP_PATH}" - BOOTSTRAP_EXIT=$? +export CALI_OUTPUT_DIR="${OUTPUT_DIR}" +DEVICE_NAME_VALUE="${DEVICE_NAME:-}" +if [ -z "${DEVICE_NAME_VALUE}" ]; then + if [ "${QA_PLATFORM_VALUE}" = "ios" ]; then + DEVICE_NAME_VALUE="${AGENT_DEVICE_IOS_DEVICE:-}" + else + DEVICE_NAME_VALUE="${AGENT_DEVICE_ANDROID_DEVICE:-}" + fi fi -if [ "${BOOTSTRAP_EXIT}" -eq 0 ]; then - BOOTSTRAP_STEP="open" - agent-device open "${APPLICATION_ID_VALUE}" --relaunch - BOOTSTRAP_EXIT=$? +set +e +if [ -n "${DEVICE_NAME_VALUE}" ]; then + cali write-mobile-pr-context --from eas --output "${CONTEXT_PATH}" --device "${DEVICE_NAME_VALUE}" + CONTEXT_EXIT=$? +else + cali write-mobile-pr-context --from eas --output "${CONTEXT_PATH}" + CONTEXT_EXIT=$? fi -if [ "${BOOTSTRAP_EXIT}" -ne 0 ]; then - BOOTSTRAP_ERROR="Deterministic ${PLATFORM_LABEL} app bootstrap failed during ${BOOTSTRAP_STEP}. See workflow logs above." +if [ "${CONTEXT_EXIT}" -eq 0 ]; then + cali qa --env eas-mobile-pr --quiet --context "${CONTEXT_PATH}" + EXIT_CODE=$? +else + EXIT_CODE="${CONTEXT_EXIT}" fi -export AGENT_QA_BOOTSTRAP_ERROR="${BOOTSTRAP_ERROR}" -npm run agent-qa -EXIT_CODE=$? - -STATUS="$(cat artifacts/qa/status.txt 2>/dev/null || printf blocked)" +STATUS="$(cat "${OUTPUT_DIR}/status.txt" 2>/dev/null || printf blocked)" case "${STATUS}" in passed) STATUS_LABEL="✅ passed" @@ -73,53 +72,58 @@ case "${STATUS}" in ;; esac -if [ -f artifacts/qa/section.md ]; then - SECTION_BODY="$(cat artifacts/qa/section.md)" -else - SECTION_BODY="### ${PLATFORM_LABEL} - -**Status:** ${STATUS_LABEL} - -No ${PLATFORM_LABEL} QA section was produced. -" +if [ ! -f "${OUTPUT_DIR}/report.json" ]; then + FALLBACK_SUMMARY="The Cali QA command failed before it could publish a report. Check the run_agent_qa logs above." + cat > "${OUTPUT_DIR}/status.txt" < "${OUTPUT_DIR}/section.md" < "${OUTPUT_DIR}/top-issue.txt" < "${OUTPUT_DIR}/screenshots.json" </dev/null | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')" +if [ -z "${TOP_ISSUE}" ]; then + if [ "${STATUS}" = "passed" ]; then + TOP_ISSUE="N/A" + else + TOP_ISSUE="No report.json was produced." + fi +fi +if [ -f "${SCREENSHOTS_JSON_PATH}" ]; then SCREENSHOTS_CELL="$( jq -r ' - if (.screenshots | length) == 0 then + if ((.screenshots // []) | length) == 0 then "N/A" else [ - .screenshots[] + (.screenshots // [])[] | if .blobUrl then - "**\((.label // .fileName))**
\"\((.label" + "**\((.label // .fileName // \"Screenshot\"))**
\"\((.label" else - "**\((.label // .fileName))**
\(.fileName) (\(.bytes) bytes)" + "**\((.label // .fileName // \"Screenshot\"))**
\(.fileName // \"screenshot\")" end ] | join("

") end - ' artifacts/qa/report.json + ' "${SCREENSHOTS_JSON_PATH}" )" else - if [ "${STATUS}" = "passed" ]; then - TOP_ISSUE="N/A" - else - TOP_ISSUE="No report.json was produced." - fi SCREENSHOTS_CELL="N/A" fi +SECTION_BODY="$(cat "${OUTPUT_DIR}/section.md" 2>/dev/null || printf '### %s\n\n**Status:** %s\n\nNo %s QA section was produced.\n' "${PLATFORM_LABEL}" "${STATUS_LABEL}" "${PLATFORM_LABEL}")" + set-output status "$STATUS" set-output status_label "$STATUS_LABEL" set-output top_issue "$TOP_ISSUE"