diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml new file mode 100644 index 000000000..8fa8c4387 --- /dev/null +++ b/.github/workflows/linux.yml @@ -0,0 +1,129 @@ +name: Linux + +on: + pull_request: + push: + branches: + - main + +permissions: + contents: read + +concurrency: + group: ci-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + validate: + name: Typecheck & Unit Tests + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Setup toolchain + uses: ./.github/actions/setup-node-pnpm + + - name: Typecheck + run: pnpm typecheck + + - name: Unit tests + run: pnpm test:unit + + smoke-linux: + name: Smoke Tests + needs: validate + runs-on: ubuntu-latest + timeout-minutes: 30 + env: + # Force X11 mode (Xvfb) — no Wayland on CI. + XDG_SESSION_TYPE: x11 + DISPLAY: ":99" + # Headless GTK: avoid dconf issues, enable accessibility bridge. + GSETTINGS_BACKEND: memory + NO_AT_BRIDGE: "0" + GTK_A11Y: atspi + GTK_MODULES: "gail:atk-bridge" + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Install Linux desktop dependencies + run: | + sudo apt-get update -qq + sudo apt-get install -y -qq \ + xvfb \ + xdotool \ + scrot \ + at-spi2-core \ + python3-gi \ + gir1.2-atspi-2.0 \ + libatk-adaptor \ + dbus-x11 \ + gnome-calculator \ + wmctrl + + - name: Setup toolchain + uses: ./.github/actions/setup-node-pnpm + + - name: Start Xvfb and D-Bus + run: | + # Start virtual framebuffer (1280x1024, 24-bit color) + Xvfb :99 -screen 0 1280x1024x24 & + sleep 1 + + # Start a D-Bus session and export its env vars for subsequent steps. + # dbus-launch forks a persistent daemon, so it survives the step. + eval "$(dbus-launch --sh-syntax)" + echo "DBUS_SESSION_BUS_ADDRESS=$DBUS_SESSION_BUS_ADDRESS" >> "$GITHUB_ENV" + echo "DBUS_SESSION_BUS_PID=$DBUS_SESSION_BUS_PID" >> "$GITHUB_ENV" + + - name: Start AT-SPI2 registry + run: | + # The registry must start AFTER DBUS_SESSION_BUS_ADDRESS is available + # (it was written to GITHUB_ENV in the previous step). + ATSPI_REG=$(find /usr -name at-spi2-registryd -type f 2>/dev/null | head -1) + if [ -z "$ATSPI_REG" ]; then + echo "::error::at-spi2-registryd not found. Install at-spi2-core." + exit 1 + fi + "$ATSPI_REG" & + sleep 2 + # Health probe: verify the registry is reachable on the a11y bus + if python3 -c "import gi; gi.require_version('Atspi','2.0'); from gi.repository import Atspi; d=Atspi.get_desktop(0); assert d is not None, 'desktop is None'; print(f'AT-SPI2 OK — {d.get_child_count()} apps')"; then + echo "AT-SPI2 registry healthy" + else + echo "::error::AT-SPI2 registry started but health probe failed" + exit 1 + fi + + - name: Verify environment + run: | + echo "=== Display ===" + xdotool getdisplaygeometry + echo "=== D-Bus ===" + echo "DBUS_SESSION_BUS_ADDRESS=$DBUS_SESSION_BUS_ADDRESS" + echo "=== AT-SPI2 Python bindings ===" + python3 -c "import gi; gi.require_version('Atspi', '2.0'); from gi.repository import Atspi; print('OK')" + echo "=== AT-SPI2 tree dump (quick test) ===" + python3 src/platforms/linux/atspi-dump.py --surface desktop --max-nodes 5 | python3 -m json.tool | head -20 || echo "::warning::AT-SPI2 tree dump returned no nodes (expected before any app is launched)" + echo "=== xdotool ===" + xdotool version + + - name: Run Linux replay smoke test + run: | + pnpm clean:daemon + node --experimental-strip-types src/bin.ts test test/integration/replays/linux \ + --retries 3 \ + --report-junit test/artifacts/replays-linux.junit.xml + + - name: Upload Linux artifacts + if: always() + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: linux-artifacts + if-no-files-found: ignore + path: | + test/artifacts/** + test/screenshots/** diff --git a/.gitignore b/.gitignore index 88f9449e4..8ca439447 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ node_modules/ .pnpm-store/ dist/ .DS_Store +__pycache__/ +*.pyc *.log test/screenshots/*.png test/artifacts/ diff --git a/package.json b/package.json index fddfa92d7..10a373a85 100644 --- a/package.json +++ b/package.json @@ -45,7 +45,8 @@ "test:replay:ios": "node --experimental-strip-types src/bin.ts test test/integration/replays/ios/simulator", "test:replay:ios-device": "node --experimental-strip-types src/bin.ts test test/integration/replays/ios/device", "test:replay:android": "node --experimental-strip-types src/bin.ts test test/integration/replays/android", - "test:replay:macos": "node --experimental-strip-types src/bin.ts test test/integration/replays/macos" + "test:replay:macos": "node --experimental-strip-types src/bin.ts test test/integration/replays/macos", + "test:replay:linux": "node --experimental-strip-types src/bin.ts test test/integration/replays/linux" }, "files": [ "bin", @@ -57,6 +58,7 @@ "!ios-runner/**/*.xcuserstate", "macos-helper", "!macos-helper/**/.build", + "src/platforms/linux/atspi-dump.py", "skills", "README.md", "LICENSE" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 01decd6b6..f1990b81d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -65,24 +65,28 @@ packages: engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [glibc] '@ast-grep/napi-linux-arm64-musl@0.37.0': resolution: {integrity: sha512-LF9sAvYy6es/OdyJDO3RwkX3I82Vkfsng1sqUBcoWC1jVb1wX5YVzHtpQox9JrEhGl+bNp7FYxB4Qba9OdA5GA==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [musl] '@ast-grep/napi-linux-x64-gnu@0.37.0': resolution: {integrity: sha512-TViz5/klqre6aSmJzswEIjApnGjJzstG/SE8VDWsrftMBMYt2PTu3MeluZVwzSqDao8doT/P+6U11dU05UOgxw==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [glibc] '@ast-grep/napi-linux-x64-musl@0.37.0': resolution: {integrity: sha512-/BcCH33S9E3ovOAEoxYngUNXgb+JLg991sdyiNP2bSoYd30a9RHrG7CYwW6fMgua3ijQ474eV6cq9yZO1bCpXg==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [musl] '@ast-grep/napi-win32-arm64-msvc@0.37.0': resolution: {integrity: sha512-TjQA4cFoIEW2bgjLkaL9yqT4XWuuLa5MCNd0VCDhGRDMNQ9+rhwi9eLOWRaap3xzT7g+nlbcEHL3AkVCD2+b3A==} @@ -205,48 +209,56 @@ packages: engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] + libc: [glibc] '@oxfmt/binding-linux-arm64-musl@0.42.0': resolution: {integrity: sha512-+JA0YMlSdDqmacygGi2REp57c3fN+tzARD8nwsukx9pkCHK+6DkbAA9ojS4lNKsiBjIW8WWa0pBrBWhdZEqfuw==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] + libc: [musl] '@oxfmt/binding-linux-ppc64-gnu@0.42.0': resolution: {integrity: sha512-VfnET0j4Y5mdfCzh5gBt0NK28lgn5DKx+8WgSMLYYeSooHhohdbzwAStLki9pNuGy51y4I7IoW8bqwAaCMiJQg==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [ppc64] os: [linux] + libc: [glibc] '@oxfmt/binding-linux-riscv64-gnu@0.42.0': resolution: {integrity: sha512-gVlCbmBkB0fxBWbhBj9rcxezPydsQHf4MFKeHoTSPicOQ+8oGeTQgQ8EeesSybWeiFPVRx3bgdt4IJnH6nOjAA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [riscv64] os: [linux] + libc: [glibc] '@oxfmt/binding-linux-riscv64-musl@0.42.0': resolution: {integrity: sha512-zN5OfstL0avgt/IgvRu0zjQzVh/EPkcLzs33E9LMAzpqlLWiPWeMDZyMGFlSRGOdDjuNmlZBCgj0pFnK5u32TQ==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [riscv64] os: [linux] + libc: [musl] '@oxfmt/binding-linux-s390x-gnu@0.42.0': resolution: {integrity: sha512-9X6+H2L0qMc2sCAgO9HS03bkGLMKvOFjmEdchaFlany3vNZOjnVui//D8k/xZAtQv2vaCs1reD5KAgPoIU4msA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [s390x] os: [linux] + libc: [glibc] '@oxfmt/binding-linux-x64-gnu@0.42.0': resolution: {integrity: sha512-BajxJ6KQvMMdpXGPWhBGyjb2Jvx4uec0w+wi6TJZ6Tv7+MzPwe0pO8g5h1U0jyFgoaF7mDl6yKPW3ykWcbUJRw==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] + libc: [glibc] '@oxfmt/binding-linux-x64-musl@0.42.0': resolution: {integrity: sha512-0wV284I6vc5f0AqAhgAbHU2935B4bVpncPoe5n/WzVZY/KnHgqxC8iSFGeSyLWEgstFboIcWkOPck7tqbdHkzA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] + libc: [musl] '@oxfmt/binding-openharmony-arm64@0.42.0': resolution: {integrity: sha512-p4BG6HpGnhfgHk1rzZfyR6zcWkE7iLrWxyehHfXUy4Qa5j3e0roglFOdP/Nj5cJJ58MA3isQ5dlfkW2nNEpolw==} @@ -319,48 +331,56 @@ packages: engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] + libc: [glibc] '@oxlint/binding-linux-arm64-musl@1.57.0': resolution: {integrity: sha512-i66WyEPVEvq9bxRUCJ/MP5EBfnTDN3nhwEdFZFTO5MmLLvzngfWEG3NSdXQzTT3vk5B9i6C2XSIYBh+aG6uqyg==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] + libc: [musl] '@oxlint/binding-linux-ppc64-gnu@1.57.0': resolution: {integrity: sha512-oMZDCwz4NobclZU3pH+V1/upVlJZiZvne4jQP+zhJwt+lmio4XXr4qG47CehvrW1Lx2YZiIHuxM2D4YpkG3KVA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [ppc64] os: [linux] + libc: [glibc] '@oxlint/binding-linux-riscv64-gnu@1.57.0': resolution: {integrity: sha512-uoBnjJ3MMEBbfnWC1jSFr7/nSCkcQYa72NYoNtLl1imshDnWSolYCjzb8LVCwYCCfLJXD+0gBLD7fyC14c0+0g==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [riscv64] os: [linux] + libc: [glibc] '@oxlint/binding-linux-riscv64-musl@1.57.0': resolution: {integrity: sha512-BdrwD7haPZ8a9KrZhKJRSj6jwCor+Z8tHFZ3PT89Y3Jq5v3LfMfEePeAmD0LOTWpiTmzSzdmyw9ijneapiVHKQ==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [riscv64] os: [linux] + libc: [musl] '@oxlint/binding-linux-s390x-gnu@1.57.0': resolution: {integrity: sha512-BNs+7ZNsRstVg2tpNxAXfMX/Iv5oZh204dVyb8Z37+/gCh+yZqNTlg6YwCLIMPSk5wLWIGOaQjT0GUOahKYImw==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [s390x] os: [linux] + libc: [glibc] '@oxlint/binding-linux-x64-gnu@1.57.0': resolution: {integrity: sha512-AghS18w+XcENcAX0+BQGLiqjpqpaxKJa4cWWP0OWNLacs27vHBxu7TYkv9LUSGe5w8lOJHeMxcYfZNOAPqw2bg==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] + libc: [glibc] '@oxlint/binding-linux-x64-musl@1.57.0': resolution: {integrity: sha512-E/FV3GB8phu/Rpkhz5T96hAiJlGzn91qX5yj5gU754P5cmVGXY1Jw/VSjDSlZBCY3VHjsVLdzgdkJaomEmcNOg==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] + libc: [musl] '@oxlint/binding-openharmony-arm64@1.57.0': resolution: {integrity: sha512-xvZ2yZt0nUVfU14iuGv3V25jpr9pov5N0Wr28RXnHFxHCRxNDMtYPHV61gGLhN9IlXM96gI4pyYpLSJC5ClLCQ==} @@ -421,36 +441,42 @@ packages: engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] + libc: [glibc] '@rolldown/binding-linux-arm64-musl@1.0.0-rc.12': resolution: {integrity: sha512-V6/wZztnBqlx5hJQqNWwFdxIKN0m38p8Jas+VoSfgH54HSj9tKTt1dZvG6JRHcjh6D7TvrJPWFGaY9UBVOaWPw==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] + libc: [musl] '@rolldown/binding-linux-ppc64-gnu@1.0.0-rc.12': resolution: {integrity: sha512-AP3E9BpcUYliZCxa3w5Kwj9OtEVDYK6sVoUzy4vTOJsjPOgdaJZKFmN4oOlX0Wp0RPV2ETfmIra9x1xuayFB7g==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [ppc64] os: [linux] + libc: [glibc] '@rolldown/binding-linux-s390x-gnu@1.0.0-rc.12': resolution: {integrity: sha512-nWwpvUSPkoFmZo0kQazZYOrT7J5DGOJ/+QHHzjvNlooDZED8oH82Yg67HvehPPLAg5fUff7TfWFHQS8IV1n3og==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [s390x] os: [linux] + libc: [glibc] '@rolldown/binding-linux-x64-gnu@1.0.0-rc.12': resolution: {integrity: sha512-RNrafz5bcwRy+O9e6P8Z/OCAJW/A+qtBczIqVYwTs14pf4iV1/+eKEjdOUta93q2TsT/FI0XYDP3TCky38LMAg==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] + libc: [glibc] '@rolldown/binding-linux-x64-musl@1.0.0-rc.12': resolution: {integrity: sha512-Jpw/0iwoKWx3LJ2rc1yjFrj+T7iHZn2JDg1Yny1ma0luviFS4mhAIcd1LFNxK3EYu3DHWCps0ydXQ5i/rrJ2ig==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] + libc: [musl] '@rolldown/binding-openharmony-arm64@1.0.0-rc.12': resolution: {integrity: sha512-vRugONE4yMfVn0+7lUKdKvN4D5YusEiPilaoO2sgUWpCvrncvWgPMzK00ZFFJuiPgLwgFNP5eSiUlv2tfc+lpA==} @@ -523,21 +549,25 @@ packages: resolution: {integrity: sha512-vD2+ztbMmeBR65jBlwUZCNIjUzO0exp/LaPSMIhLlqPlk670gMCQ7fmKo3tSgQ9tobfizEA/Atdy3/lW1Rl64A==} cpu: [arm64] os: [linux] + libc: [glibc] '@rspack/binding-linux-arm64-musl@2.0.0-beta.8': resolution: {integrity: sha512-jJ1XB7Yz9YdPRA6MJ35S9/mb+3jeI4p9v78E3dexzCPA3G4X7WXbyOcRbUlYcyOlE5MtX5O19rDexqWlkD9tVw==} cpu: [arm64] os: [linux] + libc: [musl] '@rspack/binding-linux-x64-gnu@2.0.0-beta.8': resolution: {integrity: sha512-qy+fK/tiYw3KvGjTGGMu/mWOdvBYrMO8xva/ouiaRTrx64PPZ6vyqFXOUfHj9rhY5L6aU2NTObpV6HZHcBtmhQ==} cpu: [x64] os: [linux] + libc: [glibc] '@rspack/binding-linux-x64-musl@2.0.0-beta.8': resolution: {integrity: sha512-eJF1IsayHhsURu5Dp6fzdr5jYGeJmoREOZAc9UV3aEqY6zNAcWgZT1RwKCCujJylmHgCTCOuxqdK/VdFJqWDyw==} cpu: [x64] os: [linux] + libc: [musl] '@rspack/binding-wasm32-wasi@2.0.0-beta.8': resolution: {integrity: sha512-HssdOQE8i+nUWoK+NDeD5OSyNxf80k3elKCl/due3WunoNn0h6tUTSZ8QB+bhcT4tjH9vTbibWZIT91avtvUNw==} @@ -1196,24 +1226,28 @@ packages: engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] + libc: [glibc] lightningcss-linux-arm64-musl@1.32.0: resolution: {integrity: sha512-UpQkoenr4UJEzgVIYpI80lDFvRmPVg6oqboNHfoH4CQIfNA+HOrZ7Mo7KZP02dC6LjghPQJeBsvXhJod/wnIBg==} engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] + libc: [musl] lightningcss-linux-x64-gnu@1.32.0: resolution: {integrity: sha512-V7Qr52IhZmdKPVr+Vtw8o+WLsQJYCTd8loIfpDaMRWGUZfBOYEJeyJIkqGIDMZPwPx24pUMfwSxxI8phr/MbOA==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] + libc: [glibc] lightningcss-linux-x64-musl@1.32.0: resolution: {integrity: sha512-bYcLp+Vb0awsiXg/80uCRezCYHNg1/l3mt0gzHnWV9XP1W5sKa5/TCdGWaR/zBM2PeF/HbsQv/j2URNOiVuxWg==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] + libc: [musl] lightningcss-win32-arm64-msvc@1.32.0: resolution: {integrity: sha512-8SbC8BR40pS6baCM8sbtYDSwEVQd4JlFTOlaD3gWGHfThTcABnNDBda6eTZeqbofalIJhFx0qKzgHJmcPTnGdw==} diff --git a/src/core/__tests__/capabilities.test.ts b/src/core/__tests__/capabilities.test.ts index e290ea863..5d73f28ea 100644 --- a/src/core/__tests__/capabilities.test.ts +++ b/src/core/__tests__/capabilities.test.ts @@ -40,6 +40,14 @@ const macOsDevice: DeviceInfo = { target: 'desktop', }; +const linuxDevice: DeviceInfo = { + platform: 'linux', + id: 'local', + name: 'Linux Desktop', + kind: 'device', + target: 'desktop', +}; + const tvOsSimulator: DeviceInfo = { platform: 'ios', id: 'tv-sim-1', @@ -259,7 +267,59 @@ test('tvOS follows iOS capability matrix by device kind', () => { ); }); +test('Linux supports desktop interaction commands and blocks mobile/unsupported ones', () => { + assertCommandSupport( + [ + 'back', + 'click', + 'clipboard', + 'close', + 'diff', + 'fill', + 'find', + 'focus', + 'get', + 'home', + 'is', + 'longpress', + 'open', + 'press', + 'screenshot', + 'scroll', + 'snapshot', + 'swipe', + 'type', + 'wait', + ], + [{ device: linuxDevice, expected: true, label: 'on Linux' }], + ); + assertCommandSupport( + [ + 'alert', + 'app-switcher', + 'apps', + 'boot', + 'install', + 'install-from-source', + 'keyboard', + 'logs', + 'network', + 'perf', + 'pinch', + 'push', + 'record', + 'reinstall', + 'rotate', + 'scrollintoview', + 'settings', + 'trigger-app-event', + ], + [{ device: linuxDevice, expected: false, label: 'on Linux' }], + ); +}); + test('unknown commands default to supported', () => { assert.equal(isCommandSupportedOnDevice('some-future-cmd', iosSimulator), true); assert.equal(isCommandSupportedOnDevice('some-future-cmd', androidDevice), true); + assert.equal(isCommandSupportedOnDevice('some-future-cmd', linuxDevice), true); }); diff --git a/src/core/capabilities.ts b/src/core/capabilities.ts index 15731db98..bfd041abf 100644 --- a/src/core/capabilities.ts +++ b/src/core/capabilities.ts @@ -10,6 +10,7 @@ type KindMatrix = { type CommandCapability = { apple?: KindMatrix; android?: KindMatrix; + linux?: KindMatrix; supports?: (device: DeviceInfo) => boolean; }; @@ -17,6 +18,11 @@ const isNotMacOs = (device: DeviceInfo): boolean => device.platform !== 'macos'; const isMacOsOrAppleSimulator = (device: DeviceInfo): boolean => device.platform === 'macos' || device.kind === 'simulator'; +// Linux desktop supports these commands via xdotool/ydotool + AT-SPI2. +// Linux device kind is always 'device' (local desktop). +const LINUX_DEVICE: KindMatrix = { device: true }; +const LINUX_NONE: KindMatrix = {}; + const COMMAND_CAPABILITY_MATRIX: Record = { // Apple simulator-only. alert: { @@ -24,6 +30,7 @@ const COMMAND_CAPABILITY_MATRIX: Record = { // supports() guard excludes iOS physical devices. apple: { simulator: true, device: true }, android: {}, + linux: LINUX_NONE, supports: isMacOsOrAppleSimulator, }, pinch: { @@ -31,174 +38,215 @@ const COMMAND_CAPABILITY_MATRIX: Record = { // supports() guard excludes iOS physical devices. apple: { simulator: true, device: true }, android: {}, + linux: LINUX_NONE, supports: isMacOsOrAppleSimulator, }, 'app-switcher': { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, supports: isNotMacOs, }, apps: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, }, back: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, boot: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, supports: isNotMacOs, }, click: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, clipboard: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, supports: (device) => - device.platform === 'android' || device.platform === 'macos' || device.kind === 'simulator', + device.platform === 'android' || device.platform === 'linux' || device.platform === 'macos' || device.kind === 'simulator', }, keyboard: { // iOS only supports keyboard dismiss; status/get remains Android-only. apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, supports: (device) => device.platform === 'android' || (device.platform === 'ios' && device.target !== 'tv'), }, close: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, fill: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, diff: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, find: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, focus: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, get: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, is: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, home: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, supports: isNotMacOs, }, logs: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, }, network: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, }, longpress: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, open: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, perf: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, }, install: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, supports: isNotMacOs, }, 'install-from-source': { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, supports: isNotMacOs, }, reinstall: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, supports: isNotMacOs, }, press: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, push: { apple: { simulator: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, supports: isNotMacOs, }, record: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, }, rotate: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, supports: (device) => device.platform === 'android' || (device.platform === 'ios' && device.target !== 'tv'), }, screenshot: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, scroll: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, scrollintoview: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, }, swipe: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, settings: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, supports: (device) => device.platform === 'android' || device.platform === 'macos' || device.kind === 'simulator', }, snapshot: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, 'trigger-app-event': { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_NONE, }, type: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, wait: { apple: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true }, + linux: LINUX_DEVICE, }, }; export function isCommandSupportedOnDevice(command: string, device: DeviceInfo): boolean { const capability = COMMAND_CAPABILITY_MATRIX[command]; if (!capability) return true; - const byPlatform = isApplePlatform(device.platform) ? capability.apple : capability.android; + const byPlatform = isApplePlatform(device.platform) + ? capability.apple + : device.platform === 'linux' + ? capability.linux + : capability.android; if (!byPlatform) return false; if (capability.supports && !capability.supports(device)) return false; const kind = (device.kind ?? 'unknown') as keyof KindMatrix; diff --git a/src/core/click-button.ts b/src/core/click-button.ts index e91bbab8d..870ab0b6a 100644 --- a/src/core/click-button.ts +++ b/src/core/click-button.ts @@ -26,13 +26,13 @@ export function getClickButtonValidationError(options: { if (options.commandLabel !== 'click') { return new AppError('INVALID_ARGS', '--button is supported only for click'); } - if (options.platform !== 'macos') { + if (options.platform !== 'macos' && options.platform !== 'linux') { return new AppError( 'UNSUPPORTED_OPERATION', - `click --button ${options.button} is supported only on macOS`, + `click --button ${options.button} is supported only on macOS and Linux`, ); } - if (options.button === 'middle') { + if (options.platform === 'macos' && options.button === 'middle') { return new AppError( 'UNSUPPORTED_OPERATION', 'click --button middle is not supported by the macOS runner yet', diff --git a/src/core/dispatch-resolve.ts b/src/core/dispatch-resolve.ts index 557c4ca67..39fa5dca8 100644 --- a/src/core/dispatch-resolve.ts +++ b/src/core/dispatch-resolve.ts @@ -10,6 +10,7 @@ import { import { listAndroidDevices } from '../platforms/android/devices.ts'; import { ensureAdb } from '../platforms/android/index.ts'; import { findBootableIosSimulator, listAppleDevices } from '../platforms/ios/devices.ts'; +import { listLinuxDevices } from '../platforms/linux/devices.ts'; import { withDiagnosticTimer } from '../utils/diagnostics.ts'; import { resolveAndroidSerialAllowlist, @@ -104,10 +105,15 @@ export async function resolveTargetDevice(flags: ResolveDeviceFlags): Promise> { + if (device.platform === 'linux') { + const linuxResult = await withDiagnosticTimer( + 'snapshot_capture', + async () => + await snapshotLinux(context?.surface), + { backend: 'linux-atspi' }, + ); + return { + nodes: linuxResult.nodes ?? [], + truncated: linuxResult.truncated ?? false, + backend: 'linux-atspi', + }; + } if (device.platform !== 'android') { const result = (await withDiagnosticTimer( 'snapshot_capture', diff --git a/src/core/interactors.ts b/src/core/interactors.ts index 87d741b21..3ea0946f1 100644 --- a/src/core/interactors.ts +++ b/src/core/interactors.ts @@ -38,6 +38,24 @@ import { iosRunnerOverrides, resolveAppleBackRunnerCommand, } from '../platforms/ios/interactions.ts'; +import { + pressLinux, + doubleClickLinux, + swipeLinux, + longPressLinux, + focusLinux, + typeLinux, + fillLinux, + scrollLinux, +} from '../platforms/linux/input-actions.ts'; +import { screenshotLinux } from '../platforms/linux/screenshot.ts'; +import { + openLinuxApp, + closeLinuxApp, + backLinux, + homeLinux, +} from '../platforms/linux/app-lifecycle.ts'; +import { readLinuxClipboard, writeLinuxClipboard } from '../platforms/linux/clipboard.ts'; import type { PermissionSettingOptions } from '../platforms/permission-utils.ts'; import type { SessionSurface } from './session-surface.ts'; @@ -134,6 +152,37 @@ export function getInteractor(device: DeviceInfo, runnerContext: RunnerContext): setSetting: (setting, state, appId, options) => setAndroidSetting(device, setting, state, appId, options), }; + case 'linux': + return { + open: (app) => openLinuxApp(app), + openDevice: () => Promise.resolve(), + close: (app) => closeLinuxApp(app), + tap: (x, y) => pressLinux(x, y), + doubleTap: (x, y) => doubleClickLinux(x, y), + swipe: (x1, y1, x2, y2, durationMs) => swipeLinux(x1, y1, x2, y2, durationMs), + longPress: (x, y, durationMs) => longPressLinux(x, y, durationMs), + focus: (x, y) => focusLinux(x, y), + type: (text, delayMs) => typeLinux(text, delayMs), + fill: (x, y, text, delayMs) => fillLinux(x, y, text, delayMs), + scroll: (direction, options) => scrollLinux(direction, options), + scrollIntoView: () => { + throw new AppError('UNSUPPORTED_OPERATION', 'scrollIntoView not yet supported on Linux'); + }, + screenshot: (outPath) => screenshotLinux(outPath), + back: () => backLinux(), + home: () => homeLinux(), + rotate: () => { + throw new AppError('UNSUPPORTED_OPERATION', 'rotate not supported on Linux'); + }, + appSwitcher: () => { + throw new AppError('UNSUPPORTED_OPERATION', 'appSwitcher not yet supported on Linux'); + }, + readClipboard: () => readLinuxClipboard(), + writeClipboard: (text) => writeLinuxClipboard(text), + setSetting: () => { + throw new AppError('UNSUPPORTED_OPERATION', 'setSetting not supported on Linux'); + }, + }; case 'ios': case 'macos': { const { overrides, runnerOpts } = iosRunnerOverrides(device, runnerContext); diff --git a/src/daemon/__tests__/selectors.test.ts b/src/daemon/__tests__/selectors.test.ts index c2fc13357..3f56e1d1d 100644 --- a/src/daemon/__tests__/selectors.test.ts +++ b/src/daemon/__tests__/selectors.test.ts @@ -261,3 +261,55 @@ test('role selector normalization matches Android class names by leaf type', () assert.ok(resolved); assert.equal(resolved.node.ref, 'a1'); }); + +// ── appName / windowTitle selectors ────────────────────────────────────── + +test('appName selector matches nodes with appName field', () => { + const desktopNodes: SnapshotState['nodes'] = [ + { + ref: 'd1', + index: 0, + type: 'Button', + label: 'OK', + appName: 'Calculator', + windowTitle: 'Main Window', + rect: { x: 0, y: 0, width: 80, height: 30 }, + hittable: true, + }, + { + ref: 'd2', + index: 1, + type: 'Button', + label: 'OK', + appName: 'TextEditor', + windowTitle: 'Untitled', + rect: { x: 0, y: 0, width: 80, height: 30 }, + hittable: true, + }, + ]; + + // Match by appName — should disambiguate two OK buttons + const chain1 = parseSelectorChain('label=OK appname=Calculator'); + const match1 = findSelectorChainMatch(desktopNodes, chain1, { platform: 'linux' }); + assert.ok(match1); + assert.equal(match1.matches, 1); + + // Match by windowTitle + const chain2 = parseSelectorChain('windowtitle=Untitled'); + const match2 = findSelectorChainMatch(desktopNodes, chain2, { platform: 'linux' }); + assert.ok(match2); + assert.equal(match2.matches, 1); + + // Case-insensitive key (appName vs appname) and value + const chain3 = parseSelectorChain('appName=calculator'); + const match3 = findSelectorChainMatch(desktopNodes, chain3, { platform: 'linux' }); + assert.ok(match3); + assert.equal(match3.matches, 1); +}); + +test('isSelectorToken recognizes appname and windowtitle', () => { + assert.ok(isSelectorToken('appName=Foo')); + assert.ok(isSelectorToken('appname=Foo')); + assert.ok(isSelectorToken('windowTitle=Bar')); + assert.ok(isSelectorToken('windowtitle=Bar')); +}); diff --git a/src/daemon/handlers/__tests__/interaction.test.ts b/src/daemon/handlers/__tests__/interaction.test.ts index 7166032e3..278dcd1e3 100644 --- a/src/daemon/handlers/__tests__/interaction.test.ts +++ b/src/daemon/handlers/__tests__/interaction.test.ts @@ -6,7 +6,7 @@ import { handleInteractionCommands, unsupportedRefSnapshotFlags } from '../inter import { SessionStore } from '../../session-store.ts'; import type { SessionState } from '../../types.ts'; import type { CommandFlags } from '../../../core/dispatch.ts'; -import { attachRefs } from '../../../utils/snapshot.ts'; +import { attachRefs, type SnapshotBackend } from '../../../utils/snapshot.ts'; import { buildSnapshotState } from '../snapshot-capture.ts'; vi.mock('../../../core/dispatch.ts', async (importOriginal) => { @@ -68,7 +68,7 @@ async function emulateCaptureSnapshotForSession( [], effectiveFlags.out, contextFromFlags(effectiveFlags, session.appBundleId, session.trace?.outPath), - )) as { nodes?: never[]; truncated?: boolean; backend?: 'xctest' | 'android' | 'macos-helper' }; + )) as { nodes?: never[]; truncated?: boolean; backend?: SnapshotBackend }; const snapshot = buildSnapshotState(snapshotData ?? {}, effectiveFlags); session.snapshot = snapshot; sessionStore.set(session.name, session); diff --git a/src/daemon/handlers/__tests__/session.test.ts b/src/daemon/handlers/__tests__/session.test.ts index b2d06781f..35e4d3404 100644 --- a/src/daemon/handlers/__tests__/session.test.ts +++ b/src/daemon/handlers/__tests__/session.test.ts @@ -2621,7 +2621,7 @@ test('open rejects --surface on non-macOS devices', async () => { invoke: noopInvoke, }); - assertInvalidArgsMessage(response, 'surface is only supported on macOS'); + assertInvalidArgsMessage(response, 'surface is only supported on macOS and Linux'); }); test('open on existing macOS frontmost-app session preserves surface without --surface flag', async () => { diff --git a/src/daemon/handlers/session-open-surface.ts b/src/daemon/handlers/session-open-surface.ts index 1b7f1dc50..b511a845f 100644 --- a/src/daemon/handlers/session-open-surface.ts +++ b/src/daemon/handlers/session-open-surface.ts @@ -79,14 +79,30 @@ export function buildNextOpenSession(params: { }; } +const LINUX_SUPPORTED_SURFACES = new Set(['app', 'desktop', 'frontmost-app']); + function resolveOpenSurface( device: DeviceInfo, surfaceFlag: string | undefined, openTarget: string | undefined, ): SessionSurface { + if (device.platform === 'linux') { + if (!surfaceFlag) return 'app'; + const surface = parseSessionSurface(surfaceFlag); + if (!LINUX_SUPPORTED_SURFACES.has(surface)) { + throw new AppError( + 'INVALID_ARGS', + `Linux supports --surface app, desktop, and frontmost-app (got "${surfaceFlag}")`, + ); + } + if (surface !== 'app' && openTarget) { + throw new AppError('INVALID_ARGS', `open --surface ${surface} does not accept an app target`); + } + return surface; + } if (device.platform !== 'macos') { if (surfaceFlag) { - throw new AppError('INVALID_ARGS', 'surface is only supported on macOS'); + throw new AppError('INVALID_ARGS', 'surface is only supported on macOS and Linux'); } return 'app'; } @@ -104,7 +120,7 @@ export function resolveRequestedOpenSurface(params: { existingSurface?: SessionSurface; }): SessionSurface { const { device, surfaceFlag, openTarget, existingSurface } = params; - if (device.platform === 'macos' && !surfaceFlag) { + if ((device.platform === 'macos' || device.platform === 'linux') && !surfaceFlag) { return existingSurface ?? 'app'; } return resolveOpenSurface(device, surfaceFlag, openTarget); diff --git a/src/daemon/handlers/session-replay-heal.ts b/src/daemon/handlers/session-replay-heal.ts index b3e10c6ae..e69dc99b2 100644 --- a/src/daemon/handlers/session-replay-heal.ts +++ b/src/daemon/handlers/session-replay-heal.ts @@ -1,5 +1,10 @@ import { dispatchCommand } from '../../core/dispatch.ts'; -import { attachRefs, type RawSnapshotNode, type SnapshotState } from '../../utils/snapshot.ts'; +import { + attachRefs, + type RawSnapshotNode, + type SnapshotBackend, + type SnapshotState, +} from '../../utils/snapshot.ts'; import { pruneGroupNodes } from '../snapshot-processing.ts'; import { buildSelectorChainForNode, @@ -183,7 +188,7 @@ async function captureSnapshotForReplay( })) as { nodes?: RawSnapshotNode[]; truncated?: boolean; - backend?: 'xctest' | 'android' | 'macos-helper'; + backend?: SnapshotBackend; }; const rawNodes = data?.nodes ?? []; const nodes = attachRefs(action.flags?.snapshotRaw ? rawNodes : pruneGroupNodes(rawNodes)); diff --git a/src/daemon/handlers/session-replay-script.ts b/src/daemon/handlers/session-replay-script.ts index 1b2f1847d..9bd4486b9 100644 --- a/src/daemon/handlers/session-replay-script.ts +++ b/src/daemon/handlers/session-replay-script.ts @@ -16,7 +16,7 @@ import { type ReplayScriptPlatform = Exclude; -const REPLAY_METADATA_PLATFORMS = new Set(['ios', 'android', 'macos']); +const REPLAY_METADATA_PLATFORMS = new Set(['ios', 'android', 'macos', 'linux']); export type ReplayScriptMetadata = { platform?: ReplayScriptPlatform; diff --git a/src/daemon/handlers/snapshot-capture.ts b/src/daemon/handlers/snapshot-capture.ts index bedfa1baf..f1a674167 100644 --- a/src/daemon/handlers/snapshot-capture.ts +++ b/src/daemon/handlers/snapshot-capture.ts @@ -1,11 +1,13 @@ import { dispatchCommand, type CommandFlags } from '../../core/dispatch.ts'; import { runMacOsSnapshotAction } from '../../platforms/ios/macos-helper.ts'; +import { snapshotLinux } from '../../platforms/linux/index.ts'; import type { AndroidSnapshotAnalysis } from '../../platforms/android/ui-hierarchy.ts'; import { attachRefs, findNodeByRef, normalizeRef, type RawSnapshotNode, + type SnapshotBackend, type SnapshotState, type SnapshotVisibility, } from '../../utils/snapshot.ts'; @@ -24,6 +26,10 @@ import { import { contextFromFlags } from '../context.ts'; import { findNodeByLabel, pruneGroupNodes, resolveRefLabel } from '../snapshot-processing.ts'; +function isDesktopBackend(backend: SnapshotBackend | undefined): boolean { + return backend === 'macos-helper' || backend === 'linux-atspi'; +} + type CaptureSnapshotParams = { device: SessionState['device']; session: SessionState | undefined; @@ -36,7 +42,7 @@ type CaptureSnapshotParams = { type SnapshotData = { nodes?: RawSnapshotNode[]; truncated?: boolean; - backend?: 'xctest' | 'android' | 'macos-helper'; + backend?: SnapshotBackend; analysis?: AndroidSnapshotAnalysis; }; @@ -61,11 +67,18 @@ export async function captureSnapshot(params: CaptureSnapshotParams): Promise<{ export async function captureSnapshotData(params: CaptureSnapshotParams): Promise { const { device, session, flags, outPath, logPath, snapshotScope } = params; + if (device.platform === 'linux') { + const linuxResult = await snapshotLinux(session?.surface); + return shapeDesktopSurfaceSnapshot( + { nodes: linuxResult.nodes, truncated: linuxResult.truncated, backend: 'linux-atspi' }, + { snapshotDepth: flags?.snapshotDepth, snapshotInteractiveOnly: flags?.snapshotInteractiveOnly, snapshotScope }, + ); + } if (device.platform === 'macos' && session?.surface && session.surface !== 'app') { const helperSnapshot = await runMacOsSnapshotAction(session.surface, { bundleId: session.surface === 'menubar' ? session.appBundleId : undefined, }); - return shapeMacOsSurfaceSnapshot(helperSnapshot, { + return shapeDesktopSurfaceSnapshot(helperSnapshot, { snapshotDepth: flags?.snapshotDepth, snapshotInteractiveOnly: flags?.snapshotInteractiveOnly, snapshotScope, @@ -175,7 +188,7 @@ export function buildSnapshotState( data: { nodes?: RawSnapshotNode[]; truncated?: boolean; - backend?: 'xctest' | 'android' | 'macos-helper'; + backend?: SnapshotBackend; }, flags: | (Pick< @@ -216,7 +229,7 @@ export function buildSnapshotVisibility(params: { snapshotRaw?: boolean; }): SnapshotVisibility { const { nodes, backend, snapshotRaw } = params; - if (snapshotRaw || backend === 'macos-helper') { + if (snapshotRaw || isDesktopBackend(backend)) { return { partial: false, visibleNodeCount: nodes.length, @@ -245,7 +258,7 @@ export function buildSnapshotVisibility(params: { }; } -function shapeMacOsSurfaceSnapshot( +function shapeDesktopSurfaceSnapshot( data: SnapshotData, options: { snapshotDepth?: number; diff --git a/src/daemon/selectors-match.ts b/src/daemon/selectors-match.ts index ee6658fb3..d16416f50 100644 --- a/src/daemon/selectors-match.ts +++ b/src/daemon/selectors-match.ts @@ -33,6 +33,10 @@ function matchesTerm(node: SnapshotNode, term: SelectorTerm, platform: Platform) return textEquals(node.value, String(term.value)); case 'text': return textEquals(extractNodeText(node), String(term.value)); + case 'appname': + return textEquals(node.appName, String(term.value)); + case 'windowtitle': + return textEquals(node.windowTitle, String(term.value)); case 'visible': return isNodeVisible(node) === Boolean(term.value); case 'hidden': diff --git a/src/daemon/selectors-parse.ts b/src/daemon/selectors-parse.ts index 5d8700a0d..4053f8b8d 100644 --- a/src/daemon/selectors-parse.ts +++ b/src/daemon/selectors-parse.ts @@ -6,6 +6,8 @@ export type SelectorKey = | 'text' | 'label' | 'value' + | 'appname' + | 'windowtitle' | 'visible' | 'hidden' | 'editable' @@ -28,7 +30,7 @@ export type SelectorChain = { selectors: Selector[]; }; -const TEXT_KEYS = new Set(['id', 'role', 'text', 'label', 'value']); +const TEXT_KEYS = new Set(['id', 'role', 'text', 'label', 'value', 'appname', 'windowtitle']); const BOOLEAN_KEYS = new Set([ 'visible', 'hidden', diff --git a/src/platforms/SNAPSHOT_CONTRACT.md b/src/platforms/SNAPSHOT_CONTRACT.md new file mode 100644 index 000000000..71d77eee0 --- /dev/null +++ b/src/platforms/SNAPSHOT_CONTRACT.md @@ -0,0 +1,108 @@ +# Snapshot Traversal Contract + +Each platform backend (Swift/XCTest, Android/UIAutomator, Python/AT-SPI2) produces +a flat array of `RawSnapshotNode` objects. Despite different accessibility frameworks, +all backends must conform to this shared contract so that the downstream pipeline +(filtering, ref assignment, presentation) works identically. + +## Output schema + +Every node in the `nodes` array must include: + +| Field | Type | Required | Notes | +|---------------|-----------------------------------|----------|------------------------------------------| +| `index` | `number` | yes | Sequential, 0-based, pre-order DFS | +| `type` | `string` | yes | Normalized role (see table below) | +| `role` | `string` | no | Raw platform role for debugging | +| `label` | `string \| undefined` | no | Accessible name or description | +| `value` | `string \| undefined` | no | Text content or numeric value | +| `rect` | `{x, y, width, height} \| undef` | no | Screen-absolute bounding rect | +| `enabled` | `boolean \| undefined` | no | | +| `selected` | `boolean \| undefined` | no | | +| `hittable` | `boolean \| undefined` | no | Can receive pointer/touch events | +| `depth` | `number` | yes | Tree depth (root = 0) | +| `parentIndex` | `number \| undefined` | no | Index of parent node; undefined for roots | + +Platform-specific fields (optional, passed through): +- `pid`, `appName`, `windowTitle` (Linux, macOS desktop) +- `identifier`, `subrole` (iOS/macOS) +- `resourceId`, `className` (Android) + +## Traversal rules + +| Parameter | Default | Description | +|-----------------|---------|----------------------------------------------| +| `maxNodes` | 1500 | Stop traversal after this many nodes | +| `maxDepth` | 12 | Do not descend beyond this tree depth | +| `maxApps` | 24 | Desktop only: max top-level apps to traverse | + +- Traversal order is **pre-order depth-first**. +- `index` values are assigned in traversal order (0, 1, 2, …). +- `parentIndex` points to the containing node's `index`. +- When `maxNodes` is reached, set `truncated: true` in the result. +- Backends should skip defunct/inaccessible subtrees gracefully. + +## Surface semantics + +| Surface | Behavior | +|------------------|-------------------------------------------------------| +| `app` | Snapshot the target application's UI tree | +| `frontmost-app` | Snapshot the focused/frontmost application (desktop) | +| `desktop` | Snapshot all visible applications on the desktop | +| `menubar` | macOS only: snapshot the system menu bar | + +## Normalized role types + +All backends must map platform-specific roles to these normalized strings. +The canonical mapping is maintained in: +- **iOS/macOS**: `ios-runner/…/SnapshotTraversal.swift` → `normalizedSnapshotType` +- **Android**: `src/platforms/android/ui-hierarchy.ts` → `normalizeAndroidType` +- **Linux**: `src/platforms/linux/role-map.ts` → `normalizeAtspiRole` + +Common normalized types: `Button`, `StaticText`, `TextField`, `TextArea`, +`CheckBox`, `RadioButton`, `Switch`, `ComboBox`, `Tab`, `TabList`, +`Menu`, `MenuItem`, `MenuBar`, `List`, `ListItem`, `Table`, `Cell`, `Row`, +`Tree`, `TreeItem`, `Group`, `Window`, `Dialog`, `Alert`, `ScrollArea`, +`ScrollBar`, `Slider`, `ProgressBar`, `Image`, `Link`, `Separator`, +`Toolbar`, `StatusBar`, `Tooltip`, `Application`, `Heading`. + +Unmapped roles should be PascalCased (e.g., `"extended table"` → `"ExtendedTable"`). + +## Linux platform notes + +### Surface mapping + +Linux maps session surfaces to AT-SPI2 as follows: + +| Session surface | AT-SPI2 behaviour | +|------------------|---------------------------------------------| +| `app` | Maps to `frontmost-app` (focused window) | +| `frontmost-app` | Traverses the focused application's tree | +| `desktop` | Traverses all visible applications | +| `menubar` | **Not supported** — falls back to `desktop` with a diagnostic warning | + +### Supported commands + +Linux supports: `back`, `click`, `close`, `diff`, `fill`, `find`, `focus`, +`get`, `home`, `is`, `longpress`, `open`, `press`, `screenshot`, `scroll`, +`snapshot`, `swipe`, `type`, `wait`. + +Not supported (blocked at capability level): `alert`, `app-switcher`, `apps`, +`boot`, `install`, `keyboard`, `logs`, `network`, `perf`, `pinch`, +`push`, `record`, `reinstall`, `rotate`, `scrollintoview`, `settings`, +`trigger-app-event`. + +### Known limitations + +- Input synthesis uses `xdotool` (X11) or `ydotool` (Wayland) — availability depends on the desktop environment. +- On Wayland without `ydotool`, falls back to `xdotool` with a diagnostic warning (may not work). +- `scrollIntoView` is not yet implemented. +- Clipboard requires `xclip`/`xsel` (X11) or `wl-copy`/`wl-paste` (Wayland). +- Settings operations are not supported. + +## Adding a new platform + +1. Implement a snapshot function returning `{ nodes: RawSnapshotNode[], truncated: boolean }`. +2. Map platform roles to the normalized types listed above. +3. Add unit tests verifying role normalization and node schema conformance. +4. Wire into `snapshot-capture.ts` (`captureSnapshotData`) and `dispatch.ts`. diff --git a/src/platforms/linux/__tests__/atspi-bridge.test.ts b/src/platforms/linux/__tests__/atspi-bridge.test.ts new file mode 100644 index 000000000..ab1609a3c --- /dev/null +++ b/src/platforms/linux/__tests__/atspi-bridge.test.ts @@ -0,0 +1,244 @@ +import { afterAll, beforeEach, test, vi } from 'vitest'; +import assert from 'node:assert/strict'; + +vi.mock('../../../utils/exec.ts', async (importOriginal) => { + const actual = await importOriginal(); + return { ...actual, runCmd: vi.fn(), whichCmd: vi.fn() }; +}); + +import { captureAccessibilityTree } from '../atspi-bridge.ts'; +import { runCmd, whichCmd } from '../../../utils/exec.ts'; +import { AppError } from '../../../utils/errors.ts'; + +const mockRunCmd = vi.mocked(runCmd); +const mockWhichCmd = vi.mocked(whichCmd); + +// Stub process.platform to 'linux' for these tests +const originalPlatform = process.platform; +beforeEach(() => { + mockRunCmd.mockReset(); + mockWhichCmd.mockReset(); + mockWhichCmd.mockResolvedValue(true); + Object.defineProperty(process, 'platform', { value: 'linux' }); +}); + +afterAll(() => { + Object.defineProperty(process, 'platform', { value: originalPlatform }); +}); + +function makePythonResult(nodes: Record[], truncated = false) { + return JSON.stringify({ nodes, truncated, surface: 'desktop' }); +} + +test('parses Python JSON output into RawSnapshotNodes with normalized roles', async () => { + mockRunCmd.mockResolvedValue({ + exitCode: 0, + stdout: makePythonResult([ + { + index: 0, + role: 'push button', + label: 'OK', + value: null, + rect: { x: 10, y: 20, width: 80, height: 30 }, + enabled: true, + selected: false, + hittable: true, + depth: 0, + parentIndex: null, + pid: 1234, + appName: 'TestApp', + windowTitle: 'Main Window', + }, + { + index: 1, + role: 'label', + label: 'Hello', + value: null, + rect: null, + enabled: true, + selected: false, + hittable: false, + depth: 1, + parentIndex: 0, + pid: 1234, + appName: 'TestApp', + windowTitle: 'Main Window', + }, + ]), + stderr: '', + }); + + const result = await captureAccessibilityTree('desktop'); + + assert.equal(result.nodes.length, 2); + assert.equal(result.truncated, false); + assert.equal(result.surface, 'desktop'); + + // Role normalization + assert.equal(result.nodes[0]!.type, 'Button'); + assert.equal(result.nodes[0]!.role, 'push button'); + assert.equal(result.nodes[0]!.label, 'OK'); + assert.deepEqual(result.nodes[0]!.rect, { x: 10, y: 20, width: 80, height: 30 }); + + assert.equal(result.nodes[1]!.type, 'StaticText'); + assert.equal(result.nodes[1]!.role, 'label'); + assert.equal(result.nodes[1]!.parentIndex, 0); +}); + +test('passes surface and limit args to Python script', async () => { + mockRunCmd.mockResolvedValue({ + exitCode: 0, + stdout: makePythonResult([]), + stderr: '', + }); + + await captureAccessibilityTree('frontmost-app', { + maxNodes: 500, + maxDepth: 8, + maxApps: 10, + }); + + const callArgs = mockRunCmd.mock.calls[0]![1] as string[]; + assert.ok(callArgs.includes('--surface')); + assert.ok(callArgs.includes('frontmost-app')); + assert.ok(callArgs.includes('--max-nodes')); + assert.ok(callArgs.includes('500')); + assert.ok(callArgs.includes('--max-depth')); + assert.ok(callArgs.includes('8')); + assert.ok(callArgs.includes('--max-apps')); + assert.ok(callArgs.includes('10')); +}); + +test('throws TOOL_MISSING when python3 is not found', async () => { + mockWhichCmd.mockResolvedValue(false); + + await assert.rejects( + () => captureAccessibilityTree('desktop'), + (err: unknown) => { + assert.ok(err instanceof AppError); + assert.equal(err.code, 'TOOL_MISSING'); + assert.ok(err.message.includes('python3')); + return true; + }, + ); +}); + +test('throws TOOL_MISSING when python3-gi is missing', async () => { + mockRunCmd.mockResolvedValue({ + exitCode: 1, + stdout: '', + stderr: "ModuleNotFoundError: No module named 'gi'", + }); + + await assert.rejects( + () => captureAccessibilityTree('desktop'), + (err: unknown) => { + assert.ok(err instanceof AppError); + assert.equal(err.code, 'TOOL_MISSING'); + assert.ok(err.message.includes('python3-gi')); + return true; + }, + ); +}); + +test('throws COMMAND_FAILED on non-zero exit with unknown error', async () => { + mockRunCmd.mockResolvedValue({ + exitCode: 1, + stdout: '', + stderr: 'Segmentation fault', + }); + + await assert.rejects( + () => captureAccessibilityTree('desktop'), + (err: unknown) => { + assert.ok(err instanceof AppError); + assert.equal(err.code, 'COMMAND_FAILED'); + assert.ok(err.message.includes('Segmentation fault')); + return true; + }, + ); +}); + +test('throws COMMAND_FAILED on invalid JSON output', async () => { + mockRunCmd.mockResolvedValue({ + exitCode: 0, + stdout: 'not json at all', + stderr: '', + }); + + await assert.rejects( + () => captureAccessibilityTree('desktop'), + (err: unknown) => { + assert.ok(err instanceof AppError); + assert.equal(err.code, 'COMMAND_FAILED'); + assert.ok(err.message.includes('invalid JSON')); + return true; + }, + ); +}); + +test('throws COMMAND_FAILED when Python returns an error field', async () => { + mockRunCmd.mockResolvedValue({ + exitCode: 0, + stdout: JSON.stringify({ error: 'Could not get desktop accessible. Is the accessibility bus running?' }), + stderr: '', + }); + + await assert.rejects( + () => captureAccessibilityTree('desktop'), + (err: unknown) => { + assert.ok(err instanceof AppError); + assert.equal(err.code, 'COMMAND_FAILED'); + assert.ok(err.message.includes('accessibility bus')); + return true; + }, + ); +}); + +test('handles truncated result', async () => { + mockRunCmd.mockResolvedValue({ + exitCode: 0, + stdout: makePythonResult( + [{ index: 0, role: 'frame', label: 'Win', depth: 0, parentIndex: null }], + true, + ), + stderr: '', + }); + + const result = await captureAccessibilityTree('desktop'); + assert.equal(result.truncated, true); +}); + +test('coerces null fields to undefined in node output', async () => { + mockRunCmd.mockResolvedValue({ + exitCode: 0, + stdout: makePythonResult([ + { + index: 0, + role: 'panel', + label: null, + value: null, + rect: null, + enabled: null, + selected: null, + hittable: false, + depth: 0, + parentIndex: null, + pid: null, + appName: null, + windowTitle: null, + }, + ]), + stderr: '', + }); + + const result = await captureAccessibilityTree('desktop'); + const node = result.nodes[0]!; + assert.equal(node.label, undefined); + assert.equal(node.value, undefined); + assert.equal(node.rect, undefined); + assert.equal(node.parentIndex, undefined); + assert.equal(node.pid, undefined); + assert.equal(node.appName, undefined); + assert.equal(node.windowTitle, undefined); +}); diff --git a/src/platforms/linux/__tests__/clipboard.test.ts b/src/platforms/linux/__tests__/clipboard.test.ts new file mode 100644 index 000000000..56f8cc0e7 --- /dev/null +++ b/src/platforms/linux/__tests__/clipboard.test.ts @@ -0,0 +1,121 @@ +import { afterAll, beforeEach, test, vi } from 'vitest'; +import assert from 'node:assert/strict'; + +vi.mock('../../../utils/exec.ts', async (importOriginal) => { + const actual = await importOriginal(); + return { ...actual, runCmd: vi.fn(), whichCmd: vi.fn() }; +}); + +import { runCmd, whichCmd } from '../../../utils/exec.ts'; +import { readLinuxClipboard, writeLinuxClipboard, resetClipboardToolCache } from '../clipboard.ts'; +import { AppError } from '../../../utils/errors.ts'; + +const mockRunCmd = vi.mocked(runCmd); +const mockWhichCmd = vi.mocked(whichCmd); + +const originalEnv = { ...process.env }; + +function setupX11(): void { + process.env['XDG_SESSION_TYPE'] = 'x11'; + delete process.env['WAYLAND_DISPLAY']; +} + +function setupWayland(): void { + process.env['XDG_SESSION_TYPE'] = 'wayland'; + process.env['WAYLAND_DISPLAY'] = 'wayland-0'; +} + +beforeEach(() => { + mockRunCmd.mockReset(); + mockWhichCmd.mockReset(); + resetClipboardToolCache(); +}); + +afterAll(() => { + Object.assign(process.env, originalEnv); +}); + +// ── X11 clipboard ──────────────────────────────────────────────────────── + +test('readLinuxClipboard uses xclip on X11', async () => { + setupX11(); + mockWhichCmd.mockImplementation(async (cmd) => cmd === 'xclip'); + mockRunCmd.mockResolvedValue({ exitCode: 0, stdout: 'clipboard content', stderr: '' }); + + const result = await readLinuxClipboard(); + assert.equal(result, 'clipboard content'); + assert.equal(mockRunCmd.mock.calls[0]![0], 'xclip'); + assert.ok((mockRunCmd.mock.calls[0]![1] as string[]).includes('-o')); +}); + +test('readLinuxClipboard falls back to xsel on X11', async () => { + setupX11(); + mockWhichCmd.mockImplementation(async (cmd) => cmd === 'xsel'); + mockRunCmd.mockResolvedValue({ exitCode: 0, stdout: 'from xsel', stderr: '' }); + + const result = await readLinuxClipboard(); + assert.equal(result, 'from xsel'); + assert.equal(mockRunCmd.mock.calls[0]![0], 'xsel'); +}); + +test('writeLinuxClipboard uses xclip with stdin on X11', async () => { + setupX11(); + mockWhichCmd.mockImplementation(async (cmd) => cmd === 'xclip'); + mockRunCmd.mockResolvedValue({ exitCode: 0, stdout: '', stderr: '' }); + + await writeLinuxClipboard('hello'); + assert.equal(mockRunCmd.mock.calls[0]![0], 'xclip'); + assert.equal((mockRunCmd.mock.calls[0]![2] as Record).stdin, 'hello'); +}); + +test('readLinuxClipboard throws TOOL_MISSING when no tool on X11', async () => { + setupX11(); + mockWhichCmd.mockResolvedValue(false); + + await assert.rejects( + () => readLinuxClipboard(), + (err: unknown) => { + assert.ok(err instanceof AppError); + assert.equal(err.code, 'TOOL_MISSING'); + assert.ok(err.message.includes('xclip')); + return true; + }, + ); +}); + +// ── Wayland clipboard ──────────────────────────────────────────────────── + +test('readLinuxClipboard uses wl-paste on Wayland', async () => { + setupWayland(); + mockWhichCmd.mockImplementation(async (cmd) => cmd === 'wl-paste'); + mockRunCmd.mockResolvedValue({ exitCode: 0, stdout: 'wayland content', stderr: '' }); + + const result = await readLinuxClipboard(); + assert.equal(result, 'wayland content'); + assert.equal(mockRunCmd.mock.calls[0]![0], 'wl-paste'); +}); + +test('writeLinuxClipboard uses wl-copy on Wayland', async () => { + setupWayland(); + mockWhichCmd.mockImplementation(async (cmd) => cmd === 'wl-copy' || cmd === 'wl-paste'); + mockRunCmd.mockResolvedValue({ exitCode: 0, stdout: '', stderr: '' }); + + await writeLinuxClipboard('copied'); + assert.equal(mockRunCmd.mock.calls[0]![0], 'wl-copy'); + assert.ok((mockRunCmd.mock.calls[0]![1] as string[]).includes('copied')); +}); + +test('readLinuxClipboard throws TOOL_MISSING when no tool on Wayland', async () => { + setupWayland(); + mockWhichCmd.mockResolvedValue(false); + + await assert.rejects( + () => readLinuxClipboard(), + (err: unknown) => { + assert.ok(err instanceof AppError); + assert.equal(err.code, 'TOOL_MISSING'); + assert.ok(err.message.includes('wl-paste')); + return true; + }, + ); +}); diff --git a/src/platforms/linux/__tests__/input-actions.test.ts b/src/platforms/linux/__tests__/input-actions.test.ts new file mode 100644 index 000000000..7ebb69f41 --- /dev/null +++ b/src/platforms/linux/__tests__/input-actions.test.ts @@ -0,0 +1,198 @@ +import { afterAll, beforeEach, test, vi } from 'vitest'; +import assert from 'node:assert/strict'; + +vi.mock('../../../utils/exec.ts', async (importOriginal) => { + const actual = await importOriginal(); + return { ...actual, runCmd: vi.fn(), whichCmd: vi.fn() }; +}); + +import { runCmd, whichCmd } from '../../../utils/exec.ts'; +import { resetInputToolCache } from '../linux-env.ts'; +import { + pressLinux, + rightClickLinux, + middleClickLinux, + doubleClickLinux, + focusLinux, + swipeLinux, + scrollLinux, + typeLinux, + fillLinux, + sendKey, +} from '../input-actions.ts'; + +const mockRunCmd = vi.mocked(runCmd); +const mockWhichCmd = vi.mocked(whichCmd); + +const originalPlatform = process.platform; +const originalEnv = { ...process.env }; + +function setupXdotool(): void { + Object.defineProperty(process, 'platform', { value: 'linux' }); + process.env['XDG_SESSION_TYPE'] = 'x11'; + delete process.env['WAYLAND_DISPLAY']; + resetInputToolCache(); + mockWhichCmd.mockImplementation(async (cmd) => cmd === 'xdotool'); + mockRunCmd.mockResolvedValue({ exitCode: 0, stdout: '', stderr: '' }); +} + +function setupYdotool(): void { + Object.defineProperty(process, 'platform', { value: 'linux' }); + process.env['XDG_SESSION_TYPE'] = 'wayland'; + process.env['WAYLAND_DISPLAY'] = 'wayland-0'; + resetInputToolCache(); + mockWhichCmd.mockImplementation(async (cmd) => cmd === 'ydotool'); + mockRunCmd.mockResolvedValue({ exitCode: 0, stdout: '', stderr: '' }); +} + +/** Extract the [command, args] pairs from all runCmd calls. */ +function calls(): Array<[string, string[]]> { + return mockRunCmd.mock.calls.map((c) => [c[0], c[1] as string[]]); +} + +beforeEach(() => { + mockRunCmd.mockReset(); + mockWhichCmd.mockReset(); + resetInputToolCache(); +}); + +afterAll(() => { + Object.defineProperty(process, 'platform', { value: originalPlatform }); + Object.assign(process.env, originalEnv); +}); + +// ── xdotool tests ──────────────────────────────────────────────────────── + +test('pressLinux uses xdotool mousemove + click on X11', async () => { + setupXdotool(); + await pressLinux(100, 200); + const c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('mousemove') && args.includes('100') && args.includes('200'))); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('click') && args.includes('1'))); +}); + +test('rightClickLinux sends button 3 via xdotool', async () => { + setupXdotool(); + await rightClickLinux(50, 60); + const c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('click') && args.includes('3'))); +}); + +test('middleClickLinux sends button 2 via xdotool', async () => { + setupXdotool(); + await middleClickLinux(50, 60); + const c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('click') && args.includes('2'))); +}); + +test('doubleClickLinux sends --repeat 2 via xdotool', async () => { + setupXdotool(); + await doubleClickLinux(10, 20); + const c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('--repeat') && args.includes('2'))); +}); + +test('sendKey uses xdotool key with combo', async () => { + setupXdotool(); + await sendKey('alt+Left', ['56:1', '105:1', '105:0', '56:0']); + const c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('key') && args.includes('alt+Left'))); +}); + +test('typeLinux uses xdotool type with delay', async () => { + setupXdotool(); + await typeLinux('hello', 50); + const c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('type') && args.includes('--delay') && args.includes('50') && args.includes('hello'))); +}); + +test('typeLinux omits --delay when delayMs is 0', async () => { + setupXdotool(); + await typeLinux('test', 0); + const c = calls(); + const typeCall = c.find(([cmd, args]) => cmd === 'xdotool' && args.includes('type')); + assert.ok(typeCall); + assert.ok(!typeCall[1].includes('--delay')); +}); + +test('scrollLinux uses xdotool button 4 for up, 5 for down', async () => { + setupXdotool(); + await scrollLinux('up'); + let c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('click') && args.includes('4'))); + + mockRunCmd.mockClear(); + resetInputToolCache(); + setupXdotool(); + await scrollLinux('down'); + c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('click') && args.includes('5'))); +}); + +test('scrollLinux converts pixels to click count', async () => { + setupXdotool(); + await scrollLinux('down', { pixels: 150 }); + const c = calls(); + // 150 / 15 = 10 clicks + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('--repeat') && args.includes('10'))); +}); + +test('swipeLinux performs mousedown, mousemove, mouseup via xdotool', async () => { + setupXdotool(); + await swipeLinux(0, 0, 100, 100, 10); + const c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('mousedown'))); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('mousemove') && args.includes('100'))); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('mouseup'))); +}); + +test('focusLinux delegates to pressLinux', async () => { + setupXdotool(); + await focusLinux(30, 40); + const c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('mousemove') && args.includes('30') && args.includes('40'))); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('click') && args.includes('1'))); +}); + +// ── ydotool tests ──────────────────────────────────────────────────────── + +test('pressLinux uses ydotool mousemove + click on Wayland', async () => { + setupYdotool(); + await pressLinux(100, 200); + const c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'ydotool' && args.includes('mousemove') && args.includes('--absolute'))); + assert.ok(c.some(([cmd, args]) => cmd === 'ydotool' && args.includes('click') && args.includes('0xC0'))); +}); + +test('sendKey uses ydotool with scancodes', async () => { + setupYdotool(); + await sendKey('alt+Left', ['56:1', '105:1', '105:0', '56:0']); + const c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'ydotool' && args.includes('key') && args.includes('56:1'))); +}); + +test('typeLinux uses ydotool type', async () => { + setupYdotool(); + await typeLinux('hello'); + const c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'ydotool' && args.includes('type') && args.includes('hello'))); +}); + +test('scrollLinux uses ydotool mousemove --wheel for vertical scroll', async () => { + setupYdotool(); + await scrollLinux('up'); + const c = calls(); + assert.ok(c.some(([cmd, args]) => cmd === 'ydotool' && args.includes('mousemove') && args.includes('--wheel') && args.includes('-y'))); +}); + +// ── fillLinux tests ────────────────────────────────────────────────────── + +test('fillLinux clicks, selects all, then types on X11', async () => { + setupXdotool(); + await fillLinux(50, 50, 'new text', 0); + const c = calls(); + // Should click, then ctrl+a, then type + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('click') && args.includes('1'))); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('key') && args.includes('ctrl+a'))); + assert.ok(c.some(([cmd, args]) => cmd === 'xdotool' && args.includes('type') && args.includes('new text'))); +}); diff --git a/src/platforms/linux/__tests__/role-map.test.ts b/src/platforms/linux/__tests__/role-map.test.ts new file mode 100644 index 000000000..db0df8249 --- /dev/null +++ b/src/platforms/linux/__tests__/role-map.test.ts @@ -0,0 +1,40 @@ +import { test } from 'vitest'; +import assert from 'node:assert/strict'; +import { normalizeAtspiRole } from '../role-map.ts'; + +test('maps common AT-SPI2 roles to normalized types', () => { + assert.equal(normalizeAtspiRole('push button'), 'Button'); + assert.equal(normalizeAtspiRole('toggle button'), 'Button'); + assert.equal(normalizeAtspiRole('label'), 'StaticText'); + assert.equal(normalizeAtspiRole('text'), 'TextField'); + assert.equal(normalizeAtspiRole('entry'), 'TextField'); + assert.equal(normalizeAtspiRole('check box'), 'CheckBox'); + assert.equal(normalizeAtspiRole('radio button'), 'RadioButton'); + assert.equal(normalizeAtspiRole('menu item'), 'MenuItem'); + assert.equal(normalizeAtspiRole('frame'), 'Window'); + assert.equal(normalizeAtspiRole('dialog'), 'Dialog'); + assert.equal(normalizeAtspiRole('panel'), 'Group'); + assert.equal(normalizeAtspiRole('list'), 'List'); + assert.equal(normalizeAtspiRole('list item'), 'ListItem'); + assert.equal(normalizeAtspiRole('slider'), 'Slider'); + assert.equal(normalizeAtspiRole('image'), 'Image'); + assert.equal(normalizeAtspiRole('link'), 'Link'); + assert.equal(normalizeAtspiRole('application'), 'Application'); + assert.equal(normalizeAtspiRole('combo box'), 'ComboBox'); + assert.equal(normalizeAtspiRole('page tab'), 'Tab'); + assert.equal(normalizeAtspiRole('scroll bar'), 'ScrollBar'); + assert.equal(normalizeAtspiRole('separator'), 'Separator'); + assert.equal(normalizeAtspiRole('tool tip'), 'Tooltip'); +}); + +test('normalizes role name case and whitespace', () => { + assert.equal(normalizeAtspiRole('Push Button'), 'Button'); + assert.equal(normalizeAtspiRole('LABEL'), 'StaticText'); + assert.equal(normalizeAtspiRole(' menu item '), 'MenuItem'); +}); + +test('falls back to PascalCase for unmapped roles', () => { + assert.equal(normalizeAtspiRole('custom widget'), 'CustomWidget'); + assert.equal(normalizeAtspiRole('some-fancy_role'), 'SomeFancyRole'); + assert.equal(normalizeAtspiRole('unknown'), 'Unknown'); +}); diff --git a/src/platforms/linux/app-lifecycle.ts b/src/platforms/linux/app-lifecycle.ts new file mode 100644 index 000000000..ef2c3556f --- /dev/null +++ b/src/platforms/linux/app-lifecycle.ts @@ -0,0 +1,67 @@ +import { runCmd, whichCmd } from '../../utils/exec.ts'; +import { emitDiagnostic } from '../../utils/diagnostics.ts'; +import { sendKey } from './input-actions.ts'; + +/** + * Open an application or URL on Linux. + * + * Accepts: + * - A URL (opens via xdg-open) + * - A .desktop file name or binary name + */ +export async function openLinuxApp(app: string): Promise { + // URLs or file paths: use xdg-open + if (app.includes('://') || app.startsWith('/')) { + await runCmd('xdg-open', [app]); + return; + } + + // Try launching as a binary first + if (await whichCmd(app)) { + // Fire-and-forget: apps don't exit when launched + runCmd(app, [], { allowFailure: true }).catch((err) => { + emitDiagnostic({ + level: 'warn', + phase: 'linux_app_launch', + data: { app, error: String(err) }, + }); + }); + // Give it a moment to start + await new Promise((resolve) => setTimeout(resolve, 500)); + return; + } + + // Fallback to xdg-open (handles .desktop file associations) + await runCmd('xdg-open', [app], { allowFailure: true }); +} + +/** + * Close an application by name on Linux. + * + * Uses wmctrl if available, falls back to pkill. + */ +export async function closeLinuxApp(app: string): Promise { + if (await whichCmd('wmctrl')) { + await runCmd('wmctrl', ['-c', app], { allowFailure: true }); + return; + } + + // Fallback: send SIGTERM via pkill (exact process name match) + await runCmd('pkill', ['-x', app], { allowFailure: true }); +} + +/** + * Send Alt+Left arrow to go back (standard browser/app back navigation). + */ +export async function backLinux(): Promise { + // Alt=56, Left=105 + await sendKey('alt+Left', ['56:1', '105:1', '105:0', '56:0']); +} + +/** + * Show desktop (minimize all windows) via Super+D. + */ +export async function homeLinux(): Promise { + // Super=125, D=32 + await sendKey('super+d', ['125:1', '32:1', '32:0', '125:0']); +} diff --git a/src/platforms/linux/atspi-bridge.ts b/src/platforms/linux/atspi-bridge.ts new file mode 100644 index 000000000..ad8d79c61 --- /dev/null +++ b/src/platforms/linux/atspi-bridge.ts @@ -0,0 +1,181 @@ +/** + * AT-SPI2 bridge — shells out to a Python helper that uses PyGObject + * to traverse the accessibility tree. + * + * This avoids the fragile node-gtk native addon (ABI mismatches, + * compilation on CI, etc.) in favour of python3-gi which is the + * reference GObject Introspection consumer and is trivially available + * on every Linux distro. + */ + +import path from 'node:path'; +import fs from 'node:fs'; +import { fileURLToPath } from 'node:url'; +import { AppError } from '../../utils/errors.ts'; +import { runCmd, whichCmd } from '../../utils/exec.ts'; +import type { RawSnapshotNode } from '../../utils/snapshot.ts'; +import { normalizeAtspiRole } from './role-map.ts'; + +// ── Limits (matching macOS helper's SnapshotTraversalLimits) ──────────── +const MAX_DESKTOP_APPS = 24; +const MAX_NODES = 1500; +const MAX_DEPTH = 12; + +const SCRIPT_NAME = 'atspi-dump.py'; + +let cachedScriptPath: string | null = null; + +/** Resolve atspi-dump.py relative to this module, checking both source and dist layouts. */ +function resolveScriptPath(): string { + if (cachedScriptPath) return cachedScriptPath; + const thisDir = path.dirname(fileURLToPath(import.meta.url)); + + // Walk upward looking for the script — handles both: + // src/platforms/linux/ (source) + // dist/src/ (bundled, .py lives in package root under src/platforms/linux/) + let dir = thisDir; + for (let i = 0; i < 5; i++) { + const candidate = path.join(dir, 'src', 'platforms', 'linux', SCRIPT_NAME); + if (fs.existsSync(candidate)) { cachedScriptPath = candidate; return candidate; } + // Also check same-directory (running from source dir directly) + if (i === 0) { + const sameDir = path.join(dir, SCRIPT_NAME); + if (fs.existsSync(sameDir)) { cachedScriptPath = sameDir; return sameDir; } + } + dir = path.dirname(dir); + } + + throw new AppError( + 'TOOL_MISSING', + `Cannot find ${SCRIPT_NAME}. Ensure the agent-device package is installed correctly.`, + ); +} + +// ── Public types ──────────────────────────────────────────────────────── + +export type TraversalOptions = { + maxNodes?: number; + maxDepth?: number; + maxApps?: number; +}; + +export type SnapshotSurface = 'desktop' | 'frontmost-app'; + +type PythonNode = { + index: number; + role: string; + label?: string; + value?: string; + rect?: { x: number; y: number; width: number; height: number }; + enabled?: boolean; + selected?: boolean; + hittable?: boolean; + depth: number; + parentIndex?: number; + pid?: number; + appName?: string; + windowTitle?: string; +}; + +type PythonResult = { + nodes: PythonNode[]; + truncated: boolean; + surface: string; + error?: string; +}; + +// ── Public API ────────────────────────────────────────────────────────── + +export async function captureAccessibilityTree( + surface: SnapshotSurface, + options: TraversalOptions = {}, +): Promise<{ + nodes: RawSnapshotNode[]; + truncated: boolean; + surface: SnapshotSurface; +}> { + if (process.platform !== 'linux') { + throw new AppError( + 'UNSUPPORTED_PLATFORM', + 'AT-SPI2 bridge is only available on Linux', + ); + } + + if (!(await whichCmd('python3'))) { + throw new AppError( + 'TOOL_MISSING', + 'python3 is required for AT-SPI2 accessibility snapshots on Linux.', + ); + } + + const maxNodes = options.maxNodes ?? MAX_NODES; + const maxDepth = options.maxDepth ?? MAX_DEPTH; + const maxApps = options.maxApps ?? MAX_DESKTOP_APPS; + + const scriptPath = resolveScriptPath(); + const args = [ + scriptPath, + '--surface', surface, + '--max-nodes', String(maxNodes), + '--max-depth', String(maxDepth), + '--max-apps', String(maxApps), + ]; + + const result = await runCmd('python3', args, { + allowFailure: true, + timeoutMs: 30_000, + }); + + if (result.exitCode !== 0) { + const stderr = result.stderr.trim(); + if (stderr.includes('No module named') || stderr.includes('gi.require_version')) { + throw new AppError( + 'TOOL_MISSING', + 'AT-SPI2 Python bindings not found. Install python3-gi and gir1.2-atspi-2.0.', + { cause: stderr }, + ); + } + throw new AppError( + 'COMMAND_FAILED', + `AT-SPI2 snapshot failed (exit ${result.exitCode}): ${stderr || result.stdout}`, + ); + } + + let parsed: PythonResult; + try { + parsed = JSON.parse(result.stdout); + } catch { + throw new AppError( + 'COMMAND_FAILED', + `AT-SPI2 snapshot returned invalid JSON: ${result.stdout.slice(0, 200)}`, + ); + } + + if (parsed.error) { + throw new AppError('COMMAND_FAILED', `AT-SPI2: ${parsed.error}`); + } + + // Map Python output to RawSnapshotNode with normalized roles + const nodes: RawSnapshotNode[] = (parsed.nodes ?? []).map((n) => ({ + index: n.index, + type: normalizeAtspiRole(n.role), + role: n.role, + label: n.label ?? undefined, + value: n.value ?? undefined, + rect: n.rect ?? undefined, + enabled: n.enabled ?? undefined, + selected: n.selected ?? undefined, + hittable: n.hittable ?? undefined, + depth: n.depth, + parentIndex: n.parentIndex ?? undefined, + pid: n.pid ?? undefined, + appName: n.appName ?? undefined, + windowTitle: n.windowTitle ?? undefined, + })); + + return { + nodes, + truncated: parsed.truncated, + surface, + }; +} diff --git a/src/platforms/linux/atspi-dump.py b/src/platforms/linux/atspi-dump.py new file mode 100644 index 000000000..401a38c08 --- /dev/null +++ b/src/platforms/linux/atspi-dump.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python3 +""" +AT-SPI2 accessibility tree dumper. + +Traverses the AT-SPI2 accessibility tree and outputs JSON to stdout. +Used by agent-device's Linux platform support as a subprocess. + +Requires: python3-gi, gir1.2-atspi-2.0 +""" + +import json +import sys + +import gi +gi.require_version("Atspi", "2.0") +from gi.repository import Atspi # noqa: E402 + +MAX_NODES = 1500 +MAX_DEPTH = 12 +MAX_DESKTOP_APPS = 24 + +VALID_SURFACES = ("desktop", "frontmost-app") + + +def get_rect(accessible): + try: + component = accessible.get_component_iface() + if not component: + return None + extents = component.get_extents(Atspi.CoordType.SCREEN) + if not extents: + return None + if extents.width <= 0 or extents.height <= 0: + return None + return { + "x": extents.x, + "y": extents.y, + "width": extents.width, + "height": extents.height, + } + except Exception: + return None + + +def get_text_value(accessible): + try: + text_iface = accessible.get_text_iface() + if not text_iface: + return None + count = text_iface.get_character_count() + if count <= 0: + return None + value = text_iface.get_text(0, count) + return value if value else None + except Exception: + return None + + +def get_numeric_value(accessible): + try: + value_iface = accessible.get_value_iface() + if not value_iface: + return None + current = value_iface.get_current_value() + if current is None: + return None + return str(current) + except Exception: + return None + + +def has_state(state_set, state_type): + try: + return state_set.contains(state_type) + except Exception: + return False + + +def traverse_node(accessible, depth, parent_index, ctx, app_info, window_title=None): + if len(ctx["nodes"]) >= ctx["max_nodes"] or depth > ctx["max_depth"] or not accessible: + return + + try: + role_name = accessible.get_role_name() or "unknown" + except Exception: + role_name = "unknown" + + try: + name = accessible.get_name() or "" + except Exception: + name = "" + + try: + description = accessible.get_description() or "" + except Exception: + description = "" + + label = name or description or None + rect = get_rect(accessible) + + try: + state_set = accessible.get_state_set() + except Exception: + state_set = None + + enabled = has_state(state_set, Atspi.StateType.ENABLED) if state_set else None + selected = has_state(state_set, Atspi.StateType.SELECTED) if state_set else None + visible = has_state(state_set, Atspi.StateType.VISIBLE) if state_set else True + showing = has_state(state_set, Atspi.StateType.SHOWING) if state_set else True + hittable = (enabled is not False) and visible and showing and (rect is not None) + + current_window_title = window_title + if current_window_title is None and role_name in ("frame", "window", "dialog"): + current_window_title = label + + nodes = ctx["nodes"] + node_index = len(nodes) + value = get_text_value(accessible) or get_numeric_value(accessible) + + node = { + "index": node_index, + "role": role_name, + "label": label, + "value": value, + "rect": rect, + "enabled": enabled, + "selected": selected, + "hittable": hittable, + "depth": depth, + "parentIndex": parent_index, + "pid": app_info.get("pid"), + "appName": app_info.get("appName"), + "windowTitle": current_window_title, + } + nodes.append(node) + + try: + child_count = accessible.get_child_count() + except Exception: + return + + for i in range(child_count): + if len(nodes) >= ctx["max_nodes"]: + break + try: + child = accessible.get_child_at_index(i) + if child: + traverse_node( + child, depth + 1, node_index, ctx, app_info, + current_window_title + ) + except Exception: + pass + + +def find_focused_application(desktop, app_count): + for i in range(app_count): + try: + app = desktop.get_child_at_index(i) + if not app: + continue + child_count = app.get_child_count() + for j in range(child_count): + try: + win = app.get_child_at_index(j) + if not win: + continue + state_set = win.get_state_set() + if state_set and has_state(state_set, Atspi.StateType.ACTIVE): + return app + except Exception: + pass + except Exception: + pass + + # Fallback: first app with children + for i in range(app_count): + try: + app = desktop.get_child_at_index(i) + if app and app.get_child_count() > 0: + return app + except Exception: + pass + return None + + +def get_app_info(app): + try: + app_name = app.get_name() or None + except Exception: + app_name = None + try: + pid = app.get_process_id() + except Exception: + pid = None + return {"appName": app_name, "pid": pid} + + +def capture(surface, max_nodes=MAX_NODES, max_depth=MAX_DEPTH, max_apps=MAX_DESKTOP_APPS): + desktop = Atspi.get_desktop(0) + if not desktop: + return {"error": "Could not get desktop accessible. Is the accessibility bus running?"} + + app_count = desktop.get_child_count() + ctx = {"nodes": [], "max_nodes": max_nodes, "max_depth": max_depth} + + if surface == "frontmost-app": + focused = find_focused_application(desktop, app_count) + if focused: + traverse_node(focused, 0, None, ctx, get_app_info(focused)) + else: + apps_to_traverse = min(app_count, max_apps) + for i in range(apps_to_traverse): + if len(ctx["nodes"]) >= max_nodes: + break + try: + app = desktop.get_child_at_index(i) + if not app or app.get_child_count() == 0: + continue + traverse_node(app, 0, None, ctx, get_app_info(app)) + except Exception: + pass + + nodes = ctx["nodes"] + return { + "nodes": nodes, + "truncated": len(nodes) >= max_nodes, + "surface": surface, + } + + +def parse_int_arg(value, name): + try: + n = int(value) + if n < 0: + raise ValueError(f"negative value") + return n + except ValueError as e: + json.dump({"error": f"Invalid value for {name}: '{value}' ({e})"}, sys.stdout) + sys.exit(1) + + +def main(): + try: + surface = "desktop" + max_nodes = MAX_NODES + max_depth = MAX_DEPTH + max_apps = MAX_DESKTOP_APPS + + args = sys.argv[1:] + i = 0 + while i < len(args): + if args[i] == "--surface" and i + 1 < len(args): + surface = args[i + 1] + i += 2 + elif args[i] == "--max-nodes" and i + 1 < len(args): + max_nodes = parse_int_arg(args[i + 1], "--max-nodes") + i += 2 + elif args[i] == "--max-depth" and i + 1 < len(args): + max_depth = parse_int_arg(args[i + 1], "--max-depth") + i += 2 + elif args[i] == "--max-apps" and i + 1 < len(args): + max_apps = parse_int_arg(args[i + 1], "--max-apps") + i += 2 + else: + i += 1 + + if surface not in VALID_SURFACES: + json.dump( + {"error": f"Unknown surface '{surface}'. Valid: {', '.join(VALID_SURFACES)}"}, + sys.stdout, + ) + sys.exit(1) + + result = capture(surface, max_nodes, max_depth, max_apps) + json.dump(result, sys.stdout, ensure_ascii=False) + except SystemExit: + raise + except Exception as e: + json.dump({"error": f"Unexpected error: {e}"}, sys.stdout) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/platforms/linux/clipboard.ts b/src/platforms/linux/clipboard.ts new file mode 100644 index 000000000..ff706012a --- /dev/null +++ b/src/platforms/linux/clipboard.ts @@ -0,0 +1,67 @@ +import { runCmd, whichCmd } from '../../utils/exec.ts'; +import { AppError } from '../../utils/errors.ts'; +import { isWayland } from './linux-env.ts'; + +type ClipboardTool = 'wl-clipboard' | 'xclip' | 'xsel'; + +let cachedTool: { tool: ClipboardTool; display: 'wayland' | 'x11' } | null = null; + +async function resolveClipboardTool(): Promise<{ tool: ClipboardTool; display: 'wayland' | 'x11' }> { + if (cachedTool) return cachedTool; + + if (isWayland()) { + // wl-clipboard provides both wl-paste and wl-copy + if (await whichCmd('wl-paste')) { cachedTool = { tool: 'wl-clipboard', display: 'wayland' }; return cachedTool; } + throw new AppError( + 'TOOL_MISSING', + 'wl-paste (wl-clipboard) is required for clipboard access on Wayland. Install via your package manager.', + ); + } + + if (await whichCmd('xclip')) { cachedTool = { tool: 'xclip', display: 'x11' }; return cachedTool; } + if (await whichCmd('xsel')) { cachedTool = { tool: 'xsel', display: 'x11' }; return cachedTool; } + throw new AppError( + 'TOOL_MISSING', + 'xclip or xsel is required for clipboard access on X11. Install via your package manager.', + ); +} + +/** Reset cached tool (for testing). */ +export function resetClipboardToolCache(): void { + cachedTool = null; +} + +export async function readLinuxClipboard(): Promise { + const { tool } = await resolveClipboardTool(); + + switch (tool) { + case 'wl-clipboard': { + const result = await runCmd('wl-paste', ['--no-newline'], { allowFailure: true, timeoutMs: 5000 }); + return result.stdout; + } + case 'xclip': { + const result = await runCmd('xclip', ['-selection', 'clipboard', '-o'], { allowFailure: true, timeoutMs: 5000 }); + return result.stdout; + } + case 'xsel': { + const result = await runCmd('xsel', ['--clipboard', '--output'], { allowFailure: true, timeoutMs: 5000 }); + return result.stdout; + } + } +} + +export async function writeLinuxClipboard(text: string): Promise { + const { tool } = await resolveClipboardTool(); + + switch (tool) { + case 'wl-clipboard': + await runCmd('wl-copy', ['--', text], { allowFailure: false, timeoutMs: 5000 }); + break; + case 'xclip': + await runCmd('xclip', ['-selection', 'clipboard'], { allowFailure: false, timeoutMs: 5000, stdin: text }); + break; + case 'xsel': + await runCmd('xsel', ['--clipboard', '--input'], { allowFailure: false, timeoutMs: 5000, stdin: text }); + break; + } +} diff --git a/src/platforms/linux/devices.ts b/src/platforms/linux/devices.ts new file mode 100644 index 000000000..62e76086f --- /dev/null +++ b/src/platforms/linux/devices.ts @@ -0,0 +1,19 @@ +import { hostname } from 'node:os'; +import type { DeviceInfo } from '../../utils/device.ts'; + +export async function listLinuxDevices(): Promise { + if (process.platform !== 'linux') { + return []; + } + + return [ + { + platform: 'linux', + id: 'local', + name: hostname(), + kind: 'device', + target: 'desktop', + booted: true, + }, + ]; +} diff --git a/src/platforms/linux/index.ts b/src/platforms/linux/index.ts new file mode 100644 index 000000000..b47c1ce43 --- /dev/null +++ b/src/platforms/linux/index.ts @@ -0,0 +1,17 @@ +export { listLinuxDevices } from './devices.ts'; +export { snapshotLinux } from './snapshot.ts'; +export { screenshotLinux } from './screenshot.ts'; +export { + pressLinux, + rightClickLinux, + middleClickLinux, + doubleClickLinux, + longPressLinux, + focusLinux, + swipeLinux, + scrollLinux, + typeLinux, + fillLinux, +} from './input-actions.ts'; +export { openLinuxApp, closeLinuxApp, backLinux, homeLinux } from './app-lifecycle.ts'; +export { readLinuxClipboard, writeLinuxClipboard } from './clipboard.ts'; diff --git a/src/platforms/linux/input-actions.ts b/src/platforms/linux/input-actions.ts new file mode 100644 index 000000000..de4445f57 --- /dev/null +++ b/src/platforms/linux/input-actions.ts @@ -0,0 +1,200 @@ +import { runCmd } from '../../utils/exec.ts'; +import { ensureInputTool } from './linux-env.ts'; +import type { ScrollDirection } from '../../core/scroll-gesture.ts'; + +// ── Low-level wrappers ───────────────────────────────────────────────── + +/** Per-action timeout — prevents hung xdotool/ydotool from blocking indefinitely. */ +const INPUT_TIMEOUT_MS = 10_000; + +async function xdotool(...args: string[]): Promise { + await runCmd('xdotool', args, { allowFailure: false, timeoutMs: INPUT_TIMEOUT_MS }); +} + +async function ydotool(...args: string[]): Promise { + await runCmd('ydotool', args, { allowFailure: false, timeoutMs: INPUT_TIMEOUT_MS }); +} + +/** Move the pointer to (x, y) using the detected input tool. */ +async function moveTo(x: number, y: number): Promise { + const { tool } = await ensureInputTool(); + if (tool === 'xdotool') { + await xdotool('mousemove', '--sync', String(x), String(y)); + } else { + await ydotool('mousemove', '--absolute', '-x', String(x), '-y', String(y)); + } +} + +/** + * Send a key combination via the detected input tool. + * Both `combo` (xdotool keysym notation) and `scancodes` (ydotool + * key:state pairs) must be provided — ydotool requires scancodes. + */ +export async function sendKey(combo: string, scancodes: string[]): Promise { + const { tool } = await ensureInputTool(); + if (tool === 'xdotool') { + await xdotool('key', '--clearmodifiers', combo); + } else { + await ydotool('key', ...scancodes); + } +} + +// ── Mouse actions ─────────────────────────────────────────────────────── + +// ydotool v1 button codes (Linux input event codes): +// 0xC0 = BTN_LEFT with click flags, 0xC1 = BTN_RIGHT, 0xC2 = BTN_MIDDLE +// These correspond to ydotool's packed button+action format. + +async function clickButton(x: number, y: number, xdoBtn: string, ydoCode: string): Promise { + await moveTo(x, y); + const { tool } = await ensureInputTool(); + if (tool === 'xdotool') { + await xdotool('click', xdoBtn); + } else { + await ydotool('click', ydoCode); + } +} + +export async function pressLinux(x: number, y: number): Promise { + await clickButton(x, y, '1', '0xC0'); +} + +export async function rightClickLinux(x: number, y: number): Promise { + await clickButton(x, y, '3', '0xC1'); +} + +export async function middleClickLinux(x: number, y: number): Promise { + await clickButton(x, y, '2', '0xC2'); +} + +export async function doubleClickLinux(x: number, y: number): Promise { + const { tool } = await ensureInputTool(); + await moveTo(x, y); + if (tool === 'xdotool') { + await xdotool('click', '--repeat', '2', '1'); + } else { + await ydotool('click', '0xC0'); + await ydotool('click', '0xC0'); + } +} + +export async function longPressLinux( + x: number, + y: number, + durationMs = 800, +): Promise { + const { tool } = await ensureInputTool(); + await moveTo(x, y); + if (tool === 'xdotool') { + await xdotool('mousedown', '1'); + await sleep(durationMs); + await xdotool('mouseup', '1'); + } else { + // ydotool v1: use click --down / --up for press-hold + await ydotool('click', '--down', '0xC0'); + await sleep(durationMs); + await ydotool('click', '--up', '0xC0'); + } +} + +export async function focusLinux(x: number, y: number): Promise { + await pressLinux(x, y); +} + +// ── Swipe / scroll ────────────────────────────────────────────────────── + +export async function swipeLinux( + x1: number, + y1: number, + x2: number, + y2: number, + durationMs = 300, +): Promise { + const { tool } = await ensureInputTool(); + await moveTo(x1, y1); + if (tool === 'xdotool') { + await xdotool('mousedown', '1'); + await xdotool('mousemove', '--sync', String(x2), String(y2)); + await sleep(durationMs); + await xdotool('mouseup', '1'); + } else { + // ydotool v1: use click --down / --up for drag + await ydotool('click', '--down', '0xC0'); + await ydotool('mousemove', '--absolute', '-x', String(x2), '-y', String(y2)); + await sleep(durationMs); + await ydotool('click', '--up', '0xC0'); + } +} + +const DEFAULT_SCROLL_CLICKS = 5; + +export async function scrollLinux( + direction: ScrollDirection, + options?: { amount?: number; pixels?: number }, +): Promise { + const { tool } = await ensureInputTool(); + + // Translate amount/pixels into a discrete click count. + // xdotool button clicks scroll ~15px each (3 lines × 5px). + // ydotool wheel units are ~40px each. + let scrollCount = DEFAULT_SCROLL_CLICKS; + if (options?.pixels != null) { + scrollCount = tool === 'xdotool' + ? Math.max(1, Math.round(options.pixels / 15)) + : Math.max(1, Math.round(options.pixels / 40)); + } else if (options?.amount != null) { + // amount is a fraction (0–1+) of the viewport; scale relative to default + scrollCount = Math.max(1, Math.round(DEFAULT_SCROLL_CLICKS * (options.amount / 0.6))); + } + + // xdotool: button 4=up, 5=down, 6=left, 7=right + if (tool === 'xdotool') { + const button = direction === 'up' ? '4' : direction === 'down' ? '5' : direction === 'left' ? '6' : '7'; + await xdotool('click', '--repeat', String(scrollCount), button); + } else { + // ydotool: wheel events use positive/negative values + if (direction === 'up' || direction === 'down') { + const value = direction === 'up' ? String(-scrollCount) : String(scrollCount); + await ydotool('mousemove', '--wheel', '-y', value); + } else { + const value = direction === 'left' ? String(-scrollCount) : String(scrollCount); + await ydotool('mousemove', '--wheel', '-x', value); + } + } +} + +// ── Keyboard actions ──────────────────────────────────────────────────── + +export async function typeLinux(text: string, delayMs = 0): Promise { + const { tool } = await ensureInputTool(); + if (tool === 'xdotool') { + const args = ['type']; + if (delayMs > 0) args.push('--delay', String(delayMs)); + args.push('--clearmodifiers', '--', text); + await xdotool(...args); + } else { + await ydotool('type', '--', text); + } +} + +export async function fillLinux( + x: number, + y: number, + text: string, + delayMs = 0, +): Promise { + // Click to focus the field + await pressLinux(x, y); + await sleep(100); + // Select all existing text (Ctrl+A scancodes: Ctrl=29, A=30) + await sendKey('ctrl+a', ['29:1', '30:1', '30:0', '29:0']); + await sleep(50); + // Type replacement text + await typeLinux(text, delayMs); +} + +// ── Utilities ─────────────────────────────────────────────────────────── + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/src/platforms/linux/linux-env.ts b/src/platforms/linux/linux-env.ts new file mode 100644 index 000000000..ae80ff80d --- /dev/null +++ b/src/platforms/linux/linux-env.ts @@ -0,0 +1,60 @@ +/** + * Shared Linux environment detection — display server and input tool. + * + * Results are cached after the first probe so that every action + * (press, type, scroll…) does not re-run `which` on every call. + */ + +import { whichCmd } from '../../utils/exec.ts'; +import { AppError } from '../../utils/errors.ts'; + +export type DisplayServer = 'wayland' | 'x11'; +export type InputTool = 'xdotool' | 'ydotool'; + +export function detectDisplayServer(): DisplayServer { + if (process.env['WAYLAND_DISPLAY']) return 'wayland'; + if (process.env['XDG_SESSION_TYPE'] === 'wayland') return 'wayland'; + return 'x11'; +} + +export function isWayland(): boolean { + return detectDisplayServer() === 'wayland'; +} + +// ── Cached input tool resolution ─────────────────────────────────────── + +let cachedInputTool: { tool: InputTool; display: DisplayServer } | null = null; + +export async function ensureInputTool(): Promise<{ + tool: InputTool; + display: DisplayServer; +}> { + if (cachedInputTool) return cachedInputTool; + + const display = detectDisplayServer(); + + if (display === 'wayland') { + if (await whichCmd('ydotool')) { + cachedInputTool = { tool: 'ydotool', display }; + return cachedInputTool; + } + throw new AppError( + 'TOOL_MISSING', + 'ydotool is required for input synthesis on Wayland (xdotool does not work on Wayland). Install it via your package manager.', + ); + } + + if (await whichCmd('xdotool')) { + cachedInputTool = { tool: 'xdotool', display }; + return cachedInputTool; + } + throw new AppError( + 'TOOL_MISSING', + 'xdotool is required for input synthesis on X11. Install it via your package manager.', + ); +} + +/** Reset cached tool (for testing). */ +export function resetInputToolCache(): void { + cachedInputTool = null; +} diff --git a/src/platforms/linux/role-map.ts b/src/platforms/linux/role-map.ts new file mode 100644 index 000000000..67499e676 --- /dev/null +++ b/src/platforms/linux/role-map.ts @@ -0,0 +1,165 @@ +/** + * Maps AT-SPI2 role names (as returned by `Atspi.Accessible.getRoleName()`) + * to the normalized snapshot `type` strings used by the rest of the system. + * + * The mapping follows the same conventions as the macOS helper's + * `normalizedSnapshotType` (SnapshotTraversal.swift) and the Android + * `normalizeAndroidType` (ui-hierarchy.ts). + */ + +const ATSPI_ROLE_MAP: Record = { + // Buttons + 'push button': 'Button', + 'toggle button': 'Button', + 'push button menu': 'Button', + + // Text + label: 'StaticText', + static: 'StaticText', + caption: 'StaticText', + text: 'TextField', + entry: 'TextField', + 'password text': 'TextField', + 'spin button': 'TextField', + terminal: 'TextArea', + 'document text': 'TextArea', + paragraph: 'TextArea', + + // Windows & frames + frame: 'Window', + window: 'Window', + dialog: 'Dialog', + alert: 'Alert', + 'file chooser': 'Dialog', + 'color chooser': 'Dialog', + 'font chooser': 'Dialog', + + // Containers & layout + panel: 'Group', + filler: 'Group', + section: 'Group', + form: 'Group', + grouping: 'Group', + 'layered pane': 'Group', + 'glass pane': 'Group', + 'root pane': 'Group', + 'option pane': 'Group', + 'internal frame': 'Group', + 'desktop frame': 'Group', + 'block quote': 'Group', + article: 'Group', + comment: 'Group', + landmark: 'Group', + log: 'Group', + marquee: 'Group', + math: 'Group', + notification: 'Group', + 'content deletion': 'Group', + 'content insertion': 'Group', + mark: 'Group', + suggestion: 'Group', + + // Scrolling + 'scroll pane': 'ScrollArea', + 'scroll bar': 'ScrollBar', + + // Menus + 'menu bar': 'MenuBar', + menu: 'Menu', + 'popup menu': 'Menu', + 'menu item': 'MenuItem', + 'check menu item': 'MenuItem', + 'radio menu item': 'MenuItem', + 'tearoff menu item': 'MenuItem', + + // Toggle/selection + 'check box': 'CheckBox', + 'radio button': 'RadioButton', + switch: 'Switch', + + // Combo/dropdown + 'combo box': 'ComboBox', + + // Tabs + 'page tab': 'Tab', + 'page tab list': 'TabList', + + // Tables + table: 'Table', + 'tree table': 'Table', + 'table cell': 'Cell', + 'table row': 'Row', + 'table column header': 'Cell', + 'table row header': 'Cell', + 'column header': 'Cell', + 'row header': 'Cell', + + // Lists + list: 'List', + 'list item': 'ListItem', + 'list box': 'List', + + // Trees + tree: 'Tree', + 'tree item': 'TreeItem', + 'description list': 'List', + 'description term': 'ListItem', + 'description value': 'ListItem', + + // Toolbars & status + 'tool bar': 'Toolbar', + 'status bar': 'StatusBar', + 'info bar': 'StatusBar', + + // Sliders & progress + slider: 'Slider', + 'progress bar': 'ProgressBar', + 'level bar': 'ProgressBar', + + // Media + image: 'Image', + icon: 'Image', + animation: 'Image', + canvas: 'Image', + 'drawing area': 'Image', + video: 'Video', + audio: 'Audio', + + // Links + link: 'Link', + hyperlink: 'Link', + + // Separators + separator: 'Separator', + + // Application + application: 'Application', + + // Misc + 'tool tip': 'Tooltip', + timer: 'Timer', + heading: 'Heading', + footnote: 'Footnote', + 'title bar': 'TitleBar', + 'date editor': 'DateEditor', + rating: 'Slider', +}; + +/** + * Convert a raw AT-SPI2 role name (e.g., "push button", "menu item") to + * the normalized type used in snapshot nodes. + * + * Falls back to PascalCase of the raw role name when no explicit mapping exists. + */ +export function normalizeAtspiRole(roleName: string): string { + const normalized = roleName.toLowerCase().trim(); + const mapped = ATSPI_ROLE_MAP[normalized]; + if (mapped) return mapped; + + // Fallback: convert "some role name" to "SomeRoleName" + return normalized + .split(/[\s_-]+/) + .filter(Boolean) + .map((w) => w.charAt(0).toUpperCase() + w.slice(1)) + .join(''); +} diff --git a/src/platforms/linux/screenshot.ts b/src/platforms/linux/screenshot.ts new file mode 100644 index 000000000..d27ddfc91 --- /dev/null +++ b/src/platforms/linux/screenshot.ts @@ -0,0 +1,59 @@ +import { runCmd, whichCmd } from '../../utils/exec.ts'; +import { AppError } from '../../utils/errors.ts'; +import { isWayland } from './linux-env.ts'; + +type ScreenshotTool = 'grim' | 'gnome-screenshot' | 'scrot' | 'import'; + +let cachedTool: { tool: ScreenshotTool; display: 'wayland' | 'x11' } | null = null; + +async function resolveScreenshotTool(): Promise<{ tool: ScreenshotTool; display: 'wayland' | 'x11' }> { + if (cachedTool) return cachedTool; + + if (isWayland()) { + if (await whichCmd('grim')) { cachedTool = { tool: 'grim', display: 'wayland' }; return cachedTool; } + if (await whichCmd('gnome-screenshot')) { cachedTool = { tool: 'gnome-screenshot', display: 'wayland' }; return cachedTool; } + throw new AppError( + 'TOOL_MISSING', + 'grim or gnome-screenshot is required for screenshots on Wayland. Install via your package manager.', + ); + } + + if (await whichCmd('scrot')) { cachedTool = { tool: 'scrot', display: 'x11' }; return cachedTool; } + if (await whichCmd('import')) { cachedTool = { tool: 'import', display: 'x11' }; return cachedTool; } + if (await whichCmd('gnome-screenshot')) { cachedTool = { tool: 'gnome-screenshot', display: 'x11' }; return cachedTool; } + throw new AppError( + 'TOOL_MISSING', + 'scrot, import (ImageMagick), or gnome-screenshot is required for screenshots on X11. Install via your package manager.', + ); +} + +/** Reset cached tool (for testing). */ +export function resetScreenshotToolCache(): void { + cachedTool = null; +} + +/** + * Capture a screenshot of the Linux desktop. + * + * Uses: + * - `grim` on Wayland + * - `scrot` or `import` (ImageMagick) on X11 + */ +export async function screenshotLinux(outPath: string): Promise { + const { tool } = await resolveScreenshotTool(); + + switch (tool) { + case 'grim': + await runCmd('grim', [outPath]); + break; + case 'scrot': + await runCmd('scrot', [outPath]); + break; + case 'import': + await runCmd('import', ['-window', 'root', outPath]); + break; + case 'gnome-screenshot': + await runCmd('gnome-screenshot', ['-f', outPath]); + break; + } +} diff --git a/src/platforms/linux/snapshot.ts b/src/platforms/linux/snapshot.ts new file mode 100644 index 000000000..6231d4c72 --- /dev/null +++ b/src/platforms/linux/snapshot.ts @@ -0,0 +1,38 @@ +import type { RawSnapshotNode } from '../../utils/snapshot.ts'; +import { captureAccessibilityTree, type SnapshotSurface } from './atspi-bridge.ts'; +import type { SessionSurface } from '../../core/session-surface.ts'; +import { emitDiagnostic } from '../../utils/diagnostics.ts'; + +/** + * Map the session-level surface to an AT-SPI2 surface. + * Linux supports 'desktop' and 'frontmost-app'. The 'app' surface + * (used for in-app XCTest sessions) is treated as 'frontmost-app' on Linux. + * The 'menubar' surface is not yet supported; it falls back to 'desktop'. + */ +function resolveLinuxSurface(surface: SessionSurface | undefined): SnapshotSurface { + if (surface === 'desktop') return 'desktop'; + if (surface === 'frontmost-app' || surface === 'app') return 'frontmost-app'; + if (surface === 'menubar') { + emitDiagnostic({ + level: 'warn', + phase: 'linux_snapshot', + data: { message: 'menubar surface is not supported on Linux, falling back to desktop' }, + }); + } + return 'desktop'; +} + +export async function snapshotLinux( + surface: SessionSurface | undefined, +): Promise<{ + nodes: RawSnapshotNode[]; + truncated?: boolean; +}> { + const linuxSurface = resolveLinuxSurface(surface); + const result = await captureAccessibilityTree(linuxSurface); + + return { + nodes: result.nodes, + truncated: result.truncated, + }; +} diff --git a/src/utils/__tests__/args.test.ts b/src/utils/__tests__/args.test.ts index 235f40f86..d17ee3b96 100644 --- a/src/utils/__tests__/args.test.ts +++ b/src/utils/__tests__/args.test.ts @@ -912,7 +912,7 @@ test('command usage shows command and global flags separately', () => { assert.match(help, /Command flags:/); assert.match(help, /--pattern one-way\|ping-pong/); assert.match(help, /Global flags:/); - assert.match(help, /--platform ios\|macos\|android\|apple/); + assert.match(help, /--platform ios\|macos\|android\|linux\|apple/); }); test('back command usage documents explicit mode flags', () => { diff --git a/src/utils/command-schema.ts b/src/utils/command-schema.ts index 274ead96b..e0cf1efa5 100644 --- a/src/utils/command-schema.ts +++ b/src/utils/command-schema.ts @@ -17,7 +17,7 @@ export type CliFlags = { sessionLock?: 'reject' | 'strip'; sessionLocked?: boolean; sessionLockConflicts?: 'reject' | 'strip'; - platform?: 'ios' | 'macos' | 'android' | 'apple'; + platform?: 'ios' | 'macos' | 'android' | 'linux' | 'apple'; target?: 'mobile' | 'tv' | 'desktop'; device?: string; udid?: string; @@ -284,8 +284,8 @@ const FLAG_DEFINITIONS: readonly FlagDefinition[] = [ key: 'platform', names: ['--platform'], type: 'enum', - enumValues: ['ios', 'macos', 'android', 'apple'], - usageLabel: '--platform ios|macos|android|apple', + enumValues: ['ios', 'macos', 'android', 'linux', 'apple'], + usageLabel: '--platform ios|macos|android|linux|apple', usageDescription: 'Platform to target (`apple` aliases the Apple automation backend)', }, { diff --git a/src/utils/device.ts b/src/utils/device.ts index a9f6e9f00..ef516e7de 100644 --- a/src/utils/device.ts +++ b/src/utils/device.ts @@ -1,7 +1,7 @@ import { AppError } from './errors.ts'; export type ApplePlatform = 'ios' | 'macos'; -export type Platform = ApplePlatform | 'android'; +export type Platform = ApplePlatform | 'android' | 'linux'; export type PlatformSelector = Platform | 'apple'; export type DeviceKind = 'simulator' | 'emulator' | 'device'; export type DeviceTarget = 'mobile' | 'tv' | 'desktop'; diff --git a/src/utils/snapshot.ts b/src/utils/snapshot.ts index 6f786be0c..f1cde15bc 100644 --- a/src/utils/snapshot.ts +++ b/src/utils/snapshot.ts @@ -45,11 +45,13 @@ export type SnapshotNode = RawSnapshotNode & { ref: string; }; +export type SnapshotBackend = 'xctest' | 'android' | 'macos-helper' | 'linux-atspi'; + export type SnapshotState = { nodes: SnapshotNode[]; createdAt: number; truncated?: boolean; - backend?: 'xctest' | 'android' | 'macos-helper'; + backend?: SnapshotBackend; comparisonSafe?: boolean; }; diff --git a/test/integration/replays/linux/01-desktop-smoke.ad b/test/integration/replays/linux/01-desktop-smoke.ad new file mode 100644 index 000000000..c707cfdb2 --- /dev/null +++ b/test/integration/replays/linux/01-desktop-smoke.ad @@ -0,0 +1,12 @@ +# Smoke test for Linux desktop automation on CI. +# Opens gnome-calculator, takes a snapshot, and validates the +# accessibility tree contains expected calculator UI elements. +context platform=linux +open gnome-calculator +wait 3000 +screenshot "./test/screenshots/replays/linux-calculator.png" +snapshot +# Verify calculator-specific elements: a window titled Calculator, +# or digit buttons that only a calculator app would expose. +is exists "label=Calculator || label=0 || label=1 || label=5" +snapshot -i