agenticoding · grzegorznowak · Jun 13, 2026 · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+tests/snapshots/**/*.txt text eol=lf
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,80 @@
+# Cross-platform CI for pi-agenticoding
+#
+# Runs the full unit suite on Linux, macOS, and Windows
+# on the minimum Node.js version required by pi coding agent. Snapshot
+# tests verify TUI render output against golden files.
+
+name: test
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+on:
+  push:
+    branches: [main]
+    paths-ignore: ['*.md', '**/docs/**']
+  pull_request:
+    branches: [main]
+    paths-ignore: ['*.md', '**/docs/**']
+
+jobs:
+  # ── Cross-platform test matrix ──────────────────────────────────────
+  # Node 22 (minimum) is tested only on Linux — the primary platform and the only one
+  # guaranteed to have the oldest toolchain. macOS and Windows test Node 24 (latest)
+  # to catch regressions in the newest runtime. This asymmetry is intentional: it
+  # balances CI cost with meaningful coverage while ensuring the minimum version works
+  # correctly on the platform most likely to encounter toolchain edge cases.
+  test:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false          # report every combination, don't cancel
+      matrix:
+        include:
+          - os: ubuntu-latest
+            node-version: "22"    # minimum version on primary platform
+          - os: ubuntu-latest
+            node-version: "24"    # latest on primary platform
+          - os: macos-latest
+            node-version: "24"    # latest on macOS
+          - os: windows-latest
+            node-version: "24"    # latest on Windows
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
+        with:
+          node-version: ${{ matrix.node-version }}
+          cache: "npm"
+
+      - run: npm ci
+
+      # Uniform pre-flight checks — type errors and security issues on every platform
+      - name: Type check
+        run: npx tsc --noEmit
+
+      - name: Security audit
+        run: npm audit --audit-level=moderate
+
+      # Unit suite (unit tests + snapshot tests + property-based tests)
+      - name: Unit tests
+        run: npm test
+
+      # E2E tests — process-isolated child-process harness (stdin/stdout, no PTY).
+      # Verified cross-platform: runs on Linux, macOS, and Windows.
+      # See https://github.com/agenticoding/pi-agenticoding/issues/12
+      - name: E2E tests
+        run: npm run test:e2e
+
+      # Upload test results for debugging — artifacts available for 30 days.
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-results-${{ matrix.os }}-node-${{ matrix.node-version }}
+          path: |
+            tests/snapshots/
+          retention-days: 30
diff --git a/.gitignore b/.gitignore
@@ -142,8 +142,7 @@ vite.config.js.timestamp-*
 vite.config.ts.timestamp-*
 .vite/
 
-# Lockfiles (library package — consumers manage their own)
-package-lock.json
+# package-lock.json committed for reproducible CI installs (excluded from publish)
 
 # Agenticoding local config (credentials, API keys)
 .chunkhound.json

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -7,14 +7,15 @@ Welcome! This project welcomes focused, well-validated contributions. Use coding
 - **Use code research first** — understand the surrounding module responsibilities before editing.
 - **Make minimal changes** — prefer targeted edits that reuse existing mechanisms.
 - **Match existing patterns** — keep naming, lifecycle hooks, tool contracts, and TUI behavior consistent with the current code.
-- **Preserve context-management semantics** — changes to `spawn`, `ledger`, or `handoff` should keep the agent workflow predictable across session resets and compaction.
+- **Preserve context-management semantics** — changes to `spawn`, `notebook`, or `handoff` should keep the agent workflow predictable across session resets and compaction.
+- **Use static imports only for `spawn/renderer.ts`** — it registers the frame scheduler into the singleton container at module evaluation time. Switching to `await import()` will silently break test isolation because the test harness cannot overwrite the singleton before registration.
 - **AI-agent generated contributions are welcome** — include enough human intent and validation context in the PR for reviewers to trust the result.
 
 ## Suggested Workflow
 
 1. **Research the area**
-   - Identify the relevant primitive: spawn, ledger, handoff, watchdog, or extension wiring.
-   - Read nearby tests in `agenticoding.test.ts` before changing behavior.
+   - Identify the relevant primitive: spawn, notebook, handoff, watchdog, or extension wiring.
+   - Read the relevant suite in `tests/unit/` before changing behavior.
 
 2. **Plan the smallest safe change**
    - Reuse existing state and lifecycle hooks when possible.
@@ -38,6 +39,29 @@ Before submitting, check that your change:
 - Handles reset, cancellation, and stale-session cases where relevant.
 - Keeps docs aligned with the package version and installed behavior.
 
+## Tests
+
+- `npm test` — runs the unit suite under `tests/unit/` via the in-repo Node test runner.
+- `npm run test:snapshots:check` — runs only the render-snapshot tests; fails on any drift in `tests/snapshots/`.
+- `npm run test:snapshots:update` — rewrites the golden files in `tests/snapshots/` after an intentional render change. Review the diff carefully: snapshot updates are the only signal that catches unintended UI regressions.
+- `npm run test:e2e` — runs the process-isolated end-to-end suite under `tests/e2e/`.
+
+## CI
+
+Pull requests are automatically tested via GitHub Actions. A cross-platform matrix runs on every push and PR:
+
+| OS | Node | Runs |
+|---|---|---|
+| Ubuntu | 22 (minimum) | Type check, security audit, unit tests, E2E tests |
+| Ubuntu | 24 | Type check, security audit, unit tests, E2E tests |
+| macOS | 24 | Unit tests, E2E tests |
+| Windows | 24 | Unit tests, E2E tests |
+
+Node 22 (minimum) is tested only on Linux — the primary platform and the only one guaranteed to have the oldest toolchain. macOS and Windows test Node 24 (latest) to catch regressions in the newest runtime while balancing CI cost.
+
+Snapshot golden files in `tests/snapshots/` are stored with LF line endings (enforced by `.gitattributes`). The `normalizeEOL` helper in the snapshot test file normalizes `\r\n` to `\n` on read, so Windows developers get correct comparisons even if their working tree has CRLF. If you update snapshots, the CI matrix validates them on all platforms.
+The E2E suite runs on all platforms including Windows (verified in issue #12).
+
 ## Community
 
 Use GitHub Issues for bug reports and feature requests. Keep discussions concrete: describe the agent workflow you expected, what happened instead, and any reproduction steps.