Smart-AI-Memory · silversurfer562 · May 13, 2026 · May 13, 2026
diff --git a/.github/workflows/cross-repo-compat.yml b/.github/workflows/cross-repo-compat.yml
@@ -54,6 +54,15 @@ jobs:
         run: |
           python -c "import attune_help; print(f'attune_help {attune_help.__version__} from {attune_help.__file__}')"
 
+      - name: Guard — ANTHROPIC_API_KEY must NOT be set for default suite
+        run: |
+          if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
+            echo "::error::ANTHROPIC_API_KEY is set in the default test environment."
+            echo "This can cause non-'live'-marked tests to leak real API calls."
+            exit 1
+          fi
+        shell: bash
+
       - name: Run tests
         run: |
           python -m pytest tests/ -v
diff --git a/.github/workflows/rag-gate.yml b/.github/workflows/rag-gate.yml
@@ -18,6 +18,15 @@ name: RAG Gate
 #   No workflow-level sleep needed.
 #
 # Cost: ~10 answer calls + ~10 judge calls (~$0.10–0.30 per run).
+# Hard cap: ATTUNE_CI_EVAL_KEY has $20/month limit in the Anthropic
+# console — calls beyond that are rejected by the API, not by us.
+#
+# Triggers:
+#   - workflow_dispatch — manual run (e.g. pre-publish check).
+#   - schedule (weekly Mon 13:00 UTC) — drift detection between releases.
+#   - pull_request with label "live-eval" — opt-in PR gate so contributors
+#     can validate eval-affecting changes before merge without paying for
+#     every push. See specs/test-strategy-pass-2/ for the rationale.
 
 on:
   workflow_dispatch:
@@ -32,9 +41,11 @@ on:
   schedule:
     - cron: "0 13 * * 1"
 
-  # Uncomment to gate every push to main (adds ~$0.20/push to API costs):
-  # push:
-  #   branches: [main]
+  # Opt-in PR gate: only runs when the "live-eval" label is applied.
+  # Adds ~$0.20 per labeled push; the cap is the Anthropic console
+  # monthly limit on ATTUNE_CI_EVAL_KEY ($20/month hard).
+  pull_request:
+    types: [labeled]
 
 permissions:
   contents: read
@@ -44,6 +55,11 @@ jobs:
     name: RAG smoke eval
     runs-on: ubuntu-latest
     timeout-minutes: 15
+    # Gate the pull_request trigger: only the "live-eval" label fires us.
+    # workflow_dispatch and schedule always run.
+    if: >-
+      github.event_name != 'pull_request' ||
+      github.event.label.name == 'live-eval'
 
     steps:
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -65,11 +81,24 @@ jobs:
 
       - name: Run smoke eval
         env:
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          # Prefer the dedicated CI eval key (capped at $20/month in the
+          # Anthropic console). Falls back to the legacy ANTHROPIC_API_KEY
+          # secret while the rotation is in flight; remove the fallback
+          # once ATTUNE_CI_EVAL_KEY is provisioned.
+          ANTHROPIC_API_KEY: ${{ secrets.ATTUNE_CI_EVAL_KEY || secrets.ANTHROPIC_API_KEY }}
           NO_COLOR: "1"
         run: |
           python benchmarks/smoke_eval.py --out smoke_results.json
 
+      - name: Estimate cost
+        if: always()
+        run: |
+          # Approximate per-run cost: ~10 answer calls + ~10 judge calls.
+          # Refine when smoke_eval.py emits token counts in its output.
+          # v1: emit a fixed range as a CI notice so future spend reviews
+          # have a stdout breadcrumb.
+          echo "::notice::Estimated cost for this run: \$0.10–0.30 (10 answer + 10 judge calls)."
+
       - name: Upload eval results
         if: always()
         uses: actions/upload-artifact@v4

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -36,6 +36,16 @@ jobs:
       - name: Run ruff
         run: python -m ruff check src/ tests/
 
+      - name: Guard — ANTHROPIC_API_KEY must NOT be set for default suite
+        run: |
+          if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
+            echo "::error::ANTHROPIC_API_KEY is set in the default test environment."
+            echo "This can cause non-'live'-marked tests to leak real API calls."
+            echo "Either unset the key, or mark the test with @pytest.mark.live."
+            exit 1
+          fi
+        shell: bash
+
       - name: Run tests (with coverage on ubuntu x py3.11)
         run: |
           if [ "${{ matrix.os }}" = "ubuntu-latest" ] && [ "${{ matrix.python-version }}" = "3.11" ]; then

diff --git a/pyproject.toml b/pyproject.toml
@@ -104,7 +104,7 @@ select = ["E", "F", "W", "I", "UP", "BLE"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
-addopts = "-ra"
+addopts = "-ra -m 'not live'"
 asyncio_mode = "auto"  # pytest-asyncio: auto-collect @pytest.mark.asyncio functions
 markers = [
     "live: opt-in tests that hit the live Anthropic API. Skipped by default; require ANTHROPIC_API_KEY and any test-specific env flags.",

diff --git a/tests/README.md b/tests/README.md
@@ -23,21 +23,23 @@ pytest tests/test_generated_templates_golden.py --snapshot-update
 pytest -m live
 ```
 
-## LLM mocking standard
-
-Three autouse fixtures in `conftest.py` form the reference pattern:
-
-1. `_lenient_polish_by_default` — sets
-   `ATTUNE_AUTHOR_STRICT_POLISH=false` and strips `ANTHROPIC_API_KEY`
-   so a misconfigured test never reaches the network.
-2. `_reset_rag_pipeline` — clears the module-level RagPipeline singleton
-   between tests, so a leaked patch from one test doesn't poison
-   subsequent tests.
-3. Per-test patches use `unittest.mock.patch("anthropic.Anthropic")` at
-   the **import boundary**, never at the call site.
-
-The `live` marker gates real-API tests (`pytest -m live`), so they
-never run by default.
+## LLM mocking standard, `live` marker, CI guard, cost policy
+
+See **`testing-conventions.md`** in the attune workspace umbrella for
+the canonical reference. attune-author is the **reference
+implementation** the workspace doc describes — the three autouse
+fixtures live in this layer's `conftest.py`:
+
+- `_lenient_polish_by_default` — sets `ATTUNE_AUTHOR_STRICT_POLISH=false`
+  and strips `ANTHROPIC_API_KEY`.
+- `_reset_rag_pipeline` — clears the module-level `RagPipeline`
+  singleton between tests.
+- Per-test patches use `unittest.mock.patch("anthropic.Anthropic")` at
+  the **import boundary**.
+
+The `live` marker gates real-API tests (`pytest -m live`); they never
+run by default. CI's real-API path is `rag-gate.yml` — gated weekly
+plus opt-in via the `live-eval` PR label.
 
 ## Test layout