From 7ff891629b1a6afbf86ae46f87e4aa5e641a3660 Mon Sep 17 00:00:00 2001 From: GeneAI Date: Fri, 15 May 2026 13:56:59 -0400 Subject: [PATCH] test(conftest): isolate polish cache per test, harden API-key delete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related test-isolation gaps that surfaced as snapshot flakes: 1. The autouse polish fixture set ``ATTUNE_AUTHOR_STRICT_POLISH=false`` and deleted ``ANTHROPIC_API_KEY``, but did not redirect ``ATTUNE_AUTHOR_POLISH_CACHE``. Every test shared the dev machine's real ``~/.attune/polish_cache``. A prior live ``regenerate`` run would populate the cache with polished output; subsequent golden-snapshot tests would then observe LLM-rewritten content instead of the deterministic Jinja fallback, depending on which tests had run before — flaky between machines and between sessions on the same machine. Point the cache at a per-session tmp directory via ``tmp_path_factory``. 2. ``monkeypatch.delenv`` raised ``KeyError`` when the var was not already set — fragile across environments. Use ``raising=False``. Repro of the flake (on a dev machine with ``.env`` carrying a live key and ``~/.attune/polish_cache`` populated from a real run): pytest tests/test_generated_templates_golden.py # FAILED test_task_template_matches_snapshot — snapshot shows raw # Jinja output, observed value is the LLM-polished rewrite After this commit, the test suite is hermetic w.r.t. polish state regardless of the host's cache or env. Co-Authored-By: Claude Opus 4.7 --- tests/conftest.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index e3da073..cebba06 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,7 +2,6 @@ from __future__ import annotations -import os from collections.abc import Iterator from pathlib import Path @@ -10,7 +9,10 @@ @pytest.fixture(autouse=True) -def _lenient_polish_by_default(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]: +def _lenient_polish_by_default( + monkeypatch: pytest.MonkeyPatch, + tmp_path_factory: pytest.TempPathFactory, +) -> Iterator[None]: """Disable strict polish mode for every test by default. Polish is strict in production (a missing API key raises), @@ -21,13 +23,26 @@ def _lenient_polish_by_default(monkeypatch: pytest.MonkeyPatch) -> Iterator[None test has mocked ``_call_llm``. Tests that specifically exercise strict-mode behavior override it with their own ``patch.dict`` block. + + The polish cache directory is also pointed at a per-test + tmp directory. Without this, every test would share the + dev machine's real ``~/.attune/polish_cache``, so a + previous live ``regenerate`` run can populate the cache + with polished output and cause golden-snapshot tests to + silently observe LLM-rewritten content instead of the + deterministic Jinja-only fallback. The behavior is + environment-dependent and surfaces as flakes between + machines (or between sessions on the same machine). """ monkeypatch.setenv("ATTUNE_AUTHOR_STRICT_POLISH", "false") + monkeypatch.setenv( + "ATTUNE_AUTHOR_POLISH_CACHE", + str(tmp_path_factory.mktemp("polish_cache")), + ) # Also make sure the tests never accidentally call the # real Anthropic API with a key picked up from the dev # machine's environment — a huge cost/latency hazard. - if "ANTHROPIC_API_KEY" in os.environ: - monkeypatch.delenv("ANTHROPIC_API_KEY") + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) yield