From 3a9da04d0dc4bd319b3438be9fd4a7eb572d2fbe Mon Sep 17 00:00:00 2001
From: GeneAI <patrick.roebuck@smartAImemory.com>
Date: Sat, 16 May 2026 03:01:06 -0400
Subject: [PATCH] feat(polish): faithfulness judge integration (Phase 3 of
 polish-fact-check)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 3 adds an opt-in faithfulness judge that scores polished
documents against the source files they were generated from.
When the score falls below the configured threshold, a
`## Faithfulness review` block listing the unsupported claims and
the judge's reasoning is appended to the polished file.

Pairs with Phase 1 (AST fact-check after generation) and Phase 2
(ground-truth context injection before generation) to give three
distinct interventions against polish-pass hallucinations.

New package: src/attune_author/faithfulness/
  - judge wrapper around attune_rag.eval.faithfulness.FaithfulnessJudge
    via asyncio.run (the polish pipeline is sync)
  - FaithfulnessConfig: threshold (0.95 pre-calibration default),
    budget_per_file_usd ($0.10), model (Sonnet 4.6 — Haiku is ~1/3
    the cost), block_polish_on_unavailable for strict CI
  - estimate_cost_usd: chars-to-tokens heuristic + per-model price
    lookup, used as the budget gate so we never invoke the judge
    when the estimated cost exceeds the cap
  - format_review_block + apply_review_block: soft-fail formatter
    matching the Phase 1 ## Unresolved references shape

Wiring:
  - generator._run_faithfulness_judge runs after _run_fact_check
    on every polished file. Reads optional pyproject config.
  - generator._faithfulness_telemetry / reset_faithfulness_telemetry:
    per-process counters; run_maintenance resets them at start and
    logs INFO summary at end (calls, skipped, total estimated $).
  - ATTUNE_AUTHOR_FAITHFULNESS=off env override.

Best-effort contract: missing attune-rag[claude], missing
ANTHROPIC_API_KEY, over-budget estimates, transient API failures
all degrade silently. The judge never blocks the polish.

Tests: 30 new tests under tests/unit/faithfulness/ covering the
budget gate, every skip path, the happy path, the
below-threshold review-block append, env-var override, telemetry
reset, and unexpected-exception swallowing. Full suite: 926
passed, 37 pre-existing skips.

Threshold calibration (tasks 3.3, 3.4) deferred to the same
real-LLM run that closes Phase 2's live-LLM acceptance gate —
folding two API cycles into one. Default of 0.95 is documented as
pre-calibration in decisions.md.

Spec: docs/specs/polish-fact-check/

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 CHANGELOG.md                                  |  36 ++
 README.md                                     |  31 ++
 docs/specs/polish-fact-check/decisions.md     |  35 ++
 docs/specs/polish-fact-check/tasks.md         |  27 +-
 src/attune_author/faithfulness/__init__.py    | 323 +++++++++++++++++
 src/attune_author/faithfulness/config.py      |  70 ++++
 src/attune_author/generator.py                |  90 +++++
 src/attune_author/maintenance.py              |  20 ++
 tests/unit/faithfulness/__init__.py           |   1 +
 tests/unit/faithfulness/test_config.py        |  83 +++++
 tests/unit/faithfulness/test_judge.py         | 332 ++++++++++++++++++
 .../unit/faithfulness/test_pipeline_wiring.py | 134 +++++++
 12 files changed, 1168 insertions(+), 14 deletions(-)
 create mode 100644 src/attune_author/faithfulness/__init__.py
 create mode 100644 src/attune_author/faithfulness/config.py
 create mode 100644 tests/unit/faithfulness/__init__.py
 create mode 100644 tests/unit/faithfulness/test_config.py
 create mode 100644 tests/unit/faithfulness/test_judge.py
 create mode 100644 tests/unit/faithfulness/test_pipeline_wiring.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ebe71f..ea64f97 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,42 @@ changes land, not at tag time.
 
 ### Added
 
+- **Polish fact-check Phase 3 — faithfulness judge.** Wraps
+  `attune_rag.eval.faithfulness.FaithfulnessJudge` as a
+  post-polish step: scores each polished file's claims against
+  the source files it was generated from. When the score falls
+  below the configured threshold, appends a
+  `## Faithfulness review` block listing the unsupported claims
+  and the judge's reasoning. Best-effort: missing
+  `attune-rag[claude]`, missing `ANTHROPIC_API_KEY`, over-budget
+  estimates, and transient API failures all degrade silently
+  rather than blocking the polish.
+  - New package: `src/attune_author/faithfulness/` with the
+    judge wrapper, `FaithfulnessConfig`, `JudgeOutcome`,
+    `estimate_cost_usd` budget-gate helper, and a
+    `format_review_block` / `apply_review_block` soft-fail pair
+    that mirrors the Phase 1 `## Unresolved references` shape.
+  - Config schema:
+    `[tool.attune-author.fact-check.faithfulness]` with
+    `enabled`, `threshold` (default 0.95, pre-calibration), `budget_per_file_usd`
+    (default $0.10), `model` (default Sonnet 4.6 — Haiku 4.5 is
+    cheaper for high-volume runs), and
+    `block_polish_on_unavailable` (default False — flip to True
+    in CI where missing deps should be loud).
+  - Cost telemetry: per-process counters on the generator
+    module; `run_maintenance` resets them at start and logs an
+    INFO summary at end with call count, skip count, and total
+    estimated USD spent.
+  - `ATTUNE_AUTHOR_FAITHFULNESS=off` env-var override for one-off
+    disable without editing pyproject.
+  - 30 new tests under `tests/unit/faithfulness/`.
+  - Threshold calibration against the ops-dashboard fixture
+    (tasks 3.3, 3.4) is **deferred** until the live-LLM Phase 2
+    acceptance run; today's default of 0.95 is documented as
+    pre-calibration in `decisions.md`.
+  - Spec: `docs/specs/polish-fact-check/`. Phase 4 (tutorial
+    code-fence mypy) remains on the roadmap.
+
 - **Polish fact-check Phase 2 — ground-truth context
   injection.** Builds three sentinel-tagged blocks
   (`<cli_help>`, `<public_api>`, `<dataclasses>`) and injects
diff --git a/README.md b/README.md
index 9dd14e9..7456dc8 100644
--- a/README.md
+++ b/README.md
@@ -162,6 +162,37 @@ CLI flags, fabricated private-module imports, wrong route
 paths, hallucinated counts) at the prompt layer, rather than
 relying solely on the post-generation fact-check to catch them.
 
+## Faithfulness review (Phase 3)
+
+The Phase 3 faithfulness judge wraps
+`attune_rag.eval.faithfulness.FaithfulnessJudge` as a
+post-polish step: it scores each polished file's claims against
+the source files it was generated from. When the score falls
+below the configured threshold, a `## Faithfulness review`
+block is appended to the polished file listing the unsupported
+claims and the judge's reasoning.
+
+The judge is **opt-in** because it makes real Anthropic API
+calls. To enable:
+
+```toml
+[tool.attune-author.fact-check.faithfulness]
+enabled = true
+threshold = 0.95            # below this triggers a review block
+budget_per_file_usd = 0.10  # skip if estimated cost exceeds cap
+model = "claude-sonnet-4-6" # haiku is ~1/3 the cost
+```
+
+The judge is best-effort. Missing `attune-rag[claude]`, missing
+`ANTHROPIC_API_KEY`, over-budget cost estimates, and transient
+API failures all degrade silently rather than blocking the
+polish. Set `block_polish_on_unavailable = true` in CI lanes
+where missing deps should fail loudly instead.
+
+End-of-run telemetry (call count, skip count, total estimated
+USD) logs at INFO level after `attune-author regenerate`. Set
+`ATTUNE_AUTHOR_FAITHFULNESS=off` to disable for a single run.
+
 ## Polish cache
 
 `attune-author` caches LLM polish responses on disk so re-generating an
diff --git a/docs/specs/polish-fact-check/decisions.md b/docs/specs/polish-fact-check/decisions.md
index 4913aec..f95a123 100644
--- a/docs/specs/polish-fact-check/decisions.md
+++ b/docs/specs/polish-fact-check/decisions.md
@@ -73,3 +73,38 @@ To be filled in during Phase 3 implementation:
     faithfulness judge ships, it will require its own real-LLM
     calibration run. Folding the cost-delta measurement into that
     run avoids two separate real-LLM cycles.
+- 2026-05-16 — Phase 3 shipped. New decisions captured during
+  implementation:
+  - **Opt-in default**: `enabled=False` ships in
+    `FaithfulnessConfig` and the pyproject loader, because the
+    judge makes real Anthropic API calls and we shouldn't bill
+    users for it silently on the first run after install. The
+    Phase 1 fact-check is enabled by default (no API calls); the
+    Phase 3 judge is not.
+  - **Synchronous wrapper via `asyncio.run`**: the existing
+    polish pipeline is synchronous, so the async
+    `FaithfulnessJudge.score` coroutine is bridged with
+    `asyncio.run`. This precludes calling the judge from inside
+    a running event loop (we don't, today), but keeps the
+    surface aligned with the rest of attune-author.
+  - **Best-effort vs strict**: missing extras / missing API key
+    / transient failures all default to `JudgeOutcome(score=None,
+    skipped_reason=…)` rather than raising. CI lanes that need
+    loud failures opt in via `block_polish_on_unavailable = true`.
+  - **Budget gate uses character-count heuristic, not tokenizer**:
+    `estimate_cost_usd(chars, model)` divides chars by 4 to get
+    a rough token count and multiplies by a per-model price
+    lookup. Accurate to ~20% — well inside what a $0.10 budget
+    cap cares about. A real tokenizer is a future change if
+    drift surfaces.
+  - **Cost telemetry as function attribute, not module global**:
+    `_faithfulness_telemetry()` stores the counter dict on its
+    own `_state` attribute so it's resettable, mockable, and
+    doesn't leak module-level state. Mirrors how the polish
+    cache exposes its store.
+  - **Calibration deferred**: tasks 3.3 and 3.4 require a real
+    LLM run against the ops-dashboard pre-fix and post-fix
+    fixtures. The placeholder threshold of `0.95` ships as the
+    default and the calibration is scheduled to land alongside
+    the live-LLM Phase 2 acceptance run so a single real-API
+    cycle covers both phases' open work.
diff --git a/docs/specs/polish-fact-check/tasks.md b/docs/specs/polish-fact-check/tasks.md
index 6ca3c76..705314d 100644
--- a/docs/specs/polish-fact-check/tasks.md
+++ b/docs/specs/polish-fact-check/tasks.md
@@ -110,23 +110,22 @@ the `FactCheckReport` plumbing.
 
 | # | Task | Layer | Status | Notes |
 |---|------|-------|--------|-------|
-| 3.1 | Add faithfulness-threshold + budget-cap config to `[tool.attune-author.fact-check]` | attune-author | todo | Default threshold `0.95`; default cap `$0.10/feature` |
-| 3.2 | Implement `faithfulness.judge_polished_file(polished_path, source_paths, config)` wrapper | attune-author | todo | Wraps `attune_rag.eval.faithfulness.FaithfulnessJudge` |
-| 3.3 | Calibrate threshold against ops-dashboard fixture | attune-author | todo | Pre-fix should score < 0.9 mean; post-fix ≥ 0.95 |
-| 3.4 | Document calibration result in `decisions.md` (or design doc) | attune-author | todo | Pre-committed matrix entry; concrete numbers |
-| 3.5 | Wire judge into post-polish pipeline (after Phase 1 fact-check) | attune-author | todo | Append `## Faithfulness review` block when below threshold |
-| 3.6 | Cost telemetry: aggregate per-feature judge cost; report at end of `regenerate` | attune-author | todo | Use existing telemetry hooks if any; otherwise log |
-| 3.7 | Test: judge runs and writes review block on a deliberately unfaithful synthetic input | attune-author | todo | Construct a polished file that contradicts the source |
-| 3.8 | Test: budget cap skips judge call when estimated cost exceeds threshold | attune-author | todo | |
-| 3.9 | Update CHANGELOG + README | attune-author | todo | |
+| 3.1 | Add faithfulness-threshold + budget-cap config to `[tool.attune-author.fact-check]` | attune-author | **done** | `[tool.attune-author.fact-check.faithfulness]` sub-table; defaults threshold=0.95, budget=$0.10, model=Sonnet 4.6, enabled=False (opt-in) |
+| 3.2 | Implement `faithfulness.judge_polished_file(polished_path, source_paths, config)` wrapper | attune-author | **done** | Wraps `FaithfulnessJudge` via `asyncio.run`; best-effort: missing extra / missing API key / over-budget all return `JudgeOutcome(score=None, skipped_reason=…)` rather than raising. `block_polish_on_unavailable=True` opt-in for strict CI. |
+| 3.3 | Calibrate threshold against ops-dashboard fixture | attune-author | deferred | Requires real-LLM run; placeholder default `0.95` documented in decisions.md as pre-calibration. Calibration scheduled alongside live-LLM Phase 2 acceptance run. |
+| 3.4 | Document calibration result in `decisions.md` (or design doc) | attune-author | deferred | Empty calibration record retained; will populate when 3.3 runs. |
+| 3.5 | Wire judge into post-polish pipeline (after Phase 1 fact-check) | attune-author | **done** | `generator._run_faithfulness_judge` called after `_run_fact_check`; appends `## Faithfulness review` block when below threshold. `ATTUNE_AUTHOR_FAITHFULNESS=off` env override. |
+| 3.6 | Cost telemetry: aggregate per-feature judge cost; report at end of `regenerate` | attune-author | **done** | Per-process telemetry state on `_faithfulness_telemetry`; `run_maintenance` resets at start and logs INFO summary at end (calls, skipped, total estimated $). |
+| 3.7 | Test: judge runs and writes review block on a deliberately unfaithful synthetic input | attune-author | **done** | `test_pipeline_wiring.py::test_run_faithfulness_judge_appends_review_block_when_below_threshold` + `test_judge.py::test_judge_below_threshold_flags_threshold_not_met` |
+| 3.8 | Test: budget cap skips judge call when estimated cost exceeds threshold | attune-author | **done** | `test_judge.py::test_judge_skipped_when_over_budget` |
+| 3.9 | Update CHANGELOG + README | attune-author | **done** | CHANGELOG under Unreleased; README adds a "Faithfulness review (Phase 3)" subsection. |
 
 ### Phase 3 exit checklist
 
-- [ ] Tasks 3.1–3.9 done
-- [ ] Calibration shows clean separation between pre-fix and post-fix
-      fixture scores
-- [ ] Threshold + cap configurable
-- [ ] Spec status updated
+- [x] Tasks 3.1, 3.2, 3.5–3.9 done (30 new tests)
+- [x] Threshold + cap configurable
+- [x] Spec status updated
+- [ ] Calibration (tasks 3.3, 3.4) — deferred until real-LLM run lands; placeholder default `threshold=0.95` documented in `decisions.md`
 
 ---
 
diff --git a/src/attune_author/faithfulness/__init__.py b/src/attune_author/faithfulness/__init__.py
new file mode 100644
index 0000000..f4f2f71
--- /dev/null
+++ b/src/attune_author/faithfulness/__init__.py
@@ -0,0 +1,323 @@
+"""Faithfulness judge integration for polished docs.
+
+Phase 3 of the polish-fact-check spec
+(``docs/specs/polish-fact-check``). Wraps
+:class:`attune_rag.eval.faithfulness.FaithfulnessJudge` as a
+post-polish step: score the polished file's claims against the
+source files it was generated from. When the score falls below
+the configured threshold, append a ``## Faithfulness review``
+block to the polished file so a human can review.
+
+The judge is best-effort:
+
+- Returns ``None`` if ``attune-rag[claude]`` isn't installed.
+- Returns ``None`` if the active venv has no ``ANTHROPIC_API_KEY``.
+- Returns ``None`` when the estimated cost would exceed the
+  configured per-file budget cap.
+- Catches transient API/network failures and returns ``None``
+  rather than failing the polish.
+
+A ``None`` result skips the review block. The polish itself is
+never blocked on the judge.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from dataclasses import dataclass
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+#: Approximate cost per 1K input tokens for the judge model
+#: (Haiku 4.5 default). The dict keys match the substrings we
+#: search for in the model id; first match wins.
+_COST_PER_1K_INPUT_TOKENS: dict[str, float] = {
+    "haiku": 0.001,
+    "sonnet": 0.003,
+    "opus": 0.015,
+}
+
+#: Approximate output-token cost. Output is typically ~1/5 of
+#: input for the judge but we cap the estimate at a quarter so
+#: the budget gate is conservative.
+_COST_PER_1K_OUTPUT_TOKENS: dict[str, float] = {
+    "haiku": 0.005,
+    "sonnet": 0.015,
+    "opus": 0.075,
+}
+
+#: Rough character-per-token estimate used to convert sizes to
+#: tokens without invoking a tokenizer. Accurate to within ~20%
+#: for English prose — well inside what the budget gate cares
+#: about.
+_CHARS_PER_TOKEN = 4
+
+
+@dataclass
+class FaithfulnessConfig:
+    """Faithfulness-judge configuration.
+
+    Attributes:
+        enabled: Master switch. ``False`` skips the judge entirely.
+        threshold: Score below this triggers a review block. Range
+            ``[0.0, 1.0]``. Default is the spec's pre-calibration
+            placeholder; tune via ``[tool.attune-author.fact-check]``.
+        budget_per_file_usd: Skip the judge call when the estimated
+            cost (input + output tokens × model price) would exceed
+            this. Default ``$0.10`` matches the spec.
+        model: Judge model name. Defaults to the attune-rag default
+            (Sonnet 4.6); Haiku 4.5 is roughly 1/3 the cost and
+            usable for high-volume runs.
+        block_polish_on_unavailable: When ``True``, missing
+            ``attune-rag[claude]`` or missing API key raises rather
+            than degrading silently. Default ``False`` — the judge is
+            best-effort.
+    """
+
+    enabled: bool = False
+    threshold: float = 0.95
+    budget_per_file_usd: float = 0.10
+    model: str = "claude-sonnet-4-6"
+    block_polish_on_unavailable: bool = False
+
+
+@dataclass
+class JudgeOutcome:
+    """Outcome of a single judge call.
+
+    ``score`` is ``None`` for skipped runs (disabled, unavailable,
+    over-budget) so callers can disambiguate "judge ran, all claims
+    supported" from "judge didn't run".
+    """
+
+    score: float | None
+    threshold_met: bool
+    cost_estimate_usd: float
+    supported_claims: list[str]
+    unsupported_claims: list[str]
+    reasoning: str
+    skipped_reason: str | None = None
+
+
+def estimate_cost_usd(input_chars: int, model: str, *, output_chars: int = 2000) -> float:
+    """Estimate the judge-call cost from input/output character sizes.
+
+    Uses a coarse chars-per-token approximation and a per-model
+    price lookup. The default ``output_chars`` matches the judge's
+    typical reply length. Pure: no network, no I/O.
+    """
+    model_lower = model.lower()
+    input_rate = next(
+        (rate for key, rate in _COST_PER_1K_INPUT_TOKENS.items() if key in model_lower),
+        _COST_PER_1K_INPUT_TOKENS["sonnet"],
+    )
+    output_rate = next(
+        (rate for key, rate in _COST_PER_1K_OUTPUT_TOKENS.items() if key in model_lower),
+        _COST_PER_1K_OUTPUT_TOKENS["sonnet"],
+    )
+    input_tokens = input_chars / _CHARS_PER_TOKEN
+    output_tokens = output_chars / _CHARS_PER_TOKEN
+    return (input_tokens / 1000.0) * input_rate + (output_tokens / 1000.0) * output_rate
+
+
+def judge_polished_file(
+    polished_path: Path,
+    source_paths: list[Path],
+    *,
+    config: FaithfulnessConfig,
+) -> JudgeOutcome:
+    """Run the faithfulness judge against a single polished file.
+
+    Args:
+        polished_path: Polished markdown file written by the polish pass.
+        source_paths: Source ``.py`` files the polish had as context.
+            Concatenated and passed as the judge's "passages" argument.
+        config: Resolved faithfulness configuration.
+
+    Returns:
+        A :class:`JudgeOutcome`. ``score is None`` indicates the judge
+        was skipped — see ``skipped_reason``.
+
+    Raises:
+        RuntimeError: When ``config.block_polish_on_unavailable`` is
+            True and the judge can't run (missing extra or key).
+    """
+    if not config.enabled:
+        return _skipped("disabled")
+
+    answer = ""
+    try:
+        answer = polished_path.read_text(encoding="utf-8")
+    except OSError as exc:
+        logger.warning("faithfulness: cannot read %s (%s)", polished_path, exc)
+        return _skipped("polished file unreadable")
+
+    passages: list[str] = []
+    for src in source_paths:
+        try:
+            passages.append(src.read_text(encoding="utf-8"))
+        except OSError as exc:
+            logger.debug("faithfulness: skip source %s (%s)", src, exc)
+            continue
+
+    if not passages:
+        return _skipped("no readable source passages")
+
+    total_chars = len(answer) + sum(len(p) for p in passages)
+    cost_est = estimate_cost_usd(total_chars, config.model)
+    if cost_est > config.budget_per_file_usd:
+        logger.info(
+            "faithfulness: skipping %s (estimated $%.4f > budget $%.4f)",
+            polished_path.name,
+            cost_est,
+            config.budget_per_file_usd,
+        )
+        return JudgeOutcome(
+            score=None,
+            threshold_met=True,
+            cost_estimate_usd=cost_est,
+            supported_claims=[],
+            unsupported_claims=[],
+            reasoning="",
+            skipped_reason="over-budget",
+        )
+
+    try:
+        from attune_rag.eval.faithfulness import FaithfulnessJudge
+    except ImportError as exc:
+        message = (
+            "attune-rag[claude] not installed; " "install with: pip install 'attune-rag[claude]'"
+        )
+        if config.block_polish_on_unavailable:
+            raise RuntimeError(message) from exc
+        logger.info("faithfulness: %s", message)
+        return JudgeOutcome(
+            score=None,
+            threshold_met=True,
+            cost_estimate_usd=cost_est,
+            supported_claims=[],
+            unsupported_claims=[],
+            reasoning="",
+            skipped_reason="attune-rag[claude] not installed",
+        )
+
+    if not os.environ.get("ANTHROPIC_API_KEY"):
+        if config.block_polish_on_unavailable:
+            raise RuntimeError("ANTHROPIC_API_KEY not set; faithfulness judge cannot run.")
+        logger.info("faithfulness: ANTHROPIC_API_KEY not set, skipping judge.")
+        return JudgeOutcome(
+            score=None,
+            threshold_met=True,
+            cost_estimate_usd=cost_est,
+            supported_claims=[],
+            unsupported_claims=[],
+            reasoning="",
+            skipped_reason="ANTHROPIC_API_KEY missing",
+        )
+
+    import asyncio
+
+    try:
+        judge = FaithfulnessJudge(model=config.model)
+        result = asyncio.run(
+            judge.score(
+                query=f"Documentation for {polished_path.stem}",
+                answer=answer,
+                passages=passages,
+            )
+        )
+    except Exception as exc:  # noqa: BLE001
+        # INTENTIONAL: best-effort. Network errors, SDK errors, or
+        # transient API failures should not block the polish.
+        logger.warning("faithfulness: judge call failed (%s)", exc)
+        return JudgeOutcome(
+            score=None,
+            threshold_met=True,
+            cost_estimate_usd=cost_est,
+            supported_claims=[],
+            unsupported_claims=[],
+            reasoning="",
+            skipped_reason=f"judge call failed: {exc!s}"[:200],
+        )
+
+    threshold_met = result.score >= config.threshold
+    return JudgeOutcome(
+        score=result.score,
+        threshold_met=threshold_met,
+        cost_estimate_usd=cost_est,
+        supported_claims=list(result.supported_claims),
+        unsupported_claims=list(result.unsupported_claims),
+        reasoning=result.reasoning,
+    )
+
+
+def format_review_block(outcome: JudgeOutcome, threshold: float) -> str:
+    """Render a ``## Faithfulness review`` block for soft-fail output.
+
+    The block matches the Phase 1 ``## Unresolved references`` shape
+    so editors find both with the same scan.
+    """
+    if outcome.score is None:
+        return ""
+    lines = [
+        "## Faithfulness review",
+        "",
+        f"> Auto-generated by attune-author faithfulness judge. "
+        f"Score {outcome.score:.2f} fell below the configured "
+        f"threshold of {threshold:.2f}. Review unsupported claims "
+        f"and either fix the source code or fix this doc.",
+        "",
+        f"**Score:** {outcome.score:.2f} "
+        f"(supported: {len(outcome.supported_claims)}, "
+        f"unsupported: {len(outcome.unsupported_claims)})",
+    ]
+    if outcome.unsupported_claims:
+        lines.extend(["", "### Unsupported claims", ""])
+        for claim in outcome.unsupported_claims:
+            lines.append(f"- {claim}")
+    if outcome.reasoning:
+        lines.extend(["", "### Reasoning", "", outcome.reasoning.strip()])
+    return "\n".join(lines)
+
+
+def apply_review_block(polished_path: Path, block: str) -> bool:
+    """Append the review block to ``polished_path``.
+
+    Returns True if a block was appended, False if the block was
+    empty. Matches the Phase 1 ``apply_soft_fail`` contract.
+    """
+    if not block:
+        return False
+    existing = polished_path.read_text(encoding="utf-8")
+    if not existing.endswith("\n"):
+        existing += "\n"
+    polished_path.write_text(existing + block + "\n", encoding="utf-8")
+    return True
+
+
+def _skipped(reason: str) -> JudgeOutcome:
+    return JudgeOutcome(
+        score=None,
+        threshold_met=True,
+        cost_estimate_usd=0.0,
+        supported_claims=[],
+        unsupported_claims=[],
+        reasoning="",
+        skipped_reason=reason,
+    )
+
+
+from .config import load_config  # noqa: E402 - re-export, must come after dataclasses
+
+__all__ = [
+    "FaithfulnessConfig",
+    "JudgeOutcome",
+    "apply_review_block",
+    "estimate_cost_usd",
+    "format_review_block",
+    "judge_polished_file",
+    "load_config",
+]
diff --git a/src/attune_author/faithfulness/config.py b/src/attune_author/faithfulness/config.py
new file mode 100644
index 0000000..191c4b3
--- /dev/null
+++ b/src/attune_author/faithfulness/config.py
@@ -0,0 +1,70 @@
+"""Load faithfulness-judge configuration from ``pyproject.toml``.
+
+Reads the ``[tool.attune-author.fact-check.faithfulness]``
+sub-table. Defaults match :class:`FaithfulnessConfig`'s defaults
+(enabled=False — opt-in only since the judge makes real API
+calls).
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from . import FaithfulnessConfig
+
+
+def _read_toml(path: Path) -> dict[str, object]:
+    if not path.is_file():
+        return {}
+    try:
+        import tomllib
+    except ImportError:  # pragma: no cover - Py <3.11 fallback
+        import tomli as tomllib  # type: ignore[import-not-found,no-redef]
+    try:
+        return tomllib.loads(path.read_text(encoding="utf-8"))
+    except (OSError, ValueError):
+        return {}
+
+
+def load_config(project_root: Path) -> FaithfulnessConfig:
+    """Build a :class:`FaithfulnessConfig` from the project's pyproject.
+
+    Reads ``[tool.attune-author.fact-check.faithfulness]`` so the
+    faithfulness section sits alongside (and inside) the existing
+    fact-check table. Unknown keys are ignored so adjacent phases
+    can grow their own sub-tables.
+    """
+    data = _read_toml(project_root / "pyproject.toml")
+    tool = data.get("tool", {}) if isinstance(data, dict) else {}
+    author = tool.get("attune-author", {}) if isinstance(tool, dict) else {}
+    fact_check = author.get("fact-check", {}) if isinstance(author, dict) else {}
+    section = fact_check.get("faithfulness", {}) if isinstance(fact_check, dict) else {}
+
+    if not isinstance(section, dict):
+        return FaithfulnessConfig()
+
+    def _bool(key: str, default: bool) -> bool:
+        value = section.get(key, default)
+        return bool(value)
+
+    def _float(key: str, default: float) -> float:
+        value = section.get(key, default)
+        try:
+            return float(value)
+        except (TypeError, ValueError):
+            return default
+
+    def _str(key: str, default: str) -> str:
+        value = section.get(key, default)
+        return str(value) if value else default
+
+    return FaithfulnessConfig(
+        enabled=_bool("enabled", False),
+        threshold=_float("threshold", 0.95),
+        budget_per_file_usd=_float("budget_per_file_usd", 0.10),
+        model=_str("model", "claude-sonnet-4-6"),
+        block_polish_on_unavailable=_bool("block_polish_on_unavailable", False),
+    )
+
+
+__all__ = ["load_config"]
diff --git a/src/attune_author/generator.py b/src/attune_author/generator.py
index 7394dc6..ee30e69 100644
--- a/src/attune_author/generator.py
+++ b/src/attune_author/generator.py
@@ -459,10 +459,16 @@ def apply_polish_results(
         source_hash=prep.source_hash,
         matched_files=list(prep.matched_files),
     )
+    # Resolve the project root once so the faithfulness gate can read
+    # source files. cwd is the existing convention used by
+    # _run_fact_check; matched_files are stored relative to it.
+    project_root = Path.cwd()
+    absolute_sources = [project_root / rel_path for rel_path in prep.matched_files]
     for entry in prep.pending:
         final_content = polished_by_depth.get(entry.depth, entry.rendered_content)
         entry.out_path.write_text(final_content, encoding="utf-8")
         _run_fact_check(entry.out_path)
+        _run_faithfulness_judge(entry.out_path, absolute_sources, project_root)
         result.templates.append(
             GeneratedTemplate(
                 feature=feature.name,
@@ -518,6 +524,90 @@ def _run_fact_check(polished_path: Path) -> None:
         logger.warning("Could not append fact-check block to %s: %s", polished_path, exc)
 
 
+def _faithfulness_telemetry() -> dict[str, float]:
+    """Per-process aggregate of faithfulness cost + call count.
+
+    Stored on the function as a function attribute so the polite
+    "logger.info at end of regen" hook can read totals without
+    introducing module-level state. Reset via
+    :func:`reset_faithfulness_telemetry`.
+    """
+    state = getattr(_faithfulness_telemetry, "_state", None)
+    if state is None:
+        state = {"calls": 0.0, "skipped": 0.0, "cost_usd": 0.0}
+        _faithfulness_telemetry._state = state  # type: ignore[attr-defined]
+    return state
+
+
+def reset_faithfulness_telemetry() -> None:
+    """Reset the per-process faithfulness telemetry counters."""
+    _faithfulness_telemetry._state = {  # type: ignore[attr-defined]
+        "calls": 0.0,
+        "skipped": 0.0,
+        "cost_usd": 0.0,
+    }
+
+
+def _run_faithfulness_judge(
+    polished_path: Path,
+    source_paths: list[Path],
+    project_root: Path,
+) -> None:
+    """Run the Phase 3 faithfulness judge against a freshly-written file.
+
+    Best-effort: any failure inside the judge (missing extra, missing
+    API key, transient network error) degrades silently — the polish
+    pipeline is never blocked. When the judge runs and the score
+    falls below the configured threshold, a ``## Faithfulness review``
+    block is appended to the polished file.
+
+    Mode override via the ``ATTUNE_AUTHOR_FAITHFULNESS`` env var
+    (``off`` disables; any other value defers to pyproject config).
+    """
+    if os.environ.get("ATTUNE_AUTHOR_FAITHFULNESS", "").lower() == "off":
+        return
+
+    try:
+        from attune_author.faithfulness import (
+            apply_review_block,
+            format_review_block,
+            judge_polished_file,
+            load_config,
+        )
+
+        config = load_config(project_root)
+        if not config.enabled:
+            return
+
+        outcome = judge_polished_file(polished_path, source_paths, config=config)
+    except Exception as exc:  # noqa: BLE001
+        # INTENTIONAL: opportunistic — judge layer must never break
+        # the polish pipeline. Same contract as the fact-check gate.
+        logger.warning("Faithfulness judge skipped for %s: %s", polished_path, exc)
+        return
+
+    telemetry = _faithfulness_telemetry()
+    if outcome.score is None:
+        telemetry["skipped"] += 1
+        return
+
+    telemetry["calls"] += 1
+    telemetry["cost_usd"] += outcome.cost_estimate_usd
+
+    if outcome.threshold_met:
+        return
+
+    block = format_review_block(outcome, config.threshold)
+    try:
+        apply_review_block(polished_path, block)
+    except OSError as exc:
+        logger.warning(
+            "Could not append faithfulness review block to %s: %s",
+            polished_path,
+            exc,
+        )
+
+
 def _maybe_polish(
     content: str,
     feature: Feature,
diff --git a/src/attune_author/maintenance.py b/src/attune_author/maintenance.py
index 10c0345..13788b5 100644
--- a/src/attune_author/maintenance.py
+++ b/src/attune_author/maintenance.py
@@ -99,6 +99,12 @@ def run_maintenance(
     if dry_run or report.stale_count == 0:
         return result
 
+    # Reset Phase 3 faithfulness telemetry so the end-of-run summary
+    # reflects this regen rather than carrying state across runs.
+    from attune_author.generator import reset_faithfulness_telemetry
+
+    reset_faithfulness_telemetry()
+
     for entry in report.help_entries:
         if not entry.is_stale:
             continue
@@ -127,6 +133,20 @@ def run_maintenance(
             )
             result.failed.append(entry.feature)
 
+    # Phase 3 telemetry summary. Logged at INFO so it appears in the
+    # default `attune-author regenerate` output. Silent when the judge
+    # didn't run at all (disabled or never reached).
+    from attune_author.generator import _faithfulness_telemetry
+
+    telemetry = _faithfulness_telemetry()
+    if telemetry["calls"] or telemetry["skipped"]:
+        logger.info(
+            "Faithfulness judge: %d call(s), %d skipped, estimated cost $%.4f",
+            int(telemetry["calls"]),
+            int(telemetry["skipped"]),
+            telemetry["cost_usd"],
+        )
+
     return result
 
 
diff --git a/tests/unit/faithfulness/__init__.py b/tests/unit/faithfulness/__init__.py
new file mode 100644
index 0000000..79aa7b9
--- /dev/null
+++ b/tests/unit/faithfulness/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for the faithfulness-judge integration."""
diff --git a/tests/unit/faithfulness/test_config.py b/tests/unit/faithfulness/test_config.py
new file mode 100644
index 0000000..7686aa7
--- /dev/null
+++ b/tests/unit/faithfulness/test_config.py
@@ -0,0 +1,83 @@
+"""Tests for the faithfulness config loader."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from textwrap import dedent
+
+from attune_author.faithfulness import FaithfulnessConfig, load_config
+
+
+def _write_pyproject(tmp_path: Path, body: str) -> None:
+    (tmp_path / "pyproject.toml").write_text(dedent(body), encoding="utf-8")
+
+
+def test_default_disabled_when_no_pyproject(tmp_path: Path) -> None:
+    cfg = load_config(tmp_path)
+    assert cfg == FaithfulnessConfig()
+    assert cfg.enabled is False
+
+
+def test_defaults_when_section_missing(tmp_path: Path) -> None:
+    _write_pyproject(
+        tmp_path,
+        """
+        [tool.attune-author.fact-check]
+        soft_fail = true
+        """,
+    )
+    cfg = load_config(tmp_path)
+    assert cfg == FaithfulnessConfig()
+
+
+def test_enables_via_pyproject(tmp_path: Path) -> None:
+    _write_pyproject(
+        tmp_path,
+        """
+        [tool.attune-author.fact-check.faithfulness]
+        enabled = true
+        """,
+    )
+    cfg = load_config(tmp_path)
+    assert cfg.enabled is True
+
+
+def test_custom_threshold_and_budget(tmp_path: Path) -> None:
+    _write_pyproject(
+        tmp_path,
+        """
+        [tool.attune-author.fact-check.faithfulness]
+        enabled = true
+        threshold = 0.8
+        budget_per_file_usd = 0.25
+        model = "claude-haiku-4-5"
+        """,
+    )
+    cfg = load_config(tmp_path)
+    assert cfg.threshold == 0.8
+    assert cfg.budget_per_file_usd == 0.25
+    assert cfg.model == "claude-haiku-4-5"
+
+
+def test_invalid_threshold_falls_back_to_default(tmp_path: Path) -> None:
+    _write_pyproject(
+        tmp_path,
+        """
+        [tool.attune-author.fact-check.faithfulness]
+        threshold = "not a number"
+        """,
+    )
+    cfg = load_config(tmp_path)
+    assert cfg.threshold == 0.95
+
+
+def test_block_polish_on_unavailable_toggle(tmp_path: Path) -> None:
+    _write_pyproject(
+        tmp_path,
+        """
+        [tool.attune-author.fact-check.faithfulness]
+        block_polish_on_unavailable = true
+        """,
+    )
+    cfg = load_config(tmp_path)
+    assert cfg.block_polish_on_unavailable is True
diff --git a/tests/unit/faithfulness/test_judge.py b/tests/unit/faithfulness/test_judge.py
new file mode 100644
index 0000000..77b26e2
--- /dev/null
+++ b/tests/unit/faithfulness/test_judge.py
@@ -0,0 +1,332 @@
+"""Tests for the faithfulness judge wrapper."""
+
+from __future__ import annotations
+
+import sys
+import types
+from pathlib import Path
+
+import pytest
+
+from attune_author.faithfulness import (
+    FaithfulnessConfig,
+    JudgeOutcome,
+    apply_review_block,
+    estimate_cost_usd,
+    format_review_block,
+    judge_polished_file,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+class _FakeJudgeResult:
+    """Stand-in for ``attune_rag.eval.faithfulness.FaithfulnessResult``."""
+
+    def __init__(
+        self,
+        score: float,
+        supported: list[str] | None = None,
+        unsupported: list[str] | None = None,
+        reasoning: str = "",
+    ) -> None:
+        self.score = score
+        self.supported_claims = supported or []
+        self.unsupported_claims = unsupported or []
+        self.reasoning = reasoning
+
+
+def _install_fake_attune_rag(monkeypatch, judge_score: float, **kwargs):
+    """Install a fake ``attune_rag.eval.faithfulness`` module into sys.modules.
+
+    The fake exposes a ``FaithfulnessJudge`` whose ``score`` coroutine
+    returns a ``_FakeJudgeResult`` with the given score.
+    """
+
+    class _FakeJudge:
+        def __init__(self, model: str = "x", **_):
+            self.model = model
+
+        async def score(self, query, answer, passages):  # noqa: D401
+            return _FakeJudgeResult(score=judge_score, **kwargs)
+
+    fake_module = types.ModuleType("attune_rag.eval.faithfulness")
+    fake_module.FaithfulnessJudge = _FakeJudge
+
+    parent = types.ModuleType("attune_rag")
+    eval_pkg = types.ModuleType("attune_rag.eval")
+    eval_pkg.faithfulness = fake_module
+    parent.eval = eval_pkg
+
+    monkeypatch.setitem(sys.modules, "attune_rag", parent)
+    monkeypatch.setitem(sys.modules, "attune_rag.eval", eval_pkg)
+    monkeypatch.setitem(sys.modules, "attune_rag.eval.faithfulness", fake_module)
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")  # pragma: allowlist secret
+
+
+def _block_attune_rag(monkeypatch) -> None:
+    """Force ``import attune_rag.eval.faithfulness`` to fail with ImportError."""
+    monkeypatch.setitem(sys.modules, "attune_rag.eval.faithfulness", None)
+
+
+# ---------------------------------------------------------------------------
+# estimate_cost_usd
+# ---------------------------------------------------------------------------
+
+
+def test_estimate_cost_haiku_cheaper_than_sonnet() -> None:
+    haiku = estimate_cost_usd(10_000, model="claude-haiku-4-5")
+    sonnet = estimate_cost_usd(10_000, model="claude-sonnet-4-6")
+    assert haiku < sonnet
+
+
+def test_estimate_cost_scales_with_input_size() -> None:
+    small = estimate_cost_usd(1_000, model="claude-sonnet-4-6")
+    big = estimate_cost_usd(10_000, model="claude-sonnet-4-6")
+    assert big > small
+
+
+def test_estimate_cost_unknown_model_falls_back_to_sonnet() -> None:
+    unknown = estimate_cost_usd(10_000, model="unknown-future-model")
+    sonnet = estimate_cost_usd(10_000, model="claude-sonnet-4-6")
+    assert unknown == pytest.approx(sonnet)
+
+
+# ---------------------------------------------------------------------------
+# judge_polished_file — skip paths
+# ---------------------------------------------------------------------------
+
+
+def test_judge_skipped_when_disabled(tmp_path: Path) -> None:
+    polished = tmp_path / "doc.md"
+    polished.write_text("# Hi", encoding="utf-8")
+    outcome = judge_polished_file(polished, [], config=FaithfulnessConfig(enabled=False))
+    assert outcome.score is None
+    assert outcome.skipped_reason == "disabled"
+    assert outcome.threshold_met is True
+
+
+def test_judge_skipped_when_polished_file_unreadable(tmp_path: Path) -> None:
+    missing = tmp_path / "does-not-exist.md"
+    outcome = judge_polished_file(missing, [], config=FaithfulnessConfig(enabled=True))
+    assert outcome.score is None
+    assert outcome.skipped_reason == "polished file unreadable"
+
+
+def test_judge_skipped_when_no_readable_sources(tmp_path: Path) -> None:
+    polished = tmp_path / "doc.md"
+    polished.write_text("# Hi", encoding="utf-8")
+    missing_src = tmp_path / "ghost.py"
+    outcome = judge_polished_file(polished, [missing_src], config=FaithfulnessConfig(enabled=True))
+    assert outcome.score is None
+    assert outcome.skipped_reason == "no readable source passages"
+
+
+def test_judge_skipped_when_over_budget(tmp_path: Path) -> None:
+    polished = tmp_path / "doc.md"
+    polished.write_text("x" * 200_000, encoding="utf-8")
+    src = tmp_path / "src.py"
+    src.write_text("y" * 200_000, encoding="utf-8")
+    config = FaithfulnessConfig(enabled=True, budget_per_file_usd=0.001)
+    outcome = judge_polished_file(polished, [src], config=config)
+    assert outcome.score is None
+    assert outcome.skipped_reason == "over-budget"
+
+
+def test_judge_skipped_when_attune_rag_missing(tmp_path: Path, monkeypatch) -> None:
+    polished = tmp_path / "doc.md"
+    polished.write_text("# Hi", encoding="utf-8")
+    src = tmp_path / "src.py"
+    src.write_text("def x(): pass\n", encoding="utf-8")
+    _block_attune_rag(monkeypatch)
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")  # pragma: allowlist secret
+
+    outcome = judge_polished_file(polished, [src], config=FaithfulnessConfig(enabled=True))
+
+    assert outcome.score is None
+    assert outcome.skipped_reason is not None
+    assert "attune-rag" in outcome.skipped_reason
+
+
+def test_judge_raises_when_block_polish_on_unavailable_and_extra_missing(
+    tmp_path: Path,
+    monkeypatch,
+) -> None:
+    polished = tmp_path / "doc.md"
+    polished.write_text("# Hi", encoding="utf-8")
+    src = tmp_path / "src.py"
+    src.write_text("def x(): pass\n", encoding="utf-8")
+    _block_attune_rag(monkeypatch)
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")  # pragma: allowlist secret
+
+    with pytest.raises(RuntimeError, match="attune-rag"):
+        judge_polished_file(
+            polished,
+            [src],
+            config=FaithfulnessConfig(enabled=True, block_polish_on_unavailable=True),
+        )
+
+
+def test_judge_skipped_when_api_key_missing(tmp_path: Path, monkeypatch) -> None:
+    polished = tmp_path / "doc.md"
+    polished.write_text("# Hi", encoding="utf-8")
+    src = tmp_path / "src.py"
+    src.write_text("def x(): pass\n", encoding="utf-8")
+    _install_fake_attune_rag(monkeypatch, judge_score=1.0)
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+
+    outcome = judge_polished_file(polished, [src], config=FaithfulnessConfig(enabled=True))
+
+    assert outcome.score is None
+    assert outcome.skipped_reason == "ANTHROPIC_API_KEY missing"
+
+
+# ---------------------------------------------------------------------------
+# judge_polished_file — happy path
+# ---------------------------------------------------------------------------
+
+
+def test_judge_runs_and_records_supported_claims(tmp_path: Path, monkeypatch) -> None:
+    polished = tmp_path / "doc.md"
+    polished.write_text("Polished content.", encoding="utf-8")
+    src = tmp_path / "src.py"
+    src.write_text("def real(): pass\n", encoding="utf-8")
+
+    _install_fake_attune_rag(
+        monkeypatch,
+        judge_score=0.97,
+        supported=["claim a"],
+        unsupported=[],
+        reasoning="all good",
+    )
+
+    outcome = judge_polished_file(
+        polished,
+        [src],
+        config=FaithfulnessConfig(enabled=True, threshold=0.95),
+    )
+
+    assert outcome.score == pytest.approx(0.97)
+    assert outcome.threshold_met is True
+    assert outcome.supported_claims == ["claim a"]
+
+
+def test_judge_below_threshold_flags_threshold_not_met(
+    tmp_path: Path,
+    monkeypatch,
+) -> None:
+    polished = tmp_path / "doc.md"
+    polished.write_text("Polished content.", encoding="utf-8")
+    src = tmp_path / "src.py"
+    src.write_text("def real(): pass\n", encoding="utf-8")
+
+    _install_fake_attune_rag(
+        monkeypatch,
+        judge_score=0.6,
+        supported=["a"],
+        unsupported=["b", "c"],
+        reasoning="some drift",
+    )
+
+    outcome = judge_polished_file(
+        polished,
+        [src],
+        config=FaithfulnessConfig(enabled=True, threshold=0.95),
+    )
+
+    assert outcome.score == pytest.approx(0.6)
+    assert outcome.threshold_met is False
+    assert outcome.unsupported_claims == ["b", "c"]
+
+
+def test_judge_handles_transient_call_failure(tmp_path: Path, monkeypatch) -> None:
+    polished = tmp_path / "doc.md"
+    polished.write_text("Polished content.", encoding="utf-8")
+    src = tmp_path / "src.py"
+    src.write_text("def real(): pass\n", encoding="utf-8")
+
+    fake_module = types.ModuleType("attune_rag.eval.faithfulness")
+
+    class _BadJudge:
+        def __init__(self, **_): ...
+
+        async def score(self, query, answer, passages):
+            raise RuntimeError("transient API failure")
+
+    fake_module.FaithfulnessJudge = _BadJudge
+    parent = types.ModuleType("attune_rag")
+    eval_pkg = types.ModuleType("attune_rag.eval")
+    eval_pkg.faithfulness = fake_module
+    parent.eval = eval_pkg
+
+    monkeypatch.setitem(sys.modules, "attune_rag", parent)
+    monkeypatch.setitem(sys.modules, "attune_rag.eval", eval_pkg)
+    monkeypatch.setitem(sys.modules, "attune_rag.eval.faithfulness", fake_module)
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")  # pragma: allowlist secret
+
+    outcome = judge_polished_file(polished, [src], config=FaithfulnessConfig(enabled=True))
+
+    assert outcome.score is None
+    assert outcome.skipped_reason is not None
+    assert "judge call failed" in outcome.skipped_reason
+
+
+# ---------------------------------------------------------------------------
+# format_review_block + apply_review_block
+# ---------------------------------------------------------------------------
+
+
+def test_format_review_block_returns_empty_when_score_is_none() -> None:
+    outcome = JudgeOutcome(
+        score=None,
+        threshold_met=True,
+        cost_estimate_usd=0.0,
+        supported_claims=[],
+        unsupported_claims=[],
+        reasoning="",
+    )
+    assert format_review_block(outcome, threshold=0.95) == ""
+
+
+def test_format_review_block_lists_unsupported_claims() -> None:
+    outcome = JudgeOutcome(
+        score=0.5,
+        threshold_met=False,
+        cost_estimate_usd=0.01,
+        supported_claims=["good"],
+        unsupported_claims=["bad-1", "bad-2"],
+        reasoning="explanation",
+    )
+    block = format_review_block(outcome, threshold=0.95)
+    assert "## Faithfulness review" in block
+    assert "Score 0.50" in block or "Score** 0.50" in block
+    assert "bad-1" in block
+    assert "bad-2" in block
+    assert "explanation" in block
+
+
+def test_apply_review_block_appends_block(tmp_path: Path) -> None:
+    polished = tmp_path / "doc.md"
+    polished.write_text("# Original\n\nBody.", encoding="utf-8")
+    outcome = JudgeOutcome(
+        score=0.5,
+        threshold_met=False,
+        cost_estimate_usd=0.01,
+        supported_claims=[],
+        unsupported_claims=["x"],
+        reasoning="r",
+    )
+    block = format_review_block(outcome, threshold=0.95)
+    appended = apply_review_block(polished, block)
+    assert appended is True
+    final = polished.read_text(encoding="utf-8")
+    assert "# Original" in final
+    assert "## Faithfulness review" in final
+
+
+def test_apply_review_block_returns_false_on_empty(tmp_path: Path) -> None:
+    polished = tmp_path / "doc.md"
+    polished.write_text("# Original", encoding="utf-8")
+    assert apply_review_block(polished, "") is False
diff --git a/tests/unit/faithfulness/test_pipeline_wiring.py b/tests/unit/faithfulness/test_pipeline_wiring.py
new file mode 100644
index 0000000..3821c8f
--- /dev/null
+++ b/tests/unit/faithfulness/test_pipeline_wiring.py
@@ -0,0 +1,134 @@
+"""Tests for the generator <-> faithfulness wiring + telemetry."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from unittest.mock import patch
+
+from attune_author.faithfulness import JudgeOutcome
+from attune_author.generator import (
+    _faithfulness_telemetry,
+    _run_faithfulness_judge,
+    reset_faithfulness_telemetry,
+)
+
+
+def _outcome(
+    score: float | None, *, threshold_met: bool, skipped: str | None = None
+) -> JudgeOutcome:
+    return JudgeOutcome(
+        score=score,
+        threshold_met=threshold_met,
+        cost_estimate_usd=0.0123,
+        supported_claims=[],
+        unsupported_claims=["bad"] if score is not None and not threshold_met else [],
+        reasoning="r",
+        skipped_reason=skipped,
+    )
+
+
+def setup_function() -> None:
+    reset_faithfulness_telemetry()
+
+
+def test_run_faithfulness_judge_disabled_via_env(tmp_path: Path, monkeypatch) -> None:
+    """ATTUNE_AUTHOR_FAITHFULNESS=off short-circuits before doing any work."""
+    monkeypatch.setenv("ATTUNE_AUTHOR_FAITHFULNESS", "off")
+    polished = tmp_path / "x.md"
+    polished.write_text("# x\n", encoding="utf-8")
+
+    with patch("attune_author.faithfulness.judge_polished_file") as mock_judge:
+        _run_faithfulness_judge(polished, [], tmp_path)
+
+    mock_judge.assert_not_called()
+
+
+def test_run_faithfulness_judge_disabled_via_config(tmp_path: Path) -> None:
+    """No pyproject.toml -> config.enabled defaults to False -> no judge call."""
+    polished = tmp_path / "x.md"
+    polished.write_text("# x\n", encoding="utf-8")
+
+    with patch("attune_author.faithfulness.judge_polished_file") as mock_judge:
+        _run_faithfulness_judge(polished, [], tmp_path)
+
+    mock_judge.assert_not_called()
+
+
+def _enable_via_pyproject(tmp_path: Path) -> None:
+    (tmp_path / "pyproject.toml").write_text(
+        "[tool.attune-author.fact-check.faithfulness]\nenabled = true\n",
+        encoding="utf-8",
+    )
+
+
+def test_run_faithfulness_judge_telemetry_on_success(tmp_path: Path) -> None:
+    _enable_via_pyproject(tmp_path)
+    polished = tmp_path / "x.md"
+    polished.write_text("# x\n", encoding="utf-8")
+
+    outcome = _outcome(0.97, threshold_met=True)
+    with patch("attune_author.faithfulness.judge_polished_file", return_value=outcome):
+        _run_faithfulness_judge(polished, [], tmp_path)
+
+    telemetry = _faithfulness_telemetry()
+    assert telemetry["calls"] == 1
+    assert telemetry["skipped"] == 0
+    assert telemetry["cost_usd"] == 0.0123
+
+
+def test_run_faithfulness_judge_telemetry_on_skip(tmp_path: Path) -> None:
+    _enable_via_pyproject(tmp_path)
+    polished = tmp_path / "x.md"
+    polished.write_text("# x\n", encoding="utf-8")
+
+    outcome = _outcome(None, threshold_met=True, skipped="over-budget")
+    with patch("attune_author.faithfulness.judge_polished_file", return_value=outcome):
+        _run_faithfulness_judge(polished, [], tmp_path)
+
+    telemetry = _faithfulness_telemetry()
+    assert telemetry["calls"] == 0
+    assert telemetry["skipped"] == 1
+    assert telemetry["cost_usd"] == 0
+
+
+def test_run_faithfulness_judge_appends_review_block_when_below_threshold(
+    tmp_path: Path,
+) -> None:
+    _enable_via_pyproject(tmp_path)
+    polished = tmp_path / "x.md"
+    polished.write_text("# Original\n", encoding="utf-8")
+
+    outcome = _outcome(0.5, threshold_met=False)
+    with patch("attune_author.faithfulness.judge_polished_file", return_value=outcome):
+        _run_faithfulness_judge(polished, [], tmp_path)
+
+    final = polished.read_text(encoding="utf-8")
+    assert "## Faithfulness review" in final
+    assert "bad" in final
+
+
+def test_run_faithfulness_judge_swallows_unexpected_exceptions(tmp_path: Path) -> None:
+    """A buggy judge layer must never break the polish pipeline."""
+    _enable_via_pyproject(tmp_path)
+    polished = tmp_path / "x.md"
+    polished.write_text("# Original\n", encoding="utf-8")
+
+    with patch(
+        "attune_author.faithfulness.judge_polished_file",
+        side_effect=RuntimeError("kaboom"),
+    ):
+        # Should not raise.
+        _run_faithfulness_judge(polished, [], tmp_path)
+
+
+def test_reset_faithfulness_telemetry_zeros_counters(tmp_path: Path) -> None:
+    _enable_via_pyproject(tmp_path)
+    polished = tmp_path / "x.md"
+    polished.write_text("# x\n", encoding="utf-8")
+    outcome = _outcome(0.97, threshold_met=True)
+    with patch("attune_author.faithfulness.judge_polished_file", return_value=outcome):
+        _run_faithfulness_judge(polished, [], tmp_path)
+
+    reset_faithfulness_telemetry()
+    telemetry = _faithfulness_telemetry()
+    assert telemetry == {"calls": 0.0, "skipped": 0.0, "cost_usd": 0.0}