Smart-AI-Memory
diff --git a/‎CHANGELOG.md‎
Lines changed: 63 additions & 17 deletions b/‎CHANGELOG.md‎
Lines changed: 63 additions & 17 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/benchmark_all_fixtures.py‎
Lines changed: 131 additions & 0 deletions b/‎scripts/benchmark_all_fixtures.py‎
Lines changed: 131 additions & 0 deletions
@@ -2,36 +2,82 @@
 
 All notable changes to `attune-help` are documented here.
 
-## 0.6.0 — Unreleased
+## 0.7.0 — Unreleased
 
 ### Added
 
-- **User-facing CLI** — new `attune-help` console script
+- **Path-keyed summary sidecar** for RAG consumers.
+  `src/attune_help/templates/summaries_by_path.json`
+  maps template paths (`concepts/tool-bug-predict.md`)
+  to keyword-rich, declarative summaries. attune-rag's
+  `DirectoryCorpus` reads this schema directly — the
+  existing feature-keyed `summaries.json` was silently
+  ignored by path-keyed consumers.
+- **Per-feature query fixtures** under
+  `src/attune_help/templates/fixtures/{feature}.yaml`.
+  Each fixture lists 25 natural-language queries a
+  user would ask for that feature. Three jobs: polish
+  pipeline input (`target_keywords`), per-feature
+  regression benchmark, and contrastive training data
+  if embeddings ship later.
+- **Dev-only polish + benchmark scripts** under
+  `scripts/`:
+  - `generate_fixtures.py` — LLM-generates the 25-query
+    fixture per feature via Claude Haiku 4.5.
+  - `polish_summaries.py` — LLM-polishes each template
+    into a length-bounded, keyword-rich,
+    differentiation-aware summary.
+  - `benchmark_all_fixtures.py` — runs every feature's
+    fixtures through attune-rag and reports per-feature
+    + overall Precision@1 / Recall@3.
+  - `differentiation_hints.yaml` — per-feature USP
+    statements that prevent cross-routing between
+    overlapping features.
+- **User-facing CLI** — `attune-help` console script
   exposes `lookup`, `list`, `search`, and `simpler`
   subcommands over the same `HelpEngine` API the MCP
-  server uses. Terminal users no longer need an MCP
-  client to access the help content. `python -m
-  attune_help` also works.
+  server uses. `python -m attune_help` also works.
+
+### Retrieval quality (26 features × 25 queries = 650)
+
+| Metric | Before (0.5.1) | After (0.7.0) |
+|---|---|---|
+| Precision@1 | ~0% effective (summaries ignored) | **71.7%** |
+| Recall@3 | ~0% effective | **81.5%** |
+
+Clears the 70% P@1 gate pre-committed in
+[attune-ai/docs/rag/embeddings-decision-2026-04-17.md](https://github.com/Smart-AI-Memory/attune-ai/blob/main/docs/rag/embeddings-decision-2026-04-17.md).
+Moves the fastembed v0.2.0 embeddings track from
+"committed next milestone" to "deferred / optional".
+
+Known quality variance: 6 features below the 60% P@1
+gate (spec, code-quality, planning, refactor-plan,
+workflow-orchestration, security-audit) demonstrate
+the mutual-competition effect — once every feature has
+polished summaries, overlapping features steal each
+other's queries. Scheduled for 0.7.1 follow-up with
+targeted differentiation tuning.
 
 ### Changed
 
 - **Development Status promoted to Beta** (was Alpha).
   attune-help is now a core dependency of attune-ai
-  (Production/Stable), so the Alpha classifier understated
-  the package's actual maturity. Version jumps to `0.6.0`
-  rather than `0.5.2` to mark the shift and give
-  downstream consumers a deliberate upgrade point.
-- **PyPI project URLs point to the extracted repo**
-  (`Smart-AI-Memory/attune-help`) instead of the parent
-  `attune-ai` monorepo. Also added `Changelog` and
-  `Issues` URLs.
+  (Production/Stable).
+- **PyPI project URLs** point to the extracted repo
+  (`Smart-AI-Memory/attune-help`). Added `Changelog`
+  and `Issues` URLs.
 
 ### Consumer impact
 
-- attune-ai and attune-author both now pin
-  `attune-help>=0.5.1,<0.6`. Those caps will need to be
-  bumped to `<0.7` at release time, coordinated across
-  the two consumer repos.
+- attune-ai and attune-author both currently pin
+  `attune-help>=0.5.1,<0.6`. Those caps need to bump to
+  `<0.8` and attune-rag's `DirectoryCorpus` should be
+  pointed at `summaries_by_path.json` (new schema) so
+  the +72% P@1 lift actually reaches users. Tracked as
+  attune-rag 0.1.2.
+- The originally-planned 0.6.0 release (CLI + Beta
+  classifier only) was never published; its scope is
+  rolled forward into this 0.7.0 release.
 
 ## 0.5.1 — 2026-04-12
 
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "attune-help"
-version = "0.5.1"
+version = "0.7.0"
 description = "Lightweight help runtime with progressive depth and audience adaptation."
 readme = {file = "README.md", content-type = "text/markdown"}
 requires-python = ">=3.10"
 
@@ -0,0 +1,131 @@
+"""Benchmark every feature's fixture against the polished corpus.
+
+Runs the full fixture suite and reports:
+
+- Overall P@1 and R@3 across all features
+- Per-feature P@1 and R@3 with hit/miss counts
+- Features falling below a quality gate (default 60% P@1)
+
+Uses the ``summaries_by_path.json`` sidecar (not the legacy
+feature-keyed file) because that's the new 0.7.0 content.
+
+Usage::
+
+    uv run python scripts/benchmark_all_fixtures.py
+    uv run python scripts/benchmark_all_fixtures.py --gate 0.7
+
+Requires ``attune_rag`` + ``pyyaml`` installed.
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+from pathlib import Path
+
+import yaml
+
+_REPO_ROOT = Path(__file__).resolve().parent.parent
+_TEMPLATES_DIR = _REPO_ROOT / "src" / "attune_help" / "templates"
+_FIXTURES_DIR = _TEMPLATES_DIR / "fixtures"
+
+
+def _load_fixtures() -> list[dict]:
+    fixtures = []
+    for path in sorted(_FIXTURES_DIR.glob("*.yaml")):
+        data = yaml.safe_load(path.read_text(encoding="utf-8"))
+        data["_path"] = path
+        fixtures.append(data)
+    return fixtures
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--gate", type=float, default=0.60)
+    parser.add_argument(
+        "--summaries",
+        default="summaries_by_path.json",
+        help="Sidecar filename (default: summaries_by_path.json)",
+    )
+    args = parser.parse_args(argv)
+
+    try:
+        from attune_rag import DirectoryCorpus, KeywordRetriever, RagPipeline
+    except ImportError:
+        print("attune_rag not installed; run `uv pip install attune-rag`", file=sys.stderr)
+        return 2
+
+    corpus = DirectoryCorpus(
+        root=_TEMPLATES_DIR,
+        summaries_file=args.summaries,
+        cross_links_file="cross_links.json",
+    )
+    pipeline = RagPipeline(corpus=corpus, retriever=KeywordRetriever())
+
+    fixtures = _load_fixtures()
+    total_queries = 0
+    total_top1 = 0
+    total_top3 = 0
+    rows = []
+
+    for fix in fixtures:
+        feature = fix["feature"]
+        expected = set(fix["expected_in_top_3"])
+        queries = fix["queries"]
+        top1 = 0
+        top3 = 0
+        misses = []
+        for q in queries:
+            result = pipeline.run(q, k=3)
+            paths = [h.template_path for h in result.citation.hits]
+            if paths and paths[0] in expected:
+                top1 += 1
+            if set(paths) & expected:
+                top3 += 1
+            else:
+                misses.append((q, paths))
+        total_queries += len(queries)
+        total_top1 += top1
+        total_top3 += top3
+        rows.append(
+            {
+                "feature": feature,
+                "total": len(queries),
+                "top1": top1,
+                "top3": top3,
+                "p1": top1 / len(queries) if queries else 0.0,
+                "r3": top3 / len(queries) if queries else 0.0,
+                "misses": misses,
+            }
+        )
+
+    print(f"Corpus:     {args.summaries}")
+    print(
+        f"Entries with summary: "
+        f"{sum(1 for e in corpus.entries() if e.summary)}/"
+        f"{sum(1 for _ in corpus.entries())}"
+    )
+    print(f"\nOverall P@1: {total_top1}/{total_queries} ({total_top1/total_queries:.1%})")
+    print(f"Overall R@3: {total_top3}/{total_queries} ({total_top3/total_queries:.1%})")
+
+    print("\nPer-feature breakdown:")
+    print(f"  {'feature':<26} {'P@1':>8} {'R@3':>8}   misses")
+    rows.sort(key=lambda r: r["p1"])
+    below_gate = 0
+    for r in rows:
+        marker = " ✖" if r["p1"] < args.gate else "  "
+        if r["p1"] < args.gate:
+            below_gate += 1
+        print(
+            f"{marker}{r['feature']:<26} "
+            f"{r['p1']:>7.1%} {r['r3']:>7.1%}   "
+            f"{r['total']-r['top3']}/{r['total']}"
+        )
+
+    print(f"\nFeatures below {args.gate:.0%} P@1 gate: " f"{below_gate}/{len(rows)}")
+
+    return 1 if below_gate > 0 else 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())