Skip to content

Commit 2a98175

Browse files
cdeustclaude
andcommitted
feat(wiki): plain-language top-level README generator
User directive: "readable by non tech while having all information needed for tech people." Adds a layered documentation surface: 1. /wiki/README.md — plain-language entry point (non-tech). 2. /wiki/.generated/INDEX.md — structured TOC (tech). 3. /wiki/<kind>/<domain>/<slug>.md — templated detail pages (tech). Non-tech readers land on README.md and see: * One-sentence description of what this wiki IS. * Page counts + number of categories + domains. * "Last groomed: <timestamp>" — builds trust that it's current. * Per-category sections with plain-language "read this when…" hints (e.g., "Architecture Decisions — Why we chose X over Y. Read these to understand WHY the system looks the way it does."). * Domains list sorted by page count. * Link down to INDEX.md for the full structured TOC. * A "For contributors" section explaining how the grooming works and how to opt a page out via `grooming: manual`. Design invariants (tests cover all): * Empty kinds are NOT rendered — no wall of empty sections on a fresh wiki. * Kind order is deterministic (follows PAGE_KINDS canonical order). * Same input produces same output (stable against reindex churn). * Pluralization correct: 0 pages, 1 page, 2 pages. * Domains section only appears when domain-scoped pages exist. * Custom project_name supported (for white-labelled deployments). Wiring in wiki_store._try_reindex: * Writes INDEX.md (existing behavior). * Writes README.md IF it doesn't exist OR contains the auto-generated marker. Hand-written READMEs are NEVER overwritten — users who customize stay customized. Tests: 15 for README generation + integration with the existing 32 wiki template/groomer tests = 47 wiki-layer tests total passing. Complements the cortex-wiki-groomer agent shipped in 2b6318e. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent ae6f280 commit 2a98175

3 files changed

Lines changed: 369 additions & 1 deletion

File tree

mcp_server/core/wiki_readme.py

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
"""Wiki README generation — plain-language top-level entry point.
2+
3+
The wiki's technical content lives in `<kind>/<domain>/<slug>.md` files
4+
with templated front-matter + section structure — tech-ready, but dense.
5+
This module generates a top-level ``README.md`` that is readable by
6+
non-technical stakeholders:
7+
8+
* What the wiki IS (one paragraph, plain language).
9+
* What lives WHERE (kind-labelled sections with a 1-line "what it's
10+
for" summary, not "architecture decision record" jargon).
11+
* How to NAVIGATE (auto-generated table of contents + link to the
12+
detailed technical INDEX.md).
13+
* When it was last GROOMED (builds trust: "this is current").
14+
15+
Design principle: non-tech readers see plain language at the top;
16+
tech readers follow links down to the structured INDEX + per-page
17+
templates. No information is hidden from either audience — just
18+
presented at the right depth for each click.
19+
20+
Source: user directive "wiki generation, folder and file management,
21+
keep this tidy, in order, readable by non tech while having all
22+
information needed for tech people".
23+
"""
24+
25+
from __future__ import annotations
26+
27+
from collections import defaultdict
28+
from datetime import datetime, timezone
29+
from pathlib import PurePosixPath
30+
31+
from mcp_server.core.wiki_layout import PAGE_KINDS
32+
33+
# Non-tech label + one-line description per kind. The description is
34+
# what a first-time reader needs to know to decide "do I click here?".
35+
_KIND_PLAIN: dict[str, tuple[str, str]] = {
36+
"adr": (
37+
"Architecture Decisions",
38+
"Why we chose X over Y — the reasoning behind major design "
39+
"choices. Read these to understand WHY the system looks the "
40+
"way it does.",
41+
),
42+
"specs": (
43+
"Specifications & Designs",
44+
"What we plan to build before we build it — feature specs, "
45+
"design docs, PRDs. Read these to understand WHAT is coming.",
46+
),
47+
"guides": (
48+
"Guides & How-To",
49+
"Step-by-step instructions for common tasks. Read these when "
50+
"you want to DO something.",
51+
),
52+
"reference": (
53+
"Reference",
54+
"API signatures, configuration keys, protocol formats. Read "
55+
"these to LOOK UP an exact detail.",
56+
),
57+
"conventions": (
58+
"Conventions",
59+
"The rules the team follows (naming, style, contribution "
60+
"process). Read these before proposing changes.",
61+
),
62+
"lessons": (
63+
"Lessons Learned",
64+
"Mistakes, root causes, and rules we now follow to avoid "
65+
"repeating them. Read these to learn from past incidents.",
66+
),
67+
"notes": (
68+
"Notes & Investigations",
69+
"Work-in-progress thinking, exploratory analyses. Read these "
70+
"for context on ongoing work.",
71+
),
72+
"journal": (
73+
"Journal",
74+
"Time-stamped entries of events, experiments, and sessions. "
75+
"Read these for what happened and when.",
76+
),
77+
"files": (
78+
"File Documentation",
79+
"Per-source-file documentation (auto-generated from code "
80+
"analysis). Read these for a map of the codebase.",
81+
),
82+
}
83+
84+
85+
def _count_pages(page_paths: list[str]) -> dict[str, int]:
86+
"""Count pages per kind. Only kinds with >0 pages are returned."""
87+
counts: dict[str, int] = defaultdict(int)
88+
for p in page_paths:
89+
first = PurePosixPath(p).parts[0] if p else ""
90+
if first in PAGE_KINDS:
91+
counts[first] += 1
92+
return dict(counts)
93+
94+
95+
def _count_by_domain(page_paths: list[str]) -> dict[str, int]:
96+
"""Count pages per domain, excluding the catch-all ``_general``."""
97+
counts: dict[str, int] = defaultdict(int)
98+
for p in page_paths:
99+
parts = PurePosixPath(p).parts
100+
if len(parts) < 3:
101+
continue # root-level or missing domain
102+
if parts[0] not in PAGE_KINDS:
103+
continue
104+
counts[parts[1]] += 1
105+
return dict(counts)
106+
107+
108+
def build_plain_readme(
109+
page_paths: list[str],
110+
*,
111+
project_name: str = "Cortex",
112+
generated_at: datetime | None = None,
113+
) -> str:
114+
"""Generate the top-level plain-language README.md for the wiki.
115+
116+
Pure function — takes a list of wiki-relative page paths, returns
117+
Markdown. Caller writes to ``<wiki_root>/README.md``.
118+
119+
The output is stable (same input → same output, modulo the
120+
``generated_at`` timestamp) so it's safe to write on every
121+
reindex without churning the git log.
122+
"""
123+
if generated_at is None:
124+
generated_at = datetime.now(timezone.utc)
125+
126+
total = len([p for p in page_paths if p and not p.startswith(".generated/")])
127+
kind_counts = _count_pages(page_paths)
128+
domain_counts = _count_by_domain(page_paths)
129+
130+
lines: list[str] = []
131+
lines.append(f"# {project_name} Wiki")
132+
lines.append("")
133+
lines.append(
134+
"This is your project's **living knowledge base** — "
135+
"decisions, plans, how-tos, and lessons, kept tidy automatically "
136+
"as the work happens."
137+
)
138+
lines.append("")
139+
lines.append(
140+
f"There are currently **{total} page{'s' if total != 1 else ''}** "
141+
f"across **{len(kind_counts)} categories**"
142+
+ (f" and **{len(domain_counts)} domains**." if domain_counts else ".")
143+
)
144+
lines.append("")
145+
lines.append(f"_Last groomed: {generated_at.strftime('%Y-%m-%d %H:%M UTC')}._")
146+
lines.append("")
147+
148+
# --- What's here, by category ---
149+
lines.append("## What's here")
150+
lines.append("")
151+
for kind in PAGE_KINDS:
152+
if kind_counts.get(kind, 0) == 0:
153+
continue
154+
label, description = _KIND_PLAIN[kind]
155+
count = kind_counts[kind]
156+
lines.append(f"### {label} ({count} page{'s' if count != 1 else ''})")
157+
lines.append("")
158+
lines.append(description)
159+
lines.append("")
160+
lines.append(f"→ Folder: [`{kind}/`](./{kind}/)")
161+
lines.append("")
162+
163+
# --- Domains ---
164+
if domain_counts:
165+
lines.append("## Covered domains")
166+
lines.append("")
167+
lines.append(
168+
"Each domain is a distinct area of the project. Pages are "
169+
"filed under `<category>/<domain>/<page>.md`."
170+
)
171+
lines.append("")
172+
for domain, count in sorted(domain_counts.items(), key=lambda x: (-x[1], x[0])):
173+
lines.append(f"- **{domain}** — {count} page{'s' if count != 1 else ''}")
174+
lines.append("")
175+
176+
# --- Navigation ---
177+
lines.append("## Go deeper")
178+
lines.append("")
179+
lines.append(
180+
"The full table of contents (grouped by domain and category) "
181+
"lives at [`.generated/INDEX.md`](./.generated/INDEX.md). "
182+
"It's rebuilt automatically on every wiki write."
183+
)
184+
lines.append("")
185+
lines.append(
186+
"Every page follows a consistent template per category — see "
187+
"the [conventions folder](./conventions/) (if present) for the rules."
188+
)
189+
lines.append("")
190+
191+
# --- For tech readers ---
192+
lines.append("## For contributors")
193+
lines.append("")
194+
lines.append(
195+
"Pages are groomed by `cortex-wiki-groomer` (runs asynchronously "
196+
"during `cortex:consolidate`). The groomer:"
197+
)
198+
lines.append("")
199+
lines.append("- Preserves every paragraph you wrote (no information loss).")
200+
lines.append("- Fills missing front-matter from context (or marks `unknown`).")
201+
lines.append("- Enforces naming conventions (kebab-slugs, 4-digit ADR IDs).")
202+
lines.append("- Skips any page whose front-matter declares `grooming: manual`.")
203+
lines.append("")
204+
lines.append(
205+
"To opt a page out of grooming, add `grooming: manual` to its "
206+
"front-matter. Nothing you write by hand will ever be rewritten "
207+
"without your consent."
208+
)
209+
lines.append("")
210+
211+
return "\n".join(lines) + "\n"

mcp_server/infrastructure/wiki_store.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,15 +248,43 @@ def list_pages(root: Path | str, *, kind: str | None = None) -> list[str]:
248248

249249

250250
def _try_reindex(root: Path) -> None:
251-
"""Best-effort index rebuild after wiki write."""
251+
"""Best-effort index rebuild after wiki write.
252+
253+
Produces three artefacts:
254+
* ``<root>/.generated/INDEX.md`` — structured TOC grouped by
255+
domain then kind (for tech readers).
256+
* ``<root>/README.md`` — plain-language top-level
257+
entry point (for non-tech readers). Only created if no
258+
hand-written README exists at the root.
259+
"""
252260
try:
253261
from mcp_server.core.wiki_pages import build_index
262+
from mcp_server.core.wiki_readme import build_plain_readme
254263

255264
page_paths = list_pages(root)
256265
index_md = build_index(page_paths)
257266
gen_dir = root / ".generated"
258267
gen_dir.mkdir(exist_ok=True)
259268
(gen_dir / "INDEX.md").write_text(index_md)
269+
270+
# README: only overwrite if it doesn't exist OR it's
271+
# auto-generated (marker line in the body). Never clobber a
272+
# hand-written README.
273+
readme_path = root / "README.md"
274+
auto_marker = "<!-- cortex-wiki-readme: auto-generated -->"
275+
should_write = True
276+
if readme_path.exists():
277+
try:
278+
existing = readme_path.read_text()
279+
if auto_marker not in existing:
280+
should_write = False # hand-written, don't touch
281+
except Exception:
282+
pass
283+
if should_write:
284+
readme_md = build_plain_readme(page_paths)
285+
readme_md += f"\n{auto_marker}\n"
286+
readme_path.write_text(readme_md)
287+
260288
cleanup_id_prefixed_pages(root)
261289
except Exception:
262290
pass

tests_py/core/test_wiki_readme.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
"""Tests for wiki_readme — plain-language top-level README generation.
2+
3+
Source: user directive "readable by non tech while having all
4+
information needed for tech people".
5+
"""
6+
7+
from __future__ import annotations
8+
9+
from datetime import datetime, timezone
10+
11+
import pytest
12+
13+
from mcp_server.core.wiki_readme import (
14+
_count_by_domain,
15+
_count_pages,
16+
build_plain_readme,
17+
)
18+
19+
20+
_FIXED_TIME = datetime(2026, 4, 17, 14, 30, tzinfo=timezone.utc)
21+
22+
23+
class TestCounts:
24+
def test_count_pages_by_kind(self):
25+
paths = [
26+
"adr/0001-foo.md",
27+
"adr/0002-bar.md",
28+
"specs/phase5.md",
29+
"notes/x.md",
30+
"alien/wrong.md", # unknown kind — excluded
31+
]
32+
assert _count_pages(paths) == {"adr": 2, "specs": 1, "notes": 1}
33+
34+
def test_count_by_domain_skips_root_level(self):
35+
paths = [
36+
"adr/cortex/0001.md",
37+
"adr/cortex/0002.md",
38+
"specs/cortex/phase5.md",
39+
"adr/flat.md", # no domain in path
40+
"notes/other-domain/x.md",
41+
]
42+
counts = _count_by_domain(paths)
43+
assert counts["cortex"] == 3
44+
assert counts["other-domain"] == 1
45+
assert "flat.md" not in counts
46+
47+
48+
class TestReadmeStructure:
49+
def test_readme_has_top_heading(self):
50+
out = build_plain_readme(["adr/0001-foo.md"], generated_at=_FIXED_TIME)
51+
assert "# Cortex Wiki" in out
52+
53+
def test_custom_project_name(self):
54+
out = build_plain_readme([], project_name="MyProject", generated_at=_FIXED_TIME)
55+
assert "# MyProject Wiki" in out
56+
57+
def test_plain_language_intro(self):
58+
"""Non-tech readers see a living knowledge base pitch, not jargon."""
59+
out = build_plain_readme(["adr/0001-foo.md"], generated_at=_FIXED_TIME)
60+
assert "living knowledge base" in out
61+
# No jargon gate-keepers
62+
assert "architecture decision record" not in out.lower()
63+
assert "immutable" not in out.lower()
64+
65+
def test_page_count_displayed(self):
66+
paths = ["adr/0001-foo.md", "adr/0002-bar.md", "specs/x.md"]
67+
out = build_plain_readme(paths, generated_at=_FIXED_TIME)
68+
assert "3 pages" in out
69+
70+
def test_zero_pages_singular_grammar(self):
71+
out = build_plain_readme([], generated_at=_FIXED_TIME)
72+
assert "0 pages" in out
73+
74+
def test_one_page_singular_grammar(self):
75+
out = build_plain_readme(["adr/0001-foo.md"], generated_at=_FIXED_TIME)
76+
assert "1 page" in out
77+
assert "1 pages" not in out
78+
79+
def test_timestamp_included(self):
80+
out = build_plain_readme([], generated_at=_FIXED_TIME)
81+
assert "2026-04-17 14:30 UTC" in out
82+
83+
def test_links_to_detailed_index(self):
84+
out = build_plain_readme(["adr/0001-foo.md"], generated_at=_FIXED_TIME)
85+
assert ".generated/INDEX.md" in out
86+
87+
def test_only_populated_kinds_rendered(self):
88+
"""A category with 0 pages does NOT appear in the README — we
89+
don't want a wall of empty sections for a fresh wiki."""
90+
out = build_plain_readme(["adr/0001-foo.md"], generated_at=_FIXED_TIME)
91+
assert "Architecture Decisions" in out
92+
assert "Lessons Learned" not in out # no lessons pages
93+
assert "File Documentation" not in out
94+
95+
def test_domains_section_only_when_domains_present(self):
96+
# Flat paths (no domain) → no domains section
97+
out = build_plain_readme(["adr/flat.md"], generated_at=_FIXED_TIME)
98+
assert "Covered domains" not in out
99+
100+
# Domain-scoped paths → section appears
101+
out = build_plain_readme(["adr/cortex/0001.md"], generated_at=_FIXED_TIME)
102+
assert "Covered domains" in out
103+
assert "cortex" in out
104+
105+
106+
class TestStability:
107+
def test_same_input_same_output(self):
108+
paths = ["adr/0001-foo.md", "specs/x.md"]
109+
out1 = build_plain_readme(paths, generated_at=_FIXED_TIME)
110+
out2 = build_plain_readme(paths, generated_at=_FIXED_TIME)
111+
assert out1 == out2
112+
113+
def test_kind_order_deterministic(self):
114+
"""Kinds appear in PAGE_KINDS order, not dict-iteration order."""
115+
# Paths shuffled — output should still put adr before specs.
116+
paths = ["specs/x.md", "adr/0001-foo.md", "notes/y.md"]
117+
out = build_plain_readme(paths, generated_at=_FIXED_TIME)
118+
adr_pos = out.index("Architecture Decisions")
119+
specs_pos = out.index("Specifications & Designs")
120+
notes_pos = out.index("Notes & Investigations")
121+
assert adr_pos < specs_pos < notes_pos
122+
123+
124+
class TestGroomerMention:
125+
def test_readme_tells_users_about_manual_override(self):
126+
"""Tech readers need to know how to opt a page out of grooming."""
127+
out = build_plain_readme([], generated_at=_FIXED_TIME)
128+
assert "grooming: manual" in out
129+
assert "without your consent" in out

0 commit comments

Comments
 (0)