|
| 1 | +"""MkDocs hooks for the HyperCache site. |
| 2 | +
|
| 3 | +Rewrites repo-relative links to source files (`../pkg/foo.go`, |
| 4 | +`../../hypercache.go`, etc.) into canonical GitHub URLs, so the same |
| 5 | +markdown source renders correctly both on github.com and on the |
| 6 | +GitHub Pages MkDocs build. |
| 7 | +
|
| 8 | +Without this, the operations runbook and the RFCs reference dozens |
| 9 | +of source files via paths like `../pkg/backend/dist_memory.go`. |
| 10 | +GitHub renders those as in-repo links; MkDocs's strict mode flags |
| 11 | +them as broken because `pkg/` is not part of the documentation |
| 12 | +tree. Rewriting them at build time keeps the source markdown |
| 13 | +GitHub-friendly while letting strict mode actually enforce |
| 14 | +docs-internal correctness. |
| 15 | +""" |
| 16 | + |
| 17 | +import os |
| 18 | +import re |
| 19 | +from typing import Any |
| 20 | + |
| 21 | +GITHUB_REPO_BASE = "https://github.com/hyp3rd/hypercache/blob/main" |
| 22 | + |
| 23 | +# File extensions that we treat as "source code, not docs" — links |
| 24 | +# to these get rewritten to GitHub URLs. .md is intentionally NOT |
| 25 | +# in this list because doc-to-doc links should stay intra-site so |
| 26 | +# MkDocs can validate them. |
| 27 | +SOURCE_EXTENSIONS = ( |
| 28 | + ".go", |
| 29 | + ".yaml", |
| 30 | + ".yml", |
| 31 | + ".sh", |
| 32 | + ".rb", |
| 33 | + ".txt", |
| 34 | + ".dockerignore", |
| 35 | + ".gitignore", |
| 36 | + ".env", |
| 37 | + "Dockerfile", |
| 38 | + "Makefile", |
| 39 | +) |
| 40 | + |
| 41 | +# Paths that are entire directories the docs reference for context |
| 42 | +# (e.g. "see internal/cluster/"). These get rewritten to GitHub |
| 43 | +# tree URLs — clicking takes the reader to a directory listing. |
| 44 | +SOURCE_DIR_PREFIXES = ( |
| 45 | + "pkg/", |
| 46 | + "internal/", |
| 47 | + "cmd/", |
| 48 | + "chart/", |
| 49 | + "scripts/", |
| 50 | + "tests/", |
| 51 | + "__examples/", |
| 52 | + ".github/", |
| 53 | + "docker/", |
| 54 | + "_mkdocs/", |
| 55 | +) |
| 56 | + |
| 57 | +LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)") |
| 58 | + |
| 59 | + |
| 60 | +def _is_source_link(target: str) -> bool: |
| 61 | + """Return True when the link target looks like a repo source ref |
| 62 | + rather than an in-tree docs link.""" |
| 63 | + # Strip anchor before extension/prefix checks. |
| 64 | + clean = target.split("#", 1)[0] |
| 65 | + |
| 66 | + # Source files by extension or basename. |
| 67 | + if clean.endswith(SOURCE_EXTENSIONS): |
| 68 | + return True |
| 69 | + |
| 70 | + # Directory references (no extension) that match known source |
| 71 | + # roots. We resolve `..` segments first so the prefix match |
| 72 | + # works against repo-rooted paths. |
| 73 | + parts = [p for p in clean.split("/") if p and p != "."] |
| 74 | + |
| 75 | + # Drop leading `..` segments — they all collapse to repo root |
| 76 | + # for our purposes (rewrite-target side). |
| 77 | + while parts and parts[0] == "..": |
| 78 | + parts.pop(0) |
| 79 | + |
| 80 | + if not parts: |
| 81 | + return False |
| 82 | + |
| 83 | + repo_path = "/".join(parts) |
| 84 | + if any(repo_path.startswith(p) for p in SOURCE_DIR_PREFIXES): |
| 85 | + return True |
| 86 | + |
| 87 | + return False |
| 88 | + |
| 89 | + |
| 90 | +def _resolve_to_repo_root(page_src_path: str, target: str) -> str: |
| 91 | + """Translate a relative target into a repo-rooted path. |
| 92 | +
|
| 93 | + Page src_path is relative to docs/ (e.g. `rfcs/0001-foo.md`). |
| 94 | + Target is relative to the page (e.g. `../../pkg/foo.go`). The |
| 95 | + returned path is relative to the repo root. |
| 96 | + """ |
| 97 | + # `os.path.normpath` collapses `..` correctly; we anchor at |
| 98 | + # `docs/<page_dir>` and resolve from there. |
| 99 | + page_dir = os.path.dirname(page_src_path) |
| 100 | + docs_anchored = os.path.normpath(os.path.join("docs", page_dir, target)) |
| 101 | + |
| 102 | + # The result may still start with `../` if the relative target |
| 103 | + # walked above the repo root (it shouldn't in practice). Trim |
| 104 | + # any leading `../` defensively. |
| 105 | + while docs_anchored.startswith("../"): |
| 106 | + docs_anchored = docs_anchored[3:] |
| 107 | + |
| 108 | + return docs_anchored |
| 109 | + |
| 110 | + |
| 111 | +def on_page_markdown(markdown: str, page: Any, **kwargs: Any) -> str: |
| 112 | + """Rewrite source-code links on every page before MkDocs renders it.""" |
| 113 | + page_src = page.file.src_path |
| 114 | + |
| 115 | + def replace(match: re.Match[str]) -> str: |
| 116 | + link_text = match.group(1) |
| 117 | + link_target = match.group(2) |
| 118 | + |
| 119 | + # Absolute URLs, mailtos, and pure anchors stay as-is. |
| 120 | + if link_target.startswith(("http://", "https://", "mailto:", "#")): |
| 121 | + return match.group(0) |
| 122 | + |
| 123 | + if not _is_source_link(link_target): |
| 124 | + return match.group(0) |
| 125 | + |
| 126 | + repo_path = _resolve_to_repo_root(page_src, link_target) |
| 127 | + |
| 128 | + # Preserve any anchor on the target (e.g. line ranges like |
| 129 | + # `pkg/foo.go#L34-L58`). |
| 130 | + if "#" in link_target and "#" not in repo_path: |
| 131 | + anchor = "#" + link_target.split("#", 1)[1] |
| 132 | + repo_path += anchor |
| 133 | + |
| 134 | + return f"[{link_text}]({GITHUB_REPO_BASE}/{repo_path})" |
| 135 | + |
| 136 | + return LINK_RE.sub(replace, markdown) |
0 commit comments