|
| 1 | +"""Check for unlinked type annotations in built documentation. |
| 2 | +
|
| 3 | +mkdocstrings renders resolved types as <a href="..."> links and unresolved |
| 4 | +types as <span title="fully.qualified.Name">Name</span> without an anchor. |
| 5 | +This script finds all such unlinked types in the built HTML and reports them. |
| 6 | +
|
| 7 | +Usage: |
| 8 | + python ci/check_unlinked_types.py [site_dir] |
| 9 | +
|
| 10 | +Raises ValueError if unlinked types are found. |
| 11 | +""" |
| 12 | + |
| 13 | +from __future__ import annotations |
| 14 | + |
| 15 | +import re |
| 16 | +import sys |
| 17 | +from pathlib import Path |
| 18 | + |
| 19 | +# Matches the griffe/mkdocstrings pattern for unlinked cross-references: |
| 20 | +# <span class="n"><span title="fully.qualified.Name">Name</span></span> |
| 21 | +UNLINKED_PATTERN = re.compile( |
| 22 | + r'<span class="n"><span title="(?P<qualname>[^"]+)">(?P<name>[^<]+)</span></span>' |
| 23 | +) |
| 24 | + |
| 25 | +# Patterns to exclude from the report |
| 26 | +EXCLUDE_PATTERNS = [ |
| 27 | + # TypeVars and type parameters (single brackets like Foo[T]) |
| 28 | + re.compile(r"\[.+\]$"), |
| 29 | + # Dataclass field / namedtuple field references (contain parens) |
| 30 | + re.compile(r"\("), |
| 31 | + # Private names |
| 32 | + re.compile(r"\._"), |
| 33 | + # Dunder attributes |
| 34 | + re.compile(r"\.__\w+__$"), |
| 35 | + # Testing utilities |
| 36 | + re.compile(r"^zarr\.testing\."), |
| 37 | + # Third-party types (hypothesis, pytest, etc.) |
| 38 | + re.compile(r"^(hypothesis|pytest|typing_extensions|builtins|dataclasses)\."), |
| 39 | +] |
| 40 | + |
| 41 | + |
| 42 | +def should_exclude(qualname: str) -> bool: |
| 43 | + return any(p.search(qualname) for p in EXCLUDE_PATTERNS) |
| 44 | + |
| 45 | + |
| 46 | +def find_unlinked_types(site_dir: Path) -> dict[str, set[str]]: |
| 47 | + """Find all unlinked types in built HTML files. |
| 48 | +
|
| 49 | + Returns a dict mapping qualified type names to the set of pages where they appear. |
| 50 | + """ |
| 51 | + api_dir = site_dir / "api" |
| 52 | + if not api_dir.exists(): |
| 53 | + raise FileNotFoundError(f"{api_dir} does not exist. Run 'mkdocs build' first.") |
| 54 | + |
| 55 | + unlinked: dict[str, set[str]] = {} |
| 56 | + for html_file in api_dir.rglob("*.html"): |
| 57 | + content = html_file.read_text(errors="replace") |
| 58 | + rel_path = str(html_file.relative_to(site_dir)) |
| 59 | + for match in UNLINKED_PATTERN.finditer(content): |
| 60 | + qualname = match.group("qualname") |
| 61 | + if not should_exclude(qualname): |
| 62 | + unlinked.setdefault(qualname, set()).add(rel_path) |
| 63 | + |
| 64 | + return unlinked |
| 65 | + |
| 66 | + |
| 67 | +def main() -> None: |
| 68 | + site_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("site") |
| 69 | + unlinked = find_unlinked_types(site_dir) |
| 70 | + |
| 71 | + if not unlinked: |
| 72 | + print("No unlinked types found.") |
| 73 | + return |
| 74 | + |
| 75 | + lines = [f"Found {len(unlinked)} unlinked types:\n"] |
| 76 | + for qualname in sorted(unlinked): |
| 77 | + pages = sorted(unlinked[qualname]) |
| 78 | + lines.append(f" {qualname}") |
| 79 | + lines.extend(f" - {page}" for page in pages) |
| 80 | + |
| 81 | + all_pages = {p for ps in unlinked.values() for p in ps} |
| 82 | + lines.append(f"\nTotal: {len(unlinked)} unlinked types across {len(all_pages)} pages") |
| 83 | + report = "\n".join(lines) |
| 84 | + raise ValueError(report) |
| 85 | + |
| 86 | + |
| 87 | +if __name__ == "__main__": |
| 88 | + main() |
0 commit comments