forked from zarr-developers/zarr-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_unlinked_types.py
More file actions
88 lines (67 loc) · 2.83 KB
/
check_unlinked_types.py
File metadata and controls
88 lines (67 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""Check for unlinked type annotations in built documentation.
mkdocstrings renders resolved types as <a href="..."> links and unresolved
types as <span title="fully.qualified.Name">Name</span> without an anchor.
This script finds all such unlinked types in the built HTML and reports them.
Usage:
python ci/check_unlinked_types.py [site_dir]
Raises ValueError if unlinked types are found.
"""
from __future__ import annotations
import re
import sys
from pathlib import Path
# Matches the griffe/mkdocstrings pattern for unlinked cross-references:
# <span class="n"><span title="fully.qualified.Name">Name</span></span>
UNLINKED_PATTERN = re.compile(
r'<span class="n"><span title="(?P<qualname>[^"]+)">(?P<name>[^<]+)</span></span>'
)
# Patterns to exclude from the report
EXCLUDE_PATTERNS = [
# TypeVars and type parameters (single brackets like Foo[T])
re.compile(r"\[.+\]$"),
# Dataclass field / namedtuple field references (contain parens)
re.compile(r"\("),
# Private names
re.compile(r"\._"),
# Dunder attributes
re.compile(r"\.__\w+__$"),
# Testing utilities
re.compile(r"^zarr\.testing\."),
# Third-party types (hypothesis, pytest, etc.)
re.compile(r"^(hypothesis|pytest|typing_extensions|builtins|dataclasses)\."),
]
def should_exclude(qualname: str) -> bool:
return any(p.search(qualname) for p in EXCLUDE_PATTERNS)
def find_unlinked_types(site_dir: Path) -> dict[str, set[str]]:
"""Find all unlinked types in built HTML files.
Returns a dict mapping qualified type names to the set of pages where they appear.
"""
api_dir = site_dir / "api"
if not api_dir.exists():
raise FileNotFoundError(f"{api_dir} does not exist. Run 'mkdocs build' first.")
unlinked: dict[str, set[str]] = {}
for html_file in api_dir.rglob("*.html"):
content = html_file.read_text(errors="replace")
rel_path = str(html_file.relative_to(site_dir))
for match in UNLINKED_PATTERN.finditer(content):
qualname = match.group("qualname")
if not should_exclude(qualname):
unlinked.setdefault(qualname, set()).add(rel_path)
return unlinked
def main() -> None:
site_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("site")
unlinked = find_unlinked_types(site_dir)
if not unlinked:
print("No unlinked types found.")
return
lines = [f"Found {len(unlinked)} unlinked types:\n"]
for qualname in sorted(unlinked):
pages = sorted(unlinked[qualname])
lines.append(f" {qualname}")
lines.extend(f" - {page}" for page in pages)
all_pages = {p for ps in unlinked.values() for p in ps}
lines.append(f"\nTotal: {len(unlinked)} unlinked types across {len(all_pages)} pages")
report = "\n".join(lines)
raise ValueError(report)
if __name__ == "__main__":
main()