diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000000..6d380e1304 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,31 @@ +name: Docs + +on: + push: + branches: [main] + pull_request: + branches: [main] + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + docs: + name: Check docs + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0 + - run: uv sync --group docs + - run: uv run mkdocs build + env: + DISABLE_MKDOCS_2_WARNING: "true" + NO_MKDOCS_2_WARNING: "true" + - run: uv run python ci/check_unlinked_types.py diff --git a/ci/check_unlinked_types.py b/ci/check_unlinked_types.py new file mode 100644 index 0000000000..3ccfaab397 --- /dev/null +++ b/ci/check_unlinked_types.py @@ -0,0 +1,88 @@ +"""Check for unlinked type annotations in built documentation. + +mkdocstrings renders resolved types as links and unresolved +types as Name without an anchor. +This script finds all such unlinked types in the built HTML and reports them. + +Usage: + python ci/check_unlinked_types.py [site_dir] + +Raises ValueError if unlinked types are found. +""" + +from __future__ import annotations + +import re +import sys +from pathlib import Path + +# Matches the griffe/mkdocstrings pattern for unlinked cross-references: +# Name +UNLINKED_PATTERN = re.compile( + r'(?P[^<]+)' +) + +# Patterns to exclude from the report +EXCLUDE_PATTERNS = [ + # TypeVars and type parameters (single brackets like Foo[T]) + re.compile(r"\[.+\]$"), + # Dataclass field / namedtuple field references (contain parens) + re.compile(r"\("), + # Private names + re.compile(r"\._"), + # Dunder attributes + re.compile(r"\.__\w+__$"), + # Testing utilities + re.compile(r"^zarr\.testing\."), + # Third-party types (hypothesis, pytest, etc.) + re.compile(r"^(hypothesis|pytest|typing_extensions|builtins|dataclasses)\."), +] + + +def should_exclude(qualname: str) -> bool: + return any(p.search(qualname) for p in EXCLUDE_PATTERNS) + + +def find_unlinked_types(site_dir: Path) -> dict[str, set[str]]: + """Find all unlinked types in built HTML files. + + Returns a dict mapping qualified type names to the set of pages where they appear. + """ + api_dir = site_dir / "api" + if not api_dir.exists(): + raise FileNotFoundError(f"{api_dir} does not exist. Run 'mkdocs build' first.") + + unlinked: dict[str, set[str]] = {} + for html_file in api_dir.rglob("*.html"): + content = html_file.read_text(errors="replace") + rel_path = str(html_file.relative_to(site_dir)) + for match in UNLINKED_PATTERN.finditer(content): + qualname = match.group("qualname") + if not should_exclude(qualname): + unlinked.setdefault(qualname, set()).add(rel_path) + + return unlinked + + +def main() -> None: + site_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("site") + unlinked = find_unlinked_types(site_dir) + + if not unlinked: + print("No unlinked types found.") + return + + lines = [f"Found {len(unlinked)} unlinked types:\n"] + for qualname in sorted(unlinked): + pages = sorted(unlinked[qualname]) + lines.append(f" {qualname}") + lines.extend(f" - {page}" for page in pages) + + all_pages = {p for ps in unlinked.values() for p in ps} + lines.append(f"\nTotal: {len(unlinked)} unlinked types across {len(all_pages)} pages") + report = "\n".join(lines) + raise ValueError(report) + + +if __name__ == "__main__": + main()