|
| 1 | +""" |
| 2 | +Documentation Link Validation Utility. |
| 3 | +
|
| 4 | +This module provides tools to scan Markdown files in the documentation directory |
| 5 | +and verify that all internal relative links point to valid existing files or |
| 6 | +directories containing index files. |
| 7 | +""" |
| 8 | + |
| 9 | +import os |
| 10 | +import re |
| 11 | +from pathlib import Path |
| 12 | + |
| 13 | +def check_links(): |
| 14 | + """ |
| 15 | + Recursively scan the 'docs' directory for broken relative Markdown links. |
| 16 | +
|
| 17 | + Identifies and reports: |
| 18 | + - Files that do not exist (even with .md suffix) |
| 19 | + - Directories that do not contain an index.md or overview.md file |
| 20 | + """ |
| 21 | + docs_dir = Path('docs') |
| 22 | + for md_file in docs_dir.rglob('*.md'): |
| 23 | + content = md_file.read_text() |
| 24 | + links = re.findall(r'\[.*?\]\((?!http)(.*?)\)', content) |
| 25 | + for link in links: |
| 26 | + # Clean link (remove anchors and queries) |
| 27 | + clean_link = link.split('#')[0].split('?')[0] |
| 28 | + if not clean_link: |
| 29 | + continue |
| 30 | + |
| 31 | + # Resolve relative path |
| 32 | + target_path = (md_file.parent / clean_link).resolve() |
| 33 | + |
| 34 | + # Check if it's a directory (might need /index.md or /overview.md) |
| 35 | + if target_path.is_dir(): |
| 36 | + if not (target_path / 'index.md').exists() and not (target_path / 'overview.md').exists(): |
| 37 | + print(f"BROKEN DIRECTORY LINK: {md_file}: {link} -> {target_path}") |
| 38 | + elif not target_path.exists(): |
| 39 | + # Try adding .md |
| 40 | + if not target_path.with_suffix('.md').exists(): |
| 41 | + print(f"BROKEN FILE LINK: {md_file}: {link}") |
| 42 | + |
| 43 | +if __name__ == "__main__": |
| 44 | + check_links() |
0 commit comments