Skip to content

Commit ce39ea0

Browse files
committed
ci: dedupe workflow and harden doc validation
1 parent 33a82db commit ce39ea0

6 files changed

Lines changed: 345 additions & 98 deletions

File tree

.github/workflows/cpp-build.yml

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,6 @@ jobs:
3131
sudo apt update
3232
sudo apt install -y g++
3333
34-
- name: Check markdown links
35-
run: bash ./scripts/check-links.sh
36-
37-
- name: Check README structure
38-
run: bash ./scripts/check-readme-structure.sh
39-
40-
- name: Check module completeness
41-
run: bash ./scripts/check-module-completeness.sh
42-
43-
- name: Compile all C++ files
44-
run: bash ./scripts/build-all.sh
45-
4634
- name: Verify repository
4735
run: bash ./scripts/verify-repo.sh
4836

@@ -88,22 +76,6 @@ jobs:
8876
shell: pwsh
8977
run: g++ --version
9078

91-
- name: Check markdown links
92-
shell: pwsh
93-
run: powershell -ExecutionPolicy Bypass -File scripts\check-links.ps1
94-
95-
- name: Check README structure
96-
shell: pwsh
97-
run: powershell -ExecutionPolicy Bypass -File scripts\check-readme-structure.ps1
98-
99-
- name: Check module completeness
100-
shell: pwsh
101-
run: powershell -ExecutionPolicy Bypass -File scripts\check-module-completeness.ps1
102-
103-
- name: Compile all C++ files
104-
shell: pwsh
105-
run: powershell -ExecutionPolicy Bypass -File scripts\build-all.ps1
106-
10779
- name: Verify repository
10880
shell: pwsh
10981
run: powershell -ExecutionPolicy Bypass -File scripts\verify-repo.ps1

scripts/check-links.ps1

Lines changed: 9 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,43 +3,17 @@ $ErrorActionPreference = "Stop"
33
$root = Split-Path -Parent $PSScriptRoot
44
Set-Location $root
55

6-
$mdFiles = Get-ChildItem -Recurse -Filter *.md
7-
$brokenLinks = @()
8-
9-
foreach ($file in $mdFiles) {
10-
$content = Get-Content -Raw $file.FullName
11-
$matches = [regex]::Matches($content, '\[[^\]]+\]\(([^)]+)\)')
12-
13-
foreach ($match in $matches) {
14-
$target = $match.Groups[1].Value.Trim()
15-
16-
if ($target.StartsWith("http://") -or
17-
$target.StartsWith("https://") -or
18-
$target.StartsWith("mailto:") -or
19-
$target.StartsWith("#")) {
20-
continue
21-
}
22-
23-
$hashIndex = $target.IndexOf('#')
24-
if ($hashIndex -ge 0) {
25-
$target = $target.Substring(0, $hashIndex)
26-
}
27-
28-
if ([string]::IsNullOrWhiteSpace($target)) {
29-
continue
30-
}
31-
32-
$candidate = Join-Path $file.DirectoryName $target
33-
if (-not (Test-Path $candidate)) {
34-
$brokenLinks += "$($file.FullName): $target"
35-
}
36-
}
6+
$pythonCmd = Get-Command python -ErrorAction SilentlyContinue
7+
if (-not $pythonCmd) {
8+
$pythonCmd = Get-Command python3 -ErrorAction SilentlyContinue
379
}
3810

39-
if ($brokenLinks.Count -gt 0) {
40-
Write-Host "Broken markdown links found:"
41-
$brokenLinks | ForEach-Object { Write-Host " - $_" }
11+
if (-not $pythonCmd) {
12+
Write-Host "Python was not found in PATH."
4213
exit 1
4314
}
4415

45-
Write-Host "No broken markdown links found."
16+
& $pythonCmd.Path "$PSScriptRoot\check-links.py"
17+
if ($LASTEXITCODE -ne 0) {
18+
exit $LASTEXITCODE
19+
}

scripts/check-links.py

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
#!/usr/bin/env python3
2+
from __future__ import annotations
3+
4+
import re
5+
import sys
6+
from pathlib import Path
7+
from urllib.parse import unquote
8+
9+
ROOT_DIR = Path(__file__).resolve().parents[1]
10+
FENCE_RE = re.compile(r"^\s{0,3}(`{3,}|~{3,})")
11+
REFERENCE_RE = re.compile(r"^\s{0,3}\[([^\]]+)\]:\s*(.+?)\s*$")
12+
URI_SCHEME_RE = re.compile(r"^[a-zA-Z][a-zA-Z0-9+.-]*:")
13+
14+
15+
def normalize_reference_label(label: str) -> str:
16+
return " ".join(label.strip().lower().split())
17+
18+
19+
def strip_fenced_code_blocks(text: str) -> str:
20+
output: list[str] = []
21+
in_fence = False
22+
fence_char = ""
23+
fence_len = 0
24+
25+
for line in text.splitlines(keepends=True):
26+
if not in_fence:
27+
match = FENCE_RE.match(line)
28+
if match:
29+
marker = match.group(1)
30+
in_fence = True
31+
fence_char = marker[0]
32+
fence_len = len(marker)
33+
continue
34+
output.append(line)
35+
continue
36+
37+
stripped = line.lstrip()
38+
if stripped.startswith(fence_char * fence_len):
39+
in_fence = False
40+
fence_char = ""
41+
fence_len = 0
42+
43+
return "".join(output)
44+
45+
46+
def parse_reference_destination(raw: str) -> str:
47+
raw = raw.strip()
48+
if not raw:
49+
return ""
50+
if raw.startswith("<"):
51+
end = raw.find(">")
52+
if end != -1:
53+
return raw[1:end].strip()
54+
return raw.split()[0].strip()
55+
56+
57+
def parse_reference_definitions(text: str) -> dict[str, str]:
58+
definitions: dict[str, str] = {}
59+
for line in text.splitlines():
60+
match = REFERENCE_RE.match(line)
61+
if not match:
62+
continue
63+
label = normalize_reference_label(match.group(1))
64+
destination = parse_reference_destination(match.group(2))
65+
if destination:
66+
definitions[label] = destination
67+
return definitions
68+
69+
70+
def read_bracket_content(text: str, start: int) -> tuple[int | None, str]:
71+
if start >= len(text) or text[start] != "[":
72+
return None, ""
73+
depth = 1
74+
cursor = start + 1
75+
while cursor < len(text):
76+
char = text[cursor]
77+
if char == "\\":
78+
cursor += 2
79+
continue
80+
if char == "[":
81+
depth += 1
82+
elif char == "]":
83+
depth -= 1
84+
if depth == 0:
85+
return cursor, text[start + 1 : cursor]
86+
cursor += 1
87+
return None, ""
88+
89+
90+
def parse_optional_title_and_close(text: str, cursor: int) -> int | None:
91+
while cursor < len(text) and text[cursor].isspace():
92+
cursor += 1
93+
if cursor >= len(text):
94+
return None
95+
if text[cursor] == ")":
96+
return cursor + 1
97+
98+
if text[cursor] in "\"'":
99+
quote = text[cursor]
100+
cursor += 1
101+
while cursor < len(text):
102+
if text[cursor] == "\\":
103+
cursor += 2
104+
continue
105+
if text[cursor] == quote:
106+
cursor += 1
107+
break
108+
cursor += 1
109+
elif text[cursor] == "(":
110+
depth = 1
111+
cursor += 1
112+
while cursor < len(text) and depth > 0:
113+
if text[cursor] == "\\":
114+
cursor += 2
115+
continue
116+
if text[cursor] == "(":
117+
depth += 1
118+
elif text[cursor] == ")":
119+
depth -= 1
120+
cursor += 1
121+
else:
122+
return None
123+
124+
while cursor < len(text) and text[cursor].isspace():
125+
cursor += 1
126+
if cursor < len(text) and text[cursor] == ")":
127+
return cursor + 1
128+
return None
129+
130+
131+
def parse_inline_destination(text: str, cursor: int) -> tuple[str | None, int | None]:
132+
while cursor < len(text) and text[cursor].isspace():
133+
cursor += 1
134+
if cursor >= len(text):
135+
return None, None
136+
137+
if text[cursor] == "<":
138+
end = text.find(">", cursor + 1)
139+
if end == -1:
140+
return None, None
141+
destination = text[cursor + 1 : end].strip()
142+
close_index = parse_optional_title_and_close(text, end + 1)
143+
return destination, close_index
144+
145+
start = cursor
146+
depth = 0
147+
while cursor < len(text):
148+
char = text[cursor]
149+
if char == "\\":
150+
cursor += 2
151+
continue
152+
if char == "(":
153+
depth += 1
154+
cursor += 1
155+
continue
156+
if char == ")":
157+
if depth == 0:
158+
break
159+
depth -= 1
160+
cursor += 1
161+
continue
162+
if char.isspace() and depth == 0:
163+
break
164+
cursor += 1
165+
166+
destination = text[start:cursor].strip()
167+
close_index = parse_optional_title_and_close(text, cursor)
168+
return destination, close_index
169+
170+
171+
def extract_links(text: str) -> tuple[list[tuple[str, str]], dict[str, str]]:
172+
body = strip_fenced_code_blocks(text)
173+
definitions = parse_reference_definitions(body)
174+
links: list[tuple[str, str]] = []
175+
176+
cursor = 0
177+
while cursor < len(body):
178+
char = body[cursor]
179+
180+
if char == "`":
181+
tick_count = 1
182+
while cursor + tick_count < len(body) and body[cursor + tick_count] == "`":
183+
tick_count += 1
184+
close = body.find("`" * tick_count, cursor + tick_count)
185+
if close == -1:
186+
break
187+
cursor = close + tick_count
188+
continue
189+
190+
if char == "!" and cursor + 1 < len(body) and body[cursor + 1] == "[":
191+
cursor += 1
192+
char = "["
193+
194+
if char != "[":
195+
cursor += 1
196+
continue
197+
198+
label_end, label = read_bracket_content(body, cursor)
199+
if label_end is None:
200+
cursor += 1
201+
continue
202+
203+
next_cursor = label_end + 1
204+
while next_cursor < len(body) and body[next_cursor].isspace():
205+
next_cursor += 1
206+
207+
if next_cursor < len(body) and body[next_cursor] == "(":
208+
destination, close_index = parse_inline_destination(body, next_cursor + 1)
209+
if destination is not None and close_index is not None:
210+
links.append(("inline", destination))
211+
cursor = close_index
212+
continue
213+
214+
if next_cursor < len(body) and body[next_cursor] == "[":
215+
ref_end, ref = read_bracket_content(body, next_cursor)
216+
if ref_end is not None:
217+
ref_label = ref if ref.strip() else label
218+
links.append(("reference", ref_label))
219+
cursor = ref_end + 1
220+
continue
221+
222+
cursor = label_end + 1
223+
224+
return links, definitions
225+
226+
227+
def should_skip_destination(destination: str) -> bool:
228+
if not destination:
229+
return True
230+
if destination.startswith("#"):
231+
return True
232+
if destination.startswith("//"):
233+
return True
234+
if URI_SCHEME_RE.match(destination):
235+
return True
236+
return False
237+
238+
239+
def resolve_candidate_path(file_path: Path, destination: str) -> Path:
240+
destination = destination.strip()
241+
if destination.startswith("<") and destination.endswith(">"):
242+
destination = destination[1:-1].strip()
243+
destination = destination.split("#", 1)[0].split("?", 1)[0].strip()
244+
destination = unquote(destination)
245+
destination = destination.replace("\\(", "(").replace("\\)", ")").replace("\\ ", " ")
246+
if destination.startswith("/"):
247+
return ROOT_DIR / destination.lstrip("/")
248+
return file_path.parent / destination
249+
250+
251+
def check_file(file_path: Path) -> list[str]:
252+
text = file_path.read_text(encoding="utf-8", errors="replace")
253+
links, definitions = extract_links(text)
254+
failures: list[str] = []
255+
relative_file = file_path.relative_to(ROOT_DIR).as_posix()
256+
257+
for link_type, raw_target in links:
258+
if link_type == "reference":
259+
key = normalize_reference_label(raw_target)
260+
target = definitions.get(key)
261+
if target is None:
262+
failures.append(f"{relative_file}: unresolved reference [{raw_target}]")
263+
continue
264+
else:
265+
target = raw_target
266+
267+
target = target.strip()
268+
if should_skip_destination(target):
269+
continue
270+
271+
candidate = resolve_candidate_path(file_path, target)
272+
if not candidate.exists():
273+
failures.append(f"{relative_file}: {target}")
274+
275+
return failures
276+
277+
278+
def main() -> int:
279+
markdown_files = sorted(ROOT_DIR.rglob("*.md"))
280+
broken_links: list[str] = []
281+
282+
for file_path in markdown_files:
283+
broken_links.extend(check_file(file_path))
284+
285+
if broken_links:
286+
print("Broken markdown links found:")
287+
for failure in broken_links:
288+
print(f" - {failure}")
289+
return 1
290+
291+
print("No broken markdown links found.")
292+
return 0
293+
294+
295+
if __name__ == "__main__":
296+
sys.exit(main())

0 commit comments

Comments
 (0)