Skip to content
This repository was archived by the owner on Mar 11, 2026. It is now read-only.

Commit 4b0e254

Browse files
committed
fix: validate and preserve wiki sync links
1 parent ee345f1 commit 4b0e254

3 files changed

Lines changed: 167 additions & 15 deletions

File tree

.github/workflows/sync-wiki.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ jobs:
4040
- name: Run sync unit tests
4141
run: python -m unittest discover -s tests -p 'test_sync_docs_to_wiki.py' -v
4242

43+
- name: Validate internal doc links
44+
run: python scripts/sync_docs_to_wiki.py --source-root . --check-links-only
45+
4346
- name: Clone AstrBot wiki
4447
env:
4548
WIKI_TOKEN: ${{ secrets.ASTRBOT_WIKI_TOKEN }}

scripts/sync_docs_to_wiki.py

Lines changed: 88 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88

99

1010
TITLE_RE = re.compile(r"^#\s+(.+)$", re.MULTILINE)
11+
FENCED_BLOCK_RE = re.compile(
12+
r"(^```.*?$.*?^```$|^~~~.*?$.*?^~~~$)",
13+
re.MULTILINE | re.DOTALL,
14+
)
15+
INLINE_CODE_RE = re.compile(r"(`[^`]*`)")
1116
MANIFEST_NAME = ".astrbot-wiki-sync-manifest"
1217
SOURCE_ALIASES = {
1318
"zh/config/providers/start.md": "zh/providers/start.md",
@@ -317,27 +322,80 @@ def rewrite_link_target(target: str, source_path: str, resolver: LinkResolver) -
317322
return f"{page_name_for_source(resolved)}{anchor}"
318323

319324

320-
def rewrite_links(
321-
content: str,
325+
def rewrite_links_in_segment(
326+
segment: str,
322327
source_path: str,
323328
resolver: LinkResolver,
324329
) -> str:
325-
links = list(iter_markdown_links(content))
330+
links = list(iter_markdown_links(segment))
326331
if not links:
327-
return content
332+
return segment
328333

329334
result: list[str] = []
330335
previous_end = 0
331336
for link in links:
332-
result.append(content[previous_end : link.start])
337+
result.append(segment[previous_end : link.start])
333338
result.append(
334339
f"{link.prefix}{rewrite_link_target(link.target, source_path, resolver)}{link.suffix}",
335340
)
336341
previous_end = link.end
337-
result.append(content[previous_end:])
342+
result.append(segment[previous_end:])
338343
return "".join(result)
339344

340345

346+
def rewrite_links(
347+
content: str,
348+
source_path: str,
349+
resolver: LinkResolver,
350+
) -> str:
351+
parts: list[tuple[str, str]] = []
352+
last_end = 0
353+
354+
for fenced_match in FENCED_BLOCK_RE.finditer(content):
355+
before = content[last_end : fenced_match.start()]
356+
if before:
357+
parts.append(("text", before))
358+
parts.append(("code", fenced_match.group(0)))
359+
last_end = fenced_match.end()
360+
361+
tail = content[last_end:]
362+
if tail:
363+
parts.append(("text", tail))
364+
365+
output: list[str] = []
366+
for kind, chunk in parts:
367+
if kind == "code":
368+
output.append(chunk)
369+
continue
370+
371+
last_inline_end = 0
372+
for inline_match in INLINE_CODE_RE.finditer(chunk):
373+
before_inline = chunk[last_inline_end : inline_match.start()]
374+
if before_inline:
375+
output.append(
376+
rewrite_links_in_segment(
377+
before_inline,
378+
source_path=source_path,
379+
resolver=resolver,
380+
)
381+
)
382+
383+
output.append(inline_match.group(0))
384+
last_inline_end = inline_match.end()
385+
386+
after_inline = chunk[last_inline_end:]
387+
if after_inline:
388+
output.append(
389+
rewrite_links_in_segment(
390+
after_inline,
391+
source_path=source_path,
392+
resolver=resolver,
393+
)
394+
)
395+
396+
return "".join(output)
397+
398+
341399
def find_unresolved_doc_links(source_root: Path) -> list[str]:
342400
unresolved: list[str] = []
343401
root = Path(source_root)
@@ -362,6 +420,15 @@ def find_unresolved_doc_links(source_root: Path) -> list[str]:
362420
return unresolved
363421

364422

423+
def check_unresolved_doc_links(source_root: Path) -> None:
424+
unresolved = find_unresolved_doc_links(source_root)
425+
if not unresolved:
426+
return
427+
428+
issues = "\n".join(f"- {item}" for item in unresolved)
429+
raise ValueError(f"Unresolved internal doc links found:\n{issues}")
430+
431+
365432
def page_name_for_source(source_path: str) -> str:
366433
if not source_path.endswith(".md"):
367434
raise ValueError(f"Unsupported source path: {source_path}")
@@ -417,12 +484,6 @@ def build_home_page(language: str) -> str:
417484
return normalize_content("\n".join(lines))
418485

419486

420-
def sidebar_group_name(group: str) -> str:
421-
if group == "root":
422-
return "Top Level"
423-
return group.replace("-", " ")
424-
425-
426487
def build_sidebar(page_infos: list[PageInfo]) -> str:
427488
lines: list[str] = []
428489

@@ -449,7 +510,7 @@ def build_sidebar(page_infos: list[PageInfo]) -> str:
449510
grouped.setdefault(info.group, []).append(info)
450511

451512
for group_name in sorted(grouped):
452-
lines.append(f"- {sidebar_group_name(group_name)}")
513+
lines.append(f"- {group_name}")
453514
for info in grouped[group_name]:
454515
lines.append(f" - [{info.title}]({info.page_name})")
455516

@@ -469,7 +530,7 @@ def build_page_info(
469530

470531
relative = PurePosixPath(source_path)
471532
parts = relative.parts
472-
group = "root" if len(parts) <= 2 else parts[1]
533+
group = "Top Level" if len(parts) <= 2 else parts[1].replace("-", " ")
473534

474535
return PageInfo(
475536
source_path=source_path,
@@ -553,11 +614,23 @@ def main() -> int:
553614
)
554615
parser.add_argument(
555616
"--wiki-root",
556-
required=True,
557617
help="Path to the checked out wiki repository.",
558618
)
619+
parser.add_argument(
620+
"--check-links-only",
621+
action="store_true",
622+
help="Validate internal doc links without writing wiki files.",
623+
)
559624
args = parser.parse_args()
560625

626+
if not args.check_links_only and not args.wiki_root:
627+
parser.error("--wiki-root is required unless --check-links-only is set")
628+
629+
check_unresolved_doc_links(Path(args.source_root))
630+
631+
if args.check_links_only:
632+
return 0
633+
561634
sync_docs_to_wiki(
562635
source_root=Path(args.source_root), wiki_root=Path(args.wiki_root)
563636
)

tests/test_sync_docs_to_wiki.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,48 @@ def test_rewrite_links_leaves_local_asset_links_unchanged(self):
165165
content,
166166
)
167167

168+
def test_rewrite_links_skips_fenced_code_blocks(self):
169+
module = load_sync_module()
170+
171+
with TemporaryDirectory() as temp_dir:
172+
source_root = Path(temp_dir) / "docs"
173+
(source_root / "zh").mkdir(parents=True)
174+
(source_root / "zh" / "index.md").write_text("# Home\n", encoding="utf-8")
175+
(source_root / "zh" / "guide.md").write_text("# Guide\n", encoding="utf-8")
176+
resolver = module.LinkResolver(source_root)
177+
178+
content = "```md\n[Guide](/guide)\n```\n\nSee [Guide](/guide).\n"
179+
180+
self.assertEqual(
181+
module.rewrite_links(
182+
content,
183+
source_path="zh/index.md",
184+
resolver=resolver,
185+
),
186+
"```md\n[Guide](/guide)\n```\n\nSee [Guide](zh-guide).\n",
187+
)
188+
189+
def test_rewrite_links_skips_inline_code(self):
190+
module = load_sync_module()
191+
192+
with TemporaryDirectory() as temp_dir:
193+
source_root = Path(temp_dir) / "docs"
194+
(source_root / "zh").mkdir(parents=True)
195+
(source_root / "zh" / "index.md").write_text("# Home\n", encoding="utf-8")
196+
(source_root / "zh" / "guide.md").write_text("# Guide\n", encoding="utf-8")
197+
resolver = module.LinkResolver(source_root)
198+
199+
content = "Use `[Guide](/guide)` literally, then See [Guide](/guide).\n"
200+
201+
self.assertEqual(
202+
module.rewrite_links(
203+
content,
204+
source_path="zh/index.md",
205+
resolver=resolver,
206+
),
207+
"Use `[Guide](/guide)` literally, then See [Guide](zh-guide).\n",
208+
)
209+
168210
def test_link_resolver_resolves_source_paths(self):
169211
module = load_sync_module()
170212

@@ -266,6 +308,26 @@ def test_build_page_info_returns_page_info_dataclass(self):
266308
self.assertIsInstance(page_info, module.PageInfo)
267309
self.assertEqual(page_info.page_name, "zh-index")
268310

311+
def test_build_page_info_uses_display_ready_group(self):
312+
module = load_sync_module()
313+
314+
with TemporaryDirectory() as temp_dir:
315+
source_root = Path(temp_dir) / "docs"
316+
(source_root / "zh" / "agent-runners").mkdir(parents=True)
317+
(source_root / "zh" / "agent-runners" / "guide.md").write_text(
318+
"# Guide\n",
319+
encoding="utf-8",
320+
)
321+
322+
resolver = module.LinkResolver(source_root)
323+
page_info = module.build_page_info(
324+
source_root=source_root,
325+
source_path="zh/agent-runners/guide.md",
326+
resolver=resolver,
327+
)
328+
329+
self.assertEqual(page_info.group, "agent runners")
330+
269331
def test_sync_writes_pages_and_sidebar(self):
270332
module = load_sync_module()
271333

@@ -387,6 +449,20 @@ def test_live_docs_have_no_unresolved_internal_doc_links(self):
387449

388450
self.assertEqual(unresolved, [])
389451

452+
def test_check_unresolved_doc_links_raises_for_bad_docs(self):
453+
module = load_sync_module()
454+
455+
with TemporaryDirectory() as temp_dir:
456+
source_root = Path(temp_dir) / "docs"
457+
(source_root / "zh").mkdir(parents=True)
458+
(source_root / "zh" / "index.md").write_text(
459+
"See [Missing](/missing).\n",
460+
encoding="utf-8",
461+
)
462+
463+
with self.assertRaises(ValueError):
464+
module.check_unresolved_doc_links(source_root)
465+
390466

391467
if __name__ == "__main__":
392468
unittest.main()

0 commit comments

Comments
 (0)