Skip to content

Commit bb9dc91

Browse files
louis-preclaude
andcommitted
Extend link and path validators to check HTML links and SUMMARY nesting
- Add HTML <a href> link checking to validate-links (both relative and absolute docs.seam.co URLs) - Stop absolute URL pattern at quote characters for HTML compatibility - Extend validate-paths to check that nested SUMMARY.md entries have file paths under their parent's directory, matching how GitBook builds published URLs from the tree structure Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c5e83c9 commit bb9dc91

2 files changed

Lines changed: 90 additions & 2 deletions

File tree

codegen/validate-links.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ function findSiteSection(filePath: string): SiteSection | undefined {
1313
}
1414

1515
const absoluteUrlPattern = new RegExp(
16-
`${baseUrl.replaceAll('.', '\\.')}[^)\\s]+`,
16+
`${baseUrl.replaceAll('.', '\\.')}[^)"\\s]+`,
1717
'g',
1818
)
1919

@@ -23,6 +23,15 @@ const absoluteUrlPattern = new RegExp(
2323
const relativeLinkPattern =
2424
/(?<!!)\]\((?!https?:\/\/|mailto:|#|{%|<|cursor:|file:)((?:[^)\\]|\\.)+)\)/g
2525

26+
// Matches HTML links like <a href="path"> but not external URLs or anchors.
27+
const htmlRelativeLinkPattern = /<a\s+href="(?!https?:\/\/|mailto:|#)([^"]+)"/g
28+
29+
// Matches HTML links with absolute docs.seam.co URLs.
30+
const htmlAbsoluteUrlPattern = new RegExp(
31+
`<a\\s+href="(${baseUrl.replaceAll('.', '\\.')}[^"]+)"`,
32+
'g',
33+
)
34+
2635
interface BrokenLink {
2736
file: string
2837
line: number
@@ -160,6 +169,20 @@ for (const file of files) {
160169
if (rawLink == null) continue
161170
checkRelativeLink(file, i + 1, rawLink)
162171
}
172+
173+
// Check HTML href links (relative)
174+
for (const match of lineText.matchAll(htmlRelativeLinkPattern)) {
175+
const rawLink = match[1]
176+
if (rawLink == null) continue
177+
checkRelativeLink(file, i + 1, rawLink)
178+
}
179+
180+
// Check HTML href links (absolute docs.seam.co URLs)
181+
for (const match of lineText.matchAll(htmlAbsoluteUrlPattern)) {
182+
const rawUrl = match[1]
183+
if (rawUrl == null) continue
184+
checkAbsoluteUrl(file, i + 1, rawUrl)
185+
}
163186
}
164187
}
165188

codegen/validate-paths.ts

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,16 @@ function slugify(heading: string): string {
1818
.trim()
1919
}
2020

21+
function slugFromPath(linkPath: string): string {
22+
return linkPath
23+
.replace(/README\.md$/, '')
24+
.replace(/\.md$/, '')
25+
.replace(/\/$/, '')
26+
.split('/')
27+
.filter(Boolean)
28+
.pop() ?? ''
29+
}
30+
2131
interface PathMismatch {
2232
section: string
2333
line: number
@@ -37,6 +47,11 @@ for (const section of siteSections) {
3747

3848
let currentGroup: string | null = null
3949

50+
// Track nesting: each entry is { level, slug, path }
51+
// GitBook builds URLs by joining ancestor slugs, so child file paths
52+
// should be under their parent's directory.
53+
const parentStack: Array<{ level: number; slug: string; path: string }> = []
54+
4055
for (let i = 0; i < lines.length; i++) {
4156
const lineText = lines[i]
4257
if (lineText == null) continue
@@ -48,7 +63,14 @@ for (const section of siteSections) {
4863
continue
4964
}
5065

51-
for (const match of lineText.matchAll(summaryLinkPattern)) {
66+
// Only process list items
67+
const stripped = lineText.trimStart()
68+
if (!stripped.startsWith('* [')) continue
69+
70+
const indent = lineText.length - stripped.length
71+
const level = Math.floor(indent / 2)
72+
73+
for (const match of stripped.matchAll(summaryLinkPattern)) {
5274
const title = match[1] ?? ''
5375
const linkPath = match[2] ?? ''
5476

@@ -79,6 +101,49 @@ for (const section of siteSections) {
79101
reason: `Path should start with "${currentGroup}/" (listed under "## ${currentGroup}")`,
80102
})
81103
}
104+
105+
// Check 3: nested items should have paths under their parent's directory.
106+
// GitBook builds published URLs from the SUMMARY.md tree, so a child
107+
// nested under a parent gets a URL like /parent-slug/child-slug.
108+
// The file path must match this structure.
109+
// Trim stack to current level
110+
while (parentStack.length > 0) {
111+
const top = parentStack[parentStack.length - 1]
112+
if (top != null && top.level >= level) parentStack.pop()
113+
else break
114+
}
115+
116+
const slug = slugFromPath(linkPath)
117+
const parent =
118+
parentStack.length > 0
119+
? parentStack[parentStack.length - 1]
120+
: undefined
121+
122+
if (parent != null) {
123+
// The parent's path determines the expected directory prefix.
124+
// e.g., parent path "access_codes/simulate/README.md" means
125+
// children should start with "access_codes/simulate/".
126+
const parentDir = parent.path
127+
.replace(/README\.md$/, '')
128+
.replace(/\.md$/, '/')
129+
if (!linkPath.startsWith(parentDir)) {
130+
const publishedUrl =
131+
section.urlPrefix +
132+
'/' +
133+
[...parentStack.map((p) => p.slug), slug].join('/')
134+
mismatches.push({
135+
section: section.name,
136+
line: i + 1,
137+
title,
138+
path: linkPath,
139+
reason: `Path should start with "${parentDir}" to match published URL ${publishedUrl}`,
140+
})
141+
}
142+
}
143+
144+
if (slug !== '') {
145+
parentStack.push({ level, slug, path: linkPath })
146+
}
82147
}
83148
}
84149
}

0 commit comments

Comments
 (0)