Skip to content

Commit 3b66e1a

Browse files
louis-preclaude
andauthored
ci: Add path and link validators (#1070)
* Add cross-site link validator Scans all markdown files for broken links: - Absolute docs.seam.co URLs: checks target exists in correct site section - Relative links: resolves from file location and checks target exists Skips images, anchors, GitBook templates, asset references, and file:// URIs. Groups output by broken target for readability. Run with: npm run validate-links Runs in CI as part of the Generate workflow after codegen. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Add SUMMARY.md path validator Validates that file paths in SUMMARY.md are consistent with their section group heading. For example, a file listed under "## Developer Tools" must have a path starting with "developer-tools/". Usage: npm run validate-paths Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * ci: Add validate-paths step to generate workflow Runs the SUMMARY.md path validator after codegen, before link validation. Ensures file paths stay in sync with their section headings. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Centralize site section config for both validators Move site section definitions (name, root, urlPrefix) into config.ts as the single source of truth. Both validate-paths and validate-links now read from the shared config instead of defining their own lists. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Fix eslint no-non-null-assertion errors in config.ts Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Fix site section ordering so /api URLs resolve correctly The catch-all Guides section (urlPrefix: '') was listed first, causing all /api/... URLs to match Guides instead of API Reference. Reorder so more-specific prefixes come first. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 50425e6 commit 3b66e1a

5 files changed

Lines changed: 320 additions & 7 deletions

File tree

.github/workflows/generate.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,31 @@ jobs:
6666
commit_user_name: ${{ secrets.GIT_USER_NAME }}
6767
commit_user_email: ${{ secrets.GIT_USER_EMAIL }}
6868
commit_author: ${{ secrets.GIT_USER_NAME }} <${{ secrets.GIT_USER_EMAIL }}>
69+
validate-paths:
70+
name: Validate paths
71+
needs: commit
72+
runs-on: ubuntu-latest
73+
timeout-minutes: 30
74+
steps:
75+
- name: Checkout
76+
uses: actions/checkout@v4
77+
with:
78+
ref: ${{ github.head_ref }}
79+
- name: Setup
80+
uses: ./.github/actions/setup
81+
- name: Validate SUMMARY.md paths
82+
run: npm run validate-paths
83+
validate-links:
84+
name: Validate links
85+
needs: commit
86+
runs-on: ubuntu-latest
87+
timeout-minutes: 30
88+
steps:
89+
- name: Checkout
90+
uses: actions/checkout@v4
91+
with:
92+
ref: ${{ github.head_ref }}
93+
- name: Setup
94+
uses: ./.github/actions/setup
95+
- name: Validate cross-site links
96+
run: npm run validate-links

codegen/lib/config.ts

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,43 @@
11
import { join } from 'node:path'
22

3-
// Site section roots (paths from repo root)
4-
export const apiReferenceRoot = join('docs', 'api-reference')
5-
export const guidesRoot = join('docs', 'guides')
6-
7-
// The published URL prefix for the API Reference site section on docs.seam.co.
8-
export const apiReferenceUrlPrefix = '/api'
9-
103
// The base URL for the published documentation site.
114
export const baseUrl = 'https://docs.seam.co/latest/'
125

136
// The URL prefix to strip when resolving absolute URLs to file paths.
147
export const siteUrlPrefix = '/latest'
158

9+
// Each site section is a GitBook site section with its own SUMMARY.md.
10+
export interface SiteSection {
11+
// Display name for logging/errors.
12+
name: string
13+
// Path from repo root to the section's content directory.
14+
root: string
15+
// Published URL prefix (e.g., '/api'). Empty string for the default section.
16+
urlPrefix: string
17+
}
18+
19+
// Order matters: more-specific prefixes must come first so that URL matching
20+
// picks the longest prefix (e.g., '/api' before the catch-all '').
21+
export const siteSections: SiteSection[] = [
22+
{
23+
name: 'API Reference',
24+
root: join('docs', 'api-reference'),
25+
urlPrefix: '/api',
26+
},
27+
{ name: 'Guides', root: join('docs', 'guides'), urlPrefix: '' },
28+
]
29+
30+
// Convenience accessors (used by codegen)
31+
const guides = siteSections.find((s) => s.name === 'Guides')
32+
const apiReference = siteSections.find((s) => s.name === 'API Reference')
33+
34+
if (guides == null || apiReference == null) {
35+
throw new Error('Missing required site section in config')
36+
}
37+
38+
export const guidesRoot = guides.root
39+
export const apiReferenceRoot = apiReference.root
40+
export const apiReferenceUrlPrefix = apiReference.urlPrefix
41+
1642
// Derived paths
1743
export const apiReferenceSummaryPath = join(apiReferenceRoot, 'SUMMARY.md')

codegen/validate-links.ts

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
import { existsSync, readdirSync, readFileSync } from 'node:fs'
2+
import { dirname, join, resolve } from 'node:path'
3+
4+
import { baseUrl, siteSections, siteUrlPrefix } from './lib/config.js'
5+
6+
const absoluteUrlPattern = new RegExp(
7+
`${baseUrl.replaceAll('.', '\\.')}[^)\\s]+`,
8+
'g',
9+
)
10+
11+
// Matches markdown links like [text](path) but not images ![text](path),
12+
// absolute URLs, anchors-only, GitBook template tags, or angle-bracket paths.
13+
const relativeLinkPattern =
14+
/(?<!!)\]\((?!https?:\/\/|mailto:|#|{%|<|cursor:|file:)([^)]+)\)/g
15+
16+
interface BrokenLink {
17+
file: string
18+
line: number
19+
url: string
20+
reason: string
21+
}
22+
23+
const brokenLinks: BrokenLink[] = []
24+
25+
function walkDir(dir: string): string[] {
26+
const files: string[] = []
27+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
28+
const fullPath = join(dir, entry.name)
29+
if (entry.isDirectory()) {
30+
files.push(...walkDir(fullPath))
31+
} else if (entry.name.endsWith('.md')) {
32+
files.push(fullPath)
33+
}
34+
}
35+
return files
36+
}
37+
38+
function checkAbsoluteUrl(file: string, line: number, rawUrl: string): void {
39+
const cleanUrl = rawUrl.replaceAll('\\', '')
40+
const url = new URL(cleanUrl)
41+
url.pathname = url.pathname.replace(siteUrlPrefix, '')
42+
43+
const pathname = url.pathname
44+
45+
const section = siteSections.find(({ urlPrefix }) =>
46+
urlPrefix === ''
47+
? true
48+
: pathname.startsWith(urlPrefix + '/') || pathname === urlPrefix,
49+
)
50+
51+
if (section == null) {
52+
brokenLinks.push({
53+
file,
54+
line,
55+
url: rawUrl,
56+
reason: 'No matching site section',
57+
})
58+
return
59+
}
60+
61+
const pagePath = pathname.replace(section.urlPrefix, '')
62+
const targetRoot = join(section.root, pagePath)
63+
64+
const targetMd = `${targetRoot}.md`
65+
const targetReadme = join(targetRoot, 'README.md')
66+
67+
if (!existsSync(targetMd) && !existsSync(targetReadme)) {
68+
brokenLinks.push({
69+
file,
70+
line,
71+
url: rawUrl,
72+
reason: `File not found: ${targetMd} or ${targetReadme}`,
73+
})
74+
}
75+
}
76+
77+
function checkRelativeLink(file: string, line: number, rawLink: string): void {
78+
// Strip GitBook "mention" hint and anchor
79+
const linkPath = rawLink.replace(/ "mention"$/, '').split('#')[0]
80+
if (linkPath == null || linkPath === '') return
81+
82+
// Skip asset references
83+
if (linkPath.includes('.gitbook/assets')) return
84+
85+
// Strip GitBook markdown escapes and decode URL encoding
86+
const cleanPath = decodeURIComponent(
87+
linkPath.replaceAll('\\(', '(').replaceAll('\\)', ')').replaceAll('\\', ''),
88+
)
89+
90+
const fileDir = dirname(file)
91+
const resolved = resolve(fileDir, cleanPath)
92+
93+
// Check if target exists as file, as README.md in directory, or as directory
94+
if (!existsSync(resolved) && !existsSync(join(resolved, 'README.md'))) {
95+
brokenLinks.push({
96+
file,
97+
line,
98+
url: rawLink,
99+
reason: `File not found: ${resolved}`,
100+
})
101+
}
102+
}
103+
104+
const files = walkDir('docs')
105+
106+
for (const file of files) {
107+
const contents = readFileSync(file, 'utf-8')
108+
const lines = contents.split('\n')
109+
110+
for (let i = 0; i < lines.length; i++) {
111+
const lineText = lines[i]
112+
if (lineText == null) continue
113+
114+
// Check absolute docs.seam.co URLs
115+
for (const match of lineText.matchAll(absoluteUrlPattern)) {
116+
const rawUrl = match[0].replace(/[).,]*$/, '')
117+
checkAbsoluteUrl(file, i + 1, rawUrl)
118+
}
119+
120+
// Check relative links
121+
for (const match of lineText.matchAll(relativeLinkPattern)) {
122+
const rawLink = match[1]
123+
if (rawLink == null) continue
124+
checkRelativeLink(file, i + 1, rawLink)
125+
}
126+
}
127+
}
128+
129+
if (brokenLinks.length > 0) {
130+
// Group by broken target
131+
const groups = new Map<string, Array<{ file: string; line: number }>>()
132+
for (const { file, line, url } of brokenLinks) {
133+
const existing = groups.get(url) ?? []
134+
existing.push({ file, line })
135+
groups.set(url, existing)
136+
}
137+
138+
// eslint-disable-next-line no-console
139+
console.error(
140+
`Found ${brokenLinks.length} broken link(s) across ${groups.size} unique target(s):\n`,
141+
)
142+
for (const [target, sources] of groups) {
143+
// eslint-disable-next-line no-console
144+
console.error(` ${target}`)
145+
for (const { file, line } of sources) {
146+
// eslint-disable-next-line no-console
147+
console.error(` - ${file}:${line}`)
148+
}
149+
// eslint-disable-next-line no-console
150+
console.error('')
151+
}
152+
process.exit(1)
153+
} else {
154+
// eslint-disable-next-line no-console
155+
console.log('All links are valid.')
156+
}

codegen/validate-paths.ts

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import { existsSync, readFileSync } from 'node:fs'
2+
import { join } from 'node:path'
3+
4+
import { siteSections } from './lib/config.js'
5+
6+
// Matches markdown links in SUMMARY.md: * [Title](path/to/file.md)
7+
const summaryLinkPattern = /\[([^\]]*)\]\(([^)]+)\)/g
8+
9+
// Matches ## headings that define groups in SUMMARY.md
10+
const groupHeadingPattern = /^## (.+)$/
11+
12+
function slugify(heading: string): string {
13+
return heading
14+
.toLowerCase()
15+
.replace(/[^a-z0-9\s-]/g, '')
16+
.replace(/\s+/g, '-')
17+
.replace(/-+/g, '-')
18+
.trim()
19+
}
20+
21+
interface PathMismatch {
22+
section: string
23+
line: number
24+
title: string
25+
path: string
26+
reason: string
27+
}
28+
29+
const mismatches: PathMismatch[] = []
30+
31+
for (const section of siteSections) {
32+
const summaryPath = join(section.root, 'SUMMARY.md')
33+
if (!existsSync(summaryPath)) continue
34+
35+
const contents = readFileSync(summaryPath, 'utf-8')
36+
const lines = contents.split('\n')
37+
38+
let currentGroup: string | null = null
39+
40+
for (let i = 0; i < lines.length; i++) {
41+
const lineText = lines[i]
42+
if (lineText == null) continue
43+
44+
// Track current ## group heading
45+
const headingMatch = lineText.match(groupHeadingPattern)
46+
if (headingMatch?.[1] != null) {
47+
currentGroup = slugify(headingMatch[1])
48+
continue
49+
}
50+
51+
for (const match of lineText.matchAll(summaryLinkPattern)) {
52+
const title = match[1] ?? ''
53+
const linkPath = match[2] ?? ''
54+
55+
// Skip external links and anchors
56+
if (linkPath.startsWith('http') || linkPath.startsWith('#')) continue
57+
58+
const fullPath = join(section.root, linkPath)
59+
60+
// Check 1: file exists
61+
if (!existsSync(fullPath)) {
62+
mismatches.push({
63+
section: section.name,
64+
line: i + 1,
65+
title,
66+
path: linkPath,
67+
reason: `File not found: ${fullPath}`,
68+
})
69+
continue
70+
}
71+
72+
// Check 2: file path starts with the group slug
73+
if (currentGroup != null && !linkPath.startsWith(currentGroup + '/')) {
74+
mismatches.push({
75+
section: section.name,
76+
line: i + 1,
77+
title,
78+
path: linkPath,
79+
reason: `Path should start with "${currentGroup}/" (listed under "## ${currentGroup}")`,
80+
})
81+
}
82+
}
83+
}
84+
}
85+
86+
if (mismatches.length > 0) {
87+
// eslint-disable-next-line no-console
88+
console.error(`Found ${mismatches.length} SUMMARY.md path issue(s):\n`)
89+
for (const { section, line, title, path, reason } of mismatches) {
90+
// eslint-disable-next-line no-console
91+
console.error(` [${section}] line ${line}: "${title}"`)
92+
// eslint-disable-next-line no-console
93+
console.error(` ${path}`)
94+
// eslint-disable-next-line no-console
95+
console.error(` ${reason}\n`)
96+
}
97+
process.exit(1)
98+
} else {
99+
// eslint-disable-next-line no-console
100+
console.log('All SUMMARY.md paths are valid and consistent.')
101+
}

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
"scripts": {
1717
"generate": "tsx codegen/smith.ts",
1818
"postgenerate": "npm run format",
19+
"validate-paths": "tsx codegen/validate-paths.ts",
20+
"validate-links": "tsx codegen/validate-links.ts",
1921
"typecheck": "tsc",
2022
"lint": "eslint .",
2123
"postlint": "prettier --check --ignore-path .prettierignore .",

0 commit comments

Comments
 (0)