|
| 1 | +#!/usr/bin/env node |
| 2 | + |
| 3 | +import fs from "node:fs"; |
| 4 | +import path from "node:path"; |
| 5 | +import { execFileSync } from "node:child_process"; |
| 6 | + |
| 7 | +const publicRoot = path.resolve(process.env.PUBLIC_DIR || "public"); |
| 8 | +const siteBase = new URL(process.env.SITE_BASE_URL || "https://www2.sigsoft.org/"); |
| 9 | +const siteBasePath = normalizePath(siteBase.pathname); |
| 10 | +const ignoredProtocols = new Set(["mailto:", "tel:", "javascript:", "data:", "blob:"]); |
| 11 | + |
| 12 | +const changedFiles = process.argv.slice(2).filter((file) => fs.existsSync(file)); |
| 13 | +const filesToCheck = changedFiles.length > 0 ? changedFiles : gitChangedFiles(); |
| 14 | +const failures = []; |
| 15 | + |
| 16 | +for (const file of filesToCheck) { |
| 17 | + if (!/\.(md|markdown|html|toml|yaml|yml)$/i.test(file)) continue; |
| 18 | + |
| 19 | + const source = fs.readFileSync(file, "utf8"); |
| 20 | + for (const link of extractLinks(source)) { |
| 21 | + checkLink(file, link); |
| 22 | + } |
| 23 | +} |
| 24 | + |
| 25 | +if (failures.length > 0) { |
| 26 | + console.error(`Found ${failures.length} broken link(s) in changed files:`); |
| 27 | + for (const failure of failures) console.error(`- ${failure}`); |
| 28 | + process.exit(1); |
| 29 | +} |
| 30 | + |
| 31 | +console.log(`Checked links in ${filesToCheck.length} changed file(s); no new broken internal links found.`); |
| 32 | + |
| 33 | +function checkLink(sourceFile, rawLink) { |
| 34 | + const link = decodeHtml(rawLink.trim()); |
| 35 | + if (!link || link.startsWith("#") || link.startsWith("//")) return; |
| 36 | + |
| 37 | + if (link.includes("{{<") || link.includes("{{%")) return; |
| 38 | + |
| 39 | + const refTarget = parseHugoRef(link); |
| 40 | + if (refTarget) { |
| 41 | + checkContentPath(sourceFile, link, refTarget); |
| 42 | + return; |
| 43 | + } |
| 44 | + |
| 45 | + if (/\.md$/i.test(link)) { |
| 46 | + checkContentPath(sourceFile, link, link); |
| 47 | + return; |
| 48 | + } |
| 49 | + |
| 50 | + const internalPath = toInternalPath(link); |
| 51 | + if (!internalPath) return; |
| 52 | + |
| 53 | + const target = resolvePublicPath(internalPath); |
| 54 | + if (!fs.existsSync(target)) { |
| 55 | + failures.push(`${sourceFile}: broken internal link ${link} -> ${slash(path.relative(publicRoot, target))}`); |
| 56 | + } |
| 57 | +} |
| 58 | + |
| 59 | +function checkContentPath(sourceFile, link, target) { |
| 60 | + const contentPath = target.startsWith("/") |
| 61 | + ? path.resolve("content", target.replace(/^\/+/, "")) |
| 62 | + : path.resolve(path.dirname(sourceFile), target); |
| 63 | + |
| 64 | + if (!fs.existsSync(contentPath)) { |
| 65 | + failures.push(`${sourceFile}: broken Hugo/content ref ${link} -> ${slash(path.relative(process.cwd(), contentPath))}`); |
| 66 | + } |
| 67 | +} |
| 68 | + |
| 69 | +function extractLinks(source) { |
| 70 | + const links = new Set(); |
| 71 | + const patterns = [ |
| 72 | + /\[[^\]]*]\(([^)\s]+)(?:\s+["'][^"']*["'])?\)/g, |
| 73 | + /\b(?:href|src|action)=["']([^"']+)["']/gi, |
| 74 | + /^\s*url\s*=\s*["']([^"']+)["']/gim, |
| 75 | + /\{\{<\s*ref\s+["']([^"']+)["']\s*>}}/g, |
| 76 | + /\{\{%\s*ref\s+["']([^"']+)["']\s*%}}/g, |
| 77 | + ]; |
| 78 | + |
| 79 | + for (const pattern of patterns) { |
| 80 | + let match; |
| 81 | + while ((match = pattern.exec(source)) !== null) { |
| 82 | + links.add(match[1]); |
| 83 | + } |
| 84 | + } |
| 85 | + |
| 86 | + return links; |
| 87 | +} |
| 88 | + |
| 89 | +function parseHugoRef(link) { |
| 90 | + const match = link.match(/^\{\{[<%]\s*ref\s+["']([^"']+)["']\s*[>%]}}$/); |
| 91 | + return match ? match[1] : null; |
| 92 | +} |
| 93 | + |
| 94 | +function toInternalPath(link) { |
| 95 | + let value = link; |
| 96 | + |
| 97 | + try { |
| 98 | + const url = new URL(link); |
| 99 | + if (ignoredProtocols.has(url.protocol)) return null; |
| 100 | + if (url.origin !== siteBase.origin) return null; |
| 101 | + value = stripSiteBasePath(url.pathname); |
| 102 | + } catch { |
| 103 | + if (/^[a-z][a-z0-9+.-]*:/i.test(link)) return null; |
| 104 | + if (!link.startsWith("/")) return null; |
| 105 | + value = link; |
| 106 | + } |
| 107 | + |
| 108 | + value = value.split("#")[0].split("?")[0]; |
| 109 | + return value || "/"; |
| 110 | +} |
| 111 | + |
| 112 | +function resolvePublicPath(urlPath) { |
| 113 | + const decoded = safeDecode(urlPath).replace(/^\/+/, ""); |
| 114 | + const candidate = path.join(publicRoot, decoded); |
| 115 | + |
| 116 | + if (path.extname(candidate)) return candidate; |
| 117 | + if (urlPath.endsWith("/")) return path.join(candidate, "index.html"); |
| 118 | + |
| 119 | + const asDirectoryIndex = path.join(candidate, "index.html"); |
| 120 | + if (fs.existsSync(asDirectoryIndex)) return asDirectoryIndex; |
| 121 | + return `${candidate}.html`; |
| 122 | +} |
| 123 | + |
| 124 | +function gitChangedFiles() { |
| 125 | + const baseRef = process.env.LINK_CHECK_BASE_REF; |
| 126 | + const args = baseRef ? ["diff", "--name-only", `${baseRef}...HEAD`] : ["diff", "--name-only", "HEAD^", "HEAD"]; |
| 127 | + |
| 128 | + try { |
| 129 | + const files = execFileSync("git", args, { encoding: "utf8" }) |
| 130 | + .split(/\r?\n/) |
| 131 | + .filter(Boolean); |
| 132 | + |
| 133 | + if (files.length === 0) { |
| 134 | + console.error(`No changed files found from: git ${args.join(" ")}`); |
| 135 | + console.error("Pass files explicitly or ensure the checkout has enough git history."); |
| 136 | + process.exit(1); |
| 137 | + } |
| 138 | + |
| 139 | + return files; |
| 140 | + } catch { |
| 141 | + console.error(`Unable to determine changed files from: git ${args.join(" ")}`); |
| 142 | + console.error("Pass files explicitly or ensure the checkout has enough git history."); |
| 143 | + process.exit(1); |
| 144 | + } |
| 145 | +} |
| 146 | + |
| 147 | +function stripSiteBasePath(urlPath) { |
| 148 | + const normalized = normalizePath(urlPath); |
| 149 | + if (siteBasePath !== "/" && normalized.startsWith(siteBasePath + "/")) { |
| 150 | + return normalized.slice(siteBasePath.length); |
| 151 | + } |
| 152 | + return normalized; |
| 153 | +} |
| 154 | + |
| 155 | +function normalizePath(value) { |
| 156 | + const normalized = value.startsWith("/") ? value : `/${value}`; |
| 157 | + return normalized.length > 1 ? normalized.replace(/\/+$/, "") : normalized; |
| 158 | +} |
| 159 | + |
| 160 | +function safeDecode(value) { |
| 161 | + try { |
| 162 | + return decodeURIComponent(value); |
| 163 | + } catch { |
| 164 | + return value; |
| 165 | + } |
| 166 | +} |
| 167 | + |
| 168 | +function decodeHtml(value) { |
| 169 | + return value |
| 170 | + .replaceAll("&", "&") |
| 171 | + .replaceAll(""", '"') |
| 172 | + .replaceAll(""", '"') |
| 173 | + .replaceAll("'", "'") |
| 174 | + .replaceAll("'", "'") |
| 175 | + .replaceAll("<", "<") |
| 176 | + .replaceAll(">", ">"); |
| 177 | +} |
| 178 | + |
| 179 | +function slash(value) { |
| 180 | + return value.split(path.sep).join("/"); |
| 181 | +} |
0 commit comments