|
| 1 | +import url from "node:url"; |
| 2 | +import path from "node:path"; |
| 3 | +import fs from "node:fs/promises"; |
| 4 | +import type { AstroIntegrationLogger } from "astro"; |
| 5 | +import { unified } from "unified"; |
| 6 | +import rehypeParse from "rehype-parse"; |
| 7 | +import rehypeRemark from "rehype-remark"; |
| 8 | +import remarkStringify from "remark-stringify"; |
| 9 | +import remarkGfm from "remark-gfm"; |
| 10 | +import { JSDOM } from "jsdom"; |
| 11 | +import { toText } from "hast-util-to-text"; |
| 12 | + |
| 13 | +/** |
| 14 | + * Astro integration that generates a Markdown file (index.md) next to every |
| 15 | + * rendered HTML file (index.html) in the build output. |
| 16 | + * |
| 17 | + * The Markdown is derived from the rendered HTML, so all links, includes, |
| 18 | + * components, etc. are already resolved. |
| 19 | + */ |
| 20 | +export default function markdownOutput() { |
| 21 | + let siteUrl = ""; |
| 22 | + |
| 23 | + return { |
| 24 | + name: "markdown-output", |
| 25 | + hooks: { |
| 26 | + "astro:config:done": ({ config }: { config: { site?: string } }) => { |
| 27 | + siteUrl = config.site ? new URL(config.site).origin : ""; |
| 28 | + }, |
| 29 | + "astro:build:done": async ({ |
| 30 | + dir, |
| 31 | + pages, |
| 32 | + logger, |
| 33 | + }: { |
| 34 | + dir: URL; |
| 35 | + pages: Array<{ pathname: string }>; |
| 36 | + logger: AstroIntegrationLogger; |
| 37 | + }) => { |
| 38 | + const outDir = url.fileURLToPath(dir); |
| 39 | + const processor = unified() |
| 40 | + .use(rehypeParse, { fragment: true }) |
| 41 | + .use(rehypeRemark, { |
| 42 | + handlers: { |
| 43 | + // Preserve language hints on code fences from <pre data-language="..."> |
| 44 | + pre(state: any, node: any) { |
| 45 | + const lang = |
| 46 | + node.properties?.dataLanguage || ""; |
| 47 | + const value = toText(node); |
| 48 | + const result = { |
| 49 | + type: "code" as const, |
| 50 | + lang: lang || null, |
| 51 | + meta: null, |
| 52 | + value: value.replace(/\n$/, ""), |
| 53 | + }; |
| 54 | + state.patch(node, result); |
| 55 | + return result; |
| 56 | + }, |
| 57 | + // Handle <figure> with code blocks: extract title from figcaption |
| 58 | + figure(state: any, node: any) { |
| 59 | + // Find figcaption title |
| 60 | + const figcaption = node.children?.find( |
| 61 | + (c: any) => c.tagName === "figcaption", |
| 62 | + ); |
| 63 | + const titleSpan = figcaption?.children?.find( |
| 64 | + (c: any) => |
| 65 | + c.properties?.className?.includes("title"), |
| 66 | + ); |
| 67 | + const title = titleSpan ? toText(titleSpan).trim() : ""; |
| 68 | + |
| 69 | + // Find <pre> child |
| 70 | + const pre = node.children?.find( |
| 71 | + (c: any) => c.tagName === "pre", |
| 72 | + ); |
| 73 | + if (!pre) { |
| 74 | + // Not a code figure, fall back to default |
| 75 | + return state.all(node); |
| 76 | + } |
| 77 | + |
| 78 | + const lang = pre.properties?.dataLanguage || ""; |
| 79 | + const value = toText(pre); |
| 80 | + const codeNode = { |
| 81 | + type: "code" as const, |
| 82 | + lang: lang || null, |
| 83 | + meta: null, |
| 84 | + value: value.replace(/\n$/, ""), |
| 85 | + }; |
| 86 | + state.patch(pre, codeNode); |
| 87 | + |
| 88 | + if (title) { |
| 89 | + const titleNode = { |
| 90 | + type: "paragraph" as const, |
| 91 | + children: [ |
| 92 | + { |
| 93 | + type: "inlineCode" as const, |
| 94 | + value: title, |
| 95 | + }, |
| 96 | + { |
| 97 | + type: "text" as const, |
| 98 | + value: ":", |
| 99 | + }, |
| 100 | + ], |
| 101 | + }; |
| 102 | + return [titleNode, codeNode]; |
| 103 | + } |
| 104 | + |
| 105 | + return codeNode; |
| 106 | + }, |
| 107 | + }, |
| 108 | + }) |
| 109 | + .use(remarkGfm) |
| 110 | + .use(remarkStringify, { |
| 111 | + bullet: "-", |
| 112 | + emphasis: "*", |
| 113 | + strong: "*", |
| 114 | + rule: "-", |
| 115 | + }); |
| 116 | + |
| 117 | + let count = 0; |
| 118 | + let errors = 0; |
| 119 | + |
| 120 | + await Promise.all( |
| 121 | + pages.map(async ({ pathname }) => { |
| 122 | + const htmlPath = path.join(outDir, pathname, "index.html"); |
| 123 | + const mdPath = path.join(outDir, pathname, "index.md"); |
| 124 | + |
| 125 | + try { |
| 126 | + const html = await fs.readFile(htmlPath, "utf-8"); |
| 127 | + const dom = new JSDOM(html); |
| 128 | + const doc = dom.window.document; |
| 129 | + |
| 130 | + const main = doc.querySelector("main"); |
| 131 | + if (!main) return; |
| 132 | + |
| 133 | + // Restore mermaid diagrams as code fences |
| 134 | + main.querySelectorAll("div.mermaid").forEach((el) => { |
| 135 | + const content = el.getAttribute("data-content"); |
| 136 | + if (content) { |
| 137 | + const pre = doc.createElement("pre"); |
| 138 | + pre.setAttribute("data-language", "mermaid"); |
| 139 | + pre.textContent = content; |
| 140 | + el.replaceWith(pre); |
| 141 | + } |
| 142 | + }); |
| 143 | + |
| 144 | + // Remove banner |
| 145 | + main.querySelectorAll(".sl-banner").forEach((el) => el.remove()); |
| 146 | + |
| 147 | + // Remove "Section titled" anchor links in headings |
| 148 | + main.querySelectorAll("a").forEach((el) => { |
| 149 | + if (el.textContent?.trim().startsWith("Section titled")) el.remove(); |
| 150 | + }); |
| 151 | + |
| 152 | + // Remove "Edit page" link and "Last updated" meta section |
| 153 | + main.querySelectorAll("footer .meta").forEach((el) => el.remove()); |
| 154 | + |
| 155 | + // Resolve image paths: /_astro/... URLs are build artifacts; |
| 156 | + // rewrite them to absolute URLs so they resolve outside the build output. |
| 157 | + main.querySelectorAll("img").forEach((img) => { |
| 158 | + const src = img.getAttribute("src"); |
| 159 | + if (src && src.startsWith("/")) { |
| 160 | + img.setAttribute("src", `${siteUrl}${src}`); |
| 161 | + } |
| 162 | + }); |
| 163 | + |
| 164 | + // Resolve link hrefs to absolute URLs |
| 165 | + main.querySelectorAll("a").forEach((a) => { |
| 166 | + const href = a.getAttribute("href"); |
| 167 | + if (href && href.startsWith("/")) { |
| 168 | + a.setAttribute("href", `${siteUrl}${href}`); |
| 169 | + } |
| 170 | + }); |
| 171 | + |
| 172 | + // Remove giscus comments |
| 173 | + main.querySelectorAll("giscus-comments").forEach((el) => el.remove()); |
| 174 | + |
| 175 | + // Remove copyright footer (the <hr> + copyright div) |
| 176 | + main.querySelectorAll("footer > hr").forEach((el) => el.remove()); |
| 177 | + main.querySelectorAll("footer > div:not(.pagination-links)").forEach((el) => el.remove()); |
| 178 | + |
| 179 | + // Flatten pagination links so Previous/Next text is on one line |
| 180 | + // Structure: <a> <svg/> <span> Previous <br> <span class="link-title">Title</span> </span> </a> |
| 181 | + main.querySelectorAll(".pagination-links a").forEach((a) => { |
| 182 | + a.querySelectorAll("svg").forEach((svg) => svg.remove()); |
| 183 | + a.querySelectorAll("br").forEach((br) => br.remove()); |
| 184 | + const label = a.querySelector("span")?.childNodes[0]?.textContent?.trim(); // "Previous" or "Next" |
| 185 | + const title = a.querySelector(".link-title")?.textContent?.trim(); |
| 186 | + if (label && title) { |
| 187 | + a.textContent = `${label}: ${title}`; |
| 188 | + } |
| 189 | + }); |
| 190 | + |
| 191 | + const content = main.innerHTML; |
| 192 | + const result = await processor.process(content); |
| 193 | + |
| 194 | + // Add page title and source URL as YAML frontmatter |
| 195 | + const pageTitle = doc.querySelector("title")?.textContent?.trim() || ""; |
| 196 | + const pageSource = siteUrl ? `${siteUrl}/${pathname}` : `/${pathname}`; |
| 197 | + const frontmatter = `---\ntitle: ${pageTitle}\nsource: ${pageSource}\n---\n\n`; |
| 198 | + |
| 199 | + await fs.writeFile(mdPath, frontmatter + String(result)); |
| 200 | + count++; |
| 201 | + } catch (e: any) { |
| 202 | + if (e.code === "ENOENT") { |
| 203 | + // No index.html for this page (e.g. redirects, API routes) |
| 204 | + return; |
| 205 | + } |
| 206 | + errors++; |
| 207 | + logger.warn(`Failed to generate Markdown for ${pathname}: ${e.message}`); |
| 208 | + } |
| 209 | + }), |
| 210 | + ); |
| 211 | + |
| 212 | + logger.info( |
| 213 | + `Generated ${count} Markdown files${errors > 0 ? ` (${errors} errors)` : ""}`, |
| 214 | + ); |
| 215 | + }, |
| 216 | + }, |
| 217 | + }; |
| 218 | +} |
0 commit comments