|
| 1 | +const fs = require('fs'); |
| 2 | +const path = require('path'); |
| 3 | + |
| 4 | +const OUT_SUBDIR = path.join('static', 'llms-md'); |
| 5 | +const LLMS_TXT_PATH = path.join('static', 'llms.txt'); |
| 6 | +const FRONTMATTER_RE = /^---\r?\n[\s\S]*?\r?\n---\r?\n+/; |
| 7 | +const FRONTMATTER_BLOCK_RE = /^---\r?\n([\s\S]*?)\r?\n---/; |
| 8 | + |
| 9 | +const SECTION_ORDER = ['_root', 'guides', 'integrations', 'api']; |
| 10 | +const SECTION_NAMES = { |
| 11 | + _root: 'Overview', |
| 12 | + guides: 'Guides', |
| 13 | + integrations: 'Integrations', |
| 14 | + api: 'API Reference', |
| 15 | +}; |
| 16 | + |
| 17 | +function stripFrontmatter(md) { |
| 18 | + return md.replace(FRONTMATTER_RE, ''); |
| 19 | +} |
| 20 | + |
| 21 | +function readFrontmatter(content) { |
| 22 | + const m = content.match(FRONTMATTER_BLOCK_RE); |
| 23 | + if (!m) return {}; |
| 24 | + const fm = {}; |
| 25 | + for (const line of m[1].split(/\r?\n/)) { |
| 26 | + const kv = line.match(/^(\w+):\s*(.*)$/); |
| 27 | + if (!kv) continue; |
| 28 | + let val = kv[2].trim(); |
| 29 | + if ( |
| 30 | + (val.startsWith('"') && val.endsWith('"')) || |
| 31 | + (val.startsWith("'") && val.endsWith("'")) |
| 32 | + ) { |
| 33 | + val = val.slice(1, -1); |
| 34 | + } |
| 35 | + fm[kv[1]] = val; |
| 36 | + } |
| 37 | + return fm; |
| 38 | +} |
| 39 | + |
| 40 | +function buildLlmsTxt({ siteConfig, sourceDir, mdUrlBase }) { |
| 41 | + const entries = walkMarkdown(sourceDir).map((file) => { |
| 42 | + const content = fs.readFileSync(file, 'utf8'); |
| 43 | + const fm = readFrontmatter(content); |
| 44 | + const rel = path.relative(sourceDir, file).replace(/\\/g, '/'); |
| 45 | + const relMd = rel.replace(/\.mdx?$/, '.md'); |
| 46 | + const title = fm.title || fm.sidebar_label || relMd.replace(/\.md$/, ''); |
| 47 | + const description = (fm.description || '').replace(/\s+/g, ' ').trim(); |
| 48 | + return { title, description, url: mdUrlBase + relMd, rel }; |
| 49 | + }); |
| 50 | + |
| 51 | + const grouped = {}; |
| 52 | + for (const e of entries) { |
| 53 | + const top = e.rel.includes('/') ? e.rel.split('/')[0] : '_root'; |
| 54 | + (grouped[top] ||= []).push(e); |
| 55 | + } |
| 56 | + |
| 57 | + const orderedKeys = [ |
| 58 | + ...SECTION_ORDER.filter((k) => grouped[k]), |
| 59 | + ...Object.keys(grouped).filter((k) => !SECTION_ORDER.includes(k)).sort(), |
| 60 | + ]; |
| 61 | + |
| 62 | + const lines = []; |
| 63 | + lines.push(`# ${siteConfig.title || 'Documentation'}`); |
| 64 | + lines.push(''); |
| 65 | + if (siteConfig.tagline) { |
| 66 | + lines.push(`> ${siteConfig.tagline}`); |
| 67 | + lines.push(''); |
| 68 | + } |
| 69 | + |
| 70 | + for (const key of orderedKeys) { |
| 71 | + const items = grouped[key].sort((a, b) => a.rel.localeCompare(b.rel)); |
| 72 | + const name = SECTION_NAMES[key] || (key.charAt(0).toUpperCase() + key.slice(1)); |
| 73 | + lines.push(`## ${name}`); |
| 74 | + lines.push(''); |
| 75 | + for (const e of items) { |
| 76 | + lines.push( |
| 77 | + e.description |
| 78 | + ? `- [${e.title}](${e.url}): ${e.description}` |
| 79 | + : `- [${e.title}](${e.url})`, |
| 80 | + ); |
| 81 | + } |
| 82 | + lines.push(''); |
| 83 | + } |
| 84 | + |
| 85 | + return lines.join('\n'); |
| 86 | +} |
| 87 | + |
| 88 | +function walkMarkdown(rootDir, acc = []) { |
| 89 | + for (const entry of fs.readdirSync(rootDir, { withFileTypes: true })) { |
| 90 | + const full = path.join(rootDir, entry.name); |
| 91 | + if (entry.isDirectory()) { |
| 92 | + walkMarkdown(full, acc); |
| 93 | + } else if (entry.isFile() && /\.mdx?$/.test(entry.name)) { |
| 94 | + acc.push(full); |
| 95 | + } |
| 96 | + } |
| 97 | + return acc; |
| 98 | +} |
| 99 | + |
| 100 | +function localeSourceDir(siteDir, locale, defaultLocale) { |
| 101 | + return locale === defaultLocale |
| 102 | + ? path.join(siteDir, 'docs') |
| 103 | + : path.join(siteDir, 'i18n', locale, 'docusaurus-plugin-content-docs', 'current'); |
| 104 | +} |
| 105 | + |
| 106 | +module.exports = function dhxLlmsPlugin(context) { |
| 107 | + const { siteDir, siteConfig } = context; |
| 108 | + const { locales, defaultLocale } = siteConfig.i18n; |
| 109 | + |
| 110 | + return { |
| 111 | + name: 'dhx-llms-plugin', |
| 112 | + |
| 113 | + // Make the dev server send the same headers as the production Nginx |
| 114 | + // location ~* \.md$ block in docker/nginx.conf, so "View as Markdown" |
| 115 | + // opens inline in dev too. |
| 116 | + configureWebpack() { |
| 117 | + return { |
| 118 | + devServer: { |
| 119 | + headers: (req) => { |
| 120 | + if (req && req.url && /\.md(\?|$)/.test(req.url)) { |
| 121 | + return [ |
| 122 | + { key: 'Content-Type', value: 'text/markdown; charset=utf-8' }, |
| 123 | + { key: 'Content-Disposition', value: 'inline' }, |
| 124 | + ]; |
| 125 | + } |
| 126 | + return []; |
| 127 | + }, |
| 128 | + }, |
| 129 | + }; |
| 130 | + }, |
| 131 | + |
| 132 | + async loadContent() { |
| 133 | + const outRoot = path.join(siteDir, OUT_SUBDIR); |
| 134 | + if (fs.existsSync(outRoot)) { |
| 135 | + try { |
| 136 | + fs.rmSync(outRoot, { |
| 137 | + recursive: true, |
| 138 | + force: true, |
| 139 | + maxRetries: 5, |
| 140 | + retryDelay: 100, |
| 141 | + }); |
| 142 | + } catch (err) { |
| 143 | + // On Windows, rmSync can race with file watchers / AV scanners and |
| 144 | + // throw ENOTEMPTY/EPERM. Falling through is fine — we overwrite |
| 145 | + // existing files below; only stale files would linger, and full |
| 146 | + // builds always start from a clean outDir anyway. |
| 147 | + console.warn(`[dhx-llms-plugin] could not clear ${outRoot}: ${err.code || err.message}`); |
| 148 | + } |
| 149 | + } |
| 150 | + fs.mkdirSync(outRoot, { recursive: true }); |
| 151 | + |
| 152 | + const defaultDir = localeSourceDir(siteDir, defaultLocale, defaultLocale); |
| 153 | + const defaultFiles = fs.existsSync(defaultDir) ? walkMarkdown(defaultDir) : []; |
| 154 | + |
| 155 | + for (const locale of locales) { |
| 156 | + const localeDir = path.join(outRoot, locale); |
| 157 | + const sourceDir = localeSourceDir(siteDir, locale, defaultLocale); |
| 158 | + |
| 159 | + // Seed every locale with the default-locale content so untranslated |
| 160 | + // pages still resolve. Docusaurus falls back to the default locale's |
| 161 | + // source when an i18n translation is missing — the .md mirror needs |
| 162 | + // to mirror that fallback or the button will 404 on those pages. |
| 163 | + for (const file of defaultFiles) { |
| 164 | + const rel = path.relative(defaultDir, file).replace(/\\/g, '/'); |
| 165 | + const destPath = path.join(localeDir, rel.replace(/\.mdx?$/, '.md')); |
| 166 | + fs.mkdirSync(path.dirname(destPath), { recursive: true }); |
| 167 | + fs.writeFileSync(destPath, stripFrontmatter(fs.readFileSync(file, 'utf8'))); |
| 168 | + } |
| 169 | + |
| 170 | + if (locale === defaultLocale || !fs.existsSync(sourceDir)) continue; |
| 171 | + |
| 172 | + // Overlay locale-specific translations on top of the default seed. |
| 173 | + for (const file of walkMarkdown(sourceDir)) { |
| 174 | + const rel = path.relative(sourceDir, file).replace(/\\/g, '/'); |
| 175 | + const destPath = path.join(localeDir, rel.replace(/\.mdx?$/, '.md')); |
| 176 | + fs.mkdirSync(path.dirname(destPath), { recursive: true }); |
| 177 | + fs.writeFileSync(destPath, stripFrontmatter(fs.readFileSync(file, 'utf8'))); |
| 178 | + } |
| 179 | + } |
| 180 | + |
| 181 | + // Generate /llms.txt (llmstxt.org convention) for the default locale. |
| 182 | + // LLM crawlers expect a single canonical index at the site root; we |
| 183 | + // skip per-locale variants intentionally — they're rarely consumed and |
| 184 | + // would split crawler weight across translations. |
| 185 | + if (defaultFiles.length > 0) { |
| 186 | + const baseUrl = siteConfig.baseUrl.endsWith('/') |
| 187 | + ? siteConfig.baseUrl |
| 188 | + : `${siteConfig.baseUrl}/`; |
| 189 | + const siteOrigin = (siteConfig.url || '').replace(/\/+$/, ''); |
| 190 | + const mdUrlBase = `${siteOrigin}${baseUrl}llms-md/${defaultLocale}/`; |
| 191 | + const llmsTxt = buildLlmsTxt({ |
| 192 | + siteConfig, |
| 193 | + sourceDir: defaultDir, |
| 194 | + mdUrlBase, |
| 195 | + }); |
| 196 | + fs.writeFileSync(path.join(siteDir, LLMS_TXT_PATH), llmsTxt); |
| 197 | + } |
| 198 | + }, |
| 199 | + }; |
| 200 | +}; |
0 commit comments