Skip to content

Commit 2b6ff0f

Browse files
Merge pull request #92 from AlexKlimenkov/copy-btn
rename plugin, add llms.txt and link rel alternate
2 parents 4e9ea8a + b47d54b commit 2b6ff0f

6 files changed

Lines changed: 219 additions & 88 deletions

File tree

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
# Production
55
/build
66

7-
# Generated by dhx-copy-page-plugin
7+
# Generated by dhx-llms-plugin
88
/static/llms-md
9+
/static/llms.txt
910

1011
# Generated files
1112
.docusaurus

docker/nginx.conf

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,16 @@ http {
4141
return 301 $new_uri;
4242
}
4343

44+
# Serve raw markdown inline so the docs "View as Markdown" link opens
45+
# in a browser tab instead of triggering a download.
46+
location ~* \.md$ {
47+
root /usr/share/nginx/html;
48+
types { }
49+
default_type "text/markdown; charset=utf-8";
50+
add_header Content-Disposition "inline";
51+
add_header X-Content-Type-Options "nosniff";
52+
}
53+
4454
location / {
4555
root /usr/share/nginx/html;
4656
index index.html index.htm;

docusaurus.config.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ const config = {
226226
onAfterDataTransformation
227227
}
228228
],
229-
path.resolve(__dirname, './plugins/dhx-copy-page-plugin'),
229+
path.resolve(__dirname, './plugins/dhx-llms-plugin'),
230230
[
231231
require.resolve('docusaurus-gtm-plugin'),
232232
{

plugins/dhx-copy-page-plugin/index.js

Lines changed: 0 additions & 86 deletions
This file was deleted.

plugins/dhx-llms-plugin/index.js

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
const fs = require('fs');
2+
const path = require('path');
3+
4+
const OUT_SUBDIR = path.join('static', 'llms-md');
5+
const LLMS_TXT_PATH = path.join('static', 'llms.txt');
6+
const FRONTMATTER_RE = /^---\r?\n[\s\S]*?\r?\n---\r?\n+/;
7+
const FRONTMATTER_BLOCK_RE = /^---\r?\n([\s\S]*?)\r?\n---/;
8+
9+
const SECTION_ORDER = ['_root', 'guides', 'integrations', 'api'];
10+
const SECTION_NAMES = {
11+
_root: 'Overview',
12+
guides: 'Guides',
13+
integrations: 'Integrations',
14+
api: 'API Reference',
15+
};
16+
17+
function stripFrontmatter(md) {
18+
return md.replace(FRONTMATTER_RE, '');
19+
}
20+
21+
function readFrontmatter(content) {
22+
const m = content.match(FRONTMATTER_BLOCK_RE);
23+
if (!m) return {};
24+
const fm = {};
25+
for (const line of m[1].split(/\r?\n/)) {
26+
const kv = line.match(/^(\w+):\s*(.*)$/);
27+
if (!kv) continue;
28+
let val = kv[2].trim();
29+
if (
30+
(val.startsWith('"') && val.endsWith('"')) ||
31+
(val.startsWith("'") && val.endsWith("'"))
32+
) {
33+
val = val.slice(1, -1);
34+
}
35+
fm[kv[1]] = val;
36+
}
37+
return fm;
38+
}
39+
40+
function buildLlmsTxt({ siteConfig, sourceDir, mdUrlBase }) {
41+
const entries = walkMarkdown(sourceDir).map((file) => {
42+
const content = fs.readFileSync(file, 'utf8');
43+
const fm = readFrontmatter(content);
44+
const rel = path.relative(sourceDir, file).replace(/\\/g, '/');
45+
const relMd = rel.replace(/\.mdx?$/, '.md');
46+
const title = fm.title || fm.sidebar_label || relMd.replace(/\.md$/, '');
47+
const description = (fm.description || '').replace(/\s+/g, ' ').trim();
48+
return { title, description, url: mdUrlBase + relMd, rel };
49+
});
50+
51+
const grouped = {};
52+
for (const e of entries) {
53+
const top = e.rel.includes('/') ? e.rel.split('/')[0] : '_root';
54+
(grouped[top] ||= []).push(e);
55+
}
56+
57+
const orderedKeys = [
58+
...SECTION_ORDER.filter((k) => grouped[k]),
59+
...Object.keys(grouped).filter((k) => !SECTION_ORDER.includes(k)).sort(),
60+
];
61+
62+
const lines = [];
63+
lines.push(`# ${siteConfig.title || 'Documentation'}`);
64+
lines.push('');
65+
if (siteConfig.tagline) {
66+
lines.push(`> ${siteConfig.tagline}`);
67+
lines.push('');
68+
}
69+
70+
for (const key of orderedKeys) {
71+
const items = grouped[key].sort((a, b) => a.rel.localeCompare(b.rel));
72+
const name = SECTION_NAMES[key] || (key.charAt(0).toUpperCase() + key.slice(1));
73+
lines.push(`## ${name}`);
74+
lines.push('');
75+
for (const e of items) {
76+
lines.push(
77+
e.description
78+
? `- [${e.title}](${e.url}): ${e.description}`
79+
: `- [${e.title}](${e.url})`,
80+
);
81+
}
82+
lines.push('');
83+
}
84+
85+
return lines.join('\n');
86+
}
87+
88+
function walkMarkdown(rootDir, acc = []) {
89+
for (const entry of fs.readdirSync(rootDir, { withFileTypes: true })) {
90+
const full = path.join(rootDir, entry.name);
91+
if (entry.isDirectory()) {
92+
walkMarkdown(full, acc);
93+
} else if (entry.isFile() && /\.mdx?$/.test(entry.name)) {
94+
acc.push(full);
95+
}
96+
}
97+
return acc;
98+
}
99+
100+
function localeSourceDir(siteDir, locale, defaultLocale) {
101+
return locale === defaultLocale
102+
? path.join(siteDir, 'docs')
103+
: path.join(siteDir, 'i18n', locale, 'docusaurus-plugin-content-docs', 'current');
104+
}
105+
106+
module.exports = function dhxLlmsPlugin(context) {
107+
const { siteDir, siteConfig } = context;
108+
const { locales, defaultLocale } = siteConfig.i18n;
109+
110+
return {
111+
name: 'dhx-llms-plugin',
112+
113+
// Make the dev server send the same headers as the production Nginx
114+
// location ~* \.md$ block in docker/nginx.conf, so "View as Markdown"
115+
// opens inline in dev too.
116+
configureWebpack() {
117+
return {
118+
devServer: {
119+
headers: (req) => {
120+
if (req && req.url && /\.md(\?|$)/.test(req.url)) {
121+
return [
122+
{ key: 'Content-Type', value: 'text/markdown; charset=utf-8' },
123+
{ key: 'Content-Disposition', value: 'inline' },
124+
];
125+
}
126+
return [];
127+
},
128+
},
129+
};
130+
},
131+
132+
async loadContent() {
133+
const outRoot = path.join(siteDir, OUT_SUBDIR);
134+
if (fs.existsSync(outRoot)) {
135+
try {
136+
fs.rmSync(outRoot, {
137+
recursive: true,
138+
force: true,
139+
maxRetries: 5,
140+
retryDelay: 100,
141+
});
142+
} catch (err) {
143+
// On Windows, rmSync can race with file watchers / AV scanners and
144+
// throw ENOTEMPTY/EPERM. Falling through is fine — we overwrite
145+
// existing files below; only stale files would linger, and full
146+
// builds always start from a clean outDir anyway.
147+
console.warn(`[dhx-llms-plugin] could not clear ${outRoot}: ${err.code || err.message}`);
148+
}
149+
}
150+
fs.mkdirSync(outRoot, { recursive: true });
151+
152+
const defaultDir = localeSourceDir(siteDir, defaultLocale, defaultLocale);
153+
const defaultFiles = fs.existsSync(defaultDir) ? walkMarkdown(defaultDir) : [];
154+
155+
for (const locale of locales) {
156+
const localeDir = path.join(outRoot, locale);
157+
const sourceDir = localeSourceDir(siteDir, locale, defaultLocale);
158+
159+
// Seed every locale with the default-locale content so untranslated
160+
// pages still resolve. Docusaurus falls back to the default locale's
161+
// source when an i18n translation is missing — the .md mirror needs
162+
// to mirror that fallback or the button will 404 on those pages.
163+
for (const file of defaultFiles) {
164+
const rel = path.relative(defaultDir, file).replace(/\\/g, '/');
165+
const destPath = path.join(localeDir, rel.replace(/\.mdx?$/, '.md'));
166+
fs.mkdirSync(path.dirname(destPath), { recursive: true });
167+
fs.writeFileSync(destPath, stripFrontmatter(fs.readFileSync(file, 'utf8')));
168+
}
169+
170+
if (locale === defaultLocale || !fs.existsSync(sourceDir)) continue;
171+
172+
// Overlay locale-specific translations on top of the default seed.
173+
for (const file of walkMarkdown(sourceDir)) {
174+
const rel = path.relative(sourceDir, file).replace(/\\/g, '/');
175+
const destPath = path.join(localeDir, rel.replace(/\.mdx?$/, '.md'));
176+
fs.mkdirSync(path.dirname(destPath), { recursive: true });
177+
fs.writeFileSync(destPath, stripFrontmatter(fs.readFileSync(file, 'utf8')));
178+
}
179+
}
180+
181+
// Generate /llms.txt (llmstxt.org convention) for the default locale.
182+
// LLM crawlers expect a single canonical index at the site root; we
183+
// skip per-locale variants intentionally — they're rarely consumed and
184+
// would split crawler weight across translations.
185+
if (defaultFiles.length > 0) {
186+
const baseUrl = siteConfig.baseUrl.endsWith('/')
187+
? siteConfig.baseUrl
188+
: `${siteConfig.baseUrl}/`;
189+
const siteOrigin = (siteConfig.url || '').replace(/\/+$/, '');
190+
const mdUrlBase = `${siteOrigin}${baseUrl}llms-md/${defaultLocale}/`;
191+
const llmsTxt = buildLlmsTxt({
192+
siteConfig,
193+
sourceDir: defaultDir,
194+
mdUrlBase,
195+
});
196+
fs.writeFileSync(path.join(siteDir, LLMS_TXT_PATH), llmsTxt);
197+
}
198+
},
199+
};
200+
};

src/theme/DocItem/Content/index.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import React, { useEffect, useLayoutEffect, useRef, useState } from 'react';
22
import { createPortal } from 'react-dom';
3+
import Head from '@docusaurus/Head';
34
import OriginalContent from '@theme-original/DocItem/Content';
45
import { useDoc } from '@docusaurus/plugin-content-docs/client';
56
import useDocusaurusContext from '@docusaurus/useDocusaurusContext';
@@ -64,6 +65,11 @@ export default function ContentWrapper(props) {
6465

6566
return (
6667
<>
68+
{mdUrl && (
69+
<Head>
70+
<link rel="alternate" type="text/markdown" href={mdUrl} />
71+
</Head>
72+
)}
6773
<OriginalContent {...props} />
6874
{mdUrl && <CopyPageButtonPortal mdUrl={mdUrl} pageTitle={metadata.title} />}
6975
</>

0 commit comments

Comments
 (0)