Skip to content

Commit bb0b6e4

Browse files
site: generate sitemap.xml, llms.txt, and llms-full.txt at build (Vite plugin)
Our web framework has no route-aware SSG yet, so these discoverability files were hand-maintained and kept drifting (llms.txt missed new pages; llms-full.txt had fallen behind docs/API.md, e.g. the Error Diagnostics section). Now generated: - src/site-meta.js — single source for the base URL, per-route llms.txt descriptions, and the llms-full.txt sources (README + docs/API.md). - scripts/discoverability.mjs — discovers routes from app/**/page.* (the same files the router globs), renders sitemap.xml (always complete), llms.txt (routes + curated descriptions), and llms-full.txt (the repo docs inlined); runnable standalone. - vite-discoverability.mjs — Vite plugin regenerating all three into public/ on every build and dev start, warning if a route lacks an llms.txt description or site-meta lists a route that's gone. The three public/*.txt(+xml) files are now gitignored — adding a page updates the sitemap automatically, and editing README/docs/API.md re-inlines llms-full.txt.
1 parent 10780ba commit bb0b6e4

8 files changed

Lines changed: 255 additions & 595 deletions

File tree

site/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,8 @@ npm-debug.log*
1818
!.dev.vars.example
1919
.env*
2020
!.env.example
21+
22+
# Generated by vite-discoverability.mjs (do not edit/commit)
23+
public/sitemap.xml
24+
public/llms.txt
25+
public/llms-full.txt

site/public/llms-full.txt

Lines changed: 0 additions & 437 deletions
This file was deleted.

site/public/llms.txt

Lines changed: 0 additions & 48 deletions
This file was deleted.

site/public/sitemap.xml

Lines changed: 0 additions & 110 deletions
This file was deleted.

site/scripts/discoverability.mjs

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
// Generates sitemap.xml and llms.txt from the routes on disk + src/site-meta.js.
2+
//
3+
// Routes are discovered from app/**/page.{jsx,tsx,js,ts} (the same files the app
4+
// router globs at runtime), so the sitemap is always complete. llms.txt pairs
5+
// each route with its curated description from site-meta; a route with no
6+
// description is reported (not silently dropped).
7+
//
8+
// Used by vite-discoverability.mjs at build/dev start, and runnable directly:
9+
// node scripts/discoverability.mjs # writes into public/
10+
import { globSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
11+
import path from "node:path";
12+
import { fileURLToPath } from "node:url";
13+
import {
14+
SITE_URL,
15+
SITE_NAME,
16+
TAGLINE,
17+
SUMMARY,
18+
SECTION_ORDER,
19+
PAGES,
20+
FOOTER,
21+
FULL_DOC,
22+
SITEMAP_EXTRA,
23+
SITEMAP_EXCLUDE,
24+
} from "../src/site-meta.js";
25+
26+
const APP_DIR = fileURLToPath(new URL("../app", import.meta.url));
27+
const PUBLIC_DIR = fileURLToPath(new URL("../public", import.meta.url));
28+
const REPO_ROOT = fileURLToPath(new URL("../../", import.meta.url));
29+
30+
// All routes, derived from page files: app/docs/glob/page.jsx -> /docs/glob,
31+
// app/page.jsx -> /.
32+
export function discoverRoutes(appDir = APP_DIR) {
33+
const files = globSync("**/page.*", { cwd: appDir }).filter((f) =>
34+
/(^|\/)page\.(jsx|tsx|js|ts)$/.test(f),
35+
);
36+
const routes = files.map((f) => {
37+
const trimmed = f.replace(/(^|\/)page\.(jsx|tsx|js|ts)$/, "");
38+
return trimmed === "" ? "/" : "/" + trimmed;
39+
});
40+
return [...new Set(routes)].filter((r) => !SITEMAP_EXCLUDE.has(r)).sort();
41+
}
42+
43+
function priorityFor(route) {
44+
if (route === "/") return "1.0";
45+
const depth = route.split("/").filter(Boolean).length;
46+
return depth <= 1 ? "0.9" : depth === 2 ? "0.7" : "0.6";
47+
}
48+
49+
export function renderSitemap(routes) {
50+
const entries = [
51+
{ path: "/", priority: "1.0", weekly: true },
52+
...routes
53+
.filter((r) => r !== "/")
54+
.map((r) => ({ path: r, priority: priorityFor(r), weekly: /^\/(docs|api)$/.test(r) })),
55+
...SITEMAP_EXTRA,
56+
];
57+
const urls = entries.map((e) => {
58+
const loc = `${SITE_URL}${e.path === "/" ? "/" : e.path}`;
59+
const cf = e.weekly ? " <changefreq>weekly</changefreq>\n" : "";
60+
return ` <url>\n <loc>${loc}</loc>\n${cf} <priority>${e.priority || "0.6"}</priority>\n </url>`;
61+
});
62+
return `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n${urls.join("\n")}\n</urlset>\n`;
63+
}
64+
65+
export function renderLlmsTxt(routes) {
66+
const present = new Set(routes);
67+
// A discovered route with no metadata, and metadata for a route that's gone.
68+
const missing = routes.filter((r) => r !== "/" && !PAGES[r]);
69+
const stale = Object.keys(PAGES).filter((r) => !present.has(r));
70+
71+
let out = `# ${SITE_NAME}\n\n> ${TAGLINE}\n\n${SUMMARY}\n`;
72+
for (const section of SECTION_ORDER) {
73+
// PAGES insertion order = curated reading order; include only live routes.
74+
const routesInSection = Object.keys(PAGES).filter(
75+
(r) => PAGES[r].section === section && present.has(r),
76+
);
77+
if (routesInSection.length === 0) continue;
78+
out += `\n## ${section}\n\n`;
79+
for (const r of routesInSection) {
80+
const { title, description } = PAGES[r];
81+
out += `- [${title}](${SITE_URL}${r}): ${description}\n`;
82+
}
83+
}
84+
for (const f of FOOTER) out += `\n## ${f.section}\n\n${f.body}\n`;
85+
return { content: out, missing, stale };
86+
}
87+
88+
// Inlines the canonical repo docs (README + docs/API.md) as one markdown file.
89+
export function renderLlmsFull(repoRoot = REPO_ROOT) {
90+
const header = `# ${SITE_NAME} — full documentation\n\n> ${FULL_DOC.tagline}\n\n_${FULL_DOC.note}_\n\n---\n`;
91+
const parts = FULL_DOC.sources.map((rel) =>
92+
readFileSync(path.join(repoRoot, rel), "utf8").trim(),
93+
);
94+
return `${header}\n${parts.join("\n\n---\n\n")}\n`;
95+
}
96+
97+
// Builds all three files; returns their contents plus any warnings.
98+
export function buildDiscoverability(appDir = APP_DIR) {
99+
const routes = discoverRoutes(appDir);
100+
const sitemap = renderSitemap(routes);
101+
const { content: llms, missing, stale } = renderLlmsTxt(routes);
102+
return {
103+
routes,
104+
files: {
105+
"sitemap.xml": sitemap,
106+
"llms.txt": llms,
107+
"llms-full.txt": renderLlmsFull(),
108+
},
109+
missing,
110+
stale,
111+
};
112+
}
113+
114+
// Run directly: write into public/ and report.
115+
if (process.argv[1] && path.resolve(process.argv[1]) === fileURLToPath(import.meta.url)) {
116+
const { routes, files, missing, stale } = buildDiscoverability();
117+
mkdirSync(PUBLIC_DIR, { recursive: true });
118+
for (const [name, content] of Object.entries(files)) {
119+
writeFileSync(path.join(PUBLIC_DIR, name), content);
120+
}
121+
if (missing.length) console.warn(`⚠ no llms.txt metadata (add to src/site-meta.js): ${missing.join(", ")}`);
122+
if (stale.length) console.warn(`⚠ site-meta.js entries for missing routes: ${stale.join(", ")}`);
123+
console.log(`Wrote ${Object.keys(files).join(", ")} to public/ (${routes.length} routes).`);
124+
}

0 commit comments

Comments
 (0)