Skip to content

Commit a2b628f

Browse files
committed
feat(docs): split v3 llms.txt into spec-compliant index and full dump
1 parent 9c4cd00 commit a2b628f

2 files changed

Lines changed: 220 additions & 54 deletions

File tree

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import { render } from "svelte/server";
2+
import TurndownService from "turndown";
3+
4+
// Full content dump used by tools that need to chunk page bodies
5+
// (e.g. stacks-mcp-server). The spec-compliant index lives at /llms.txt.
6+
7+
const turndownService = new TurndownService({
8+
headingStyle: "atx",
9+
codeBlockStyle: "fenced",
10+
});
11+
12+
export async function GET() {
13+
const baseUrl = "https://stackoverflow.design/";
14+
const mdFiles = import.meta.glob("$docs/public/**/**/*.md");
15+
16+
let groupedDocs = {};
17+
18+
for (const [path, doc] of Object.entries(mdFiles)) {
19+
const parts = path.split("/");
20+
const publicIndex = parts.indexOf("public");
21+
22+
// Derive the section name from the path. Files under system/<group>/
23+
// use the sub-group name (e.g. components, base). Files outside system
24+
// (brand, copy, resources, changelog) use the first segment under public.
25+
let group;
26+
if (parts[publicIndex + 1] === "system") {
27+
group = parts[publicIndex + 2] ?? "system";
28+
} else {
29+
group = parts[publicIndex + 1] ?? "general";
30+
}
31+
32+
if (!groupedDocs[group]) {
33+
groupedDocs[group] = [];
34+
}
35+
36+
groupedDocs[group].push({ path, doc });
37+
}
38+
39+
// Header
40+
let output = `
41+
# Site Content for LLMs
42+
# Generated: ${new Date().toISOString()}
43+
# Site URL: ${baseUrl}
44+
`.trim();
45+
46+
// Loop over the sections
47+
for (const [group, docs] of Object.entries(groupedDocs)) {
48+
output += `\n\n## Collection: ${group}`;
49+
50+
// Render the docs in the group
51+
for (const { path, doc } of docs) {
52+
const page = await doc();
53+
54+
output += `
55+
56+
### Page: ${page.metadata?.title ?? path}
57+
URL: ${baseUrl}${getSlug(path)}
58+
Date: ${new Date().toISOString()}
59+
description: ${page.metadata?.description ?? ""}
60+
61+
Content:
62+
${turndownService.turndown(render(page.default).body)}
63+
64+
---
65+
66+
`.trimEnd();
67+
}
68+
}
69+
70+
return new Response(output, {
71+
headers: {
72+
"Content-Type": "text/plain",
73+
},
74+
});
75+
}
76+
77+
function getSlug(filePath) {
78+
let slug = filePath;
79+
80+
slug = slug
81+
.replace("/src/docs/public/", "")
82+
.replace("index.md", "")
83+
.replace(".md", "");
84+
85+
return slug;
86+
}
Lines changed: 134 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,155 @@
1-
import { render } from "svelte/server";
2-
import TurndownService from "turndown";
1+
import YAML from "yaml";
2+
import structureRaw from "$src/structure.yaml?raw";
33

4-
const turndownService = new TurndownService({
5-
headingStyle: "atx",
6-
codeBlockStyle: "fenced",
7-
});
4+
// Spec-compliant llms.txt index (see https://llmstxt.org). The full content
5+
// dump tools like stacks-mcp-server chunk lives at /llms-full.txt.
86

9-
export async function GET() {
10-
const baseUrl = "https://stackoverflow.design/";
11-
const mdFiles = import.meta.glob("$docs/public/**/**/*.md");
12-
13-
let groupedDocs = {};
7+
const BASE_URL = "https://stackoverflow.design";
148

15-
for (const [path, doc] of Object.entries(mdFiles)) {
16-
const parts = path.split("/");
9+
const SITE_TITLE = "Stacks";
10+
const SITE_DESCRIPTION =
11+
"Stacks provides everything you need to quickly design, build, and ship coherent experiences across all of Stack Overflow—from the brand and product itself, down to how we send emails and write copy.";
1712

18-
const systemIndex = parts.indexOf("system");
19-
const group = parts[systemIndex + 1];
20-
21-
if (!groupedDocs[group]) {
22-
groupedDocs[group] = [];
23-
}
13+
// Top-level navigation entries whose direct children should each become their
14+
// own H2 section. Everything else collapses to a single H2 per top-level.
15+
const FLATTENED_TOP_LEVELS = new Set(["system"]);
2416

25-
groupedDocs[group].push({ path, doc });
17+
export async function GET() {
18+
let structure = { navigation: [] };
19+
try {
20+
structure = YAML.parse(structureRaw) ?? { navigation: [] };
21+
} catch (err) {
22+
console.error("Failed to parse structure.yaml:", err);
2623
}
2724

28-
// Header
29-
let output = `
30-
# Site Content for LLMs
31-
# Generated: ${new Date().toISOString()}
32-
# Site URL: ${baseUrl}
33-
`.trim();
34-
35-
// Loop over the sections
36-
for (const [group, docs] of Object.entries(groupedDocs)) {
37-
output += `\n\n## Collection: ${group}`;
25+
const descriptions = await loadPageDescriptions();
26+
const sections = collectSections(structure.navigation ?? [], descriptions);
27+
const output = renderOutput(sections);
3828

39-
// Render the docs in the group
40-
for (const { path, doc } of docs) {
41-
const page = await doc();
29+
return new Response(output, {
30+
headers: { "Content-Type": "text/plain" },
31+
});
32+
}
4233

43-
output += `
34+
async function loadPageDescriptions() {
35+
// Read markdown files as raw text rather than loading the compiled Svelte
36+
// module — we only need the frontmatter description, and avoiding the
37+
// module load keeps this independent of every page component's imports.
38+
const mdFiles = import.meta.glob("$docs/public/**/*.md", {
39+
query: "?raw",
40+
import: "default",
41+
});
42+
const entries = await Promise.all(
43+
Object.entries(mdFiles).map(async ([path, doc]) => {
44+
const raw = await doc();
45+
return [pathToUrl(path), parseDescription(raw)];
46+
})
47+
);
48+
return Object.fromEntries(entries);
49+
}
4450

45-
### Page: ${page.metadata?.title ?? path}
46-
URL: ${baseUrl}${getSlug(path)}
47-
Date: ${new Date().toISOString()}
48-
description: ${page.metadata?.description ?? ""}
51+
function parseDescription(raw) {
52+
const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---/);
53+
if (!match) return "";
54+
try {
55+
const fm = YAML.parse(match[1]) || {};
56+
return typeof fm.description === "string" ? fm.description : "";
57+
} catch {
58+
return "";
59+
}
60+
}
4961

50-
Content:
51-
${turndownService.turndown(render(page.default).body)}
62+
function collectSections(navigation, descriptions) {
63+
const sections = [];
64+
for (const topLevel of navigation) {
65+
if (topLevel.private) continue;
66+
67+
if (FLATTENED_TOP_LEVELS.has(topLevel.slug)) {
68+
for (const child of topLevel.items ?? []) {
69+
if (child.private) continue;
70+
pushSection(
71+
sections,
72+
child,
73+
[topLevel.slug, child.slug],
74+
descriptions
75+
);
76+
}
77+
} else {
78+
pushSection(
79+
sections,
80+
topLevel,
81+
[topLevel.slug],
82+
descriptions
83+
);
84+
}
85+
}
86+
return sections;
87+
}
5288

53-
---
89+
function pushSection(sections, item, basePath, descriptions) {
90+
const links = [];
91+
walkItems(item, basePath, descriptions, links);
92+
if (links.length > 0) {
93+
sections.push({ title: item.title ?? basePath.join(" / "), links });
94+
}
95+
}
5496

55-
`.trimEnd();
56-
}
97+
function walkItems(item, basePath, descriptions, links) {
98+
const children = Array.isArray(item.items) ? item.items : [];
99+
if (children.length === 0) {
100+
emitLink(item, basePath, descriptions, links);
101+
return;
102+
}
103+
for (const child of children) {
104+
if (child.private) continue;
105+
if (child.externalUrl) continue; // external links aren't site pages
106+
walkItems(child, [...basePath, child.slug], descriptions, links);
57107
}
108+
}
58109

59-
return new Response(output, {
60-
headers: {
61-
"Content-Type": "text/plain",
62-
},
63-
});
110+
function emitLink(item, basePath, descriptions, links) {
111+
const slug = basePath.join("/");
112+
// Folder-index pages keep their trailing slash (brand/color/index.md →
113+
// /brand/color/); plain .md pages don't (brand/motion.md → /brand/motion).
114+
const candidateUrls = [`/${slug}/`, `/${slug}`];
115+
const matchedUrl = candidateUrls.find((u) => descriptions[u] != null);
116+
if (!matchedUrl) {
117+
// No matching page file — skip rather than emit a dead link.
118+
return;
119+
}
120+
const cleaned = cleanDescription(descriptions[matchedUrl]);
121+
const url = `${BASE_URL}${matchedUrl}`;
122+
const title = item.title ?? basePath[basePath.length - 1];
123+
const suffix = cleaned ? `: ${cleaned}` : "";
124+
links.push(`- [${title}](${url})${suffix}`);
64125
}
65126

66-
function getSlug(filePath) {
67-
let slug = filePath;
127+
// Descriptions may contain inline HTML for the rendered page; strip it for
128+
// llms.txt. Loop until stable so unclosed tags can't reintroduce the pattern
129+
// (CodeQL js/incomplete-multi-character-sanitization).
130+
function cleanDescription(input) {
131+
let previous;
132+
let output = input;
133+
do {
134+
previous = output;
135+
output = output.replace(/<[^>]+>/g, "");
136+
} while (output !== previous);
137+
return output.replace(/\s+/g, " ").trim();
138+
}
68139

69-
slug = slug
140+
function pathToUrl(path) {
141+
let slug = path
70142
.replace("/src/docs/public/", "")
71-
.replace("index.md", "")
72-
.replace(".md", "");
143+
.replace(/\.md$/, "");
144+
if (slug.endsWith("/index")) {
145+
return `/${slug.replace(/\/index$/, "")}/`;
146+
}
147+
return `/${slug}`;
148+
}
73149

74-
return slug;
150+
function renderOutput(sections) {
151+
const body = sections
152+
.map((s) => `## ${s.title}\n\n${s.links.join("\n")}`)
153+
.join("\n\n");
154+
return `# ${SITE_TITLE}\n\n> ${SITE_DESCRIPTION}\n\n${body}\n`;
75155
}

0 commit comments

Comments
 (0)