-
-
Notifications
You must be signed in to change notification settings - Fork 96
Expand file tree
/
Copy pathgenerateLlmsTxt.ts
More file actions
257 lines (215 loc) · 7.78 KB
/
Copy pathgenerateLlmsTxt.ts
File metadata and controls
257 lines (215 loc) · 7.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#!/usr/bin/env node
/**
* Script to generate multiple llms.txt files following the llms.txt specification.
* See https://llmstxt.org/ for format details.
*/
import * as fs from "fs/promises";
import * as path from "path";
/**
* Recursively find all markdown files in a directory
* @param dir The directory to search
* @returns Array of file paths
*/
async function findMarkdownFiles(dir: string): Promise<string[]> {
const entries = await fs.readdir(dir, { withFileTypes: true });
const filesPromises = entries.map(async (entry) => {
const entryPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
return findMarkdownFiles(entryPath);
} else if (
entry.isFile() &&
(entry.name.endsWith(".md") || entry.name.endsWith(".mdx"))
) {
return [entryPath];
}
return [];
});
const files = await Promise.all(filesPromises);
return files.flat();
}
/**
* Get the title from a markdown file
* @param filePath Path to the markdown file
* @returns The title or a fallback based on filename
*/
async function getMarkdownTitle(filePath: string): Promise<string> {
try {
const content = await fs.readFile(filePath, "utf8");
// Look for frontmatter title first
const frontmatterMatch = content.match(/^---\s*\n[\s\S]*?title:\s*["']?([^"'\n]+)["']?[\s\S]*?\n---/);
if (frontmatterMatch) {
return frontmatterMatch[1].trim();
}
// Look for first H1 heading
const h1Match = content.match(/^#\s+(.+)$/m);
if (h1Match) {
return h1Match[1].trim();
}
// Fallback to filename
const basename = path.basename(filePath, path.extname(filePath));
return basename === "README" ? "Overview" : basename.replace(/-/g, " ");
} catch {
// Fallback to filename
const basename = path.basename(filePath, path.extname(filePath));
return basename === "README" ? "Overview" : basename.replace(/-/g, " ");
}
}
/**
* Generate a single markdown file for a docs page
* @param filePath Path to the original markdown file
* @param outputDir Directory to write the output file
* @param repoRoot Repository root path
* @returns The output file name
*/
async function generateIndividualFile(
filePath: string,
outputDir: string,
repoRoot: string,
userDocsDir: string
): Promise<string> {
const content = await fs.readFile(filePath, "utf8");
const relativePath = path.relative(repoRoot, filePath);
// Generate output filename from path relative to user docs dir
let outputName = path.relative(userDocsDir, filePath)
.replace(/\//g, "-")
.replace(/\.mdx?$/, ".md");
// Special handling for README files
if (outputName.endsWith("-README.md")) {
outputName = outputName.replace("-README.md", "-overview.md");
}
// Create llms subdirectory
const llmsDir = path.join(outputDir, "llms");
await fs.mkdir(llmsDir, { recursive: true });
const outputPath = path.join(llmsDir, outputName);
// Add source comment at the top
const fileContent = `<!-- Source: ${relativePath} -->\n\n${content}`;
await fs.writeFile(outputPath, fileContent);
return `llms/${outputName}`;
}
/**
* Create a directory page following llms.txt format
* @param title Page title
* @param description Page description
* @param sections Sections with files
* @param outputPath Output file path
*/
async function createDirectoryPage(
title: string,
description: string,
sections: Array<{ name: string; files: Array<{ name: string; filename: string; description?: string }> }>,
outputPath: string
): Promise<void> {
let content = `# ${title}\n\n> ${description}\n\n`;
for (const section of sections) {
content += `## ${section.name}\n\n`;
for (const file of section.files) {
content += `- [${file.name}](${file.filename})`;
if (file.description) {
content += `: ${file.description}`;
}
content += "\n";
}
content += "\n";
}
await fs.writeFile(outputPath, content.trim());
}
/**
* Generate a full concatenated file
* @param files Array of file paths
* @param outputPath Output file path
* @param repoRoot Repository root path
* @param title Title for the file
*/
async function generateFullFile(
files: string[],
outputPath: string,
repoRoot: string,
title: string
): Promise<void> {
let content = `# ${title}\n\nThis file is auto-generated from all relevant Markdown files in the Cursorless documentation.\n`;
for (const filePath of files) {
const relativePath = path.relative(repoRoot, filePath);
content += `\n\n<!-- File: ${relativePath} -->\n\n`;
const fileContent = await fs.readFile(filePath, "utf8");
content += fileContent;
}
await fs.writeFile(outputPath, content.trim());
}
/**
* Main function to generate all llms.txt files
*/
export async function generateLlmsTxt(): Promise<void> {
// Get repo root from environment variable
const repoRoot = process.env.CURSORLESS_REPO_ROOT;
if (!repoRoot) {
throw new Error("CURSORLESS_REPO_ROOT environment variable must be set");
}
const docsDir = path.resolve(repoRoot, "packages/cursorless-org-docs/src/docs");
const outputDir = path.resolve(repoRoot, "packages/cursorless-org/out");
console.log("Generating llms.txt files...");
try {
// Create the output directory if it doesn't exist
await fs.mkdir(outputDir, { recursive: true });
// Find user docs only
const userDocsDir = path.join(docsDir, "user");
const userFiles = await findMarkdownFiles(userDocsDir);
console.log(`Found ${userFiles.length} user docs`);
// Generate individual files for user docs
const userIndividualFiles: Array<{ name: string; filename: string; description?: string; isRootReadme?: boolean }> = [];
// Process user files
for (const filePath of userFiles) {
const outputName = await generateIndividualFile(filePath, outputDir, repoRoot, userDocsDir);
const title = await getMarkdownTitle(filePath);
const relativePath = path.relative(repoRoot, filePath);
// Check if this is the root README
const isRootReadme = relativePath.endsWith("user/README.md");
userIndividualFiles.push({
name: title,
filename: outputName,
isRootReadme
});
}
// Sort files by name
userIndividualFiles.sort((a, b) => a.name.localeCompare(b.name));
// Separate root README from other files
const rootReadme = userIndividualFiles.find(f => f.isRootReadme);
const otherFiles = userIndividualFiles.filter(f => !f.isRootReadme);
// 1. Generate main llms.txt (directory page)
const sections = [];
if (rootReadme) {
sections.push({
name: "Core Documentation",
files: [{ name: rootReadme.name, filename: rootReadme.filename }]
});
}
sections.push({
name: "Optional",
files: [
...otherFiles,
{ name: "Complete Documentation", filename: "llms-full.txt", description: "Full concatenated documentation" }
]
});
await createDirectoryPage(
"Cursorless",
"Cursorless is a spoken language for structural navigation and editing. Use voice commands to edit code faster than with a keyboard.",
sections,
path.join(outputDir, "llms.txt")
);
// 2. Generate llms-full.txt (complete user documentation)
await generateFullFile(
userFiles,
path.join(outputDir, "llms-full.txt"),
repoRoot,
"Cursorless Documentation"
);
console.log("Successfully generated llms.txt files:");
console.log(" - llms.txt (main directory page)");
console.log(" - llms-full.txt (complete documentation)");
console.log(` - ${userIndividualFiles.length} individual documentation files`);
} catch (error) {
console.error("Error generating llms.txt files:", error);
process.exit(1);
}
}
// Run the main function directly
void generateLlmsTxt();