Skip to content

Commit 83b1506

Browse files
committed
adjusted doc building gh action to produce similar llms.txt's as kotlinlang.org
1 parent 3f6d6de commit 83b1506

5 files changed

Lines changed: 113 additions & 0 deletions

File tree

.github/workflows/main.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ jobs:
6464
needs: build-job
6565
runs-on: ubuntu-latest
6666
steps:
67+
- name: Checkout repository
68+
uses: actions/checkout@v4
6769
- name: Download artifact
6870
uses: actions/download-artifact@v4
6971
with:
@@ -72,6 +74,8 @@ jobs:
7274
uses: montudor/action-zip@v1
7375
with:
7476
args: unzip -qq ${{ env.ARTIFACT }} -d dir
77+
- name: Generate llms.txt index
78+
run: node docs/scripts/llms/generate-llms-index.mjs dir
7579
- name: Setup Pages
7680
uses: actions/configure-pages@v5
7781
- name: Upload artifact

docs/StardustDocs/cfg/buildprofiles.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,13 @@
1919
<include-in-head>include-head.html</include-in-head>
2020
</variables>
2121
<sitemap change-frequency="monthly"/>
22+
23+
<!-- This generates a txt file for each topic in the Writerside `_llms` folder.
24+
It also generates an `llms.txt` file containing everything in one file.
25+
However, using scripts/llms our GitHub action renames this to `llms-full.txt`
26+
and generates a new `llms.txt` serving as a table-of-contents for all generated llm files. -->
2227
<llms-txt/>
28+
2329
<build-profile instance="d">
2430
<variables>
2531
<noindex-content>false</noindex-content>
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import * as fs from 'fs';
2+
import * as path from 'path';
3+
4+
const BASE_URL = 'https://kotlin.github.io/dataframe';
5+
6+
function extractTitle(filePath) {
7+
try {
8+
const content = fs.readFileSync(filePath, 'utf-8');
9+
const firstLine = content.split('\n')[0].trim();
10+
if (firstLine.startsWith('#')) {
11+
return firstLine.replace(/^#+\s*/, '').trim();
12+
}
13+
// Fallback: use filename if no # found
14+
return path.basename(filePath, '.txt')
15+
.split('-')
16+
.map(word => word.charAt(0).toUpperCase() + word.slice(1))
17+
.join(' ');
18+
} catch (error) {
19+
console.warn(` Warning: Could not read title from ${filePath}:`, error.message);
20+
return path.basename(filePath, '.txt');
21+
}
22+
}
23+
24+
function readIntroFile(fileName, fallbackText) {
25+
// Correctly handle script directory in ESM
26+
const scriptDir = path.dirname(new URL(import.meta.url).pathname);
27+
const introPath = path.join(scriptDir, fileName);
28+
try {
29+
return fs.readFileSync(introPath, 'utf-8').trim();
30+
} catch (error) {
31+
return fallbackText;
32+
}
33+
}
34+
35+
function generateLlmsIndex(docsDir) {
36+
console.log('Starting llms.txt index generation...');
37+
const llmsFolder = path.join(docsDir, '_llms');
38+
39+
if (!fs.existsSync(llmsFolder)) {
40+
console.log(` Folder does not exist: ${llmsFolder} - skipping`);
41+
return;
42+
}
43+
44+
const files = fs.readdirSync(llmsFolder)
45+
.filter(file => file.endsWith('.txt'))
46+
.sort();
47+
48+
console.log(` Found ${files.length} files in _llms`);
49+
50+
let content = readIntroFile('llms-intro.txt', '# Kotlin DataFrame documentation\n\nKotlin DataFrame is a typesafe DSL for structured data processing in Kotlin.');
51+
content += '\n\n';
52+
53+
// Add link to full content
54+
content += `- [Full Content](${BASE_URL}/llms-full.txt)\n\n`;
55+
56+
for (const fileName of files) {
57+
const title = extractTitle(path.join(llmsFolder, fileName));
58+
const absoluteUrl = `${BASE_URL}/_llms/${fileName}`;
59+
content += `- [${title}](${absoluteUrl})\n`;
60+
}
61+
62+
const outputPath = path.join(docsDir, 'llms.txt');
63+
const fullPath = path.join(docsDir, 'llms-full.txt');
64+
65+
// Move existing llms.txt (full content) to llms-full.txt
66+
if (fs.existsSync(outputPath) && !fs.existsSync(fullPath)) {
67+
fs.renameSync(outputPath, fullPath);
68+
console.log(` Moved existing llms.txt to llms-full.txt`);
69+
}
70+
71+
try {
72+
fs.writeFileSync(outputPath, content, 'utf-8');
73+
console.log(` Created: llms.txt (${files.length} files indexed)`);
74+
} catch (error) {
75+
console.error(' Error writing llms.txt:', error);
76+
process.exit(1);
77+
}
78+
}
79+
80+
const args = process.argv.slice(2);
81+
const docsDir = args[0];
82+
83+
if (!docsDir) {
84+
console.error('Please provide the documentation directory as an argument.');
85+
process.exit(1);
86+
}
87+
88+
const startTime = Date.now();
89+
try {
90+
generateLlmsIndex(path.resolve(docsDir));
91+
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
92+
console.log(`\nComplete in ${duration}s`);
93+
} catch (error) {
94+
console.error('\nError during llms.txt generation:', error);
95+
process.exit(1);
96+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Kotlin DataFrame Documentation - Full Content
2+
3+
This file contains the combined content of all documentation topics, optimized for LLMs.

docs/scripts/llms/llms-intro.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Kotlin DataFrame Documentation
2+
3+
Kotlin DataFrame is a typesafe DSL for structured data processing in Kotlin.
4+
This index provides links to individual topics in a format optimized for LLMs.

0 commit comments

Comments
 (0)