|
| 1 | +/** |
| 2 | + * Converts built part JSONs (HTML desc) to Markdown using turndown, |
| 3 | + * and generates llms.txt + individual .md files. |
| 4 | + * |
| 5 | + * Mechanically converts documents/*-parts/*.json to llms-documents/ (.md files). |
| 6 | + * Root files (e.g. option.md) are placed at llms-documents/, while part files |
| 7 | + * (e.g. option.title.md) are placed at llms-documents/*-parts/. |
| 8 | + * Type information is extracted from documents/*.json (full schema) via traverse. |
| 9 | + * |
| 10 | + * Prerequisites: JSON must be built first (node build.js --env dev) |
| 11 | + * Usage: node build/build-llms.js --env dev |
| 12 | + */ |
| 13 | +const fs = require('fs'); |
| 14 | +const fse = require('fs-extra'); |
| 15 | +const path = require('path'); |
| 16 | +const globby = require('globby'); |
| 17 | +const TurndownService = require('turndown'); |
| 18 | +const {gfm} = require('turndown-plugin-gfm'); |
| 19 | +const {traverse} = require('../tool/schemaHelper'); |
| 20 | +const {readConfigEnvFile} = require('./helper'); |
| 21 | + |
| 22 | +// --- Constants --- |
| 23 | + |
| 24 | +const LANGUAGES = ['en', 'zh']; |
| 25 | +const OUTPUT_DIR_NAME = 'llms-documents'; |
| 26 | +const MAX_HEADING_DEPTH = 6; |
| 27 | + |
| 28 | +const SECTION_LABELS = { |
| 29 | + en: {'option-parts': 'Option', 'option-gl-parts': 'Option GL', 'api-parts': 'API', 'tutorial-parts': 'Tutorial'}, |
| 30 | + zh: {'option-parts': '配置项 (Option)', 'option-gl-parts': 'GL配置项 (Option GL)', 'api-parts': 'API', 'tutorial-parts': '教程 (Tutorial)'} |
| 31 | +}; |
| 32 | + |
| 33 | +const LLMS_TXT_HEADER = [ |
| 34 | + '# Apache ECharts Documentation', |
| 35 | + '', |
| 36 | + '> Apache ECharts is a free, powerful charting and visualization library offering easy ways to add intuitive, interactive, and highly customizable charts to your commercial products.', |
| 37 | + '' |
| 38 | +].join('\n'); |
| 39 | + |
| 40 | +// --- Config --- |
| 41 | + |
| 42 | +const argv = require('yargs').argv; |
| 43 | +const envType = (argv.dev != null || argv.debug != null || argv.env === 'dev') ? 'dev' : argv.env; |
| 44 | +if (!envType) throw new Error('--env MUST be specified'); |
| 45 | +const config = readConfigEnvFile(envType); |
| 46 | + |
| 47 | +// --- Turndown --- |
| 48 | + |
| 49 | +const td = new TurndownService({headingStyle: 'atx', codeBlockStyle: 'fenced'}); |
| 50 | +td.use(gfm); |
| 51 | +td.addRule('iframe', {filter: 'iframe', replacement: () => ''}); |
| 52 | + |
| 53 | +function htmlToMd(html) { |
| 54 | + return html ? td.turndown(html).replace(/\n{3,}/g, '\n\n').trim() : ''; |
| 55 | +} |
| 56 | + |
| 57 | +// --- Extract type info from full schema JSON --- |
| 58 | + |
| 59 | +/** |
| 60 | + * Extract type and default value info from a full schema JSON by traversing |
| 61 | + * the nested schema tree. |
| 62 | + * |
| 63 | + * @param {string} schemaJsonPath - path to schema JSON (e.g. "documents/option.json") |
| 64 | + * @param {string} docName - e.g. "option", "api" |
| 65 | + * @returns {Object<string, {type: string|null, default: string|null}>} |
| 66 | + * e.g. { "option.title.show": {type: "boolean", default: "true"} } |
| 67 | + */ |
| 68 | +function buildTypeMap(schemaJsonPath, docName) { |
| 69 | + if (docName === 'tutorial' || !fs.existsSync(schemaJsonPath)) return {}; |
| 70 | + const schema = JSON.parse(fs.readFileSync(schemaJsonPath, 'utf-8')); |
| 71 | + const typeMap = {}; |
| 72 | + traverse(schema, docName, (schemaPath, node) => { |
| 73 | + if (node.type || node.default != null) { |
| 74 | + typeMap[schemaPath] = { |
| 75 | + type: node.type ? (Array.isArray(node.type) ? node.type.join('|') : node.type) : null, |
| 76 | + default: node.default != null |
| 77 | + ? (typeof node.default === 'object' ? JSON.stringify(node.default) : String(node.default)) |
| 78 | + : null |
| 79 | + }; |
| 80 | + } |
| 81 | + }); |
| 82 | + return typeMap; |
| 83 | +} |
| 84 | + |
| 85 | +// --- Resolve links in HTML --- |
| 86 | +// Best-effort rewriting of <a href="#path"> and <a href="api.html#path"> in HTML |
| 87 | +// so that turndown produces markdown links pointing to the correct .md files. |
| 88 | +// Some source links have non-standard formats (e.g. missing "#", no dot separator) |
| 89 | +// that cannot be resolved; these are left as-is or linked to the root file. |
| 90 | + |
| 91 | +/** |
| 92 | + * Split linkPath into a part key (first segment) and fragment (rest), matching |
| 93 | + * the key against partKeys with case-insensitive and singular/plural fallback. |
| 94 | + * |
| 95 | + * @param {string} linkPath - e.g. "title.show", "echarts.init" |
| 96 | + * @param {Set<string>} partKeys - e.g. Set{'title','series-bar','geo',...} |
| 97 | + * @returns {{key: string, frag: string|null}|null} |
| 98 | + * e.g. "title.show" -> {key: "title", frag: "show"} |
| 99 | + * "angleAxis.axisLabel.interval" -> {key: "angleAxis", frag: "axisLabel.interval"} |
| 100 | + * "geo" -> {key: "geo", frag: null} |
| 101 | + * "unknown" -> null |
| 102 | + */ |
| 103 | +function tryResolvePartKey(linkPath, partKeys) { |
| 104 | + const [seg, ...rest] = linkPath.split('.'); |
| 105 | + const frag = rest.length > 0 ? rest.join('.') : null; |
| 106 | + |
| 107 | + if (partKeys.has(seg)) return {key: seg, frag}; |
| 108 | + |
| 109 | + // Fallback: case-insensitive and singular/plural matching |
| 110 | + const segL = seg.toLowerCase(); |
| 111 | + for (const k of partKeys) { |
| 112 | + if (k.toLowerCase() === segL) return {key: k, frag}; |
| 113 | + } |
| 114 | + for (const k of partKeys) { |
| 115 | + const kl = k.toLowerCase(); |
| 116 | + if (kl === segL + 's' || kl + 's' === segL) return {key: k, frag}; |
| 117 | + } |
| 118 | + return null; |
| 119 | +} |
| 120 | + |
| 121 | +/** |
| 122 | + * Resolve a link path to an href pointing to the correct .md file. |
| 123 | + * If partKeys contains a match, link to the individual part file; |
| 124 | + * otherwise fall back to the root file. |
| 125 | + * |
| 126 | + * @param {string} linkPath - e.g. "title.show", "visualMap" |
| 127 | + * @param {Set<string>} partKeys - keys of individual part files |
| 128 | + * @param {string} pathPrefix - path prefix for part files |
| 129 | + * from part: "option" -> "option.title.md" |
| 130 | + * from root: "option-parts/option" -> "option-parts/option.title.md" |
| 131 | + * cross-doc: "../api-parts/api" -> "../api-parts/api.echarts.md" |
| 132 | + * @param {string} rootPath - path prefix for root file fallback |
| 133 | + * from part: "../option" -> "../option.md#visualMap" |
| 134 | + * from root: "option" -> "option.md#visualMap" |
| 135 | + * cross-doc: "../api" -> "../api.md#events" |
| 136 | + * @returns {string} resolved href attribute string |
| 137 | + */ |
| 138 | +function resolveLink(linkPath, partKeys, pathPrefix, rootPath) { |
| 139 | + const resolved = tryResolvePartKey(linkPath, partKeys); |
| 140 | + if (!resolved) { |
| 141 | + return `href="${rootPath}.md#${linkPath}"`; |
| 142 | + } |
| 143 | + return `href="${pathPrefix}.${resolved.key}.md${resolved.frag ? '#' + resolved.frag : ''}"`; |
| 144 | +} |
| 145 | + |
| 146 | +/** |
| 147 | + * Rewrite internal links and image paths in HTML before turndown conversion. |
| 148 | + * Handles three patterns: |
| 149 | + * 1. Same-doc: href="#title.show" -> href="option.title.md#show" |
| 150 | + * 2. Cross-doc: href="api.html#echarts.init" -> href="../api-parts/api.echarts.md#init" |
| 151 | + * 3. Images: src="documents/asset/img/..." -> src="../../documents/asset/img/..." |
| 152 | + * Unresolvable links fall back to the root file. |
| 153 | + * |
| 154 | + * @param {string} html - HTML string containing <a href="..."> links |
| 155 | + * @param {Object<string, Set<string>>} partKeysByDoc - part keys for all docs |
| 156 | + * @param {string} docName - current doc name (e.g. "option") |
| 157 | + * @param {boolean} isRoot - whether the current file is a root file |
| 158 | + * @returns {string} HTML with rewritten href attributes and image paths |
| 159 | + */ |
| 160 | +function tryResolveHtmlLinks(html, partKeysByDoc, docName, isRoot) { |
| 161 | + const partKeys = partKeysByDoc[docName]; |
| 162 | + // Path prefixes differ depending on whether current file is root or part: |
| 163 | + // root (llms-documents/option.md) -> part: "option-parts/option", root: "option" |
| 164 | + // part (llms-documents/option-parts/option.*.md) -> part: "option", root: "../option" |
| 165 | + const sameDocPartPrefix = isRoot ? `${docName}-parts/${docName}` : docName; |
| 166 | + const sameDocRootPath = isRoot ? docName : `../${docName}`; |
| 167 | + const crossDocPrefix = isRoot ? '' : '../'; |
| 168 | + |
| 169 | + // Same-doc links: href="#title.show" -> href="option.title.md#show" |
| 170 | + const resolved = html.replace(/href="#([^"]+)"/g, (match, linkPath) => |
| 171 | + partKeys ? resolveLink(linkPath, partKeys, sameDocPartPrefix, sameDocRootPath) : match |
| 172 | + ); |
| 173 | + |
| 174 | + // Cross-doc links: href="api.html#echarts.init" -> href="../api-parts/api.echarts.md#init" |
| 175 | + const crossResolved = resolved.replace( |
| 176 | + /href="(option-gl|option|api|tutorial)\.html#([^"]+)"/g, |
| 177 | + (match, targetDoc, fragment) => { |
| 178 | + const keys = partKeysByDoc[targetDoc]; |
| 179 | + if (!keys) return match; |
| 180 | + return resolveLink(fragment, keys, `${crossDocPrefix}${targetDoc}-parts/${targetDoc}`, `${crossDocPrefix}${targetDoc}`); |
| 181 | + } |
| 182 | + ); |
| 183 | + |
| 184 | + // Image paths: src="documents/asset/..." -> relative path to public/{lang}/documents/asset/ |
| 185 | + const imgPrefix = isRoot ? '../' : '../../'; |
| 186 | + return crossResolved.replace( |
| 187 | + /src="(documents\/asset\/[^"]*)"/g, |
| 188 | + (_, src) => `src="${imgPrefix}${src}"` |
| 189 | + ); |
| 190 | +} |
| 191 | + |
| 192 | +// --- Convert part JSON to Markdown --- |
| 193 | + |
| 194 | +function formatPropertyEntry(key, entry, typeInfo, linkResolver) { |
| 195 | + const heading = '#'.repeat(Math.min(key.split('.').length + 1, MAX_HEADING_DEPTH)) + ' ' + key; |
| 196 | + const meta = [ |
| 197 | + typeInfo && typeInfo.type && `- **Type**: \`${typeInfo.type}\``, |
| 198 | + typeInfo && typeInfo.default != null && `- **Default**: \`${typeInfo.default}\`` |
| 199 | + ].filter(Boolean); |
| 200 | + const body = entry.desc ? htmlToMd(linkResolver(entry.desc)) : ''; |
| 201 | + return [heading, ...meta, ...(body ? ['', body] : []), '']; |
| 202 | +} |
| 203 | + |
| 204 | +function jsonToMd(data, typeMap, baseName, linkResolver) { |
| 205 | + const lines = Object.entries(data).flatMap(([key, entry]) => { |
| 206 | + const fullKey = baseName ? `${baseName}.${key}` : key; |
| 207 | + return formatPropertyEntry(key, entry, typeMap[fullKey], linkResolver); |
| 208 | + }); |
| 209 | + return lines.join('\n').replace(/\n{3,}/g, '\n\n').trimEnd() + '\n'; |
| 210 | +} |
| 211 | + |
| 212 | +// --- Collect part JSON files --- |
| 213 | + |
| 214 | +/** |
| 215 | + * Collect part JSON files for each *-parts/ directory, excluding outline files. |
| 216 | + * |
| 217 | + * @param {string[]} partsDirs - paths to *-parts/ directories |
| 218 | + * @returns {Object<string, string[]>} dir path -> JSON file paths |
| 219 | + */ |
| 220 | +function collectPartJsonFiles(partsDirs) { |
| 221 | + const jsonFilesByDir = {}; |
| 222 | + for (const dir of partsDirs) { |
| 223 | + jsonFilesByDir[dir] = globby.sync('*.json', { cwd: dir, absolute: true }) |
| 224 | + .filter(filePath => !path.basename(filePath).includes('-outline')); |
| 225 | + } |
| 226 | + return jsonFilesByDir; |
| 227 | +} |
| 228 | + |
| 229 | +// --- Collect file keys for link resolution across docs --- |
| 230 | + |
| 231 | +/** |
| 232 | + * Build a map of doc name -> Set of part keys for all *-parts/ directories. |
| 233 | + * Part keys are file names with the doc name stripped (e.g. "option.title" -> "title"). |
| 234 | + * Root files (e.g. "option.json") are excluded since they are not individual part files. |
| 235 | + * |
| 236 | + * @param {string[]} partsDirs - paths to *-parts/ directories |
| 237 | + * @param {Object<string, string[]>} jsonFilesByDir - pre-collected JSON file paths |
| 238 | + * @returns {Object<string, Set<string>>} partKeysByDoc - e.g. { option: Set{'title','geo',...}, api: Set{'echarts',...} } |
| 239 | + */ |
| 240 | +function buildPartKeysByDoc(partsDirs, jsonFilesByDir) { |
| 241 | + const partKeysByDoc = {}; |
| 242 | + for (const dir of partsDirs) { |
| 243 | + const docName = path.basename(dir).replace(/-parts$/, ''); |
| 244 | + partKeysByDoc[docName] = new Set( |
| 245 | + jsonFilesByDir[dir].map(filePath => path.basename(filePath, '.json')) |
| 246 | + .filter(k => k !== docName) |
| 247 | + .map(k => k.startsWith(docName + '.') ? k.slice(docName.length + 1) : k) |
| 248 | + ); |
| 249 | + } |
| 250 | + return partKeysByDoc; |
| 251 | +} |
| 252 | + |
| 253 | +// --- Process a single *-parts/ directory --- |
| 254 | + |
| 255 | +/** |
| 256 | + * Convert part JSON files in a single *-parts/ directory to Markdown. |
| 257 | + * Each JSON file becomes a .md file with resolved links and type info. |
| 258 | + * Root files (e.g. option.json) are output to the parent directory. |
| 259 | + * |
| 260 | + * @param {string} partsDir - path to a *-parts/ directory (e.g. "documents/option-parts") |
| 261 | + * @param {string} outDir - output base directory (e.g. "llms-documents") |
| 262 | + * @param {Object} typeMap - property path -> {type, default} map |
| 263 | + * @param {Object<string, Set<string>>} partKeysByDoc - part keys for all docs |
| 264 | + * @param {string[]} jsonFiles - pre-collected JSON file paths for this directory |
| 265 | + * @returns {{name: string, path: string, section: string}[]} output file descriptors |
| 266 | + */ |
| 267 | +function processPartsDir(partsDir, outDir, typeMap, partKeysByDoc, jsonFiles) { |
| 268 | + const dirName = path.basename(partsDir); |
| 269 | + const docName = dirName.replace(/-parts$/, ''); |
| 270 | + |
| 271 | + return jsonFiles.map(filePath => { |
| 272 | + const baseName = path.basename(filePath, '.json'); |
| 273 | + const data = JSON.parse(fs.readFileSync(filePath, 'utf-8')); |
| 274 | + const isRoot = baseName === docName; |
| 275 | + const linkResolver = (html) => tryResolveHtmlLinks(html, partKeysByDoc, docName, isRoot); |
| 276 | + const content = `# ${baseName}\n\n` + jsonToMd(data, typeMap, baseName, linkResolver); |
| 277 | + const fileName = isRoot ? `${baseName}.md` : `${dirName}/${baseName}.md`; |
| 278 | + const fullPath = path.resolve(outDir, fileName); |
| 279 | + fse.ensureDirSync(path.dirname(fullPath)); |
| 280 | + fs.writeFileSync(fullPath, content, 'utf-8'); |
| 281 | + return {name: fileName, path: fullPath, section: dirName}; |
| 282 | + }); |
| 283 | +} |
| 284 | + |
| 285 | +// --- Generate docs for a single language --- |
| 286 | + |
| 287 | +function generateDocsForLang(lang) { |
| 288 | + const docsDir = path.resolve(config.releaseDestDir, lang, 'documents'); |
| 289 | + const outDir = path.resolve(config.releaseDestDir, lang, OUTPUT_DIR_NAME); |
| 290 | + fse.ensureDirSync(outDir); |
| 291 | + |
| 292 | + // Step 1: Build a type map from full schema JSONs (option.json, api.json, etc.) |
| 293 | + // by traversing the nested schema tree to collect type/default for each |
| 294 | + // property path (e.g. "option.title.show" -> {type: "boolean", default: "true"}). |
| 295 | + const schemaFiles = globby.sync('*.json', { cwd: docsDir, absolute: true }); |
| 296 | + const typeMap = {}; |
| 297 | + for (const filePath of schemaFiles) { |
| 298 | + Object.assign(typeMap, buildTypeMap(filePath, path.basename(filePath, '.json'))); |
| 299 | + } |
| 300 | + |
| 301 | + // Step 2: Collect part JSON files and file keys for all *-parts/ directories upfront, |
| 302 | + // so that cross-doc links can be resolved against actual files. |
| 303 | + const partsDirs = globby.sync('*-parts', { |
| 304 | + cwd: docsDir, |
| 305 | + absolute: true, |
| 306 | + onlyDirectories: true |
| 307 | + }); |
| 308 | + const jsonFilesByDir = collectPartJsonFiles(partsDirs); |
| 309 | + const partKeysByDoc = buildPartKeysByDoc(partsDirs, jsonFilesByDir); |
| 310 | + |
| 311 | + // Step 3: For each *-parts/ directory, read part JSONs (e.g. option.title.json), |
| 312 | + // resolve internal links in HTML, convert desc to Markdown via turndown, |
| 313 | + // attach type/default from the type map, and write as .md files. |
| 314 | + const files = partsDirs |
| 315 | + .flatMap(dir => processPartsDir(dir, outDir, typeMap, partKeysByDoc, jsonFilesByDir[dir])) |
| 316 | + .sort((a, b) => a.name.localeCompare(b.name)); |
| 317 | + |
| 318 | + console.log(`Generated ${files.length} docs for ${lang}`); |
| 319 | + return files; |
| 320 | +} |
| 321 | + |
| 322 | +// --- llms.txt --- |
| 323 | + |
| 324 | +function writeLlmsTxt(lang, files) { |
| 325 | + const langDir = path.resolve(config.releaseDestDir, lang); |
| 326 | + fse.ensureDirSync(langDir); |
| 327 | + const labels = SECTION_LABELS[lang] || SECTION_LABELS.en; |
| 328 | + const groups = {}; |
| 329 | + for (const file of files) { |
| 330 | + if (!groups[file.section]) groups[file.section] = []; |
| 331 | + groups[file.section].push(file); |
| 332 | + } |
| 333 | + |
| 334 | + const sections = Object.keys(groups) |
| 335 | + .sort() |
| 336 | + .flatMap(sectionKey => [ |
| 337 | + `## ${labels[sectionKey] || sectionKey}`, '', |
| 338 | + ...groups[sectionKey] |
| 339 | + .sort((a, b) => { |
| 340 | + const aIsRoot = !a.name.includes('/'); |
| 341 | + const bIsRoot = !b.name.includes('/'); |
| 342 | + if (aIsRoot !== bIsRoot) return aIsRoot ? -1 : 1; |
| 343 | + return a.name.localeCompare(b.name); |
| 344 | + }) |
| 345 | + .map(file => |
| 346 | + `- [${path.basename(file.name, '.md')}](${OUTPUT_DIR_NAME}/${file.name})` |
| 347 | + ), |
| 348 | + '' |
| 349 | + ]); |
| 350 | + |
| 351 | + const content = [LLMS_TXT_HEADER, ...sections].join('\n').trimEnd() + '\n'; |
| 352 | + fs.writeFileSync(path.join(langDir, 'llms.txt'), content, 'utf-8'); |
| 353 | + console.log(`Generated ${lang}/llms.txt`); |
| 354 | +} |
| 355 | + |
| 356 | +// --- Main --- |
| 357 | + |
| 358 | +function buildLlms() { |
| 359 | + console.log('Building llms documents ...'); |
| 360 | + for (const lang of LANGUAGES) { |
| 361 | + // Step 1-2: Generate individual .md files from part JSONs. |
| 362 | + const files = generateDocsForLang(lang); |
| 363 | + // Step 3: Generate llms.txt index listing all .md files. |
| 364 | + if (files.length > 0) writeLlmsTxt(lang, files); |
| 365 | + } |
| 366 | + console.log('Build llms documents done.'); |
| 367 | +} |
| 368 | + |
| 369 | +module.exports = buildLlms; |
| 370 | +if (require.main === module) buildLlms(); |
0 commit comments