diff --git a/package.json b/package.json index 23bdd9c0..7f993e7d 100644 --- a/package.json +++ b/package.json @@ -20,7 +20,15 @@ "scripts": { "ci:deps": "pnpm i --frozen-lockfile", "ci:demo:build": "nx sb:build @markdown-editor/demo", - "ci:docs:build": "node scripts/generate-docs.mjs && npx -y @diplodoc/cli -i ./docs-src -o ./docs-dist", + "ci:docs:build": "node scripts/docs/index.mjs build && npx -y @diplodoc/cli -i ./docs-src -o ./docs-dist", + "docs:extract": "node scripts/docs/index.mjs extract", + "docs:enrich:prompts": "node scripts/docs/index.mjs enrich --mode prompts", + "docs:enrich": "node scripts/docs/index.mjs enrich --mode enrich", + "docs:enrich:apply": "node scripts/docs/index.mjs enrich --mode apply", + "docs:enrich:agent": "echo 'Give the agent: scripts/docs/enrich-agent.md' && echo 'Raw docs: docs-gen/raw/' && echo 'Output: docs-gen/enriched/'", + "docs:assemble": "node scripts/docs/index.mjs generate && node scripts/docs/index.mjs assemble", + "docs:build": "node scripts/docs/index.mjs build && npx -y @diplodoc/cli -i ./docs-src -o ./docs-dist", + "docs:generate": "node scripts/docs/index.mjs extract && node scripts/docs/index.mjs enrich --mode prompts", "ci:test:visual": "nx playwright @markdown-editor/demo", "ci:test:unit": "nx run-many -t test --verbose", "ci:test:esbuild": "nx run-many -t test:esbuild --verbose", diff --git a/scripts/docs/assembler.mjs b/scripts/docs/assembler.mjs new file mode 100644 index 00000000..217f88bf --- /dev/null +++ b/scripts/docs/assembler.mjs @@ -0,0 +1,237 @@ +import {existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync} from 'node:fs'; +import {basename, join} from 'node:path'; +import process from 'node:process'; + +import {config} from './config.mjs'; +import {logger} from './logger.mjs'; +import {parseFrontmatter, slugify, stripFrontmatter, yamlQuote} from './utils.mjs'; + +const {order: CATEGORY_ORDER, labels: CATEGORY_LABELS} = config.categories; + +/** + * Assembles enriched/raw extension docs into the docs-src/ output directory + */ +export class Assembler { + constructor(docsGenDir, outDir) { + this.docsGenDir = docsGenDir; + this.rawDir = join(docsGenDir, 'raw'); + this.enrichedDir = join(docsGenDir, 'enriched'); + this.irPath = join(docsGenDir, 'extensions.json'); + this.outDir = outDir; + this.extensionsOutDir = join(outDir, 'extensions'); + } + + /** + * Runs the full assembly pipeline + */ + run() { + if (!existsSync(this.rawDir)) { + logger.error(`${this.rawDir} not found. Run extract first.`); + process.exit(1); + } + if (!existsSync(this.outDir)) { + logger.error(`${this.outDir} not found. Run generate first.`); + process.exit(1); + } + + const extensions = existsSync(this.irPath) + ? JSON.parse(readFileSync(this.irPath, 'utf-8')) + : []; + + const version = this.resolveVersion(extensions); + logger.info(`Assembling extension docs for v${version}...`); + + const docs = this.collectDocs(); + logger.info( + `Found ${docs.size} extension docs (enriched: ${[...docs.values()].filter((d) => d.source === 'enriched').length})`, + ); + + const pages = this.writePages(docs, extensions); + this.writeIndex(pages, extensions, version); + + const tocItems = this.generateTocItems(pages); + this.patchTocYaml(tocItems); + this.patchIndexMd(version); + + logger.success(`Assembled ${pages.length} extension pages in ${this.extensionsOutDir}/`); + logger.info('Updated toc.yaml and index.md'); + } + + /** + * Collects docs preferring enriched over raw + */ + collectDocs() { + const docs = new Map(); + + if (existsSync(this.rawDir)) { + for (const file of readdirSync(this.rawDir).filter((f) => f.endsWith('.md'))) { + const name = basename(file, '.md'); + docs.set(name, { + name, + source: 'raw', + content: readFileSync(join(this.rawDir, file), 'utf-8'), + }); + } + } + + if (existsSync(this.enrichedDir)) { + for (const file of readdirSync(this.enrichedDir).filter((f) => f.endsWith('.md'))) { + const name = basename(file, '.md'); + docs.set(name, { + name, + source: 'enriched', + content: readFileSync(join(this.enrichedDir, file), 'utf-8'), + }); + } + } + + return docs; + } + + /** + * Writes individual extension pages to docs-src/extensions/ + */ + writePages(docs, extensions) { + mkdirSync(this.extensionsOutDir, {recursive: true}); + const pages = []; + + for (const [name, doc] of docs) { + const extInfo = extensions.find((e) => e.name === name); + const category = extInfo?.category || parseFrontmatter(doc.content).category || 'other'; + + const slug = slugify(name); + writeFileSync(join(this.extensionsOutDir, `${slug}.md`), stripFrontmatter(doc.content)); + + pages.push({ + name, + slug, + category, + relativePath: `extensions/${slug}.md`, + hasNodes: extInfo?.nodes?.length > 0, + hasMarks: extInfo?.marks?.length > 0, + hasActions: extInfo?.actions?.length > 0, + source: doc.source, + }); + } + + return pages; + } + + /** + * Writes the extensions index page with categorized tables + */ + writeIndex(pages, extensions, version) { + const lines = [ + '# Extensions Reference', + '', + `Documentation generated for \`@gravity-ui/markdown-editor@${version}\`.`, + '', + ]; + + for (const category of CATEGORY_ORDER) { + const categoryPages = pages + .filter((p) => p.category === category) + .sort((a, b) => a.name.localeCompare(b.name)); + if (categoryPages.length === 0) continue; + + lines.push(`## ${CATEGORY_LABELS[category]} Extensions`, ''); + lines.push('| Extension | Nodes | Marks | Actions |'); + lines.push('|-----------|-------|-------|---------|'); + + for (const page of categoryPages) { + const ext = extensions.find((e) => e.name === page.name); + const nodes = ext?.nodes?.join(', ') || '-'; + const marks = ext?.marks?.join(', ') || '-'; + const actions = ext?.actions?.length || 0; + lines.push( + `| [${page.name}](${page.relativePath}) | ${nodes} | ${marks} | ${actions} |`, + ); + } + lines.push(''); + } + + writeFileSync(join(this.outDir, 'extensions-index.md'), lines.join('\n')); + } + + /** + * Generates YAML toc entries for the extensions section + */ + generateTocItems(pages) { + const lines = []; + lines.push(' - name: Extensions'); + lines.push(' href: extensions-index.md'); + lines.push(' items:'); + + for (const category of CATEGORY_ORDER) { + const categoryPages = pages + .filter((p) => p.category === category) + .sort((a, b) => a.name.localeCompare(b.name)); + if (categoryPages.length === 0) continue; + + lines.push(` - name: ${yamlQuote(CATEGORY_LABELS[category])}`); + lines.push(' items:'); + for (const page of categoryPages) { + lines.push(` - name: ${yamlQuote(page.name)}`); + lines.push(` href: ${page.relativePath}`); + } + } + + return lines.join('\n'); + } + + /** + * Patches toc.yaml to include the extensions section + */ + patchTocYaml(extensionsTocItems) { + const tocPath = join(this.outDir, 'toc.yaml'); + + if (!existsSync(tocPath)) { + logger.warn('toc.yaml not found, creating minimal version'); + const content = + [ + 'title: Markdown Editor', + 'href: index.md', + 'items:', + ' - name: Overview', + ' href: index.md', + extensionsTocItems, + ].join('\n') + '\n'; + writeFileSync(tocPath, content); + return; + } + + let content = readFileSync(tocPath, 'utf-8'); + // Remove previous Extensions section before appending fresh one + const extSectionRe = /\n {2}- name: Extensions\n[\s\S]*?(?=\n {2}- name:|\n?$)/; + content = content.replace(extSectionRe, ''); + content = content.trimEnd() + '\n' + extensionsTocItems + '\n'; + writeFileSync(tocPath, content); + } + + /** + * Patches index.md to add a link to the extensions reference + */ + patchIndexMd(version) { + const indexPath = join(this.outDir, 'index.md'); + if (!existsSync(indexPath)) return; + + let content = readFileSync(indexPath, 'utf-8'); + content = content.replace(/\n## Extensions[\s\S]*?(?=\n## |\n?$)/, ''); + content = content.trimEnd() + '\n\n## Extensions\n\n'; + content += `- [Extensions Reference](extensions-index.md) (v${version})\n`; + writeFileSync(indexPath, content); + } + + /** + * Reads version from the first extension's raw doc frontmatter + */ + resolveVersion(extensions) { + if (extensions[0]) { + const raw = join(this.rawDir, `${extensions[0].name}.md`); + if (existsSync(raw)) { + return parseFrontmatter(readFileSync(raw, 'utf-8')).version || 'unknown'; + } + } + return 'unknown'; + } +} diff --git a/scripts/docs/config.mjs b/scripts/docs/config.mjs new file mode 100644 index 00000000..b64a244c --- /dev/null +++ b/scripts/docs/config.mjs @@ -0,0 +1,120 @@ +/** + * Configuration for the extension documentation generation pipeline + */ +export const config = { + ai: { + provider: 'openai', + model: 'gpt-4o-mini', + temperature: 0.3, + maxTokens: 1000, + }, + + prompts: { + description: { + system: `You are a technical writer for the @gravity-ui/markdown-editor library — a ProseMirror-based WYSIWYG and markup editor. Write concise, accurate documentation in English.`, + user: `Write a description of the "{name}" extension (2-4 sentences). +Focus on what this extension adds to the editor from a user's perspective. +Do not repeat the extension name as the first word. + +Category: {category} +ProseMirror nodes: {nodes} +ProseMirror marks: {marks} +Actions: {actions} +Included in presets: {presets} + +Source code: +{sourceCode} + +Write ONLY the description text, no markdown headers.`, + }, + + syntaxGuide: { + system: `You are a technical writer for a markdown editor library. Write clear syntax guides.`, + user: `Write a syntax guide for the "{name}" extension. + +Explain the markdown/markup syntax this extension handles: +- Show the syntax patterns with inline code +- Explain how they render +- Note any variations or edge cases + +If this is a behavior extension with no markdown syntax, write: "This extension does not define custom markdown syntax." + +Metadata: +- Category: {category} +- Input rules: {inputRules} +- Serializer hints: {serializerHints} + +Test examples: +{markupExamples} + +Source code: +{sourceCode} + +Write markdown content without the section header.`, + }, + + serialization: { + system: `You are a technical writer for a markdown editor library.`, + user: `Describe how the "{name}" extension serializes its content back to markdown. + +What markdown output does it produce? Include code examples where helpful. + +If the extension doesn't produce markdown output, write: "This extension does not produce markdown output." + +Serializer hints from code: {serializerHints} +Nodes: {nodes} +Marks: {marks} + +Source code: +{sourceCode} + +Write markdown content without the section header.`, + }, + + useCases: { + system: `You are a technical writer for the @gravity-ui/markdown-editor library.`, + user: `Write 2-4 bullet points describing typical use cases for the "{name}" extension. +When would a developer include this extension in their editor setup? + +Category: {category} +Nodes: {nodes} +Marks: {marks} +Presets: {presets} + +Write ONLY bullet points in markdown. Each should be one concise sentence.`, + }, + + examples: { + system: `You are a technical writer creating markdown documentation examples.`, + user: `Provide 2-3 clear markdown examples for the "{name}" extension. + +Each example should: +1. Have a brief one-line description +2. Show the markdown syntax in a code block +3. Be practical and realistic + +Existing test examples: +{markupExamples} + +Serializer hints: {serializerHints} +Input rules: {inputRules} + +If this extension has no markdown syntax, write: "This extension does not have markdown syntax examples." + +Write in markdown format.`, + }, + }, + + skipEnrichment: ['BaseInputRules', 'BaseKeymap', 'BaseStyles', 'ReactRenderer', 'SharedState'], + + categories: { + order: ['markdown', 'yfm', 'additional', 'behavior', 'base'], + labels: { + markdown: 'Markdown', + yfm: 'YFM', + additional: 'Additional', + behavior: 'Behavior', + base: 'Base', + }, + }, +}; diff --git a/scripts/docs/enrich-agent.md b/scripts/docs/enrich-agent.md new file mode 100644 index 00000000..4a4bdaf3 --- /dev/null +++ b/scripts/docs/enrich-agent.md @@ -0,0 +1,70 @@ +# Enrich Extension Docs — Agent Instructions + +You are enriching documentation for the `@gravity-ui/markdown-editor` library. + +## What to do + +1. Read `docs-gen/extensions.json` to get the list of all extensions and their metadata (IR). +2. For each extension that has a raw doc in `docs-gen/raw/{Name}.md`: + - Read the raw doc file. + - Find all `` markers. + - For each marker, read the extension source code at the path from `dirPath` in the IR, then write a replacement text (see section templates below). + - Write the result to `docs-gen/enriched/{Name}.md` — same content as raw but with markers replaced by your text. +3. Skip these extensions (infrastructure, no user-facing docs needed): + - BaseInputRules, BaseKeymap, BaseStyles, ReactRenderer, SharedState + +## Section templates + +### `description` + +Write 2-4 sentences describing what this extension does from a user's perspective. Do not start with the extension name. Write in English. + +### `serialization` + +Describe what markdown output this extension produces when serializing. Show syntax patterns with inline code. If the extension has no markdown output (behavior extensions), write: "This extension does not produce markdown output." + +### `syntaxGuide` + +Explain the markdown syntax this extension handles. Show patterns, explain how they render, note variations. If no markdown syntax, write: "This extension does not define custom markdown syntax." + +### `useCases` + +Write 2-4 bullet points: when would a developer include this extension? One concise sentence per bullet. + +## How to find source code + +Each extension IR entry has `dirPath` — relative path to the extension directory. Read the key files there: +- `index.ts` — main extension wiring (actions, keymaps, plugins) +- `*Specs/index.ts` — schema, parser, serializer definitions +- `*Specs/const.ts` — node/mark names, attribute enums +- `*Specs/serializer.ts` — how content is serialized to markdown + +## How to write the enriched file + +Take the raw file content, replace each `` with your text, write to `docs-gen/enriched/{Name}.md`. Keep everything else unchanged (frontmatter, deterministic sections, etc.). + +## Scope control + +- `--all` — enrich all extensions (default) +- `--only Bold,Heading,YfmNote` — enrich only listed extensions +- `--category markdown` — enrich only extensions in a category + +When the user gives you this file, they may add a scope line at the bottom. If no scope is specified, do all. + +## Quality guidelines + +- Be accurate — base descriptions on the actual source code, not guesses. +- Be concise — 2-4 sentences for description, not a wall of text. +- Be specific — mention actual syntax like `**text**`, `{% note %}`, `$formula$`. +- Don't invent features that aren't in the code. +- For behavior extensions (Clipboard, History, Search, etc.) the syntaxGuide and serialization sections should say "does not define/produce" rather than being left empty. + +--- + +## Scope + + + + + +--all diff --git a/scripts/docs/enricher.mjs b/scripts/docs/enricher.mjs new file mode 100644 index 00000000..ea42c09e --- /dev/null +++ b/scripts/docs/enricher.mjs @@ -0,0 +1,259 @@ +import {existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync} from 'node:fs'; +import {basename, join} from 'node:path'; +import process from 'node:process'; + +import {config} from './config.mjs'; +import {logger} from './logger.mjs'; + +const AI_MARKER_RE = //g; + +/** + * Enriches raw extension docs with AI-generated content + */ +export class Enricher { + constructor(docsGenDir) { + this.docsGenDir = docsGenDir; + this.rawDir = join(docsGenDir, 'raw'); + this.enrichedDir = join(docsGenDir, 'enriched'); + this.promptsDir = join(docsGenDir, 'prompts'); + this.irPath = join(docsGenDir, 'extensions.json'); + } + + /** + * Loads extensions IR and raw doc file list + */ + load() { + if (!existsSync(this.rawDir)) { + logger.error(`${this.rawDir} not found. Run extract first.`); + process.exit(1); + } + if (!existsSync(this.irPath)) { + logger.error(`${this.irPath} not found. Run extract first.`); + process.exit(1); + } + + this.extensions = JSON.parse(readFileSync(this.irPath, 'utf-8')); + this.rawFiles = readdirSync(this.rawDir).filter((f) => f.endsWith('.md')); + logger.info(`Found ${this.rawFiles.length} raw docs, ${this.extensions.length} extensions`); + } + + /** + * Generates prompt JSON files for manual AI processing + */ + generatePrompts(opts) { + mkdirSync(this.promptsDir, {recursive: true}); + + let count = 0; + for (const file of this.rawFiles) { + const extName = basename(file, '.md'); + if (opts.only && !opts.only.includes(extName)) continue; + if (config.skipEnrichment?.includes(extName)) continue; + + const rawContent = readFileSync(join(this.rawDir, file), 'utf-8'); + const extInfo = this.extensions.find((e) => e.name === extName); + if (!extInfo) continue; + + const markers = [...rawContent.matchAll(new RegExp(AI_MARKER_RE.source, 'g'))].map( + (m) => m[1], + ); + if (markers.length === 0) continue; + + const sourceCode = this.readExtensionSource(extInfo); + const prompts = {}; + for (const section of markers) { + prompts[section] = this.buildPrompt( + section, + extName, + rawContent, + sourceCode, + extInfo, + ); + } + + writeFileSync( + join(this.promptsDir, `${extName}.json`), + JSON.stringify({extension: extName, prompts}, null, 2), + ); + count++; + } + + return count; + } + + /** + * Enriches raw docs by calling the OpenAI API + */ + async enrichWithAI(opts) { + mkdirSync(this.enrichedDir, {recursive: true}); + + let count = 0; + for (const file of this.rawFiles) { + const extName = basename(file, '.md'); + if (opts.only && !opts.only.includes(extName)) continue; + + const rawContent = readFileSync(join(this.rawDir, file), 'utf-8'); + const extInfo = this.extensions.find((e) => e.name === extName); + if (!extInfo) continue; + + const sourceCode = this.readExtensionSource(extInfo); + let enrichedContent = rawContent; + let enriched = false; + + const replacements = []; + for (const match of rawContent.matchAll(new RegExp(AI_MARKER_RE.source, 'g'))) { + const section = match[1]; + const marker = match[0]; + const prompt = this.buildPrompt(section, extName, rawContent, sourceCode, extInfo); + + logger.info(` Enriching ${extName}.${section}...`); + try { + const result = await this.callOpenAI(prompt, opts.model); + replacements.push({marker, result}); + enriched = true; + } catch (err) { + logger.warn(`failed to enrich ${extName}.${section}: ${err.message}`); + replacements.push({marker, result: ``}); + } + } + + for (const {marker, result} of replacements) { + enrichedContent = enrichedContent.replace(marker, result); + } + + if (enriched) { + writeFileSync(join(this.enrichedDir, `${extName}.md`), enrichedContent); + count++; + } + } + + return count; + } + + /** + * Applies manually prepared AI responses from docs-gen/responses/ directory + */ + applyResponses() { + mkdirSync(this.enrichedDir, {recursive: true}); + const responsesDir = join(this.docsGenDir, 'responses'); + + if (!existsSync(responsesDir)) { + logger.info(`No responses directory found at ${responsesDir}`); + logger.info('To use manual enrichment:'); + logger.info(' 1. Run: node scripts/docs/index.mjs enrich --mode prompts'); + logger.info(` 2. Process prompts from ${this.promptsDir}/`); + logger.info(` 3. Save responses in ${responsesDir}/ExtName.json`); + logger.info(' 4. Run: node scripts/docs/index.mjs enrich --mode apply'); + return 0; + } + + let count = 0; + for (const file of this.rawFiles) { + const extName = basename(file, '.md'); + const responsePath = join(responsesDir, `${extName}.json`); + if (!existsSync(responsePath)) continue; + + const rawContent = readFileSync(join(this.rawDir, file), 'utf-8'); + const responses = JSON.parse(readFileSync(responsePath, 'utf-8')); + + let enrichedContent = rawContent; + for (const [section, text] of Object.entries(responses)) { + enrichedContent = enrichedContent.replace(``, text); + } + + writeFileSync(join(this.enrichedDir, `${extName}.md`), enrichedContent); + count++; + } + + return count; + } + + /** + * Reads relevant source files from an extension directory for AI context + */ + readExtensionSource(extInfo) { + const dir = extInfo.dirPath; + if (!existsSync(dir)) return ''; + + const files = []; + const walk = (d) => { + for (const entry of readdirSync(d, {withFileTypes: true})) { + const full = join(d, entry.name); + if (entry.isDirectory()) { + if (!entry.name.includes('NodeView') && entry.name !== 'node_modules') { + walk(full); + } + } else if (/\.(ts|tsx)$/.test(entry.name) && !entry.name.endsWith('.test.ts')) { + files.push(full); + } + } + }; + walk(dir); + + return files + .slice(0, 8) + .map((f) => { + const content = readFileSync(f, 'utf-8'); + const truncated = + content.length > 3000 ? content.slice(0, 3000) + '\n// ... truncated' : content; + return `--- ${f} ---\n${truncated}`; + }) + .join('\n\n'); + } + + /** + * Builds a prompt string for a given section using config templates + */ + buildPrompt(section, extName, rawContent, sourceCode, extInfo) { + const templateDef = config.prompts[section]; + if (!templateDef) { + return `Describe the "${section}" aspect of the ${extName} extension.`; + } + + const vars = { + name: extName, + category: extInfo.category || 'unknown', + nodes: extInfo.nodes?.join(', ') || 'none', + marks: extInfo.marks?.join(', ') || 'none', + actions: extInfo.actions?.join(', ') || 'none', + presets: extInfo.presets?.join(', ') || 'not in standard presets', + inputRules: extInfo.inputRules?.join(', ') || 'none', + serializerHints: extInfo.serializerHints?.join(', ') || 'none', + markupExamples: extInfo.markupExamples?.map((e) => `- \`${e}\``).join('\n') || 'none', + sourceCode, + rawContent, + }; + + const interpolate = (tpl) => tpl.replace(/\{(\w+)\}/g, (_, key) => vars[key] ?? ''); + return `${interpolate(templateDef.system)}\n\n${interpolate(templateDef.user)}`; + } + + /** + * Calls the OpenAI chat completions API + */ + async callOpenAI(prompt, model) { + const apiKey = process.env.OPENAI_API_KEY; + if (!apiKey) throw new Error('OPENAI_API_KEY environment variable is required'); + + const response = await fetch('https://api.openai.com/v1/chat/completions', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ + model: model || config.ai.model, + messages: [{role: 'user', content: prompt}], + temperature: config.ai.temperature, + max_tokens: config.ai.maxTokens, + }), + }); + + if (!response.ok) { + const text = await response.text(); + throw new Error(`OpenAI API error ${response.status}: ${text}`); + } + + const data = await response.json(); + return data.choices[0].message.content.trim(); + } +} diff --git a/scripts/docs/extractor/constants.mjs b/scripts/docs/extractor/constants.mjs new file mode 100644 index 00000000..c2a637af --- /dev/null +++ b/scripts/docs/extractor/constants.mjs @@ -0,0 +1,126 @@ +/** + * Extracts constant declarations, enums, and object literals from TypeScript source. + * Returns a Map of name -> resolved string value. + */ +export function extractConstants(content) { + const names = new Map(); + let m; + + // Simple const with string literal: const FOO = 'bar' + const constRe = /(?:export\s+)?const\s+(\w+)\s*=\s*['"]([^'"]+)['"]/g; + while ((m = constRe.exec(content))) { + names.set(m[1], m[2]); + } + + // Enum members: enum Foo { Bar = 'baz' } + const enumRe = /(?:export\s+)?enum\s+(\w+)\s*\{([^}]+)\}/g; + while ((m = enumRe.exec(content))) { + const enumName = m[1]; + const entries = m[2].matchAll(/(\w+)\s*=\s*['"]([^'"]+)['"]/g); + for (const e of entries) { + names.set(`${enumName}.${e[1]}`, e[2]); + } + } + + // Object literal properties: const Obj = { Prop: 'val' | varRef } + const objRe = /(?:export\s+)?const\s+(\w+)\s*=\s*\{([^}]+)\}/gs; + while ((m = objRe.exec(content))) { + const objName = m[1]; + const propRe = /(\w+)\s*:\s*(?:['"]([^'"]+)['"]|(\w+))/g; + let pm; + while ((pm = propRe.exec(m[2]))) { + names.set(`${objName}.${pm[1]}`, pm[2] || pm[3]); + } + } + + // Const-to-const references: const A = B + const refs = []; + const refRe = /(?:export\s+)?const\s+(\w+)\s*=\s*(\w+)\s*;/g; + while ((m = refRe.exec(content))) { + refs.push([m[1], m[2]]); + } + + // Multi-pass resolution for chained references (A -> B -> 'value') + for (let pass = 0; pass < 3; pass++) { + for (const [target, source] of refs) { + if (!names.has(target) && names.has(source)) { + names.set(target, names.get(source)); + } + } + for (const [key, val] of names) { + if (typeof val === 'string' && names.has(val) && key !== val) { + names.set(key, names.get(val)); + } + } + } + + return names; +} + +/** + * Resolves a single raw name against the constants map. + */ +export function resolveConstant(raw, constants) { + if (!raw) return raw; + if (raw.startsWith("'") || raw.startsWith('"')) return raw.slice(1, -1); + + if (constants.has(raw)) { + const val = constants.get(raw); + if (constants.has(val)) return constants.get(val); + return val; + } + + // Try matching as Enum.Member suffix + for (const [key, val] of constants) { + if (key.endsWith(`.${raw}`) || key === raw) return val; + } + return raw; +} + +/** + * Resolves a list of raw names, expanding enum/object prefixes when needed. + */ +export function resolveAllConstants(rawList, constants) { + const resolved = []; + + for (const raw of rawList) { + let val = resolveConstant(raw, constants); + + // Try dotted reference directly + if (val === raw && raw.includes('.') && constants.has(raw)) { + val = constants.get(raw); + } + + // Chase reference chains up to 5 levels deep + let depth = 0; + while (constants.has(val) && depth < 5) { + val = constants.get(val); + depth++; + } + + // If still unresolved, try expanding all members of the prefix + if (val === raw && constants.size > 0) { + const prefix = raw + '.'; + const members = []; + for (const [key, v] of constants) { + if (key.startsWith(prefix)) { + let memberVal = v; + let md = 0; + while (constants.has(memberVal) && md < 5) { + memberVal = constants.get(memberVal); + md++; + } + members.push(memberVal); + } + } + if (members.length > 0) { + resolved.push(...members); + continue; + } + } + + resolved.push(val); + } + + return [...new Set(resolved)]; +} diff --git a/scripts/docs/extractor/index.mjs b/scripts/docs/extractor/index.mjs new file mode 100644 index 00000000..f24ad4a2 --- /dev/null +++ b/scripts/docs/extractor/index.mjs @@ -0,0 +1,187 @@ +import {existsSync, mkdirSync, rmSync, writeFileSync} from 'node:fs'; +import {basename, dirname, join, relative} from 'node:path'; + +import {logger} from '../logger.mjs'; +import {listDirs, readAllTsFiles, readText} from '../utils.mjs'; + +import {extractConstants, resolveAllConstants} from './constants.mjs'; +import {generateRawMd} from './markdown-gen.mjs'; +import {getPresetsForExtension, parsePresets} from './presets.mjs'; +import { + extractActions, + extractAddMark, + extractAddNode, + extractInputRules, + extractKeymaps, + extractMarkSpecs, + extractMdPlugins, + extractNodeSpecs, + extractOptionsType, + extractPlugins, + extractSerializerSyntax, + extractTestExamples, +} from './regex.mjs'; + +const CATEGORIES = ['base', 'behavior', 'markdown', 'yfm', 'additional']; + +/** + * Scans extension directories, builds IR, and generates raw markdown docs + */ +export class ExtensionExtractor { + constructor(editorPkg, outDir) { + this.editorPkg = editorPkg; + this.extensionsDir = join(editorPkg, 'src/extensions'); + this.presetsDir = join(editorPkg, 'src/presets'); + this.outDir = outDir; + this.rawDir = join(outDir, 'raw'); + } + + /** + * Scans a single extension directory and returns its metadata. + */ + scan(extDir, category) { + const name = basename(extDir); + const allFiles = readAllTsFiles(extDir); + const nonTestFiles = allFiles.filter((f) => !f.path.endsWith('.test.ts')); + const allContent = nonTestFiles.map((f) => f.content).join('\n'); + + const constants = extractConstants(allContent); + + const specsFiles = nonTestFiles.filter( + (f) => + f.path.includes('Specs') || + f.path.includes('const') || + f.path.includes('schema') || + f.path.includes('parser') || + (f.path.endsWith('/index.ts') && dirname(f.path) === extDir), + ); + const specsContent = specsFiles.map((f) => f.content).join('\n'); + + const rawNodes = [...extractAddNode(specsContent), ...extractNodeSpecs(specsContent)]; + const rawMarks = [...extractAddMark(specsContent), ...extractMarkSpecs(specsContent)]; + + const nodes = resolveAllConstants(rawNodes, constants); + const marks = resolveAllConstants(rawMarks, constants); + const actions = resolveAllConstants(extractActions(allContent), constants); + const keymaps = extractKeymaps(allContent); + const inputRules = extractInputRules(allContent); + const plugins = [...new Set(extractPlugins(allContent))]; + const mdPlugins = [...new Set(extractMdPlugins(allContent))]; + + const serializerContent = nonTestFiles + .filter((f) => f.path.includes('serializer') || f.path.includes('Specs')) + .map((f) => f.content) + .join('\n'); + const serializerHints = extractSerializerSyntax(serializerContent); + + const indexFile = nonTestFiles.find( + (f) => f.path.endsWith('/index.ts') && dirname(f.path) === extDir, + ); + const options = indexFile ? extractOptionsType(indexFile.content) : []; + const specsIndexFile = nonTestFiles.find( + (f) => f.path.includes('Specs') && f.path.endsWith('/index.ts'), + ); + if (specsIndexFile && options.length === 0) { + options.push(...extractOptionsType(specsIndexFile.content)); + } + + const testFiles = allFiles.filter((f) => f.path.endsWith('.test.ts')); + const markupExamples = testFiles.flatMap((f) => extractTestExamples(f.content)); + + return { + name, + dirPath: relative('.', extDir), + category, + nodes, + marks, + actions, + keymaps, + inputRules, + plugins, + mdPlugins, + serializerHints, + options, + markupExamples: [...new Set(markupExamples)], + presets: [], + }; + } + + /** + * Scans all extension categories and returns full IR array + */ + scanAll() { + const extensions = []; + for (const category of CATEGORIES) { + const catDir = join(this.extensionsDir, category); + for (const dir of listDirs(catDir)) { + const extDir = join(catDir, dir); + try { + extensions.push(this.scan(extDir, category)); + } catch (err) { + logger.warn(`failed to scan ${extDir}: ${err.message}`); + } + } + } + return extensions; + } + + /** + * Runs the full extraction pipeline: scan, resolve presets, write IR + raw docs + */ + run() { + logger.info('Extracting extension documentation...'); + + if (existsSync(this.outDir)) { + rmSync(this.rawDir, {recursive: true, force: true}); + } + mkdirSync(this.rawDir, {recursive: true}); + + const version = this.getEditorVersion(); + logger.info(`Editor version: ${version}`); + + const presetMap = parsePresets(this.presetsDir); + const extensions = this.scanAll(); + + for (const ext of extensions) { + ext.presets = getPresetsForExtension(presetMap, ext.name); + } + + logger.info(`Found ${extensions.length} extensions`); + + writeFileSync(join(this.outDir, 'extensions.json'), JSON.stringify(extensions, null, 2)); + + for (const ext of extensions) { + const rawMd = generateRawMd(ext, presetMap, version); + writeFileSync(join(this.rawDir, `${ext.name}.md`), rawMd); + } + + this.printSummary(extensions); + } + + /** + * Reads the editor package version + */ + getEditorVersion() { + const pkg = JSON.parse(readText(join(this.editorPkg, 'package.json'))); + return pkg.version; + } + + /** + * Prints a summary table of extracted extensions. + */ + printSummary(extensions) { + const summary = extensions.map((e) => { + const parts = [e.name]; + if (e.nodes.length) parts.push(`nodes:${e.nodes.join(',')}`); + if (e.marks.length) parts.push(`marks:${e.marks.join(',')}`); + if (e.actions.length) parts.push(`actions:${e.actions.length}`); + if (e.plugins.length) parts.push(`plugins:${e.plugins.length}`); + return ` ${parts.join(' | ')}`; + }); + + logger.info('\nExtracted extensions:'); + logger.info(summary.join('\n')); + logger.success(`Raw docs written to ${this.rawDir}/`); + logger.success(`IR written to ${join(this.outDir, 'extensions.json')}`); + } +} diff --git a/scripts/docs/extractor/markdown-gen.mjs b/scripts/docs/extractor/markdown-gen.mjs new file mode 100644 index 00000000..29bc05ea --- /dev/null +++ b/scripts/docs/extractor/markdown-gen.mjs @@ -0,0 +1,157 @@ +import {getPresetsForExtension} from './presets.mjs'; + +/** + * Generates a raw markdown doc page for a single extension. + */ +export function generateRawMd(ext, presetMap, version) { + const presets = getPresetsForExtension(presetMap, ext.name); + const lines = []; + + // Frontmatter + lines.push('---'); + lines.push(`extension: ${ext.name}`); + lines.push(`version: ${version}`); + lines.push(`category: ${ext.category}`); + lines.push(`generated: ${new Date().toISOString()}`); + lines.push('---'); + lines.push(''); + lines.push(`# ${ext.name}`); + lines.push(''); + lines.push(''); + lines.push(''); + + // Presets + lines.push('## Presets'); + lines.push(''); + if (presets.length > 0) { + for (const p of presets) lines.push(`- ${p}`); + } else { + lines.push('Not included in any standard preset (use directly).'); + } + lines.push(''); + + // Schema + if (ext.nodes.length > 0 || ext.marks.length > 0) { + lines.push('## Schema'); + lines.push(''); + for (const n of ext.nodes) { + lines.push(`### Node: \`${n}\``); + lines.push(''); + } + for (const m of ext.marks) { + lines.push(`### Mark: \`${m}\``); + lines.push(''); + } + } + + // Actions + if (ext.actions.length > 0) { + lines.push('## Actions'); + lines.push(''); + lines.push('| Action ID |'); + lines.push('|-----------|'); + for (const a of ext.actions) { + lines.push(`| \`${a}\` |`); + } + lines.push(''); + } + + // Keymaps + if (ext.keymaps.length > 0) { + lines.push('## Keymaps'); + lines.push(''); + lines.push('| Key |'); + lines.push('|-----|'); + for (const k of ext.keymaps) { + lines.push(`| \`${k}\` |`); + } + lines.push(''); + } + + // Input Rules + if (ext.inputRules.length > 0) { + lines.push('## Input Rules'); + lines.push(''); + lines.push('| Pattern |'); + lines.push('|---------|'); + for (const r of ext.inputRules) { + lines.push(`| \`${r}\` |`); + } + lines.push(''); + } + + // Markdown Parsing + lines.push('## Markdown Parsing'); + lines.push(''); + if (ext.mdPlugins.length > 0) { + lines.push('Uses markdown-it plugins:'); + lines.push(''); + for (const p of ext.mdPlugins) lines.push(`- \`${p}\``); + } else if (ext.nodes.length > 0 || ext.marks.length > 0) { + lines.push('Uses built-in markdown-it tokens (CommonMark).'); + } else { + lines.push('No markdown parsing.'); + } + lines.push(''); + + // Markdown Serialization + lines.push('## Markdown Serialization'); + lines.push(''); + if (ext.serializerHints.length > 0) { + lines.push('Serializer patterns:'); + lines.push(''); + for (const s of ext.serializerHints) { + const escaped = s.replace(/\|/g, '\\|'); + lines.push(`- \`${escaped}\``); + } + } else { + lines.push(''); + } + lines.push(''); + + // Plugins + if (ext.plugins.length > 0) { + lines.push('## Plugins'); + lines.push(''); + for (const p of ext.plugins) lines.push(`- \`${p}\``); + lines.push(''); + } + + // Options + if (ext.options.length > 0) { + lines.push('## Options'); + lines.push(''); + lines.push('| Option | Type |'); + lines.push('|--------|------|'); + for (const o of ext.options) { + lines.push(`| \`${o.name}\` | \`${o.type}\` |`); + } + lines.push(''); + } + + // Examples from tests + if (ext.markupExamples.length > 0) { + lines.push('## Markup Examples'); + lines.push(''); + lines.push('Extracted from tests:'); + lines.push(''); + for (const ex of ext.markupExamples.slice(0, 10)) { + lines.push('```markdown'); + lines.push(ex); + lines.push('```'); + lines.push(''); + } + } + + // AI placeholder sections + lines.push('## Syntax Guide'); + lines.push(''); + lines.push(''); + lines.push(''); + lines.push('## Use Cases'); + lines.push(''); + lines.push(''); + lines.push(''); + + return lines.join('\n'); +} diff --git a/scripts/docs/extractor/presets.mjs b/scripts/docs/extractor/presets.mjs new file mode 100644 index 00000000..45219935 --- /dev/null +++ b/scripts/docs/extractor/presets.mjs @@ -0,0 +1,53 @@ +import {existsSync} from 'node:fs'; +import {join} from 'node:path'; + +import {readText} from '../utils.mjs'; + +const PRESET_DEFS = [ + {name: 'ZeroPreset', file: 'zero.ts', parent: null}, + {name: 'CommonMarkPreset', file: 'commonmark.ts', parent: 'ZeroPreset'}, + {name: 'DefaultPreset', file: 'default.ts', parent: 'CommonMarkPreset'}, + {name: 'YfmPreset', file: 'yfm.ts', parent: 'DefaultPreset'}, + {name: 'FullPreset', file: 'full.ts', parent: 'YfmPreset'}, +]; + +/** + * Builds a Map of preset name -> list of extension names (with inheritance) + */ +export function parsePresets(presetsDir) { + const presetMap = new Map(); + + for (const def of PRESET_DEFS) { + const filePath = join(presetsDir, def.file); + if (!existsSync(filePath)) continue; + + const content = readText(filePath); + const directUses = []; + const useRe = /\.use\(\s*(\w+)/g; + let m; + while ((m = useRe.exec(content))) { + if (!m[1].endsWith('Preset') && !m[1].endsWith('Specs')) { + directUses.push(m[1]); + } + } + + // Inherit parent preset's extensions + const inherited = def.parent ? presetMap.get(def.parent) || [] : []; + presetMap.set(def.name, [...new Set([...inherited, ...directUses])]); + } + + return presetMap; +} + +/** + * Returns the list of preset names that include a given extension + */ +export function getPresetsForExtension(presetMap, extName) { + const presets = []; + for (const [presetName, extensions] of presetMap) { + if (extensions.includes(extName)) { + presets.push(presetName); + } + } + return presets; +} diff --git a/scripts/docs/extractor/regex.mjs b/scripts/docs/extractor/regex.mjs new file mode 100644 index 00000000..a54c9fb1 --- /dev/null +++ b/scripts/docs/extractor/regex.mjs @@ -0,0 +1,196 @@ +/** + * Extracts ProseMirror node registrations from builder.addNode() calls + */ +export function extractAddNode(content) { + const nodes = []; + // builder.addNode(name, callback) — second arg starts with `(` or line-end + const re = /builder\s*\.addNode\(\s*(?:(\w+\.\w+)|(\w+)|['"]([^'"]+)['"])\s*,\s*(?:\(|$)/gm; + let m; + while ((m = re.exec(content))) { + nodes.push(m[3] || m[1] || m[2]); + } + // Chained: ).addNode(name, ...) + const re2 = /\)\s*\.addNode\(\s*(?:(\w+\.\w+)|(\w+)|['"]([^'"]+)['"])\s*,\s*(?:\(|$)/gm; + while ((m = re2.exec(content))) { + nodes.push(m[3] || m[1] || m[2]); + } + return nodes; +} + +/** + * Extracts ProseMirror mark registrations from builder.addMark() calls + */ +export function extractAddMark(content) { + const marks = []; + // builder.addMark(name, ...) — require builder prefix to avoid matching tr.addMark + const re = /builder\s*\.addMark\(\s*\n?\s*(?:(\w+\.\w+)|(\w+)|['"]([^'"]+)['"])\s*,/g; + let m; + while ((m = re.exec(content))) { + marks.push(m[3] || m[1] || m[2]); + } + // Chained: newline + indent + .addMark — excludes inline tr.addMark + const re2 = /\)\s*\n\s*\.addMark\(\s*\n?\s*(?:(\w+\.\w+)|(\w+)|['"]([^'"]+)['"])\s*,/g; + while ((m = re2.exec(content))) { + marks.push(m[3] || m[1] || m[2]); + } + return marks; +} + +/** + * Extracts node specs from .addNodeSpec({ name: ... }) calls + */ +export function extractNodeSpecs(content) { + const nodes = []; + const re = /\.addNodeSpec\(\s*\{\s*name:\s*(?:(\w+\.\w+)|(\w+)|['"]([^'"]+)['"])/g; + let m; + while ((m = re.exec(content))) { + nodes.push(m[3] || m[1] || m[2]); + } + return nodes; +} + +/** + * Extracts mark specs from .addMarkSpec({ name: ... }) calls + */ +export function extractMarkSpecs(content) { + const marks = []; + const re = /\.addMarkSpec\(\s*\{\s*name:\s*(?:(\w+\.\w+)|(\w+)|['"]([^'"]+)['"])/g; + let m; + while ((m = re.exec(content))) { + marks.push(m[3] || m[1] || m[2]); + } + return marks; +} + +/** + * Extracts action IDs from .addAction() calls + */ +export function extractActions(content) { + const actions = []; + const re = /\.addAction\(\s*(?:(\w+\.\w+)|(\w+)|['"]([^'"]+)['"])/g; + let m; + while ((m = re.exec(content))) { + actions.push(m[3] || m[1] || m[2]); + } + return actions; +} + +/** + * Extracts plugin function names from .addPlugin() calls + */ +export function extractPlugins(content) { + const plugins = []; + const re = /\.addPlugin\(\s*(\w+)/g; + let m; + while ((m = re.exec(content))) { + plugins.push(m[1]); + } + return plugins; +} + +/** + * Extracts keymap bindings from .addKeymap() callbacks + */ +export function extractKeymaps(content) { + const keymaps = []; + const re = /\.addKeymap\(\s*\([^)]*\)\s*=>\s*\(\{([^}]*(?:\{[^}]*\}[^}]*)*)\}\)/gs; + let m; + while ((m = re.exec(content))) { + const block = m[1]; + const keyRe = /['"]?([^'",:]+)['"]?\s*:/g; + let km; + while ((km = keyRe.exec(block))) { + const key = km[1].trim(); + if (key && !key.startsWith('//') && !key.startsWith('...')) { + keymaps.push(key); + } + } + } + return [...new Set(keymaps)]; +} + +/** + * Extracts input rule patterns (markInputRule, wrappingInputRule, etc.) + */ +export function extractInputRules(content) { + const rules = []; + const re = + /(?:markInputRule|textblockTypeInputRule|nodeInputRule|wrappingInputRule|inlineNodeInputRule)\s*\(\s*(?:\/([^/]+)\/|{[^}]*open:\s*'([^']*)'[^}]*close:\s*'([^']*)'[^}]*})/g; + let m; + while ((m = re.exec(content))) { + if (m[1]) { + rules.push(`/${m[1]}/`); + } else if (m[2] && m[3]) { + rules.push(`${m[2]}...${m[3]}`); + } + } + return rules; +} + +/** + * Extracts markdown-it plugin registrations from md.use() calls + */ +export function extractMdPlugins(content) { + const plugins = []; + const re = /md\.use\(\s*(\w+)/g; + let m; + while ((m = re.exec(content))) { + plugins.push(m[1]); + } + return plugins; +} + +/** + * Extracts the Options type fields from `export type FooOptions = { ... }` + */ +export function extractOptionsType(content) { + const fields = []; + const re = /export\s+type\s+\w+Options\s*(?:=\s*(?:\w+\s*&\s*)?)?(?:\{([^}]*)\}|([^;]*))/gs; + const m = re.exec(content); + if (!m) return fields; + const block = m[1] || m[2] || ''; + const fieldRe = /(\w+)\??\s*:\s*([^;]+)/g; + let fm; + while ((fm = fieldRe.exec(block))) { + const name = fm[1].trim(); + const type = fm[2].trim().replace(/\s+/g, ' '); + if (name && !name.startsWith('//')) { + fields.push({name, type}); + } + } + return fields; +} + +/** + * Extracts markup examples from same() assertions in test files + */ +export function extractTestExamples(content) { + const examples = []; + const re = /same\(\s*'([^']+)'/g; + let m; + while ((m = re.exec(content))) { + examples.push(m[1]); + } + const re2 = /same\(\s*`([^`]+)`/g; + while ((m = re2.exec(content))) { + examples.push(m[1]); + } + return examples; +} + +/** + * Extracts serializer syntax patterns from state.write() and state.text() calls + */ +export function extractSerializerSyntax(content) { + const snippets = []; + const writeRe = /state\.write\(\s*[`'"]([^`'"]*)[`'"]/g; + let m; + while ((m = writeRe.exec(content))) { + if (m[1].trim()) snippets.push(m[1]); + } + const textRe = /state\.text\(\s*[`'"]([^`'"]*)[`'"]/g; + while ((m = textRe.exec(content))) { + if (m[1].trim()) snippets.push(m[1]); + } + return snippets; +} diff --git a/scripts/docs/generator.mjs b/scripts/docs/generator.mjs new file mode 100644 index 00000000..c0a6ceeb --- /dev/null +++ b/scripts/docs/generator.mjs @@ -0,0 +1,244 @@ +import { + cpSync, + existsSync, + mkdirSync, + readFileSync, + readdirSync, + rmSync, + writeFileSync, +} from 'node:fs'; +import {dirname, join} from 'node:path'; +import process from 'node:process'; + +import {logger} from './logger.mjs'; +import {slugify, yamlQuote} from './utils.mjs'; + +// Source docs use ##### as a metadata header: "##### Category / Title" +const HEADER_RE = /^#{5}\s+(.+)$/; + +const GITHUB_RAW_RE = + /https:\/\/raw\.githubusercontent\.com\/gravity-ui\/markdown-editor\/(?:refs\/heads\/[^/]+|[^/]+)\/docs\//g; + +/** + * Generates the Diplodoc docs-src/ site from hand-written docs/ markdown files + */ +export class Generator { + constructor(docsDir, outDir) { + this.docsDir = docsDir; + this.outDir = outDir; + } + + /** + * Runs the full generation pipeline + */ + run() { + this.clean(); + + const docs = this.collectDocs(); + const {categories, topLevel} = this.groupByCategory(docs); + + this.writeYfmConfig(); + this.writeDocFiles(docs); + this.writeTocYaml(categories, topLevel); + this.writeIndexMd(categories, topLevel); + this.copyAssets(); + + logger.success( + `Generated docs-src/: ${docs.length} pages in ${categories.size} categories + ${topLevel.length} top-level`, + ); + } + + /** + * Removes and recreates the output directory + */ + clean() { + if (existsSync(this.outDir)) { + rmSync(this.outDir, {recursive: true, force: true}); + } + mkdirSync(this.outDir, {recursive: true}); + } + + /** + * Reads all markdown files and parses their ##### headers + */ + collectDocs() { + if (!existsSync(this.docsDir)) { + logger.error(`source directory "${this.docsDir}" does not exist`); + process.exit(1); + } + + const files = readdirSync(this.docsDir) + .filter((f) => f.endsWith('.md')) + .sort(); + const docs = []; + + for (const file of files) { + const content = readFileSync(join(this.docsDir, file), 'utf-8'); + const lines = content.split('\n'); + const parsed = this.parseHeader(lines[0]); + + if (!parsed) { + logger.warn(`Skipping ${file}: no ##### header found`); + continue; + } + + docs.push({ + sourceFile: file, + category: parsed.category, + title: parsed.title, + content: lines.slice(1).join('\n').replace(/^\n+/, ''), + }); + } + + return docs; + } + + /** + * Extracts category and title from a ##### header line + */ + parseHeader(firstLine) { + const match = firstLine.match(HEADER_RE); + if (!match) return null; + + const raw = match[1].trim(); + const parts = raw.split('/').map((s) => s.trim()); + return parts.length === 2 + ? {category: parts[0], title: parts[1]} + : {category: null, title: parts[0]}; + } + + /** + * Splits docs into categorized and top-level groups + */ + groupByCategory(docs) { + const categories = new Map(); + const topLevel = []; + + for (const doc of docs) { + if (doc.category) { + if (!categories.has(doc.category)) categories.set(doc.category, []); + categories.get(doc.category).push(doc); + } else { + topLevel.push(doc); + } + } + + return {categories, topLevel}; + } + + /** + * Computes relative output path from doc category and title slugs + */ + computeOutputPath(doc) { + if (doc.category) { + return join(slugify(doc.category), slugify(doc.title) + '.md'); + } + return slugify(doc.title) + '.md'; + } + + /** + * Rewrites absolute GitHub raw URLs to relative paths + */ + rewriteAssetUrls(content, doc) { + const prefix = doc.category ? '../' : './'; + return content.replace(GITHUB_RAW_RE, prefix); + } + + /** + * Writes all doc pages, checking for duplicate output paths + */ + writeDocFiles(docs) { + const seen = new Map(); + for (const doc of docs) { + const outPath = this.computeOutputPath(doc); + if (seen.has(outPath)) { + logger.error( + `duplicate output path "${outPath}" from "${doc.sourceFile}" and "${seen.get(outPath)}"`, + ); + process.exit(1); + } + seen.set(outPath, doc.sourceFile); + } + + for (const doc of docs) { + const outPath = join(this.outDir, this.computeOutputPath(doc)); + mkdirSync(dirname(outPath), {recursive: true}); + writeFileSync(outPath, this.rewriteAssetUrls(doc.content, doc)); + } + } + + /** + * Generates toc.yaml for the Diplodoc site + */ + writeTocYaml(categories, topLevel) { + const lines = [ + 'title: Markdown Editor', + 'href: index.md', + 'items:', + ' - name: Overview', + ' href: index.md', + ]; + + for (const [category, docs] of categories) { + lines.push(` - name: ${yamlQuote(category)}`); + lines.push(' items:'); + for (const doc of docs) { + lines.push(` - name: ${yamlQuote(doc.title)}`); + lines.push(` href: ${this.computeOutputPath(doc)}`); + } + } + + for (const doc of topLevel) { + lines.push(` - name: ${yamlQuote(doc.title)}`); + lines.push(` href: ${this.computeOutputPath(doc)}`); + } + + writeFileSync(join(this.outDir, 'toc.yaml'), lines.join('\n') + '\n'); + } + + /** + * Generates the index.md landing page + */ + writeIndexMd(categories, topLevel) { + const lines = [ + '# Markdown Editor', + '', + 'Documentation for the Gravity UI Markdown Editor.', + '', + ]; + + for (const [category, docs] of categories) { + lines.push(`## ${category}`, ''); + for (const doc of docs) { + lines.push(`- [${doc.title}](${this.computeOutputPath(doc)})`); + } + lines.push(''); + } + + if (topLevel.length > 0) { + for (const doc of topLevel) { + lines.push(`- [${doc.title}](${this.computeOutputPath(doc)})`); + } + lines.push(''); + } + + writeFileSync(join(this.outDir, 'index.md'), lines.join('\n')); + } + + /** + * Copies the assets/ directory to the output + */ + copyAssets() { + const assetsDir = join(this.docsDir, 'assets'); + if (existsSync(assetsDir)) { + cpSync(assetsDir, join(this.outDir, 'assets'), {recursive: true}); + } + } + + /** + * Writes the .yfm Diplodoc config file + */ + writeYfmConfig() { + writeFileSync(join(this.outDir, '.yfm'), 'allowHTML: true\n'); + } +} diff --git a/scripts/docs/index.mjs b/scripts/docs/index.mjs new file mode 100644 index 00000000..00eee390 --- /dev/null +++ b/scripts/docs/index.mjs @@ -0,0 +1,116 @@ +import process from 'node:process'; + +import {Assembler} from './assembler.mjs'; +import {Enricher} from './enricher.mjs'; +import {ExtensionExtractor} from './extractor/index.mjs'; +import {Generator} from './generator.mjs'; +import {logger} from './logger.mjs'; + +const EDITOR_PKG = 'packages/editor'; +const DOCS_DIR = 'docs'; +const DOCS_SRC_DIR = 'docs-src'; +const DOCS_GEN_DIR = 'docs-gen'; + +/** + * Parses CLI arguments into a command and options object + */ +function parseArgs() { + const args = process.argv.slice(2); + const command = args[0]; + const opts = {mode: 'prompts', only: null, model: null}; + + for (let i = 1; i < args.length; i++) { + switch (args[i]) { + case '--mode': + opts.mode = args[++i]; + break; + case '--only': + opts.only = args[++i]?.split(','); + break; + case '--model': + opts.model = args[++i]; + break; + } + } + + return {command, opts}; +} + +function runGenerate() { + new Generator(DOCS_DIR, DOCS_SRC_DIR).run(); +} + +function runExtract() { + new ExtensionExtractor(EDITOR_PKG, DOCS_GEN_DIR).run(); +} + +async function runEnrich(opts) { + const enricher = new Enricher(DOCS_GEN_DIR); + enricher.load(); + + switch (opts.mode) { + case 'prompts': { + const count = enricher.generatePrompts(opts); + logger.success(`Generated ${count} prompt files in ${DOCS_GEN_DIR}/prompts/`); + logger.info('\nNext steps:'); + logger.info(' - Process prompts through your AI tool'); + logger.info(` - Save responses in ${DOCS_GEN_DIR}/responses/ExtName.json`); + logger.info(' - Run: node scripts/docs/index.mjs enrich --mode apply'); + logger.info('\nOr with OpenAI API:'); + logger.info(' OPENAI_API_KEY=sk-... node scripts/docs/index.mjs enrich --mode enrich'); + break; + } + case 'enrich': { + const count = await enricher.enrichWithAI(opts); + logger.success(`Enriched ${count} docs in ${DOCS_GEN_DIR}/enriched/`); + break; + } + case 'apply': { + const count = enricher.applyResponses(); + logger.success(`Applied responses to ${count} docs in ${DOCS_GEN_DIR}/enriched/`); + break; + } + default: + logger.error(`Unknown enrich mode: ${opts.mode}. Use --mode prompts|enrich|apply`); + process.exit(1); + } +} + +function runAssemble() { + new Assembler(DOCS_GEN_DIR, DOCS_SRC_DIR).run(); +} + +/** + * Full pipeline: generate -> extract -> assemble + */ +function runBuild() { + runGenerate(); + runExtract(); + runAssemble(); +} + +async function main() { + const {command, opts} = parseArgs(); + + const commands = { + generate: runGenerate, + extract: runExtract, + enrich: () => runEnrich(opts), + assemble: runAssemble, + build: runBuild, + }; + + const handler = commands[command]; + if (!handler) { + logger.error(`Unknown command: ${command}`); + logger.info('Available commands: generate, extract, enrich, assemble, build'); + process.exit(1); + } + + await handler(); +} + +main().catch((err) => { + logger.error(err); + process.exit(1); +}); diff --git a/scripts/docs/logger.mjs b/scripts/docs/logger.mjs new file mode 100644 index 00000000..ddb71c62 --- /dev/null +++ b/scripts/docs/logger.mjs @@ -0,0 +1,25 @@ +/* eslint-disable no-console */ + +/** + * Logging utility for doc-generation scripts + */ +export class Logger { + info(...args) { + console.log(...args); + } + + warn(...args) { + console.warn('Warning:', ...args); + } + + error(...args) { + console.error('Error:', ...args); + } + + success(...args) { + console.log('✓', ...args); + } +} + +/** Shared logger instance. */ +export const logger = new Logger(); diff --git a/scripts/docs/utils.mjs b/scripts/docs/utils.mjs new file mode 100644 index 00000000..64fd8e23 --- /dev/null +++ b/scripts/docs/utils.mjs @@ -0,0 +1,91 @@ +import {existsSync, readFileSync, readdirSync, statSync} from 'node:fs'; +import {join} from 'node:path'; + +/** + * Converts a string to a URL-friendly slug + */ +export function slugify(str) { + return str + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/(^-|-$)/g, ''); +} + +/** + * Wraps a string in double quotes if it contains YAML special characters + */ +export function yamlQuote(str) { + if (/[:#"'{}[\],&*?|>!%@`]/.test(str)) { + return `"${str.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"`; + } + return str; +} + +/** + * Reads a file as UTF-8 text + */ +export function readText(filePath) { + return readFileSync(filePath, 'utf-8'); +} + +/** + * Lists subdirectories starting with an uppercase letter + */ +export function listDirs(dir) { + if (!existsSync(dir)) return []; + return readdirSync(dir).filter((name) => { + const full = join(dir, name); + return statSync(full).isDirectory() && /^[A-Z]/.test(name); + }); +} + +/** + * Recursively finds files matching a regex pattern + */ +export function findFiles(dir, pattern) { + const results = []; + if (!existsSync(dir)) return results; + for (const entry of readdirSync(dir, {recursive: true})) { + if (pattern.test(entry)) { + results.push(join(dir, entry)); + } + } + return results; +} + +/** + * Reads all .ts/.tsx files in a directory (recursive) + */ +export function readAllTsFiles(dir) { + const files = findFiles(dir, /\.tsx?$/); + return files.map((f) => ({path: f, content: readText(f)})); +} + +/** + * Strips YAML frontmatter from markdown content + */ +export function stripFrontmatter(content) { + if (content.startsWith('---')) { + const end = content.indexOf('---', 3); + if (end !== -1) { + return content.slice(end + 3).replace(/^\n+/, ''); + } + } + return content; +} + +/** + * Parses simple key-value YAML frontmatter into an object + */ +export function parseFrontmatter(content) { + if (!content.startsWith('---')) return {}; + const end = content.indexOf('---', 3); + if (end === -1) return {}; + const yaml = content.slice(3, end).trim(); + const result = {}; + for (const line of yaml.split('\n')) { + const match = line.match(/^(\w+):\s*(.+)$/); + if (match) result[match[1]] = match[2].trim(); + } + return result; +} diff --git a/scripts/generate-docs.mjs b/scripts/generate-docs.mjs deleted file mode 100644 index b41cf440..00000000 --- a/scripts/generate-docs.mjs +++ /dev/null @@ -1,275 +0,0 @@ -import { - cpSync, - existsSync, - mkdirSync, - readFileSync, - readdirSync, - rmSync, - writeFileSync, -} from 'node:fs'; -import {dirname, join} from 'node:path'; -import process from 'node:process'; - -const DOCS_DIR = 'docs'; -const OUT_DIR = 'docs-src'; -const GITHUB_RAW_RE = - /https:\/\/raw\.githubusercontent\.com\/gravity-ui\/markdown-editor\/(?:refs\/heads\/[^/]+|[^/]+)\/docs\//g; - -// Source docs use ##### as a metadata header (not rendered). -// Format: "##### Category / Title" or "##### Title" (no category). -// This line is stripped from the output; the rest becomes the page content. -const HEADER_RE = /^#{5}\s+(.+)$/; - -/** - * Converts a string to a URL-friendly slug (lowercase, alphanumeric, hyphens). - * @param str - */ -function slugify(str) { - return str - .toLowerCase() - .replace(/[^a-z0-9]+/g, '-') - .replace(/(^-|-$)/g, ''); -} - -/** - * Extracts category and title from a `##### Category / Title` header line. - * @param firstLine - */ -function parseHeader(firstLine) { - const match = firstLine.match(HEADER_RE); - if (!match) return null; - - const raw = match[1].trim(); - const parts = raw.split('/').map((s) => s.trim()); - - if (parts.length === 2) { - return {category: parts[0], title: parts[1]}; - } - return {category: null, title: parts[0]}; -} - -/** Removes all generated content from the output directory. */ -function cleanOutDir() { - if (existsSync(OUT_DIR)) { - rmSync(OUT_DIR, {recursive: true, force: true}); - } - mkdirSync(OUT_DIR, {recursive: true}); -} - -/** Reads all markdown files from the source directory and parses their headers. */ -function collectDocs() { - if (!existsSync(DOCS_DIR)) { - console.error(`Error: source directory "${DOCS_DIR}" does not exist`); - process.exit(1); - } - - const files = readdirSync(DOCS_DIR) - .filter((f) => f.endsWith('.md')) - .sort(); - const docs = []; - - for (const file of files) { - const content = readFileSync(join(DOCS_DIR, file), 'utf-8'); - const lines = content.split('\n'); - const parsed = parseHeader(lines[0]); - - if (!parsed) { - console.warn(`Skipping ${file}: no ##### header found`); - continue; - } - - const strippedContent = lines.slice(1).join('\n').replace(/^\n+/, ''); - - docs.push({ - sourceFile: file, - category: parsed.category, - title: parsed.title, - content: strippedContent, - }); - } - - return docs; -} - -/** - * Splits docs into a category map and a top-level (uncategorized) list. - * @param docs - */ -function groupByCategory(docs) { - const categories = new Map(); - const topLevel = []; - - for (const doc of docs) { - if (doc.category) { - if (!categories.has(doc.category)) { - categories.set(doc.category, []); - } - categories.get(doc.category).push(doc); - } else { - topLevel.push(doc); - } - } - - return {categories, topLevel}; -} - -/** - * Builds a relative output file path from the doc's category and title slugs. - * @param doc - */ -function computeOutputPath(doc) { - if (doc.category) { - return join(slugify(doc.category), slugify(doc.title) + '.md'); - } - return slugify(doc.title) + '.md'; -} - -/** - * Ensures no two docs resolve to the same output path; exits on collision. - * @param docs - */ -function checkDuplicatePaths(docs) { - const seen = new Map(); - for (const doc of docs) { - const outPath = computeOutputPath(doc); - if (seen.has(outPath)) { - console.error( - `Error: duplicate output path "${outPath}" from "${doc.sourceFile}" and "${seen.get(outPath)}"`, - ); - process.exit(1); - } - seen.set(outPath, doc.sourceFile); - } -} - -/** - * Rewrites absolute GitHub raw URLs to relative paths based on doc nesting depth. - * @param content - * @param doc - */ -function rewriteAssetUrls(content, doc) { - const prefix = doc.category ? '../' : './'; - return content.replace(GITHUB_RAW_RE, prefix); -} - -/** - * Writes stripped markdown content to categorized output paths. - * @param docs - */ -function writeDocFiles(docs) { - checkDuplicatePaths(docs); - for (const doc of docs) { - const outPath = join(OUT_DIR, computeOutputPath(doc)); - mkdirSync(dirname(outPath), {recursive: true}); - writeFileSync(outPath, rewriteAssetUrls(doc.content, doc)); - } -} - -/** - * Wraps a string in double quotes if it contains YAML special characters. - * @param str - */ -function yamlQuote(str) { - if (/[:#"'{}[\],&*?|>!%@`]/.test(str)) { - return `"${str.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"`; - } - return str; -} - -/** - * Generates the `toc.yaml` table of contents for the YFM documentation site. - * @param categories - * @param topLevel - */ -function generateTocYaml(categories, topLevel) { - const lines = [ - 'title: Markdown Editor', - 'href: index.md', - 'items:', - ' - name: Overview', - ' href: index.md', - ]; - - for (const [category, docs] of categories) { - lines.push(` - name: ${yamlQuote(category)}`); - lines.push(' items:'); - for (const doc of docs) { - lines.push(` - name: ${yamlQuote(doc.title)}`); - lines.push(` href: ${computeOutputPath(doc)}`); - } - } - - for (const doc of topLevel) { - lines.push(` - name: ${yamlQuote(doc.title)}`); - lines.push(` href: ${computeOutputPath(doc)}`); - } - - writeFileSync(join(OUT_DIR, 'toc.yaml'), lines.join('\n') + '\n'); -} - -/** - * Generates the `index.md` landing page with links to all doc pages. - * @param categories - * @param topLevel - */ -function generateIndexMd(categories, topLevel) { - const lines = [ - '# Markdown Editor', - '', - 'Documentation for the Gravity UI Markdown Editor.', - '', - ]; - - for (const [category, docs] of categories) { - lines.push(`## ${category}`, ''); - for (const doc of docs) { - lines.push(`- [${doc.title}](${computeOutputPath(doc)})`); - } - lines.push(''); - } - - if (topLevel.length > 0) { - for (const doc of topLevel) { - lines.push(`- [${doc.title}](${computeOutputPath(doc)})`); - } - lines.push(''); - } - - writeFileSync(join(OUT_DIR, 'index.md'), lines.join('\n')); -} - -/** Copies the `assets/` directory from source docs to the output directory. */ -function copyAssets() { - const assetsDir = join(DOCS_DIR, 'assets'); - if (existsSync(assetsDir)) { - cpSync(assetsDir, join(OUT_DIR, 'assets'), {recursive: true}); - } -} - -/** Writes the `.yfm` Diplodoc config into the output directory. */ -function writeYfmConfig() { - writeFileSync(join(OUT_DIR, '.yfm'), 'allowHTML: true\n'); -} - -/** Entry point: cleans output, collects docs, and generates the documentation site. */ -function main() { - cleanOutDir(); - - const docs = collectDocs(); - const {categories, topLevel} = groupByCategory(docs); - - writeYfmConfig(); - writeDocFiles(docs); - generateTocYaml(categories, topLevel); - generateIndexMd(categories, topLevel); - copyAssets(); - - const totalFiles = docs.length; - const totalCategories = categories.size; - // eslint-disable-next-line no-console - console.log( - `Generated docs-src/: ${totalFiles} pages in ${totalCategories} categories + ${topLevel.length} top-level`, - ); -} - -main();