|
| 1 | +import { readFileSync, readdirSync, existsSync } from 'fs' |
| 2 | +import { join, relative, dirname } from 'path' |
| 3 | +import { fileURLToPath } from 'url' |
| 4 | +import arc from '@architect/functions' |
| 5 | + |
| 6 | +const __dirname = dirname(fileURLToPath(import.meta.url)) |
| 7 | +const BASE_URL = 'https://arc.codes' |
| 8 | + |
| 9 | +// Configuration |
| 10 | +const config = { |
| 11 | + // Files or directories to skip |
| 12 | + excludes: [ '.DS_Store', 'node_modules', 'table-of-contents.mjs' ], |
| 13 | +} |
| 14 | + |
| 15 | +/** |
| 16 | + * Cleans markdown content for LLM consumption |
| 17 | + * @param {string} content - Raw markdown content |
| 18 | + * @returns {string} Cleaned content |
| 19 | + */ |
| 20 | +function cleanMarkdownContent (content) { |
| 21 | + return content |
| 22 | + // Remove frontmatter |
| 23 | + .replace(/^---[\s\S]*?---\n*/m, '') |
| 24 | + // Remove custom HTML components but keep content |
| 25 | + .replace(/<arc-viewer[^>]*>/g, '') |
| 26 | + .replace(/<\/arc-viewer>/g, '') |
| 27 | + .replace(/<arc-tab[^>]*label="([^"]*)"[^>]*>/g, '**$1:**\n') |
| 28 | + .replace(/<\/arc-tab>/g, '') |
| 29 | + .replace(/<div[^>]*slot[^>]*>/g, '') |
| 30 | + .replace(/<\/div>/g, '') |
| 31 | + .replace(/<h5>/g, '') |
| 32 | + .replace(/<\/h5>/g, '') |
| 33 | + // Remove multiple newlines |
| 34 | + .replace(/\n{3,}/g, '\n\n') |
| 35 | + .trim() |
| 36 | +} |
| 37 | + |
| 38 | +/** |
| 39 | + * Extracts frontmatter from markdown content |
| 40 | + * @param {string} content - Raw markdown content |
| 41 | + * @returns {Object} Frontmatter data |
| 42 | + */ |
| 43 | +function extractFrontmatter (content) { |
| 44 | + const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/) |
| 45 | + if (!frontmatterMatch) return {} |
| 46 | + |
| 47 | + const frontmatter = {} |
| 48 | + const lines = frontmatterMatch[1].split('\n') |
| 49 | + for (const line of lines) { |
| 50 | + const [ key, ...valueParts ] = line.split(':') |
| 51 | + if (key && valueParts.length) { |
| 52 | + frontmatter[key.trim()] = valueParts.join(':').trim() |
| 53 | + } |
| 54 | + } |
| 55 | + return frontmatter |
| 56 | +} |
| 57 | + |
| 58 | +/** |
| 59 | + * Generates a URL from a relative file path |
| 60 | + * @param {string} relativePath - Relative path to the markdown file |
| 61 | + * @returns {string} Full URL |
| 62 | + */ |
| 63 | +function filePathToUrl (relativePath) { |
| 64 | + const urlPath = relativePath |
| 65 | + .replace(/\.md$/, '') |
| 66 | + .replace(/:/g, '') // Remove colons from path (e.g., :tutorials) |
| 67 | + return `${BASE_URL}/docs/en/${urlPath}` |
| 68 | +} |
| 69 | + |
| 70 | +/** |
| 71 | + * Processes a markdown file and extracts its content |
| 72 | + * @param {string} filePath - Path to the markdown file |
| 73 | + * @param {string} docsDir - Base docs directory |
| 74 | + * @returns {string} Processed content with metadata |
| 75 | + */ |
| 76 | +function processMarkdownFile (filePath, docsDir) { |
| 77 | + const content = readFileSync(filePath, 'utf-8') |
| 78 | + const relativePath = relative(docsDir, filePath) |
| 79 | + const frontmatter = extractFrontmatter(content) |
| 80 | + const cleanContent = cleanMarkdownContent(content) |
| 81 | + |
| 82 | + const metadata = [ |
| 83 | + frontmatter.title ? `# ${frontmatter.title}` : null, |
| 84 | + `Source: ${filePathToUrl(relativePath)}`, |
| 85 | + frontmatter.description ? `Description: ${frontmatter.description}` : null, |
| 86 | + frontmatter.category ? `Category: ${frontmatter.category}` : null, |
| 87 | + ] |
| 88 | + .filter(Boolean) |
| 89 | + .join('\n') |
| 90 | + |
| 91 | + return `${metadata}\n\n${cleanContent}\n` |
| 92 | +} |
| 93 | + |
| 94 | +/** |
| 95 | + * Recursively processes all markdown files in a directory |
| 96 | + * @param {string} dir - Directory to process |
| 97 | + * @param {string} docsDir - Base docs directory for relative paths |
| 98 | + * @returns {string[]} Array of processed file contents |
| 99 | + */ |
| 100 | +function processDirectory (dir, docsDir) { |
| 101 | + const results = [] |
| 102 | + |
| 103 | + if (!existsSync(dir)) { |
| 104 | + console.error(`Directory does not exist: ${dir}`) |
| 105 | + return results |
| 106 | + } |
| 107 | + |
| 108 | + let files |
| 109 | + try { |
| 110 | + files = readdirSync(dir, { withFileTypes: true }) |
| 111 | + } |
| 112 | + catch (err) { |
| 113 | + console.error(`Error reading directory ${dir}:`, err.message) |
| 114 | + return results |
| 115 | + } |
| 116 | + |
| 117 | + for (const file of files) { |
| 118 | + if (config.excludes.includes(file.name)) continue |
| 119 | + |
| 120 | + const fullPath = join(dir, file.name) |
| 121 | + |
| 122 | + if (file.isDirectory()) { |
| 123 | + results.push(...processDirectory(fullPath, docsDir)) |
| 124 | + } |
| 125 | + else if (file.name.endsWith('.md')) { |
| 126 | + try { |
| 127 | + results.push(processMarkdownFile(fullPath, docsDir)) |
| 128 | + } |
| 129 | + catch (err) { |
| 130 | + console.error(`Error processing ${fullPath}:`, err.message) |
| 131 | + } |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + return results |
| 136 | +} |
| 137 | + |
| 138 | +async function _handler () { |
| 139 | + // Try local dev path first (src/views), then fall back to production symlink (node_modules/@architect/views) |
| 140 | + let docsDir = join(__dirname, '..', '..', 'views', 'docs', 'en') |
| 141 | + |
| 142 | + if (!existsSync(docsDir)) { |
| 143 | + docsDir = join(__dirname, 'node_modules', '@architect', 'views', 'docs', 'en') |
| 144 | + } |
| 145 | + |
| 146 | + console.log('Attempting to read docs from:', docsDir) |
| 147 | + |
| 148 | + const header = `# Architect (arc.codes) - Complete Documentation |
| 149 | +
|
| 150 | +> This is the complete documentation for Architect, a simple framework for building and delivering powerful Functional Web Apps (FWAs) on AWS. |
| 151 | +
|
| 152 | +> For a high-level overview, see: ${BASE_URL}/llms.txt |
| 153 | +
|
| 154 | +--- |
| 155 | +
|
| 156 | +` |
| 157 | + |
| 158 | + const content = processDirectory(docsDir, docsDir) |
| 159 | + const separator = '\n\n---\n\n' |
| 160 | + const body = header + content.join(separator) |
| 161 | + |
| 162 | + return { |
| 163 | + statusCode: 200, |
| 164 | + headers: { |
| 165 | + 'content-type': 'text/plain; charset=utf-8', |
| 166 | + 'cache-control': 'no-cache, no-store, must-revalidate', |
| 167 | + }, |
| 168 | + body, |
| 169 | + } |
| 170 | +} |
| 171 | + |
| 172 | +export const handler = arc.http.async(_handler) |
0 commit comments