feat(public): .md mirror for every route + /llms-full.txt aggregate (#17)

mastermanas805 · claude · web-flow · commit f47e0676c644 · 2026-05-11T17:37:05.000+05:30
LLMs and crawlers no longer have to parse HTML to consume the
marketing site. Every public HTML route now has a parallel .md
mirror at the same path with a .md suffix:

  /                          →  /index.md
  /pricing                   →  /pricing.md
  /for-agents                →  /for-agents.md
  /status                    →  /status.md
  /docs                      →  /docs.md       (all 8 sections concatenated)
  /blog                      →  /blog.md       (post index)
  /blog/&lt;slug&gt;               →  /blog/&lt;slug&gt;.md
  /use-cases                 →  /use-cases.md  (catalogue grouped by category)
  /use-cases/&lt;slug&gt;          →  /use-cases/&lt;slug&gt;.md

A single aggregate /llms-full.txt (~361 KB) concatenates every
markdown page with URL-keyed section separators, so an LLM that
wants the entire site's text can fetch one file instead of 115.

Sources:
  - Blog posts: copy .content/blog/&lt;slug&gt;.md verbatim
  - Use cases: copy .content/use-cases/&lt;slug&gt;.md verbatim
  - Docs: concatenate .content/docs/*.md (ordered by frontmatter
    'order') into a single /docs.md
  - React-only marketing pages (home, pricing, for-agents,
    status): copy authored .content/pages/&lt;name&gt;.md
  - Index pages (/blog, /use-cases): generated at build time from
    the corresponding directory listings + frontmatter

Markdown link convention follows GFM — any URL in the rendered
text is in [label](url) form so an LLM can follow them. Internal
links use /path.md (the mirror), not /path (the HTML route), so
an LLM following links stays in markdown.

Total prerender output is now:
  - 115 HTML files (per-route SPA-pre-render)
  - 115 .md files (mirror routes)
  - 1 llms.txt (manifest pointing at the .md routes)
  - 1 llms-full.txt (361 KB, 115 sections, full text dump)

GitHub repo descriptions for InstaNode-dev/content and
InstaNode-dev/instanode-web were updated in the same change to
advertise the LLM-friendly URLs.

Co-authored-by: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/scripts/prerender.mjs b/scripts/prerender.mjs
@@ -135,25 +135,256 @@ async function main() {
 
   // Step 5: copy /llms.txt from the content repo to dist root. The
   // llms.txt convention (https://llmstxt.org) expects the file at the
-  // domain root. Source of truth is .content/llms.txt — that lets the
-  // content repo author it like any other content file, and the
-  // dashboard build inlines it into dist/ for static hosting to serve.
+  // domain root.
   const llmsSource = resolve(ROOT, '.content/llms.txt')
+  let llmsBaseContent = ''
   if (existsSync(llmsSource)) {
-    const llmsContent = await readFile(llmsSource, 'utf-8')
-    await writeFile(resolve(DIST, 'llms.txt'), llmsContent, 'utf-8')
+    llmsBaseContent = await readFile(llmsSource, 'utf-8')
+    await writeFile(resolve(DIST, 'llms.txt'), llmsBaseContent, 'utf-8')
     console.log('prerender: copied llms.txt to dist root')
   } else {
     console.warn('prerender: no .content/llms.txt found, skipping')
   }
 
-  // Step 6: clean up the SSR bundle — it's only needed during this script.
+  // Step 6: emit .md mirror routes for every HTML page so LLMs and
+  // crawlers can consume plain text without parsing HTML. URL convention:
+  // /foo → /foo.md, /blog/foo → /blog/foo.md, / → /index.md.
+  //
+  // Sources:
+  //   - Blog posts: copy .content/blog/<slug>.md verbatim
+  //   - Use cases: copy .content/use-cases/<slug>.md verbatim
+  //   - Docs page: concatenate all .content/docs/*.md (one page in HTML,
+  //     so one combined markdown file at /docs.md)
+  //   - Index pages (/blog.md, /use-cases.md): generated from filenames
+  //   - React-only pages (/, /pricing, /for-agents, /status): copy
+  //     authored .content/pages/<name>.md
+  //
+  // All emitted .md files are also concatenated into /llms-full.txt for
+  // one-shot LLM consumption. Section separators use "---" + the path.
+  console.log('prerender: emitting .md mirror routes…')
+  const mdRoutes = await emitMarkdownRoutes()
+  console.log(`prerender: wrote ${mdRoutes.length} .md files`)
+
+  // Step 7: aggregate every .md into /llms-full.txt — a single file an
+  // LLM can fetch once and have the entire site's content.
+  await writeAggregate(mdRoutes)
+  console.log('prerender: wrote llms-full.txt aggregate')
+
+  // Step 8: clean up the SSR bundle — it's only needed during this script.
   // Leaving it in dist-ssr would inflate the GH Pages upload by ~400 KB.
   await rm(SSR_DIST, { recursive: true, force: true })
 
   console.log(`prerender: ${written} files written. SEO-ready.`)
 }
 
+/* emitMarkdownRoutes — writes the .md mirror for every HTML route.
+ *
+ * Returns an array of {route, path, content} for the aggregate step. */
+async function emitMarkdownRoutes() {
+  const out = []
+
+  // Helper: write a .md file at a given route path.
+  // route '/foo'   → dist/foo.md
+  // route '/foo/bar' → dist/foo/bar.md
+  // route '/'      → dist/index.md
+  async function writeRouteMd(route, content) {
+    const fileSubpath = route === '/' ? 'index.md' : route.replace(/^\//, '') + '.md'
+    const outPath = resolve(DIST, fileSubpath)
+    await mkdir(dirname(outPath), { recursive: true })
+    await writeFile(outPath, content, 'utf-8')
+    out.push({ route: route === '/' ? '/index.md' : route + '.md', content })
+  }
+
+  // 1. React-only pages — read from .content/pages/<name>.md
+  const reactPageMap = {
+    '/': 'home.md',
+    '/pricing': 'pricing.md',
+    '/for-agents': 'for-agents.md',
+    '/status': 'status.md',
+  }
+  for (const [route, filename] of Object.entries(reactPageMap)) {
+    const src = resolve(ROOT, `.content/pages/${filename}`)
+    if (!existsSync(src)) {
+      console.warn(`  skip ${route}: no ${filename}`)
+      continue
+    }
+    const text = await readFile(src, 'utf-8')
+    await writeRouteMd(route, text)
+  }
+
+  // 2. Blog posts — copy verbatim
+  const blogDir = resolve(ROOT, '.content/blog')
+  const blogFiles = existsSync(blogDir)
+    ? readdirSync(blogDir).filter((f) => f.endsWith('.md'))
+    : []
+  for (const f of blogFiles) {
+    const slug = f.replace(/\.md$/, '')
+    const text = await readFile(resolve(blogDir, f), 'utf-8')
+    await writeRouteMd(`/blog/${slug}`, text)
+  }
+
+  // 3. /blog index — generated from blog post filenames + frontmatter
+  if (blogFiles.length > 0) {
+    const blogIndex = await buildBlogIndex(blogDir, blogFiles)
+    await writeRouteMd('/blog', blogIndex)
+  }
+
+  // 4. Use cases — copy verbatim per file
+  const useCaseDir = resolve(ROOT, '.content/use-cases')
+  const useCaseFiles = existsSync(useCaseDir)
+    ? readdirSync(useCaseDir).filter((f) => f.endsWith('.md'))
+    : []
+  for (const f of useCaseFiles) {
+    const slug = f.replace(/\.md$/, '')
+    const text = await readFile(resolve(useCaseDir, f), 'utf-8')
+    await writeRouteMd(`/use-cases/${slug}`, text)
+  }
+
+  // 5. /use-cases index — generated, grouped by category
+  if (useCaseFiles.length > 0) {
+    const useCaseIndex = await buildUseCasesIndex(useCaseDir, useCaseFiles)
+    await writeRouteMd('/use-cases', useCaseIndex)
+  }
+
+  // 6. /docs — concatenate all docs sections into one markdown page
+  const docsDir = resolve(ROOT, '.content/docs')
+  const docsFiles = existsSync(docsDir)
+    ? readdirSync(docsDir).filter((f) => f.endsWith('.md'))
+    : []
+  if (docsFiles.length > 0) {
+    const docsPage = await buildDocsPage(docsDir, docsFiles)
+    await writeRouteMd('/docs', docsPage)
+  }
+
+  return out
+}
+
+/* writeAggregate — bundle every .md mirror into one llms-full.txt at
+ * dist root. Each section is prefixed with a separator that includes
+ * the URL path the section came from. */
+async function writeAggregate(mdRoutes) {
+  const header = `# instanode.dev — full text dump\n\n` +
+    `This file is the concatenation of every .md route on instanode.dev.\n` +
+    `For the per-route URLs and an LLM-oriented index, see\n` +
+    `https://instanode.dev/llms.txt — that's the manifest pointing here.\n\n` +
+    `Each section below is delimited by an HTTP-style header line\n` +
+    `(\`URL: <path>\`) and a horizontal rule. There are ${mdRoutes.length} sections\n` +
+    `in this file.\n\n`
+
+  const sections = mdRoutes.map(({ route, content }) =>
+    `\n\n---\nURL: ${route}\n---\n\n${content.trim()}\n`,
+  )
+
+  await writeFile(resolve(DIST, 'llms-full.txt'), header + sections.join(''), 'utf-8')
+}
+
+/* buildBlogIndex — emit a markdown index of every blog post: title,
+ * date, excerpt, link to the .md detail. */
+async function buildBlogIndex(dir, files) {
+  const posts = []
+  for (const f of files) {
+    const src = await readFile(resolve(dir, f), 'utf-8')
+    const meta = parseFrontmatter(src)
+    if (!meta.title || !meta.date) continue
+    posts.push({
+      slug: f.replace(/\.md$/, ''),
+      title: meta.title,
+      date: meta.date,
+      excerpt: meta.excerpt || '',
+    })
+  }
+  posts.sort((a, b) => b.date.localeCompare(a.date))
+
+  let out = `# Blog — instanode.dev\n\n`
+  out += `> Build notes, retrospectives, and the occasional rant on what "frictionless for AI agents" actually means.\n\n`
+  out += `## Posts\n\n`
+  for (const p of posts) {
+    out += `### [${p.title}](/blog/${p.slug}.md)\n\n`
+    out += `*${p.date}*\n\n`
+    if (p.excerpt) out += `${p.excerpt}\n\n`
+  }
+  return out
+}
+
+/* buildUseCasesIndex — emit a markdown catalogue of every use case
+ * grouped by category, each linking to its .md detail page. */
+async function buildUseCasesIndex(dir, files) {
+  const cases = []
+  for (const f of files) {
+    const src = await readFile(resolve(dir, f), 'utf-8')
+    const meta = parseFrontmatter(src)
+    if (!meta.title || !meta.category) continue
+    cases.push({
+      slug: f.replace(/\.md$/, ''),
+      title: meta.title,
+      category: meta.category,
+      scenario: meta.scenario || '',
+    })
+  }
+
+  const grouped = new Map()
+  for (const c of cases) {
+    if (!grouped.has(c.category)) grouped.set(c.category, [])
+    grouped.get(c.category).push(c)
+  }
+  const cats = Array.from(grouped.keys()).sort()
+
+  let out = `# Use cases — instanode.dev\n\n`
+  out += `> ${cases.length} unique scenarios across ${cats.length} archetypes. Each detail page includes a paste-ready prompt that any vanilla LLM (ChatGPT, Claude, Gemini) can act on with no MCP and no installation — point the LLM at https://instanode.dev/llms.txt for the API contract and it generates a runnable script.\n\n`
+  for (const cat of cats) {
+    out += `## ${cat}\n\n`
+    const list = grouped.get(cat).sort((a, b) => a.title.localeCompare(b.title))
+    for (const c of list) {
+      out += `- [${c.title}](/use-cases/${c.slug}.md)`
+      if (c.scenario) out += ` — ${c.scenario}`
+      out += `\n`
+    }
+    out += `\n`
+  }
+  return out
+}
+
+/* buildDocsPage — concatenate all docs sections (ordered by frontmatter
+ * 'order') into one markdown page mirroring the HTML /docs page. */
+async function buildDocsPage(dir, files) {
+  const sections = []
+  for (const f of files) {
+    const src = await readFile(resolve(dir, f), 'utf-8')
+    const meta = parseFrontmatter(src)
+    const body = src.replace(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/, '')
+    sections.push({
+      id: f.replace(/\.md$/, ''),
+      title: meta.title || f,
+      order: Number(meta.order) || 0,
+      body: body.trim(),
+    })
+  }
+  sections.sort((a, b) => a.order - b.order)
+
+  let out = `# Documentation — instanode.dev\n\n`
+  out += `> Everything you need to provision, deploy, and claim. Every curl below works against \`https://api.instanode.dev\` as-is.\n\n`
+  for (const s of sections) {
+    out += `## ${s.title}\n\n${s.body}\n\n`
+  }
+  return out
+}
+
+/* parseFrontmatter — tiny YAML subset for blog/use-case/docs headers.
+ * Mirrors the runtime parsers in src/content/*.ts. */
+function parseFrontmatter(src) {
+  const m = src.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n?/)
+  if (!m) return {}
+  const meta = {}
+  for (const line of m[1].split(/\r?\n/)) {
+    const sep = line.indexOf(':')
+    if (sep < 0) continue
+    const key = line.slice(0, sep).trim()
+    const value = line.slice(sep + 1).trim()
+    if (key) meta[key] = value
+  }
+  return meta
+}
+
 main().catch((err) => {
   console.error('prerender failed:', err)
   process.exit(1)