LLM-Coding · rdmueller · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/scripts/generate-sitemap.js b/scripts/generate-sitemap.js
@@ -3,69 +3,101 @@
 /**
  * generate-sitemap.js
  *
- * Generates sitemap.xml for the Semantic Anchors website
+ * Generates sitemap.xml for the Semantic Anchors website.
+ *
+ * Produces clean (non-hash) URLs that match the History API router in
+ * website/src/utils/router.js. Hash-based URLs (#/about) are not crawlable
+ * by search engines — every hash URL looks like the homepage to a crawler,
+ * and claude.ai / LLM fetchers cannot reach them either.
+ *
+ * Keep the PAGES list in sync with router.js `ROUTE_TITLES` when adding
+ * new routes.
  */
 
 const fs = require('fs')
 const path = require('path')
 
-// Paths
 const ANCHORS_DATA = path.join(__dirname, '..', 'website', 'public', 'data', 'anchors.json')
 const OUTPUT_FILE = path.join(__dirname, '..', 'website', 'public', 'sitemap.xml')
 const BASE_URL = 'https://llm-coding.github.io/Semantic-Anchors'
 
-// Read anchors data
-const anchorsData = JSON.parse(fs.readFileSync(ANCHORS_DATA, 'utf-8'))
+// Static pages served by the SPA router. Keep in sync with
+// website/src/utils/router.js -> ROUTE_TITLES AND with the ROUTES list in
+// scripts/prerender-routes.js.
+//
+// Only routes that can be pre-rendered to static HTML are listed here —
+// otherwise the sitemap would advertise URLs that return an empty SPA
+// shell to non-JS crawlers and claude.ai fetchers.
+//
+// Excluded on purpose:
+// - /contracts     — interactive JS page (localStorage, client-side data
+//                    fetching); no static content worth serving
+// - /anchor/:id    — rendered per entry via the anchor loop below
+//
+// priority: 1.0 homepage, 0.8 top-level content, 0.7 contributing/meta, 0.6 anchors
+const PAGES = [
+  { path: '/', priority: '1.0', changefreq: 'weekly' },
+  { path: '/about', priority: '0.8', changefreq: 'monthly' },
+  { path: '/workflow', priority: '0.8', changefreq: 'monthly' },
+  { path: '/brownfield', priority: '0.8', changefreq: 'monthly' },
+  { path: '/evaluations', priority: '0.8', changefreq: 'monthly' },
+  { path: '/all-anchors', priority: '0.8', changefreq: 'weekly' },
+  { path: '/agentskill', priority: '0.7', changefreq: 'monthly' },
+  { path: '/changelog', priority: '0.7', changefreq: 'weekly' },
+  { path: '/contributing', priority: '0.7', changefreq: 'monthly' },
+  { path: '/rejected-proposals', priority: '0.5', changefreq: 'monthly' },
+]
 
-// Generate sitemap
+const anchorsData = JSON.parse(fs.readFileSync(ANCHORS_DATA, 'utf-8'))
 const today = new Date().toISOString().split('T')[0]
 
-let sitemap = `<?xml version="1.0" encoding="UTF-8"?>
-<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
-  <!-- Homepage -->
-  <url>
-    <loc>${BASE_URL}/</loc>
-    <lastmod>${today}</lastmod>
-    <changefreq>weekly</changefreq>
-    <priority>1.0</priority>
-  </url>
-
-  <!-- About Page -->
-  <url>
-    <loc>${BASE_URL}/#/about</loc>
-    <lastmod>${today}</lastmod>
-    <changefreq>monthly</changefreq>
-    <priority>0.8</priority>
+/**
+ * Render one <url> entry for sitemap.xml.
+ * @param {string} loc - Fully-qualified URL of the page.
+ * @param {string} lastmod - ISO date string (YYYY-MM-DD).
+ * @param {string} changefreq - Sitemap changefreq value (weekly, monthly, ...).
+ * @param {string} priority - Sitemap priority value ("0.0"–"1.0").
+ * @param {string} [comment] - Optional XML comment placed above the entry.
+ * @returns {string} One <url>...</url> block with a trailing blank line.
+ */
+function urlEntry(loc, lastmod, changefreq, priority, comment) {
+  return `  ${comment ? `<!-- ${comment} -->\n  ` : ''}<url>
+    <loc>${loc}</loc>
+    <lastmod>${lastmod}</lastmod>
+    <changefreq>${changefreq}</changefreq>
+    <priority>${priority}</priority>
   </url>
 
-  <!-- Contributing Page -->
-  <url>
-    <loc>${BASE_URL}/#/contributing</loc>
-    <lastmod>${today}</lastmod>
-    <changefreq>monthly</changefreq>
-    <priority>0.7</priority>
-  </url>
+`
+}
 
+let sitemap = `<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
 `
 
-// Add all anchors
-anchorsData.forEach((anchor) => {
-  sitemap += `  <!-- Anchor: ${anchor.title} -->
-  <url>
-    <loc>${BASE_URL}/#/anchor/${anchor.id}</loc>
-    <lastmod>${today}</lastmod>
-    <changefreq>monthly</changefreq>
-    <priority>0.6</priority>
-  </url>
+// Static pages
+for (const page of PAGES) {
+  const loc = page.path === '/' ? `${BASE_URL}/` : `${BASE_URL}${page.path}`
+  sitemap += urlEntry(loc, today, page.changefreq, page.priority)
+}
 
-`
+// Individual anchor pages
+anchorsData.forEach((anchor) => {
+  sitemap += urlEntry(
+    `${BASE_URL}/anchor/${anchor.id}`,
+    today,
+    'monthly',
+    '0.6',
+    `Anchor: ${anchor.title}`
+  )
 })
 
 sitemap += `</urlset>
 `
 
-// Write sitemap
 fs.writeFileSync(OUTPUT_FILE, sitemap, 'utf-8')
 
 console.log(`✓ Sitemap generated: ${OUTPUT_FILE}`)
-console.log(`✓ Total URLs: ${anchorsData.length + 3} (3 pages + ${anchorsData.length} anchors)`)
+console.log(
+  `✓ Total URLs: ${PAGES.length + anchorsData.length} (${PAGES.length} pages + ${anchorsData.length} anchors)`
+)
diff --git a/scripts/prerender-routes.js b/scripts/prerender-routes.js
@@ -0,0 +1,216 @@
+#!/usr/bin/env node
+
+/**
+ * prerender-routes.js
+ *
+ * Post-build step: generate per-route static HTML so crawlers and non-JS
+ * fetchers (claude.ai, curl, search engine bots that skip JS execution) can
+ * access doc-style pages directly at their clean URLs.
+ *
+ * How it works:
+ *   1. Reads the built Vite shell at website/dist/index.html
+ *   2. For each route that has a pre-rendered content fragment in
+ *      website/dist/docs/<fragment>.html, generates
+ *      website/dist/<route>/index.html that injects the fragment into
+ *      the #app div's initial markup and updates the <title> + meta
+ *      description.
+ *   3. When a user-agent with JS loads the page, the SPA boots, clears
+ *      #app, and re-renders as usual — so users get the normal interactive
+ *      experience. Crawlers and no-JS fetchers see real content immediately.
+ *
+ * GitHub Pages serves <route>/index.html automatically when the clean URL
+ * (e.g. /workflow) is requested.
+ *
+ * Keep ROUTES in sync with website/src/utils/router.js and scripts/render-docs.js.
+ */
+
+const fs = require('fs')
+const path = require('path')
+
+const DIST = path.join(__dirname, '..', 'website', 'dist')
+const SHELL = path.join(DIST, 'index.html')
+
+// Each entry maps a clean-URL route to the doc fragment rendered by
+// scripts/render-docs.js, plus SEO metadata for the per-route <head>.
+const ROUTES = [
+  {
+    path: '/about',
+    fragment: 'docs/about.html',
+    title: 'About — Semantic Anchors',
+    description:
+      'Learn what semantic anchors are, why they matter for LLM communication, and how the catalog is curated.',
+  },
+  {
+    path: '/workflow',
+    fragment: 'docs/spec-driven-workflow.html',
+    title: 'Development Workflow — Semantic Anchors',
+    description:
+      'The Semantic Anchors spec-driven development workflow — from requirements to specification to implementation, powered by semantic anchors.',
+  },
+  {
+    path: '/brownfield',
+    fragment: 'docs/brownfield-workflow.html',
+    title: 'Brownfield Workflow — Semantic Anchors',
+    description:
+      'Applying semantic anchors to brownfield codebases using a bounded-context approach.',
+  },
+  {
+    path: '/changelog',
+    fragment: 'docs/changelog.html',
+    title: 'Changelog — Semantic Anchors',
+    description: 'Chronological record of all semantic anchors added to the catalog.',
+  },
+  {
+    path: '/contributing',
+    fragment: 'CONTRIBUTING.html',
+    title: 'Contributing — Semantic Anchors',
+    description:
+      'How to propose new semantic anchors, quality criteria, and the contribution workflow.',
+  },
+  {
+    path: '/agentskill',
+    fragment: 'docs/agentskill.html',
+    title: 'AgentSkill — Semantic Anchors',
+    description:
+      'The semantic-anchor-translator AgentSkill — install semantic anchors into Claude Code, Codex, Cursor, and other coding agents.',
+  },
+  {
+    path: '/rejected-proposals',
+    fragment: 'docs/rejected-proposals.html',
+    title: 'Rejected Proposals — Semantic Anchors',
+    description:
+      'Anchor proposals that did not meet the quality criteria, with reasoning — useful for understanding the curation bar.',
+  },
+  {
+    path: '/all-anchors',
+    fragment: 'docs/all-anchors.html',
+    title: 'Full Reference — Semantic Anchors',
+    description:
+      'Full reference of all semantic anchors in one long document — readable offline, linkable, easy to Ctrl-F.',
+  },
+  {
+    path: '/evaluations',
+    fragment: 'docs/anchor-evaluations.html',
+    title: 'Evaluations — Semantic Anchors',
+    description: 'Multiple-choice evaluations of semantic anchor recognition across 10 LLMs.',
+  },
+]
+
+/**
+ * Read the Vite-built HTML shell (website/dist/index.html).
+ * Exits with an error if the shell is missing — indicates that the caller
+ * forgot to run `vite build` before this post-build step.
+ * @returns {string} Raw HTML contents of the shell.
+ */
+function readShell() {
+  if (!fs.existsSync(SHELL)) {
+    console.error(`ERROR: ${SHELL} does not exist. Run 'vite build' first.`)
+    process.exit(1)
+  }
+  return fs.readFileSync(SHELL, 'utf-8')
+}
+
+/**
+ * Escape a string for safe insertion into an HTML attribute or text node.
+ * Converts &, <, >, ", and ' to their HTML entity equivalents. Used for
+ * route titles and descriptions that end up inside <title> and meta tags.
+ * @param {string} str - Input string to escape.
+ * @returns {string} HTML-safe string.
+ */
+function escapeHtml(str) {
+  return String(str).replace(
+    /[&<>"']/g,
+    (c) =>
+      ({
+        '&': '&amp;',
+        '<': '&lt;',
+        '>': '&gt;',
+        '"': '&quot;',
+        "'": '&#39;',
+      })[c]
+  )
+}
+
+/**
+ * Build the pre-populated markup that goes inside <div id="app">.
+ * Mirrors the layout produced at runtime by renderHeader() + renderDocPage()
+ * + renderFooter() in website/src/main.js, but statically — so crawlers see
+ * real content in the initial HTML response.
+ */
+function buildAppMarkup(fragmentHtml) {
+  return `
+    <main class="flex-1">
+      <article class="mx-auto max-w-4xl px-4 py-8 sm:px-6 lg:px-8">
+        <div id="doc-content" class="asciidoc-content">${fragmentHtml}</div>
+      </article>
+    </main>
+  `
+}
+
+/**
+ * Pre-render a single route to website/dist/<route>/index.html.
+ * Reads the AsciiDoc fragment produced by scripts/render-docs.js, injects
+ * it into a copy of the Vite shell, and updates the <title>, meta
+ * description, and canonical URL to match the route. Throws if the
+ * fragment is missing so the build fails fast instead of shipping an
+ * incomplete set of pre-rendered pages.
+ * @param {string} shell - Raw HTML of the Vite build shell.
+ * @param {{path: string, fragment: string, title: string, description: string}} route
+ *   Route descriptor from the ROUTES list.
+ * @throws {Error} When the configured fragment file does not exist.
+ */
+function prerenderRoute(shell, route) {
+  const fragmentPath = path.join(DIST, route.fragment)
+  if (!fs.existsSync(fragmentPath)) {
+    throw new Error(
+      `Missing fragment for ${route.path}: ${route.fragment} (expected at ${fragmentPath}). ` +
+        `Make sure scripts/render-docs.js runs before prerender-routes.js and writes the fragment to website/public/docs/.`
+    )
+  }
+  const fragment = fs.readFileSync(fragmentPath, 'utf-8')
+
+  let html = shell
+
+  // Replace <title>
+  html = html.replace(/<title>[\s\S]*?<\/title>/, `<title>${escapeHtml(route.title)}</title>`)
+
+  // Replace meta description if present
+  html = html.replace(
+    /<meta\s+name="description"\s+content="[^"]*"\s*\/?>/,
+    `<meta name="description" content="${escapeHtml(route.description)}" />`
+  )
+
+  // Update canonical URL so each pre-rendered page points to itself
+  const canonicalUrl = `https://llm-coding.github.io/Semantic-Anchors${route.path}`
+  html = html.replace(
+    /<link\s+rel="canonical"\s+href="[^"]*"\s*\/?>/,
+    `<link rel="canonical" href="${canonicalUrl}" />`
+  )
+
+  // Inject pre-rendered content into #app
+  html = html.replace(
+    /<div\s+id="app"\s*>\s*<\/div>/,
+    `<div id="app">${buildAppMarkup(fragment)}</div>`
+  )
+
+  const outDir = path.join(DIST, route.path)
+  const outFile = path.join(outDir, 'index.html')
+  fs.mkdirSync(outDir, { recursive: true })
+  fs.writeFileSync(outFile, html, 'utf-8')
+}
+
+/**
+ * Entry point: read the shell once, then pre-render every route in ROUTES.
+ * Throws (via prerenderRoute) if any fragment is missing, so the build
+ * fails non-zero instead of shipping an incomplete set of static pages.
+ */
+function main() {
+  const shell = readShell()
+  for (const route of ROUTES) {
+    prerenderRoute(shell, route)
+    console.log(`  ✓ pre-rendered ${route.path}`)
+  }
+  console.log(`\n✓ Pre-rendered ${ROUTES.length} routes to dist/<route>/index.html`)
+}
+
+main()
diff --git a/website/package.json b/website/package.json
@@ -8,7 +8,7 @@
     "predev": "node ../scripts/sync-anchors.js",
     "dev": "vite",
     "prebuild": "node ../scripts/sync-anchors.js && node ../scripts/render-docs.js",
-    "build": "vite build",
+    "build": "vite build && node ../scripts/prerender-routes.js",
     "preview": "vite preview",
     "test": "vitest run",
     "test:watch": "vitest",