diff --git a/next.config.js b/next.config.js index 4b0f77f896..490cfdb7fe 100644 --- a/next.config.js +++ b/next.config.js @@ -30,6 +30,18 @@ const defaultConfig = { ], }, async headers() { + // Serve top-level index URLs (e.g. /faqs.md, /programs.md, /docs.md) as inline markdown. + // Only non-nested routes have an index file; /docs.md is aliased to /docs/llms.txt via rewrite. + const mdIndexHeaders = Object.keys(CONTENT_ROUTES) + .filter((route) => !route.includes('/')) + .map((route) => ({ + source: `/${route}.md`, + headers: [ + { key: 'Content-Disposition', value: 'inline' }, + { key: 'Content-Type', value: 'text/markdown; charset=utf-8' }, + ], + })); + return [ { source: '/', @@ -136,7 +148,9 @@ const defaultConfig = { ], }, { - source: '/(docs|postgresql|guides|branching|programs|use-cases|faqs)/:path*.md', + source: `/(${Object.keys(CONTENT_ROUTES) + .filter((r) => !r.includes('/')) + .join('|')})/:path*.md`, headers: [ { key: 'Content-Disposition', @@ -148,6 +162,7 @@ const defaultConfig = { }, ], }, + ...mdIndexHeaders, ]; }, async redirects() { @@ -2053,11 +2068,10 @@ const defaultConfig = { destination: '/docs/connect/connection-errors', permanent: true, }, - { - source: '/docs', - destination: '/docs/introduction', - permanent: true, - }, + // NOTE: bare `/docs` is intentionally NOT redirected here. The middleware + // (src/proxy.js) owns it: agents / Accept: markdown get /docs/llms.txt, and + // browsers are redirected to /docs/introduction. A next.config redirect would + // run before middleware and intercept the markdown case. { source: '/docs/postgres', destination: '/docs/postgres/index', @@ -2472,8 +2486,10 @@ const defaultConfig = { // /:path*.md above requires at least one segment after the route name, // so /branching.md (no separator) doesn't match. Add explicit index rewrites. + // /docs.md is excluded here: it's aliased to the canonical /docs/llms.txt index + // below rather than a generated page-listing (see process-md-for-llms.js). const indexRewrites = Object.keys(CONTENT_ROUTES) - .filter((route) => !route.includes('/')) + .filter((route) => !route.includes('/') && route !== 'docs') .map((route) => ({ source: `/${route}.md`, destination: `/md/${route}.md`, @@ -2498,6 +2514,12 @@ const defaultConfig = { source: '/docs/.well-known/skills/:name/SKILL.md', destination: '/docs/ai/skills/:name/SKILL.md', }, + // /docs.md serves the canonical, curated docs index (llms.txt) instead of a + // generated page-listing. beforeFiles so the [slug] catch-all doesn't intercept it. + { source: '/docs.md', destination: '/docs/llms.txt' }, + // Index .md files (e.g. /faqs.md, /programs.md) must be beforeFiles so the + // top-level [slug] catch-all doesn't intercept them before the rewrite fires. + ...indexRewrites, ], // afterFiles: runs after checking pages/public files but before dynamic routes // This ensures physical .md files are served first, with fallback to public/md/ @@ -2518,7 +2540,6 @@ const defaultConfig = { }, { source: '/skill.md', destination: '/docs/ai/skills/neon-postgres/SKILL.md' }, { source: '/docs/changelog/:path*.md', destination: '/md/changelog/:path*.md' }, - ...indexRewrites, ...contentRewrites, ], // fallback: existing rewrites for external services diff --git a/scripts/test-markdown-urls.js b/scripts/test-markdown-urls.js index 80e94c33be..8d0721ecd8 100755 --- a/scripts/test-markdown-urls.js +++ b/scripts/test-markdown-urls.js @@ -375,20 +375,20 @@ function buildTests() { ); if (hasDotMd) { - // No index file at public/md/guides.md etc.; expect same contract as other .md 404s - // (requires top-level .md routing — see PR #4735). + // /{route}.md serves the generated index file from public/md/{route}.md — + // a listing of all pages in that route. Returns 200 markdown even though + // /{route} (without .md) is excluded from middleware markdown serving. add( 'Excluded route', path, 'dot-md', [ - (r) => expectStatus(r.status, 404), + (r) => expectStatus(r.status, 200), (r) => expectContentType(r.contentType, 'text/markdown'), - (r) => expectBodyContains(r.body, 'Page Not Found'), + (r) => expectMarkdownBody(r.body), (r) => expectBodyContains(r.body, '/docs/llms.txt'), - (r) => expectHeader(r.headers, 'x-content-source', 'md-404'), ], - { note: 'hub index .md → markdown 404 (not Vercel HTML 404)' } + { note: 'hub index .md → generated index listing (200 markdown)' } ); } } @@ -639,29 +639,26 @@ function buildTests() { ); } - // ── 9b. Blog post (not a docs content route; stays HTML for agents / Accept: markdown) - // Example: https://neon.com/blog/prewarming + // ── 9b. Non-docs marketing page (stays HTML for agents / Accept: markdown) + // Tests that middleware does not serve markdown for pages outside CONTENT_ROUTES. - const blogPostPath = '/blog/prewarming'; + const nonDocsPath = '/about-us'; add( - 'Blog post', - blogPostPath, + 'Non-docs page', + nonDocsPath, 'browser', [ (r) => expectStatus(r.status, 200), (r) => expectContentType(r.contentType, 'text/html'), (r) => expectHtmlBody(r.body), ], - { - spotCheck: (r) => expectBodyContains(r.body, 'Prewarming', true), - note: 'engineering blog article', - } + { note: 'marketing page, not a content route' } ); add( - 'Blog post', - blogPostPath, + 'Non-docs page', + nonDocsPath, 'accept-md', [ (r) => expectStatus(r.status, 200), @@ -669,19 +666,16 @@ function buildTests() { (r) => expectHtmlBody(r.body), (r) => { const src = r.headers.get('x-content-source'); - if (src === 'markdown') return 'blog post returned x-content-source: markdown'; + if (src === 'markdown') return 'non-docs page returned x-content-source: markdown'; return null; }, ], - { - spotCheck: (r) => expectBodyContains(r.body, 'Prewarming', true), - note: 'Accept: markdown must not serve docs markdown', - } + { note: 'Accept: markdown must not serve docs markdown' } ); add( - 'Blog post', - blogPostPath, + 'Non-docs page', + nonDocsPath, 'agent-ua', [ (r) => expectStatus(r.status, 200), @@ -689,16 +683,133 @@ function buildTests() { (r) => expectHtmlBody(r.body), (r) => { const src = r.headers.get('x-content-source'); - if (src === 'markdown') return 'blog post returned x-content-source: markdown'; + if (src === 'markdown') return 'non-docs page returned x-content-source: markdown'; return null; }, ], + { note: 'AI UA must not get docs markdown' } + ); + + // ── 9c. Hub index .md files — generated by generateRouteIndex at postbuild ── + // These serve a listing of all pages in that route (requires --generate or postbuild). + + const hubIndexRoutes = [ + { path: '/faqs.md', spotWord: 'FAQs' }, + { path: '/programs.md', spotWord: 'Programs' }, + { path: '/branching.md', spotWord: 'Branching' }, + { path: '/guides.md', spotWord: 'Guides' }, + { path: '/postgresql.md', spotWord: 'PostgreSQL' }, + ]; + + for (const { path, spotWord } of hubIndexRoutes) { + // dot-md (browser UA): served as static file via beforeFiles rewrite + add( + 'Hub index .md', + path, + 'browser', + [ + (r) => expectStatus(r.status, 200), + (r) => expectContentType(r.contentType, 'text/markdown'), + (r) => expectMarkdownBody(r.body), + (r) => expectBodyContains(r.body, '/docs/llms.txt'), + ], + { spotCheck: (r) => expectBodyContains(r.body, spotWord, true), note: 'static index listing' } + ); + + // agent-ua: middleware fetches /md/{route}.md and serves it + add( + 'Hub index .md', + path, + 'agent-ua', + [ + (r) => expectStatus(r.status, 200), + (r) => expectContentType(r.contentType, 'text/markdown'), + (r) => expectMarkdownBody(r.body), + ], + { + spotCheck: (r) => expectBodyContains(r.body, spotWord, true), + note: 'agent gets index listing', + } + ); + } + + // ── 9d. /docs.md — aliased to the canonical curated index (llms.txt), not a + // generated page-listing. Both the static rewrite (beforeFiles) and middleware + // (CUSTOM_MARKDOWN_PATHS) resolve it to /docs/llms.txt. + + add( + 'Docs index .md', + '/docs.md', + 'browser', + [ + (r) => expectStatus(r.status, 200), + (r) => expectContentType(r.contentType, 'text/markdown'), + (r) => expectMarkdownBody(r.body), + ], { - spotCheck: (r) => expectBodyContains(r.body, 'Prewarming', true), - note: 'AI UA must not get docs markdown', + spotCheck: (r) => expectBodyContains(r.body, '# Neon Postgres', true), + note: 'aliased to /docs/llms.txt', } ); + add( + 'Docs index .md', + '/docs.md', + 'agent-ua', + [ + (r) => expectStatus(r.status, 200), + (r) => expectContentType(r.contentType, 'text/markdown'), + (r) => expectMarkdownBody(r.body), + ], + { + spotCheck: (r) => expectBodyContains(r.body, '# Neon Postgres', true), + note: 'agent gets llms.txt content', + } + ); + + // ── 9e. Bare /docs — content-negotiated by the middleware (src/proxy.js). The + // /docs→/docs/introduction redirect lives in middleware (not next.config) so it runs + // AFTER the markdown check: agents / non-HTML Accept get llms.txt; browsers redirect. + + // Browser (Accept: text/html, normal UA): redirected to the introduction page. + add( + 'Bare /docs', + '/docs', + 'browser', + [ + (r) => expectStatus(r.status, 308), + (r) => expectHeader(r.headers, 'location', '/docs/introduction'), + (r) => expectHeader(r.headers, 'vary', 'Accept'), + ], + { note: 'browser → redirect to /docs/introduction (Vary: Accept)' } + ); + + // Markdown-negotiated requests all resolve to /docs/llms.txt with the doc headers. + // Each mode exercises a different branch of isAIAgentRequest(): + // accept-md → Accept: text/markdown + // accept-plain→ prefersNonHtml (no text/html in Accept) + // agent-ua → known agent User-Agent (Claude) + // agent-axios → HTTP-client User-Agent (axios) with Accept: text/html + for (const mode of ['accept-md', 'accept-plain', 'agent-ua', 'agent-axios']) { + add( + 'Bare /docs', + '/docs', + mode, + [ + (r) => expectStatus(r.status, 200), + (r) => expectContentType(r.contentType, 'text/markdown'), + (r) => expectMarkdownBody(r.body), + (r) => expectHeader(r.headers, 'x-content-source', 'markdown'), + (r) => expectHeader(r.headers, 'x-robots-tag', 'noindex'), + (r) => expectHeader(r.headers, 'vary', 'Accept'), + ], + { + spotCheck: (r) => expectBodyContains(r.body, '# Neon Postgres', true), + note: 'markdown client → /docs/llms.txt content', + } + ); + } + // ── 9. Individual changelog entry (file must exist under public/md/changelog/ — run --generate) // --------------------------------------------------------------------------- diff --git a/src/app/(docs)/docs/[...slug]/page.jsx b/src/app/(docs)/docs/[...slug]/page.jsx index 7f297a83d7..ddc51d727b 100644 --- a/src/app/(docs)/docs/[...slug]/page.jsx +++ b/src/app/(docs)/docs/[...slug]/page.jsx @@ -3,7 +3,7 @@ import { notFound } from 'next/navigation'; import Post from 'components/pages/doc/post'; import VERCEL_URL from 'constants/base'; -import { DOCS_DIR_PATH, CHANGELOG_DIR_PATH } from 'constants/content'; +import { DOCS_DIR_PATH, CHANGELOG_DIR_PATH, isUnusedOrSharedContent } from 'constants/content'; import LINKS from 'constants/links'; import { getPostBySlug } from 'utils/api-content'; import { getAllPosts, getAllChangelogs, getNavigationLinks, getNavigation } from 'utils/api-docs'; @@ -12,12 +12,6 @@ import { getFlatSidebar } from 'utils/get-flat-sidebar'; import getMetadata from 'utils/get-metadata'; import getTableOfContents from 'utils/get-table-of-contents'; -const isUnusedOrSharedContent = (slug) => - slug.includes('unused/') || - slug.includes('shared-content/') || - slug.includes('README') || - slug.includes('GUIDE_TEMPLATE'); - export async function generateStaticParams() { const posts = await getAllPosts(); diff --git a/src/app/postgresql/[...slug]/page.jsx b/src/app/postgresql/[...slug]/page.jsx index 84ca25b4f3..96ae3e7395 100644 --- a/src/app/postgresql/[...slug]/page.jsx +++ b/src/app/postgresql/[...slug]/page.jsx @@ -3,7 +3,7 @@ import { notFound } from 'next/navigation'; import Post from 'components/pages/doc/post'; import VERCEL_URL from 'constants/base'; -import { POSTGRESQL_DIR_PATH } from 'constants/content'; +import { POSTGRESQL_DIR_PATH, isUnusedOrSharedContent } from 'constants/content'; import { POSTGRESQL_BASE_PATH } from 'constants/docs'; import { getPostBySlug } from 'utils/api-content'; import { getNavigation, getAllPostgresTutorials, getNavigationLinks } from 'utils/api-postgresql'; @@ -12,12 +12,6 @@ import { getFlatSidebar } from 'utils/get-flat-sidebar'; import getMetadata from 'utils/get-metadata'; import getTableOfContents from 'utils/get-table-of-contents'; -const isUnusedOrSharedContent = (slug) => - slug.includes('unused/') || - slug.includes('shared-content/') || - slug.includes('README') || - slug.includes('GUIDE_TEMPLATE'); - export async function generateStaticParams() { const posts = await getAllPostgresTutorials(); diff --git a/src/constants/content.js b/src/constants/content.js index 9eb0224c93..4ef351ad56 100644 --- a/src/constants/content.js +++ b/src/constants/content.js @@ -29,11 +29,21 @@ const EXCLUDED_ROUTES = [ 'use-cases/serverless-apps', ]; +const EXCLUDED_DIRS = ['shared-content', 'unused']; + const EXCLUDED_FILES = ['rss.xml', 'context7.json']; +const isUnusedOrSharedContent = (slug) => + slug.includes('unused/') || + slug.includes('shared-content/') || + slug.includes('README') || + slug.includes('GUIDE_TEMPLATE'); + module.exports = { CONTENT_ROUTES, + isUnusedOrSharedContent, EXCLUDED_ROUTES, + EXCLUDED_DIRS, EXCLUDED_FILES, DOCS_DIR_PATH, BRANCHING_DIR_PATH, diff --git a/src/middleware.test.js b/src/middleware.test.js index 7e8bcb4433..d74765c96f 100644 --- a/src/middleware.test.js +++ b/src/middleware.test.js @@ -13,7 +13,7 @@ vi.mock('next/server', () => ({ } static redirect(url) { - return { type: 'redirect', url }; + return { type: 'redirect', url, headers: new Headers() }; } }, })); @@ -122,6 +122,46 @@ describe('Middleware - AI Agent Integration Tests', () => { }); }); + describe('Bare /docs root', () => { + it('serves llms.txt markdown for AI User-Agent', async () => { + const req = createMockRequest('/docs', 'Claude/1.0', 'text/html'); + mockMarkdownFetch('# Neon Postgres'); + + const response = await middleware(req); + + expect(global.fetch).toHaveBeenCalledWith('https://neon.com/docs/llms.txt'); + const text = await response.text(); + expect(text).toContain('# Neon Postgres'); + expect(response.headers.get('X-Content-Source')).toBe('markdown'); + }); + + it('serves llms.txt markdown for Accept: text/markdown', async () => { + const req = createMockRequest('/docs', 'Mozilla/5.0', 'text/markdown'); + mockMarkdownFetch('# Neon Postgres'); + + const response = await middleware(req); + + expect(global.fetch).toHaveBeenCalledWith('https://neon.com/docs/llms.txt'); + const text = await response.text(); + expect(text).toContain('# Neon Postgres'); + }); + + it('redirects browsers to /docs/introduction without fetching markdown', async () => { + const req = createMockRequest( + '/docs', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', + 'text/html' + ); + + const response = await middleware(req); + + expect(response.type).toBe('redirect'); + expect(response.url.toString()).toContain('/docs/introduction'); + expect(response.headers.get('Vary')).toBe('Accept'); + expect(global.fetch).not.toHaveBeenCalled(); + }); + }); + describe('Excluded routes - no index markdown available, return HTML', () => { const excludedCases = [ { name: 'Index /guides', path: '/guides', reason: 'index page without markdown' }, diff --git a/src/proxy.js b/src/proxy.js index 1d3fb58352..55acc6d2e7 100644 --- a/src/proxy.js +++ b/src/proxy.js @@ -119,6 +119,17 @@ export async function proxy(req) { } } + // Bare /docs has no page of its own. Agents / Accept: markdown are handled above + // (served /docs/llms.txt); everyone else (browsers) is redirected to the intro. + // This redirect lives here, not in next.config, so it runs after the markdown check. + if (pathname === '/docs') { + const res = NextResponse.redirect(new URL('/docs/introduction', req.url), 308); + // The /docs response is content-negotiated (agents/markdown get llms.txt above), + // so the redirect must vary on Accept to stay correct in shared caches. + res.headers.set('Vary', 'Accept'); + return res; + } + // Apply doc headers to all content route responses (.md URLs and HTML pages). // Vary: Accept is only set on markdown-negotiated responses (applyDocHeaders above). if (isContentRoute(pathname)) { @@ -214,6 +225,7 @@ export const config = { '/', // Check if the user is logged in '/home', // Check if the user is logged in '/pricing', // Agent-friendly pricing page + '/docs', // Bare docs root: serve llms.txt for agents; browsers fall through to the /docs→/docs/introduction redirect '/(docs|postgresql|guides|branching|programs|use-cases|faqs)/:path*', // All markdown routes '/:path(docs|postgresql|guides|branching|programs|use-cases).md', // Top-level .md index URLs ], diff --git a/src/scripts/compare-md-conversion.js b/src/scripts/compare-md-conversion.js index d734d201f9..2fbc3bf426 100644 --- a/src/scripts/compare-md-conversion.js +++ b/src/scripts/compare-md-conversion.js @@ -52,7 +52,7 @@ Examples: buildNavigationMap, addNavigationContext, } = require('./process-md-for-llms.js'); - let converted = await processFile(inputPath, pageUrl); + let converted = (await processFile(inputPath, pageUrl)).content; // Add breadcrumb header and navigation footer (same as processDirectory does) const navMap = buildNavigationMap(projectRoot); diff --git a/src/scripts/generate-llms-full.js b/src/scripts/generate-llms-full.js index cf1730b579..ee2a5d1b93 100644 --- a/src/scripts/generate-llms-full.js +++ b/src/scripts/generate-llms-full.js @@ -22,15 +22,13 @@ const path = require('path'); const matter = require('gray-matter'); -const { CONTENT_ROUTES } = require('../constants/content'); +const { CONTENT_ROUTES, EXCLUDED_DIRS } = require('../constants/content'); const config = require('./llms-index-config'); const { stripNavigationContext } = require('./process-md-for-llms'); const BASE_URL = 'https://neon.com'; const OUTPUT_PATH = 'public/docs/llms-full.txt'; - -const EXCLUDED_DIRS = ['shared-content', 'unused']; const EXCLUDED_FILES = ['README.md', 'index.md', '_index.md']; const FULL_TEXT_CONFIG = config.fullText || {}; diff --git a/src/scripts/generate-llms-index.js b/src/scripts/generate-llms-index.js index 354b0c72c6..27b0990b60 100644 --- a/src/scripts/generate-llms-index.js +++ b/src/scripts/generate-llms-index.js @@ -16,14 +16,12 @@ const path = require('path'); const matter = require('gray-matter'); -const { CONTENT_ROUTES } = require('../constants/content'); +const { CONTENT_ROUTES, EXCLUDED_DIRS } = require('../constants/content'); const config = require('./llms-index-config'); const BASE_URL = 'https://neon.com'; const OUTPUT_PATH = 'public/docs/llms.txt'; - -const EXCLUDED_DIRS = ['shared-content', 'unused']; const EXCLUDED_FILES = ['README.md', 'index.md', '_index.md']; const COLLAPSED_ROUTES = config.collapsedRoutes || {}; diff --git a/src/scripts/process-md-for-llms.js b/src/scripts/process-md-for-llms.js index 7875b1f1d6..ba30ab4178 100644 --- a/src/scripts/process-md-for-llms.js +++ b/src/scripts/process-md-for-llms.js @@ -35,6 +35,8 @@ const path = require('path'); const matter = require('gray-matter'); const jsYaml = require('js-yaml'); +const { isUnusedOrSharedContent } = require('../constants/content'); + // Project root for shared content loading (set during processing) let projectRoot = null; @@ -1941,7 +1943,7 @@ async function processFile(inputPath, pageUrl, rootDir) { // Normalize smart quotes to straight quotes (matches Python behavior) output = normalizeQuotes(output); - return output; + return { content: output, title: frontmatter.title || null }; } /** @@ -2219,34 +2221,81 @@ function stripNavigationContext(content) { } /** - * Process a directory recursively + * Process a directory recursively. + * Returns an array of { title, url } for every page written (used to build route indexes). */ async function processDirectory(inputDir, outputDir, baseContentDir, rootDir) { const entries = await fs.readdir(inputDir, { withFileTypes: true }); + const pages = []; for (const entry of entries) { const inputPath = path.join(inputDir, entry.name); if (entry.isDirectory()) { - await processDirectory(inputPath, outputDir, baseContentDir, rootDir); + const dirRelPath = path.relative(baseContentDir, inputPath) + '/'; + if (isUnusedOrSharedContent(dirRelPath)) continue; + const subPages = await processDirectory(inputPath, outputDir, baseContentDir, rootDir); + pages.push(...subPages); } else if (entry.name.endsWith('.md')) { const relativePath = path.relative(baseContentDir, inputPath); + if (isUnusedOrSharedContent(relativePath)) continue; const outputPath = path.join(outputDir, relativePath); const pageUrl = getPageUrl(inputPath, baseContentDir); await fs.mkdir(path.dirname(outputPath), { recursive: true }); try { - let result = await processFile(inputPath, pageUrl, rootDir); - result = addNavigationContext(result, relativePath, navigationMap); - await fs.writeFile(outputPath, result); + const { content, title } = await processFile(inputPath, pageUrl, rootDir); + const withNav = addNavigationContext(content, relativePath, navigationMap); + await fs.writeFile(outputPath, withNav); console.log(`✓ ${relativePath}`); + if (pageUrl) { + pages.push({ title: title || path.basename(entry.name, '.md'), url: `${pageUrl}.md` }); + } } catch (error) { console.error(`✗ ${relativePath}: ${error.message}`); processingErrors.push({ file: inputPath, error: error.message }); } } } + + return pages; +} + +const ROUTE_LABELS = { + faqs: 'FAQs', + postgresql: 'PostgreSQL', + 'use-cases': 'Use Cases', +}; + +// Routes that should NOT get a generated page-listing index. Instead, their +// /${route}.md URL is aliased to the canonical, curated index at /docs/llms.txt +// (see next.config.js beforeFiles rewrite and ai-agent-detection CUSTOM_MARKDOWN_PATHS). +// llms.txt is written later in postbuild (generate-llms-index.js), so we can't copy +// its content here; we alias at the routing layer instead. +const ROUTES_ALIASED_TO_LLMS = new Set(['docs']); + +/** + * Generate a /${route}.md index file listing all pages in that route. + * Called by processAllContent for every non-nested route after its files are written. + */ +async function generateRouteIndex(route, pages, outputDir) { + const label = ROUTE_LABELS[route] || route.charAt(0).toUpperCase() + route.slice(1); + const outputPath = path.join(outputDir, `${route}.md`); + + const lines = [ + `> Full Neon documentation index: ${BASE_URL}/docs/llms.txt`, + '', + `# Neon ${label}`, + '', + '## All pages', + '', + ...pages.map(({ title, url }) => `- [${title}](${url})`), + '', + ]; + + await fs.writeFile(outputPath, lines.join('\n')); + console.log(`✓ ${route}.md (index, ${pages.length} pages)`); } /** @@ -2259,13 +2308,19 @@ async function processAllContent(contentRoutes, rootDir) { navigationMap = buildNavigationMap(rootDir); console.log(`Navigation map: ${navigationMap.size} pages with sibling links\n`); - const baseContentDir = path.join(rootDir, 'content'); const outputDir = path.join(rootDir, 'public/md'); for (const [route, srcPath] of Object.entries(contentRoutes)) { const inputDir = path.join(rootDir, srcPath); + // Use parent of inputDir as base so relative paths start with the dir name, + // not any intermediate segments (e.g. content/pages/programs → programs/...). + const baseContentDir = path.dirname(inputDir); console.log(`Processing ${srcPath} -> public/md/${route}`); - await processDirectory(inputDir, outputDir, baseContentDir, rootDir); + const pages = await processDirectory(inputDir, outputDir, baseContentDir, rootDir); + + if (!route.includes('/') && !ROUTES_ALIASED_TO_LLMS.has(route) && pages.length > 0) { + await generateRouteIndex(route, pages, outputDir); + } } // Build-time verification: fail if no markdown files were generated @@ -2297,6 +2352,7 @@ module.exports = { processFile, processDirectory, processAllContent, + generateRouteIndex, loadDependencies, clearState, printBuildSummary, @@ -2319,8 +2375,8 @@ async function main() { const inputPath = path.resolve(args[1]); const baseContentDir = path.join(rootDir, 'content'); const pageUrl = getPageUrl(inputPath, baseContentDir); - const result = await processFile(inputPath, pageUrl, rootDir); - console.log(result); + const { content } = await processFile(inputPath, pageUrl, rootDir); + console.log(content); } else if (args[0] === '--dir') { // Process specific directory const dir = args[1] || 'docs/guides'; diff --git a/src/scripts/process-md-for-llms.test.js b/src/scripts/process-md-for-llms.test.js index 2b38d8cd53..71e2ebe113 100644 --- a/src/scripts/process-md-for-llms.test.js +++ b/src/scripts/process-md-for-llms.test.js @@ -17,7 +17,7 @@ describe('MDX to Markdown Conversion', () => { const inputPath = 'content/docs/guides/prisma.md'; const pageUrl = 'https://neon.com/docs/guides/prisma'; - const result = await processFile(inputPath, pageUrl); + const { content: result } = await processFile(inputPath, pageUrl); // Should have title from frontmatter expect(result).toContain('# Connect from Prisma to Neon'); @@ -44,7 +44,7 @@ describe('MDX to Markdown Conversion', () => { const inputPath = 'content/docs/guides/nextjs.md'; const pageUrl = 'https://neon.com/docs/guides/nextjs'; - const result = await processFile(inputPath, pageUrl); + const { content: result } = await processFile(inputPath, pageUrl); // Should have converted CodeTabs to bold labels expect(result).toContain('**node-postgres**'); @@ -61,7 +61,7 @@ describe('MDX to Markdown Conversion', () => { const pageUrl = 'https://neon.com/docs/workflows/data-anonymization'; const projectRoot = process.cwd(); - const result = await processFile(inputPath, pageUrl, projectRoot); + const { content: result } = await processFile(inputPath, pageUrl, projectRoot); // FeatureBeta should be replaced with its content (an Admonition) expect(result).toContain('**Note:**'); @@ -74,7 +74,7 @@ describe('MDX to Markdown Conversion', () => { const pageUrl = 'https://neon.com/docs/introduction/regions'; const projectRoot = process.cwd(); - const result = await processFile(inputPath, pageUrl, projectRoot); + const { content: result } = await processFile(inputPath, pageUrl, projectRoot); expect(result).toContain('Azure regions'); expect(result).toContain('April 7, 2026'); @@ -86,7 +86,7 @@ describe('MDX to Markdown Conversion', () => { const pageUrl = 'https://neon.com/docs/guides/consumption-limits'; const projectRoot = process.cwd(); - const result = await processFile(inputPath, pageUrl, projectRoot); + const { content: result } = await processFile(inputPath, pageUrl, projectRoot); expect(result).toContain('consumption_history/account'); expect(result).toContain('deprecated'); @@ -98,7 +98,7 @@ describe('MDX to Markdown Conversion', () => { const pageUrl = 'https://neon.com/use-cases/dev-test'; const projectRoot = process.cwd(); - const result = await processFile(inputPath, pageUrl, projectRoot); + const { content: result } = await processFile(inputPath, pageUrl, projectRoot); expect(result).not.toContain(''); @@ -112,7 +112,7 @@ describe('MDX to Markdown Conversion', () => { const pageUrl = 'https://neon.com/docs/auth/reference/nextjs-server'; const projectRoot = process.cwd(); - const result = await processFile(inputPath, pageUrl, projectRoot); + const { content: result } = await processFile(inputPath, pageUrl, projectRoot); // TwoColumnLayout.Item should become headings expect(result).toContain('## Installation'); @@ -138,7 +138,7 @@ title: Test ${mdxContent}`; await fs.writeFile(tempPath, fullContent); - return processFile(tempPath, pageUrl, rootDir); + return (await processFile(tempPath, pageUrl, rootDir)).content; } it('should convert Admonition to bold label', async () => { @@ -299,7 +299,7 @@ Install the package using npm. async function processInlineMdx(mdxContent, pageUrl = 'https://neon.com/docs/test') { const tempPath = '/tmp/test-mdx-conversion.md'; await fs.writeFile(tempPath, `---\ntitle: Test\n---\n${mdxContent}`); - return processFile(tempPath, pageUrl); + return (await processFile(tempPath, pageUrl)).content; } it('should convert relative URLs to absolute', async () => { @@ -351,7 +351,7 @@ title: Test ${mdxContent}`; await fs.writeFile(tempPath, fullContent); - return processFile(tempPath, pageUrl); + return (await processFile(tempPath, pageUrl)).content; } it('should convert MegaLink to descriptive link', async () => { @@ -406,7 +406,7 @@ ${mdxContent}`; it('should handle QuoteBlock with object author and link in real file', async () => { const inputPath = 'content/pages/use-cases/dev-test.md'; const pageUrl = 'https://neon.com/use-cases/dev-test'; - const result = await processFile(inputPath, pageUrl, process.cwd()); + const { content: result } = await processFile(inputPath, pageUrl, process.cwd()); expect(result).toContain('— Jonathan Reyes, Principal Engineer at Dispatch'); expect(result).not.toContain("name: 'Jonathan Reyes'"); @@ -579,7 +579,7 @@ Below the line. async function processInlineMdx(mdxContent) { const tempPath = '/tmp/test-mdx-conversion.md'; await fs.writeFile(tempPath, `---\ntitle: Test\n---\n${mdxContent}`); - return processFile(tempPath); + return (await processFile(tempPath)).content; } it('should not escape backticks in text', async () => { @@ -604,7 +604,7 @@ See [CONN_MAX_AGE](https://example.com). it('should not include index pointer in processFile output (moved to page header)', async () => { const tempPath = '/tmp/test-mdx-conversion.md'; await fs.writeFile(tempPath, `---\ntitle: Test Page\n---\nSome content here.`); - const result = await processFile(tempPath); + const { content: result } = await processFile(tempPath); // Index pointer is no longer in processFile -- it's added by addNavigationContext expect(result).not.toContain('llms.txt'); @@ -879,7 +879,7 @@ See [CONN_MAX_AGE](https://example.com). it('should convert every component without raw MDX leaks', async () => { const fixturePath = 'src/scripts/fixtures/mdx-conversion-test.md'; const pageUrl = 'https://neon.com/docs/test/mdx-conversion-test'; - const result = await processFile(fixturePath, pageUrl, process.cwd()); + const { content: result } = await processFile(fixturePath, pageUrl, process.cwd()); // No raw MDX component tags should survive conversion const componentNames = [ diff --git a/src/utils/ai-agent-detection.js b/src/utils/ai-agent-detection.js index 7c68af439c..b4e6ef0125 100644 --- a/src/utils/ai-agent-detection.js +++ b/src/utils/ai-agent-detection.js @@ -44,6 +44,7 @@ export function isAIAgentRequest(request) { // path (or no path at all). Maps directly to the correct static file in public/. const CUSTOM_MARKDOWN_PATHS = { pricing: '/pricing.md', // Hand-written, served from public/pricing.md (no CONTENT_ROUTES entry) + docs: '/docs/llms.txt', // Docs index aliases to the curated llms.txt (not a generated page-listing). Covers both /docs and /docs.md for agents / Accept: markdown (like pricing). 'docs/changelog': '/md/docs/changelog.md', 'docs/skill.md': '/docs/ai/skills/neon-postgres/SKILL.md', // primary skill alias — update alongside next.config.js if primary changes (see config/skills.json) }; @@ -85,7 +86,7 @@ export function getMarkdownPath(pathname) { // Get the content directory path from CONTENT_ROUTES and convert to public path // Example: content/docs -> /md/docs const contentPath = CONTENT_ROUTES[matchedRoute]; - const publicPath = contentPath.replace('content/', '/md/'); + const publicPath = contentPath.replace(/^content(?:\/pages)?\//, '/md/'); // Extract slug after the matched route const slug = normalized === matchedRoute ? '' : path.replace(`${matchedRoute}/`, ''); diff --git a/src/utils/ai-agent-detection.test.js b/src/utils/ai-agent-detection.test.js index e9f834ffe9..ef222c784d 100644 --- a/src/utils/ai-agent-detection.test.js +++ b/src/utils/ai-agent-detection.test.js @@ -162,12 +162,12 @@ describe('getMarkdownPath', () => { it('should convert /programs/agents to markdown path', () => { const result = getMarkdownPath('/programs/agents'); - expect(result).toBe('/md/pages/programs/agents.md'); + expect(result).toBe('/md/programs/agents.md'); }); it('should convert /use-cases/ai-agents to markdown path', () => { const result = getMarkdownPath('/use-cases/ai-agents'); - expect(result).toBe('/md/pages/use-cases/ai-agents.md'); + expect(result).toBe('/md/use-cases/ai-agents.md'); }); it('should handle nested docs paths', () => { @@ -187,6 +187,21 @@ describe('getMarkdownPath', () => { expect(result).toBeNull(); }); + it('should alias /docs.md to the curated llms.txt index', () => { + const result = getMarkdownPath('/docs.md'); + expect(result).toBe('/docs/llms.txt'); + }); + + it('should alias bare /docs to the curated llms.txt index (agent / Accept: markdown)', () => { + const result = getMarkdownPath('/docs'); + expect(result).toBe('/docs/llms.txt'); + }); + + it('should not alias docs subpaths to llms.txt', () => { + const result = getMarkdownPath('/docs/introduction'); + expect(result).toBe('/md/docs/introduction.md'); + }); + it('should resolve /docs/changelog to custom markdown path', () => { const result = getMarkdownPath('/docs/changelog'); expect(result).toBe('/md/docs/changelog.md'); @@ -278,9 +293,9 @@ describe('getMarkdownPath', () => { expect(result).toBe('/md/postgresql.md'); }); - it('should map /programs.md to /md/pages/programs.md (file may not exist)', () => { + it('should map /programs.md to /md/programs.md', () => { const result = getMarkdownPath('/programs.md'); - expect(result).toBe('/md/pages/programs.md'); + expect(result).toBe('/md/programs.md'); }); }); });