|
| 1 | +import type { Config, Context } from "@netlify/edge-functions"; |
| 2 | +import { extname } from "path"; |
| 3 | + |
| 4 | +const ALLOWED_HTTP_METHODS = new Set(["GET", "HEAD"]); |
| 5 | +const LLMS_REWRITES = new Set(["/llms.txt", "/llms-full.txt"]); |
| 6 | + |
| 7 | +export const config: Config = { |
| 8 | + // This middleware should run for all paths, but we explicitly exclude common static asset types |
| 9 | + // and some specific files to avoid unnecessary middleware execution |
| 10 | + path: "/*", |
| 11 | + excludedPath: [ |
| 12 | + "/**/*.js", |
| 13 | + "/**/*.css", |
| 14 | + "/**/*.png", |
| 15 | + "/**/*.jpg", |
| 16 | + "/**/*.jpeg", |
| 17 | + "/**/*.svg", |
| 18 | + "/**/*.ico", |
| 19 | + "/**/*.xml", |
| 20 | + "/img/**", |
| 21 | + "/robots.txt", |
| 22 | + "/404.html", |
| 23 | + "/_redirects", |
| 24 | + "/.nojekyll", |
| 25 | + ], |
| 26 | +}; |
| 27 | + |
| 28 | +// This middleware serves Markdown content to clients that prefer it (like LLMs), |
| 29 | +// while still supporting regular HTML for browsers and other clients. |
| 30 | +// It also adds Link headers to indicate alternate formats and ensures proper Vary headers. |
| 31 | +export default async function handler(request: Request, context: Context) { |
| 32 | + try { |
| 33 | + // Only handle allowed HTTP methods |
| 34 | + if (!ALLOWED_HTTP_METHODS.has(request.method)) return; |
| 35 | + |
| 36 | + // Skip our own Algolia crawler — it follows rel="alternate" links and |
| 37 | + // would otherwise index the .md variants. |
| 38 | + const userAgent = request.headers.get("user-agent") || ""; |
| 39 | + if (/algolia/i.test(userAgent)) return; |
| 40 | + |
| 41 | + const url = new URL(request.url); |
| 42 | + const { pathname } = url; |
| 43 | + |
| 44 | + // Respond with index.md for llms.txt and llms-full.txt, |
| 45 | + // as index.md is well suited for this purpose |
| 46 | + if (LLMS_REWRITES.has(pathname)) { |
| 47 | + return buildTarget("/index.md", url); |
| 48 | + } |
| 49 | + |
| 50 | + const ext = extname(pathname); |
| 51 | + if (ext === ".html" || ext === ".md") { |
| 52 | + // For direct requests to .html or .md files, |
| 53 | + // add a link header pointing to the alternate format. |
| 54 | + return modifyHeaders(await context.next(), (headers) => { |
| 55 | + addAlternateLink(headers, url); |
| 56 | + }); |
| 57 | + } else if (ext) { |
| 58 | + // Skip other requests with file extensions, |
| 59 | + // as they are static assets that shouldn't have alternate links. |
| 60 | + return; |
| 61 | + } |
| 62 | + |
| 63 | + // For other requests, check if the client prefers Markdown over HTML. |
| 64 | + // If so, try to serve the corresponding Markdown file |
| 65 | + // (e.g., /foo -> /foo/index.md). |
| 66 | + // If the Markdown file doesn't exist (404), |
| 67 | + // continue with the normal request handling. |
| 68 | + if (prefersMarkdown(request.headers.get("accept"))) { |
| 69 | + const target = buildTarget(joinIndexMD(pathname), url); |
| 70 | + const response = await fetch(target); |
| 71 | + if (response.status !== 404) return finalize(response, url); |
| 72 | + } |
| 73 | + |
| 74 | + // For all other cases, proceed with the normal request handling. |
| 75 | + return finalize(await context.next(), url); |
| 76 | + } catch (error) { |
| 77 | + console.error("Error in LLM middleware:", error); |
| 78 | + // In case of any error, proceed with the normal request handling |
| 79 | + return context.next(); |
| 80 | + } |
| 81 | +} |
| 82 | + |
| 83 | +// Helper function to build a target URL based on the original URL and a new pathname, |
| 84 | +// while preserving the search parameters. |
| 85 | +function buildTarget(pathname: string, base: URL): URL { |
| 86 | + const target = new URL(pathname, base); |
| 87 | + target.search = base.search; |
| 88 | + return target; |
| 89 | +} |
| 90 | + |
| 91 | +// Helper function to convert a pathname to its corresponding index.md path. |
| 92 | +function joinIndexMD(pathname: string): string { |
| 93 | + return pathname.replace(/\/?$/, "/") + "index.md"; |
| 94 | +} |
| 95 | + |
| 96 | +// Parses the Accept header to determine if the client prefers Markdown over HTML. |
| 97 | +function prefersMarkdown(accept: string | null): boolean { |
| 98 | + if (!accept) return false; |
| 99 | + |
| 100 | + // Quality values (q) indicate the client's preference for different content types. |
| 101 | + // Values less than 0 mean that the type wasn't found in the Accept header |
| 102 | + let markdownQ = -1; |
| 103 | + let htmlQ = -1; |
| 104 | + let textQ = -1; |
| 105 | + let anyQ = -1; |
| 106 | + |
| 107 | + // Parse the Accept header, which can contain multiple content types with optional quality values. |
| 108 | + for (const part of accept.split(",")) { |
| 109 | + // Each part can have parameters separated by semicolons, e.g., "text/html; q=0.9". |
| 110 | + const segments = part.trim().split(";"); |
| 111 | + const type = segments[0].trim().toLowerCase(); |
| 112 | + if (!type) continue; |
| 113 | + |
| 114 | + // Default quality value is 1 if the type is present without an explicit q parameter. |
| 115 | + let q = 1; |
| 116 | + // Look for a q parameter in the segments to determine the quality value for this content type. |
| 117 | + for (let i = 1; i < segments.length; i++) { |
| 118 | + const param = segments[i].trim(); |
| 119 | + if (!param.startsWith("q=")) continue; |
| 120 | + const value = Number.parseFloat(param.slice(2)); |
| 121 | + if (!Number.isNaN(value)) q = value; |
| 122 | + } |
| 123 | + |
| 124 | + // Update the quality values for the relevant content types based on the parsed Accept header. |
| 125 | + if (type === "text/markdown") { |
| 126 | + markdownQ = Math.max(q, markdownQ); |
| 127 | + } else if (type === "text/html") { |
| 128 | + htmlQ = Math.max(q, htmlQ); |
| 129 | + } else if (type === "text/*") { |
| 130 | + textQ = Math.max(q, textQ); |
| 131 | + } else if (type === "*/*") { |
| 132 | + anyQ = Math.max(q, anyQ); |
| 133 | + } |
| 134 | + } |
| 135 | + |
| 136 | + // If "text/html" isn't explicitly listed, |
| 137 | + // use the quality values of "text/*" and "*/*" as a fallback for HTML, |
| 138 | + if (htmlQ < 0) htmlQ = textQ > 0 ? textQ : anyQ; |
| 139 | + |
| 140 | + // Markdown is preferred if it was explicitly listed with a quality value greater than 0, |
| 141 | + // and its quality value is greater than or equal to that of HTML. |
| 142 | + return markdownQ > 0 && markdownQ >= htmlQ; |
| 143 | +} |
| 144 | + |
| 145 | +// Finalize the response by adding necessary headers. |
| 146 | +// This function should be used only for responses to paths without file extensions |
| 147 | +// (e.g., /foo or /foo/). |
| 148 | +// For responses to direct requests to .html or .md files, the alternate link header |
| 149 | +// is added in the main handler function, and this finalize function is not used. |
| 150 | +function finalize(response: Response, url: URL): Response { |
| 151 | + return modifyHeaders(response, (headers) => { |
| 152 | + // Add "Accept" to the Vary header to indicate that the response may vary |
| 153 | + // based on the Accept header, which is important for caching CDNs and browsers |
| 154 | + // to work correctly with content negotiation. |
| 155 | + appendVary(headers, "Accept"); |
| 156 | + // Add a Link header pointing to the alternate format (Markdown or HTML) |
| 157 | + // for clients that can handle it. |
| 158 | + addAlternateLink(headers, new URL(response.url, url)); |
| 159 | + }); |
| 160 | +} |
| 161 | + |
| 162 | +// Helper function to create a new Response with modified headers based on an existing Response. |
| 163 | +function modifyHeaders( |
| 164 | + response: Response, |
| 165 | + fn: (headers: Headers) => void, |
| 166 | +): Response { |
| 167 | + const headers = new Headers(response.headers); |
| 168 | + |
| 169 | + fn(headers); |
| 170 | + |
| 171 | + return new Response(response.body, { |
| 172 | + status: response.status, |
| 173 | + statusText: response.statusText, |
| 174 | + headers, |
| 175 | + }); |
| 176 | +} |
| 177 | + |
| 178 | +// Helper function to append a value to the Vary header, ensuring that it doesn't create duplicates. |
| 179 | +function appendVary(headers: Headers, value: string) { |
| 180 | + const existing = headers.get("vary"); |
| 181 | + |
| 182 | + // If there's no existing Vary header, just set it to the new value. |
| 183 | + if (!existing) { |
| 184 | + headers.set("vary", value); |
| 185 | + return; |
| 186 | + } |
| 187 | + |
| 188 | + // If the Vary header already includes the value (case-insensitive), do nothing to avoid duplicates. |
| 189 | + const tokens = existing.split(",").map((s) => s.trim()); |
| 190 | + if (tokens.some((t) => t.toLowerCase() === value.toLowerCase())) return; |
| 191 | + |
| 192 | + // Otherwise, append the new value to the existing Vary header. |
| 193 | + headers.set("vary", `${existing}, ${value}`); |
| 194 | +} |
| 195 | + |
| 196 | +// Helper function to add a Link header pointing to the alternate format (Markdown or HTML) |
| 197 | +// for a given URL. |
| 198 | +function addAlternateLink(headers: Headers, url: URL) { |
| 199 | + let alternatePath: string | null = null; |
| 200 | + let alternateType = "text/markdown"; |
| 201 | + |
| 202 | + const ext = extname(url.pathname); |
| 203 | + if (ext === ".html") { |
| 204 | + // For an HTML page, the alternate format is the corresponding Markdown file. |
| 205 | + alternatePath = url.pathname.replace(/\.html$/, ".md"); |
| 206 | + } else if (ext === ".md") { |
| 207 | + // For a Markdown page, the alternate format is the corresponding HTML file. |
| 208 | + alternatePath = url.pathname.replace(/\.md$/, ".html"); |
| 209 | + alternateType = "text/html"; |
| 210 | + } else if (ext === "") { |
| 211 | + // Paths without an extension are most likely point to /path/index.html, |
| 212 | + // so we should add /index.md to it as the alternate path. |
| 213 | + alternatePath = joinIndexMD(url.pathname); |
| 214 | + } |
| 215 | + |
| 216 | + // If we couldn't determine a valid alternate path, don't add a Link header. |
| 217 | + if (!alternatePath) return; |
| 218 | + |
| 219 | + // Build the full URL for the alternate format and add a Link header. |
| 220 | + const alternateUrl = buildTarget(alternatePath, url); |
| 221 | + const link = `<${alternateUrl}>; rel="alternate"; type="${alternateType}"`; |
| 222 | + headers.set("link", link); |
| 223 | +} |
0 commit comments