Skip to content

Commit e615b08

Browse files
committed
Improve LLM visibility
1 parent 3ab4d2f commit e615b08

37 files changed

Lines changed: 639 additions & 30 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,6 @@
1818
npm-debug.log*
1919
yarn-debug.log*
2020
yarn-error.log*
21+
22+
# Local Netlify folder
23+
.netlify

docs/intro.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
slug: '/'
33
title: ''
44
description: imgproxy is a fast and secure standalone server for resizing and converting remote images
5-
displayed_sidebar: tutorialSidebar
5+
displayed_sidebar: main
66
---
77

88
<h1>

docusaurus.config.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
Options as PresetClassicOptions,
44
ThemeConfig as PresetClassicThemeConfig,
55
} from "@docusaurus/preset-classic";
6+
import { join } from "node:path";
67

78
import badgeRemarkPlugin from "./src/remark/badge";
89
import codeAnchorRemarkPlugin from "./src/remark/code-anchor";
@@ -28,6 +29,12 @@ const config: Config = {
2829
baseUrl: "/",
2930

3031
onBrokenLinks: "throw",
32+
// Anchors for configuration options are generated dynamically,
33+
// so Docusaurus can't know them in advance.
34+
// It'd be nice to be able to verify anchors, but for now,
35+
// let's just ignore broken anchors instead flooding the build
36+
// output with warnings.
37+
onBrokenAnchors: "ignore",
3138

3239
i18n: {
3340
defaultLocale: "en",
@@ -92,6 +99,8 @@ const config: Config = {
9299
},
93100
],
94101

102+
plugins: [join(__dirname, "src/plugins/llms.ts")],
103+
95104
presets: [
96105
[
97106
"classic",
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
import type { Config, Context } from "@netlify/edge-functions";
2+
import { extname } from "path";
3+
4+
const ALLOWED_HTTP_METHODS = new Set(["GET", "HEAD"]);
5+
const LLMS_REWRITES = new Set(["/llms.txt", "/llms-full.txt"]);
6+
7+
export const config: Config = {
8+
// This middleware should run for all paths, but we explicitly exclude common static asset types
9+
// and some specific files to avoid unnecessary middleware execution
10+
path: "/*",
11+
excludedPath: [
12+
"/**/*.js",
13+
"/**/*.css",
14+
"/**/*.png",
15+
"/**/*.jpg",
16+
"/**/*.jpeg",
17+
"/**/*.svg",
18+
"/**/*.ico",
19+
"/**/*.xml",
20+
"/img/**",
21+
"/robots.txt",
22+
"/404.html",
23+
"/_redirects",
24+
"/.nojekyll",
25+
],
26+
};
27+
28+
// This middleware serves Markdown content to clients that prefer it (like LLMs),
29+
// while still supporting regular HTML for browsers and other clients.
30+
// It also adds Link headers to indicate alternate formats and ensures proper Vary headers.
31+
export default async function handler(request: Request, context: Context) {
32+
try {
33+
// Only handle allowed HTTP methods
34+
if (!ALLOWED_HTTP_METHODS.has(request.method)) return;
35+
36+
// Skip our own Algolia crawler — it follows rel="alternate" links and
37+
// would otherwise index the .md variants.
38+
const userAgent = request.headers.get("user-agent") || "";
39+
if (/algolia/i.test(userAgent)) return;
40+
41+
const url = new URL(request.url);
42+
const { pathname } = url;
43+
44+
// Respond with index.md for llms.txt and llms-full.txt,
45+
// as index.md is well suited for this purpose
46+
if (LLMS_REWRITES.has(pathname)) {
47+
return buildTarget("/index.md", url);
48+
}
49+
50+
const ext = extname(pathname);
51+
if (ext === ".html" || ext === ".md") {
52+
// For direct requests to .html or .md files,
53+
// add a link header pointing to the alternate format.
54+
return modifyHeaders(await context.next(), (headers) => {
55+
addAlternateLink(headers, url);
56+
});
57+
} else if (ext) {
58+
// Skip other requests with file extensions,
59+
// as they are static assets that shouldn't have alternate links.
60+
return;
61+
}
62+
63+
// For other requests, check if the client prefers Markdown over HTML.
64+
// If so, try to serve the corresponding Markdown file
65+
// (e.g., /foo -> /foo/index.md).
66+
// If the Markdown file doesn't exist (404),
67+
// continue with the normal request handling.
68+
if (prefersMarkdown(request.headers.get("accept"))) {
69+
const target = buildTarget(joinIndexMD(pathname), url);
70+
const response = await fetch(target);
71+
if (response.status !== 404) return finalize(response, url);
72+
}
73+
74+
// For all other cases, proceed with the normal request handling.
75+
return finalize(await context.next(), url);
76+
} catch (error) {
77+
console.error("Error in LLM middleware:", error);
78+
// In case of any error, proceed with the normal request handling
79+
return context.next();
80+
}
81+
}
82+
83+
// Helper function to build a target URL based on the original URL and a new pathname,
84+
// while preserving the search parameters.
85+
function buildTarget(pathname: string, base: URL): URL {
86+
const target = new URL(pathname, base);
87+
target.search = base.search;
88+
return target;
89+
}
90+
91+
// Helper function to convert a pathname to its corresponding index.md path.
92+
function joinIndexMD(pathname: string): string {
93+
return pathname.replace(/\/?$/, "/") + "index.md";
94+
}
95+
96+
// Parses the Accept header to determine if the client prefers Markdown over HTML.
97+
function prefersMarkdown(accept: string | null): boolean {
98+
if (!accept) return false;
99+
100+
// Quality values (q) indicate the client's preference for different content types.
101+
// Values less than 0 mean that the type wasn't found in the Accept header
102+
let markdownQ = -1;
103+
let htmlQ = -1;
104+
let textQ = -1;
105+
let anyQ = -1;
106+
107+
// Parse the Accept header, which can contain multiple content types with optional quality values.
108+
for (const part of accept.split(",")) {
109+
// Each part can have parameters separated by semicolons, e.g., "text/html; q=0.9".
110+
const segments = part.trim().split(";");
111+
const type = segments[0].trim().toLowerCase();
112+
if (!type) continue;
113+
114+
// Default quality value is 1 if the type is present without an explicit q parameter.
115+
let q = 1;
116+
// Look for a q parameter in the segments to determine the quality value for this content type.
117+
for (let i = 1; i < segments.length; i++) {
118+
const param = segments[i].trim();
119+
if (!param.startsWith("q=")) continue;
120+
const value = Number.parseFloat(param.slice(2));
121+
if (!Number.isNaN(value)) q = value;
122+
}
123+
124+
// Update the quality values for the relevant content types based on the parsed Accept header.
125+
if (type === "text/markdown") {
126+
markdownQ = Math.max(q, markdownQ);
127+
} else if (type === "text/html") {
128+
htmlQ = Math.max(q, htmlQ);
129+
} else if (type === "text/*") {
130+
textQ = Math.max(q, textQ);
131+
} else if (type === "*/*") {
132+
anyQ = Math.max(q, anyQ);
133+
}
134+
}
135+
136+
// If "text/html" isn't explicitly listed,
137+
// use the quality values of "text/*" and "*/*" as a fallback for HTML,
138+
if (htmlQ < 0) htmlQ = textQ > 0 ? textQ : anyQ;
139+
140+
// Markdown is preferred if it was explicitly listed with a quality value greater than 0,
141+
// and its quality value is greater than or equal to that of HTML.
142+
return markdownQ > 0 && markdownQ >= htmlQ;
143+
}
144+
145+
// Finalize the response by adding necessary headers.
146+
// This function should be used only for responses to paths without file extensions
147+
// (e.g., /foo or /foo/).
148+
// For responses to direct requests to .html or .md files, the alternate link header
149+
// is added in the main handler function, and this finalize function is not used.
150+
function finalize(response: Response, url: URL): Response {
151+
return modifyHeaders(response, (headers) => {
152+
// Add "Accept" to the Vary header to indicate that the response may vary
153+
// based on the Accept header, which is important for caching CDNs and browsers
154+
// to work correctly with content negotiation.
155+
appendVary(headers, "Accept");
156+
// Add a Link header pointing to the alternate format (Markdown or HTML)
157+
// for clients that can handle it.
158+
addAlternateLink(headers, new URL(response.url, url));
159+
});
160+
}
161+
162+
// Helper function to create a new Response with modified headers based on an existing Response.
163+
function modifyHeaders(
164+
response: Response,
165+
fn: (headers: Headers) => void,
166+
): Response {
167+
const headers = new Headers(response.headers);
168+
169+
fn(headers);
170+
171+
return new Response(response.body, {
172+
status: response.status,
173+
statusText: response.statusText,
174+
headers,
175+
});
176+
}
177+
178+
// Helper function to append a value to the Vary header, ensuring that it doesn't create duplicates.
179+
function appendVary(headers: Headers, value: string) {
180+
const existing = headers.get("vary");
181+
182+
// If there's no existing Vary header, just set it to the new value.
183+
if (!existing) {
184+
headers.set("vary", value);
185+
return;
186+
}
187+
188+
// If the Vary header already includes the value (case-insensitive), do nothing to avoid duplicates.
189+
const tokens = existing.split(",").map((s) => s.trim());
190+
if (tokens.some((t) => t.toLowerCase() === value.toLowerCase())) return;
191+
192+
// Otherwise, append the new value to the existing Vary header.
193+
headers.set("vary", `${existing}, ${value}`);
194+
}
195+
196+
// Helper function to add a Link header pointing to the alternate format (Markdown or HTML)
197+
// for a given URL.
198+
function addAlternateLink(headers: Headers, url: URL) {
199+
let alternatePath: string | null = null;
200+
let alternateType = "text/markdown";
201+
202+
const ext = extname(url.pathname);
203+
if (ext === ".html") {
204+
// For an HTML page, the alternate format is the corresponding Markdown file.
205+
alternatePath = url.pathname.replace(/\.html$/, ".md");
206+
} else if (ext === ".md") {
207+
// For a Markdown page, the alternate format is the corresponding HTML file.
208+
alternatePath = url.pathname.replace(/\.md$/, ".html");
209+
alternateType = "text/html";
210+
} else if (ext === "") {
211+
// Paths without an extension are most likely point to /path/index.html,
212+
// so we should add /index.md to it as the alternate path.
213+
alternatePath = joinIndexMD(url.pathname);
214+
}
215+
216+
// If we couldn't determine a valid alternate path, don't add a Link header.
217+
if (!alternatePath) return;
218+
219+
// Build the full URL for the alternate format and add a Link header.
220+
const alternateUrl = buildTarget(alternatePath, url);
221+
const link = `<${alternateUrl}>; rel="alternate"; type="${alternateType}"`;
222+
headers.set("link", link);
223+
}

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
"@docusaurus/types": "^3.9.2",
3434
"@eslint/js": "^9.39.1",
3535
"@evilmartians/lefthook": "^2.0.4",
36+
"@netlify/edge-functions": "^3.0.6",
3637
"@types/mdast": "4.0.4",
3738
"eslint": "^9.39.1",
3839
"eslint-config-prettier": "^10.1.8",

pnpm-lock.yaml

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sidebars.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { SidebarsConfig } from "@docusaurus/plugin-content-docs";
22

33
const sidebars: SidebarsConfig = {
4-
tutorialSidebar: [
4+
main: [
55
"getting_started",
66
{
77
type: "link",

0 commit comments

Comments
 (0)