Skip to content

Commit 9f2c83f

Browse files
committed
Enable Markdown content negotiation and implement Markdown file generation
- Add middleware to serve `.md` files when `Accept: text/markdown` header is present. - Introduce `markdown-output` Astro plugin to generate `.md` files alongside rendered HTML during builds. - Add tests for content negotiation and Markdown file handling.
1 parent 8f52738 commit 9f2c83f

7 files changed

Lines changed: 339 additions & 0 deletions

File tree

astro/astro.config.mjs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import * as fs from "node:fs";
1717
import { duendeOpenGraphImage } from "./src/plugins/duende-og-image.js";
1818
import removeMarkdownExtensions from "./src/plugins/remove-markdown-extensions.js";
1919
import staticRedirects from "./src/plugins/static-redirects.js";
20+
import markdownOutput from "./src/plugins/markdown-output.js";
2021

2122
// https://astro.build/config
2223
export default defineConfig({
@@ -233,6 +234,7 @@ export default defineConfig({
233234
contentDir: "./src/content/docs",
234235
}),
235236
staticRedirects(),
237+
markdownOutput(),
236238
opengraphImages({
237239
options: {
238240
fonts: [

astro/package-lock.json

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

astro/package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,13 @@
3434
"astro-opengraph-images": "^1.14.3",
3535
"astro-redirect-from": "^1.3.5",
3636
"astro-rehype-relative-markdown-links": "^0.19.0",
37+
"hast-util-to-text": "^4.0.2",
3738
"jsdom": "^29.0.2",
3839
"patch-package": "^8.0.1",
3940
"react": "^19.2.4",
4041
"rehype-external-links": "^3.0.0",
42+
"rehype-parse": "^9.0.1",
43+
"rehype-remark": "^10.0.1",
4144
"satori": "^0.26.0",
4245
"sharp": "^0.34.5",
4346
"starlight-auto-sidebar": "^0.2.0",
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
import url from "node:url";
2+
import path from "node:path";
3+
import fs from "node:fs/promises";
4+
import type { AstroIntegrationLogger } from "astro";
5+
import { unified } from "unified";
6+
import rehypeParse from "rehype-parse";
7+
import rehypeRemark from "rehype-remark";
8+
import remarkStringify from "remark-stringify";
9+
import remarkGfm from "remark-gfm";
10+
import { JSDOM } from "jsdom";
11+
import { toText } from "hast-util-to-text";
12+
13+
/**
14+
* Astro integration that generates a Markdown file (index.md) next to every
15+
* rendered HTML file (index.html) in the build output.
16+
*
17+
* The Markdown is derived from the rendered HTML, so all links, includes,
18+
* components, etc. are already resolved.
19+
*/
20+
export default function markdownOutput() {
21+
return {
22+
name: "markdown-output",
23+
hooks: {
24+
"astro:build:done": async ({
25+
dir,
26+
pages,
27+
logger,
28+
}: {
29+
dir: URL;
30+
pages: Array<{ pathname: string }>;
31+
logger: AstroIntegrationLogger;
32+
}) => {
33+
const outDir = url.fileURLToPath(dir);
34+
const processor = unified()
35+
.use(rehypeParse, { fragment: true })
36+
.use(rehypeRemark, {
37+
handlers: {
38+
// Preserve language hints on code fences from <pre data-language="...">
39+
pre(state: any, node: any) {
40+
const lang =
41+
node.properties?.dataLanguage || "";
42+
const value = toText(node);
43+
const result = {
44+
type: "code" as const,
45+
lang: lang || null,
46+
meta: null,
47+
value: value.replace(/\n$/, ""),
48+
};
49+
state.patch(node, result);
50+
return result;
51+
},
52+
// Handle <figure> with code blocks: extract title from figcaption
53+
figure(state: any, node: any) {
54+
// Find figcaption title
55+
const figcaption = node.children?.find(
56+
(c: any) => c.tagName === "figcaption",
57+
);
58+
const titleSpan = figcaption?.children?.find(
59+
(c: any) =>
60+
c.properties?.className?.includes("title"),
61+
);
62+
const title = titleSpan ? toText(titleSpan).trim() : "";
63+
64+
// Find <pre> child
65+
const pre = node.children?.find(
66+
(c: any) => c.tagName === "pre",
67+
);
68+
if (!pre) {
69+
// Not a code figure, fall back to default
70+
return state.all(node);
71+
}
72+
73+
const lang = pre.properties?.dataLanguage || "";
74+
const value = toText(pre);
75+
const codeNode = {
76+
type: "code" as const,
77+
lang: lang || null,
78+
meta: null,
79+
value: value.replace(/\n$/, ""),
80+
};
81+
state.patch(pre, codeNode);
82+
83+
if (title) {
84+
const titleNode = {
85+
type: "paragraph" as const,
86+
children: [
87+
{
88+
type: "inlineCode" as const,
89+
value: title,
90+
},
91+
{
92+
type: "text" as const,
93+
value: ":",
94+
},
95+
],
96+
};
97+
return [titleNode, codeNode];
98+
}
99+
100+
return codeNode;
101+
},
102+
},
103+
})
104+
.use(remarkGfm)
105+
.use(remarkStringify, {
106+
bullet: "-",
107+
emphasis: "*",
108+
strong: "*",
109+
rule: "-",
110+
});
111+
112+
let count = 0;
113+
let errors = 0;
114+
115+
await Promise.all(
116+
pages.map(async ({ pathname }) => {
117+
const htmlPath = path.join(outDir, pathname, "index.html");
118+
const mdPath = path.join(outDir, pathname, "index.md");
119+
120+
try {
121+
const html = await fs.readFile(htmlPath, "utf-8");
122+
const dom = new JSDOM(html);
123+
const doc = dom.window.document;
124+
125+
const main = doc.querySelector("main");
126+
if (!main) return;
127+
128+
// Remove banner
129+
main.querySelectorAll(".sl-banner").forEach((el) => el.remove());
130+
131+
// Remove "Section titled" anchor links in headings
132+
main.querySelectorAll("a").forEach((el) => {
133+
if (el.textContent?.trim().startsWith("Section titled")) el.remove();
134+
});
135+
136+
// Remove "Edit page" link and "Last updated" meta section
137+
main.querySelectorAll("footer .meta").forEach((el) => el.remove());
138+
139+
// Remove giscus comments
140+
main.querySelectorAll("giscus-comments").forEach((el) => el.remove());
141+
142+
// Remove copyright footer (the <hr> + copyright div)
143+
main.querySelectorAll("footer > hr").forEach((el) => el.remove());
144+
main.querySelectorAll("footer > div:not(.pagination-links)").forEach((el) => el.remove());
145+
146+
// Flatten pagination links so Previous/Next text is on one line
147+
// Structure: <a> <svg/> <span> Previous <br> <span class="link-title">Title</span> </span> </a>
148+
main.querySelectorAll(".pagination-links a").forEach((a) => {
149+
a.querySelectorAll("svg").forEach((svg) => svg.remove());
150+
a.querySelectorAll("br").forEach((br) => br.remove());
151+
const label = a.querySelector("span")?.childNodes[0]?.textContent?.trim(); // "Previous" or "Next"
152+
const title = a.querySelector(".link-title")?.textContent?.trim();
153+
if (label && title) {
154+
a.textContent = `${label}: ${title}`;
155+
}
156+
});
157+
158+
const content = main.innerHTML;
159+
const result = await processor.process(content);
160+
161+
// Add page title as YAML frontmatter
162+
const pageTitle = doc.querySelector("title")?.textContent?.trim() || "";
163+
const frontmatter = `---\ntitle: ${pageTitle}\n---\n\n`;
164+
165+
await fs.writeFile(mdPath, frontmatter + String(result));
166+
count++;
167+
} catch (e: any) {
168+
if (e.code === "ENOENT") {
169+
// No index.html for this page (e.g. redirects, API routes)
170+
return;
171+
}
172+
errors++;
173+
logger.warn(`Failed to generate Markdown for ${pathname}: ${e.message}`);
174+
}
175+
}),
176+
);
177+
178+
logger.info(
179+
`Generated ${count} Markdown files${errors > 0 ? ` (${errors} errors)` : ""}`,
180+
);
181+
},
182+
},
183+
};
184+
}

server/src/Docs.Web/Program.cs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,37 @@
5151
// Enable response compression
5252
app.UseResponseCompression();
5353

54+
// Content negotiation: serve .md file when Accept: text/markdown
55+
app.Use(async (context, next) =>
56+
{
57+
var accept = context.Request.Headers.Accept.ToString();
58+
if (accept.Contains("text/markdown", StringComparison.OrdinalIgnoreCase))
59+
{
60+
var webHostEnvironment = context.RequestServices.GetRequiredService<IWebHostEnvironment>();
61+
var requestPath = context.Request.Path.Value?.TrimEnd('/') ?? "";
62+
63+
// Try the exact path with .md extension, then index.md inside the directory
64+
var candidates = new[]
65+
{
66+
Path.Combine(webHostEnvironment.WebRootPath, requestPath.TrimStart('/') + ".md"),
67+
Path.Combine(webHostEnvironment.WebRootPath, requestPath.TrimStart('/'), "index.md")
68+
};
69+
70+
foreach (var mdPath in candidates)
71+
{
72+
if (File.Exists(mdPath))
73+
{
74+
context.Response.ContentType = "text/markdown; charset=utf-8";
75+
context.Response.Headers["content-signal"] = "ai-train=yes, search=yes, ai-input=yes";
76+
await context.Response.SendFileAsync(mdPath);
77+
return;
78+
}
79+
}
80+
}
81+
82+
await next();
83+
});
84+
5485
// Add trailing slash redirect middleware (replicate nginx behavior)
5586
app.Use(async (context, next) =>
5687
{
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
using System.Net;
2+
using System.Net.Http.Headers;
3+
using Microsoft.AspNetCore.Mvc.Testing;
4+
5+
namespace Docs.Web.Tests;
6+
7+
public class ContentNegotiationTests : IClassFixture<MarkdownWebApplicationFactory>
8+
{
9+
private readonly HttpClient _client;
10+
11+
public ContentNegotiationTests(MarkdownWebApplicationFactory factory)
12+
{
13+
_client = factory.CreateClient(new WebApplicationFactoryClientOptions
14+
{
15+
AllowAutoRedirect = false
16+
});
17+
}
18+
19+
[Fact]
20+
public async Task AcceptMarkdown_WithMatchingMdFile_ReturnsMarkdownContent()
21+
{
22+
var request = new HttpRequestMessage(HttpMethod.Get, "/docs/guide/");
23+
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/markdown"));
24+
25+
var response = await _client.SendAsync(request);
26+
27+
Assert.Equal(HttpStatusCode.OK, response.StatusCode);
28+
Assert.Equal("text/markdown", response.Content.Headers.ContentType?.MediaType);
29+
Assert.Equal("ai-train=yes, search=yes, ai-input=yes", response.Headers.GetValues("content-signal").Single());
30+
var content = await response.Content.ReadAsStringAsync();
31+
Assert.Equal("# Guide", content);
32+
}
33+
34+
[Fact]
35+
public async Task AcceptMarkdown_WithExactPathMdFile_ReturnsMarkdownContent()
36+
{
37+
// /docs/page has no trailing slash — middleware tries /docs/page.md which exists
38+
var request = new HttpRequestMessage(HttpMethod.Get, "/docs/page");
39+
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/markdown"));
40+
41+
var response = await _client.SendAsync(request);
42+
43+
Assert.Equal(HttpStatusCode.OK, response.StatusCode);
44+
Assert.Equal("text/markdown", response.Content.Headers.ContentType?.MediaType);
45+
Assert.Equal("ai-train=yes, search=yes, ai-input=yes", response.Headers.GetValues("content-signal").Single());
46+
var content = await response.Content.ReadAsStringAsync();
47+
Assert.Equal("# Page", content);
48+
}
49+
50+
[Fact]
51+
public async Task AcceptMarkdown_NoMdFile_FallsThrough()
52+
{
53+
var request = new HttpRequestMessage(HttpMethod.Get, "/docs/nonexistent/");
54+
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/markdown"));
55+
56+
var response = await _client.SendAsync(request);
57+
58+
Assert.NotEqual("text/markdown", response.Content.Headers.ContentType?.MediaType);
59+
}
60+
61+
[Fact]
62+
public async Task AcceptHtml_WithMdFilePresent_DoesNotReturnMarkdown()
63+
{
64+
var request = new HttpRequestMessage(HttpMethod.Get, "/docs/guide/");
65+
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/html"));
66+
67+
var response = await _client.SendAsync(request);
68+
69+
Assert.NotEqual("text/markdown", response.Content.Headers.ContentType?.MediaType);
70+
}
71+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
using System.Text.Json;
2+
using Microsoft.AspNetCore.Hosting;
3+
using Microsoft.AspNetCore.Mvc.Testing;
4+
5+
namespace Docs.Web.Tests;
6+
7+
/// <summary>
8+
/// A WebApplicationFactory that creates a temp wwwroot with .md files for content negotiation tests.
9+
/// </summary>
10+
public sealed class MarkdownWebApplicationFactory : WebApplicationFactory<Program>
11+
{
12+
private readonly string _tempWebRoot;
13+
14+
public MarkdownWebApplicationFactory()
15+
{
16+
_tempWebRoot = Path.Combine(Path.GetTempPath(), "Docs.Web.Tests.Md", Guid.NewGuid().ToString());
17+
Directory.CreateDirectory(_tempWebRoot);
18+
19+
// Write empty redirects.json so Program.cs doesn't warn
20+
File.WriteAllText(Path.Combine(_tempWebRoot, "redirects.json"), "{}");
21+
22+
// Create test markdown files
23+
var guideDir = Path.Combine(_tempWebRoot, "docs", "guide");
24+
Directory.CreateDirectory(guideDir);
25+
File.WriteAllText(Path.Combine(guideDir, "index.md"), "# Guide");
26+
27+
var docsDir = Path.Combine(_tempWebRoot, "docs");
28+
File.WriteAllText(Path.Combine(docsDir, "page.md"), "# Page");
29+
}
30+
31+
protected override void ConfigureWebHost(IWebHostBuilder builder)
32+
{
33+
builder.UseWebRoot(_tempWebRoot);
34+
builder.UseEnvironment("Testing");
35+
}
36+
37+
protected override void Dispose(bool disposing)
38+
{
39+
base.Dispose(disposing);
40+
if (disposing && Directory.Exists(_tempWebRoot))
41+
{
42+
Directory.Delete(_tempWebRoot, recursive: true);
43+
}
44+
}
45+
}

0 commit comments

Comments
 (0)