Skip to content

Commit 29cc6c4

Browse files
cscheidclaude
andcommitted
Preserve code annotations in llms-txt output
Save original code block text (with annotation markers) before code-annotation.lua strips them, then restore during HTML-to-markdown conversion. Annotation definition lists are converted to ordered lists. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent c32970e commit 29cc6c4

5 files changed

Lines changed: 104 additions & 4 deletions

File tree

src/project/types/website/website-llms.ts

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ function extractMainContent(doc: Document): string {
128128
return "";
129129
}
130130

131+
// Preprocess annotated code blocks before converting to markdown
132+
preprocessAnnotatedCodeBlocks(clone, main as Element);
133+
131134
// Return a minimal HTML document with just the content
132135
return `<!DOCTYPE html>
133136
<html>
@@ -138,6 +141,65 @@ ${main.innerHTML}
138141
</html>`;
139142
}
140143

144+
/**
145+
* Preprocess annotated code blocks for llms output.
146+
* Restores original code text (with annotation markers) and converts
147+
* the annotation definition list to an ordered list.
148+
*/
149+
function preprocessAnnotatedCodeBlocks(doc: Document, container: Element): void {
150+
// Restore original code text in annotated code blocks.
151+
// The llms-code-annotations.lua filter saves the original text
152+
// (before code-annotation.lua strips markers) as a data attribute.
153+
const annotated = container.querySelectorAll("[data-llms-code-original]");
154+
for (const node of annotated) {
155+
const el = node as Element;
156+
const originalText = el.getAttribute("data-llms-code-original");
157+
if (!originalText) continue;
158+
159+
// The attribute is on the wrapper div; find the <code> element inside
160+
const codeEl = el.tagName === "CODE"
161+
? el
162+
: el.querySelector("code") as Element | null;
163+
if (codeEl) {
164+
// Replace content with original (removes syntax highlighting spans + annotation buttons)
165+
codeEl.textContent = originalText;
166+
}
167+
168+
el.removeAttribute("data-llms-code-original");
169+
}
170+
171+
// Remove annotation gutter elements
172+
const gutters = container.querySelectorAll(
173+
".code-annotation-gutter, .code-annotation-gutter-bg",
174+
);
175+
for (const gutter of gutters) {
176+
(gutter as Element).remove();
177+
}
178+
179+
// Convert annotation definition lists to ordered lists.
180+
// The annotation text is in <dd> elements; <dt> elements have just the number.
181+
const dls = container.querySelectorAll("dl.code-annotation-container-grid");
182+
for (const dlNode of dls) {
183+
const dl = dlNode as Element;
184+
const ol = doc.createElement("ol");
185+
const dds = dl.querySelectorAll("dd");
186+
for (const ddNode of dds) {
187+
const dd = ddNode as Element;
188+
const li = doc.createElement("li");
189+
li.innerHTML = dd.innerHTML;
190+
ol.appendChild(li);
191+
}
192+
193+
// Replace the DL (and its cell-annotation wrapper div if present)
194+
const parent = dl.parentElement;
195+
if (parent && parent.classList.contains("cell-annotation")) {
196+
parent.parentElement?.replaceChild(ol, parent);
197+
} else {
198+
dl.parentElement?.replaceChild(ol, dl);
199+
}
200+
}
201+
}
202+
141203
/**
142204
* Convert HTML content to markdown using Pandoc with the llms.lua filter.
143205
*/

src/project/types/website/website.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import { projectOffset, projectOutputDir } from "../../project-shared.ts";
3232
import { isHtmlFileOutput } from "../../../config/format.ts";
3333

3434
import {
35+
kFilterParams,
3536
kIncludeInHeader,
3637
kPageTitle,
3738
kTitle,
@@ -358,6 +359,8 @@ export const websiteProjectType: ProjectType = {
358359

359360
// Add llms.txt finalizer if enabled
360361
if (websiteConfigBoolean(kLlmsTxt, false, project.config)) {
362+
extras[kFilterParams] = extras[kFilterParams] || {};
363+
extras[kFilterParams]["llms-txt"] = true;
361364
extras.html[kHtmlFinalizers]?.push(
362365
llmsHtmlFinalizer(source, project, format),
363366
);

src/resources/filters/main.lua

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ import("./quarto-pre/bibliography-formats.lua")
143143
import("./quarto-pre/book-links.lua")
144144
import("./quarto-pre/book-numbering.lua")
145145
import("./quarto-pre/code-annotation.lua")
146+
import("./quarto-pre/llms-code-annotations.lua")
146147
import("./quarto-pre/code-filename.lua")
147148
import("./quarto-pre/contentsshortcode.lua")
148149
import("./quarto-pre/engine-escape.lua")
@@ -336,6 +337,15 @@ local quarto_pre_filters = {
336337
traverser = 'jog',
337338
},
338339

340+
{ name = "pre-llms-save-code-annotations",
341+
filter = filterIf(
342+
function() return param("llms-txt", false) end,
343+
llms_save_code_annotations()
344+
),
345+
flags = { "has_code_annotations" },
346+
traverser = 'jog',
347+
},
348+
339349
{ name = "pre-code-annotations",
340350
filter = code_annotations(),
341351
flags = { "has_code_annotations" },
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
-- llms-code-annotations.lua
2+
-- Copyright (C) 2020-2026 Posit Software, PBC
3+
--
4+
-- Saves original CodeBlock text before code-annotation.lua strips markers.
5+
-- Only runs when llms-txt is enabled (guarded by filterIf in main.lua).
6+
7+
function llms_save_code_annotations()
8+
return {
9+
CodeBlock = function(el)
10+
if el.text:match("<%d+>") then
11+
el.attributes["data-llms-code-original"] = el.text
12+
end
13+
return el
14+
end
15+
}
16+
end

tests/docs/smoke-all/website/llms-txt/index.qmd

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ _quarto:
1212
# Second array: patterns that must NOT match (empty)
1313
- []
1414
ensureLlmsMdRegexMatches:
15-
# First array: patterns that MUST match - verify anchor links are converted
16-
- ["\\[callout examples\\]\\(about\\.llms\\.md#callout-examples\\)"]
17-
# Second array: patterns that must NOT match (no .html or .html# links)
18-
- ["\\.html\\)", "\\.html#"]
15+
# First array: patterns that MUST match - verify anchor links and code annotations
16+
- ["\\[callout examples\\]\\(about\\.llms\\.md#callout-examples\\)", "#<1>", "#<2>", "Load tidyverse", "Open help for ggplot"]
17+
# Second array: patterns that must NOT match (no .html links, no annotation UI)
18+
- ["\\.html\\)", "\\.html#", "code-annotation-anchor"]
1919
---
2020

2121
## Test Content
@@ -25,3 +25,12 @@ This is a test website for the llms-txt feature.
2525
See the [about page](about.qmd) for more information.
2626

2727
Also see the [callout examples](about.qmd#callout-examples).
28+
29+
## Code Annotations
30+
31+
```r
32+
library(tidyverse) # <1>
33+
?ggplot # <2>
34+
```
35+
1. Load tidyverse
36+
2. Open help for ggplot

0 commit comments

Comments
 (0)