Skip to content

Commit d121ca0

Browse files
cscheidclaude
andcommitted
Support conditional content for llms-txt format
Allow users to include/exclude content in .llms.md output using content-visible/content-hidden with when-format="llms-txt". A pre-filter intercepts ConditionalBlock nodes before they are cleared, wrapping them in marker divs that llms.lua and the HTML finalizer handle independently. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 29cc6c4 commit d121ca0

5 files changed

Lines changed: 111 additions & 5 deletions

File tree

src/project/types/website/website-llms.ts

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import { basename, join, relative } from "../../../deno_ral/path.ts";
88
import { existsSync } from "../../../deno_ral/fs.ts";
99
import { pathWithForwardSlashes } from "../../../core/path.ts";
1010

11-
import { Document, Element } from "../../../core/deno-dom.ts";
11+
import { Document, Element, Node } from "../../../core/deno-dom.ts";
1212
import { execProcess } from "../../../core/process.ts";
1313
import { pandocBinaryPath, resourcePath } from "../../../core/resources.ts";
1414

@@ -83,9 +83,36 @@ export function llmsHtmlFinalizer(
8383

8484
// Convert HTML to markdown using Pandoc with the llms.lua filter
8585
await convertHtmlToLlmsMarkdown(htmlContent, llmsOutputPath);
86+
87+
// Clean up conditional content markers from the original HTML doc
88+
cleanupConditionalContent(doc);
8689
};
8790
}
8891

92+
/**
93+
* Clean up conditional content markers from the HTML document.
94+
* - Remove llms-only content (should not appear in HTML output)
95+
* - Unwrap llms-hidden markers (keep content, remove wrapper div)
96+
*/
97+
function cleanupConditionalContent(doc: Document): void {
98+
// Remove llms-only content from HTML output
99+
for (const el of doc.querySelectorAll(".llms-conditional-content")) {
100+
(el as Element).remove();
101+
}
102+
103+
// Unwrap llms-hidden markers (keep content, remove wrapper div)
104+
for (const el of doc.querySelectorAll(".llms-hidden-content")) {
105+
const parent = (el as Element).parentElement;
106+
if (parent) {
107+
const element = el as Element;
108+
while (element.firstChild) {
109+
parent.insertBefore(element.firstChild as Node, element as Node);
110+
}
111+
element.remove();
112+
}
113+
}
114+
}
115+
89116
/**
90117
* Extract the main content from an HTML document, removing navigation,
91118
* sidebars, footers, scripts, and styles.

src/resources/filters/llms/llms.lua

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ local skippable_classes = {
1515
["quarto-float"] = true,
1616
["quarto-float-fig"] = true,
1717
["figure"] = true,
18+
["llms-conditional-content"] = true,
1819
}
1920
local droppable_classes = {
2021
["navbar-container"] = true,
@@ -26,6 +27,7 @@ local droppable_classes = {
2627
["quarto-listing-category"] = true, -- category filter sidebar
2728
["listing-category"] = true, -- individual category badges
2829
["quarto-page-breadcrumbs"] = true, -- breadcrumb navigation
30+
["llms-hidden-content"] = true,
2931
}
3032
local droppable_ids = {
3133
["quarto-header"] = true,

src/resources/filters/main.lua

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ import("./quarto-pre/book-links.lua")
144144
import("./quarto-pre/book-numbering.lua")
145145
import("./quarto-pre/code-annotation.lua")
146146
import("./quarto-pre/llms-code-annotations.lua")
147+
import("./quarto-pre/llms-conditional-content.lua")
147148
import("./quarto-pre/code-filename.lua")
148149
import("./quarto-pre/contentsshortcode.lua")
149150
import("./quarto-pre/engine-escape.lua")
@@ -322,6 +323,15 @@ local quarto_pre_filters = {
322323
traverser = 'jog',
323324
},
324325

326+
{ name = "pre-llms-conditional-content",
327+
filter = filterIf(
328+
function() return param("llms-txt", false) end,
329+
llms_resolve_conditional_content()
330+
),
331+
flags = { "has_conditional_content" },
332+
traverser = 'jog',
333+
},
334+
325335
{ name = "pre-combined-hidden",
326336
filter = combineFilters({
327337
hidden(),
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
-- llms-conditional-content.lua
2+
-- Copyright (C) 2020-2026 Posit Software, PBC
3+
--
4+
-- Pre-filter that intercepts ConditionalBlock nodes referencing llms-txt
5+
-- and replaces them with marker Divs so content can be included/excluded
6+
-- from llms.md output independently of the HTML format.
7+
-- Only runs when llms-txt is enabled (guarded by filterIf in main.lua).
8+
9+
local constants = require("modules/constants")
10+
11+
local function list_contains(list, value)
12+
if not list then return false end
13+
for _, v in ipairs(list) do
14+
if v == value then return true end
15+
end
16+
return false
17+
end
18+
19+
-- Determine if a ConditionalBlock should be visible for llms-txt output.
20+
-- Returns true (include), false (exclude), or nil (no llms-txt condition).
21+
local function is_llms_visible(tbl)
22+
local cond = tbl.condition
23+
local has_when = list_contains(cond[constants.kWhenFormat], "llms-txt")
24+
local has_unless = list_contains(cond[constants.kUnlessFormat], "llms-txt")
25+
26+
if not has_when and not has_unless then return nil end
27+
28+
if tbl.behavior == constants.kContentVisible then
29+
-- content-visible when-format="llms-txt" -> include for llms
30+
-- content-visible unless-format="llms-txt" -> exclude for llms
31+
return has_when
32+
else -- content-hidden
33+
-- content-hidden when-format="llms-txt" -> exclude for llms
34+
-- content-hidden unless-format="llms-txt" -> include for llms
35+
return has_unless
36+
end
37+
end
38+
39+
function llms_resolve_conditional_content()
40+
return {
41+
ConditionalBlock = function(tbl)
42+
local llms_visible = is_llms_visible(tbl)
43+
if llms_visible == nil then return nil end
44+
45+
local html_visible = is_visible(tbl) -- from content-hidden.lua
46+
if llms_visible == html_visible then return nil end -- no intervention needed
47+
48+
local div = tbl.original_node:clone()
49+
if llms_visible then
50+
div.classes:insert("llms-conditional-content")
51+
else
52+
div.classes:insert("llms-hidden-content")
53+
end
54+
return div
55+
end
56+
}
57+
end

tests/docs/smoke-all/website/llms-txt/index.qmd

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ _quarto:
1212
# Second array: patterns that must NOT match (empty)
1313
- []
1414
ensureLlmsMdRegexMatches:
15-
# First array: patterns that MUST match - verify anchor links and code annotations
16-
- ["\\[callout examples\\]\\(about\\.llms\\.md#callout-examples\\)", "#<1>", "#<2>", "Load tidyverse", "Open help for ggplot"]
17-
# Second array: patterns that must NOT match (no .html links, no annotation UI)
18-
- ["\\.html\\)", "\\.html#", "code-annotation-anchor"]
15+
# First array: patterns that MUST match - verify anchor links, code annotations, and conditional content
16+
- ["\\[callout examples\\]\\(about\\.llms\\.md#callout-examples\\)", "#<1>", "#<2>", "Load tidyverse", "Open help for ggplot", "only for LLM consumption"]
17+
# Second array: patterns that must NOT match (no .html links, no annotation UI, no hidden content)
18+
- ["\\.html\\)", "\\.html#", "code-annotation-anchor", "should not appear in LLM output"]
1919
---
2020

2121
## Test Content
@@ -26,6 +26,16 @@ See the [about page](about.qmd) for more information.
2626

2727
Also see the [callout examples](about.qmd#callout-examples).
2828

29+
## Conditional Content
30+
31+
::: {.content-visible when-format="llms-txt"}
32+
This content is only for LLM consumption.
33+
:::
34+
35+
::: {.content-hidden when-format="llms-txt"}
36+
This content should not appear in LLM output.
37+
:::
38+
2939
## Code Annotations
3040

3141
```r

0 commit comments

Comments
 (0)