Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/resources/filters/llms/llms.lua
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,8 @@ function Link(link)
return link.content
end

if link.target and (link.target:match("%.html$") or link.target:match("%.html#")) then
local is_absolute = link.target:match("^%a[%w+%-%.]*:") or link.target:match("^//")
if link.target and not is_absolute and (link.target:match("%.html$") or link.target:match("%.html#")) then
link.target = link.target:gsub("%.html#", ".llms.md#")
link.target = link.target:gsub("%.html$", ".llms.md")
link.target = link.target:gsub("^%./", "")
Expand Down
12 changes: 9 additions & 3 deletions tests/docs/smoke-all/website/llms-txt/about.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ _quarto:
ensureLlmsMdExists: true
ensureLlmsMdRegexMatches:
# First array: patterns that MUST match
- ["^# About", "> \\*\\*NOTE:\\*\\*", "> \\*\\*WARNING:\\*\\*", "This is a note", "``` python", "def hello", "\\| Feature", "\\|[-]+\\|", "\\[home page\\]\\(.*\\.llms\\.md\\)", "\\[test site intro\\]\\(index\\.llms\\.md#test-content\\)", "## Alpha Tab", "Alpha content here", "## Beta Tab", "Beta content here"]
# Second array: patterns that must NOT match (no .html links, no breadcrumbs, no empty tab links)
- ["\\.html\\)", "\\.html#", "\\[Info\\]", "\\[Alpha Tab\\]\\(\\)", "\\[Beta Tab\\]\\(\\)"]
- ["^# About", "> \\*\\*NOTE:\\*\\*", "> \\*\\*WARNING:\\*\\*", "This is a note", "``` python", "def hello", "\\| Feature", "\\|[-]+\\|", "\\[home page\\]\\(.*\\.llms\\.md\\)", "\\[test site intro\\]\\(index\\.llms\\.md#test-content\\)", "## Alpha Tab", "Alpha content here", "## Beta Tab", "Beta content here", "https://pandoc.org/lua-filters.html", "https://www.lua.org/manual/5.3/manual.html#6.4"]
# Second array: patterns that must NOT match (no internal .html links, no breadcrumbs, no empty tab links)
- ["\\(index\\.html\\)", "\\(index\\.html#", "\\[Info\\]", "\\[Alpha Tab\\]\\(\\)", "\\[Beta Tab\\]\\(\\)"]
---

About this test site.
Expand Down Expand Up @@ -58,3 +58,9 @@ Beta content here.
Go back to the [home page](index.qmd).

Go to the [test site intro](index.qmd#test-content).

## External Link Example

Check out [Pandoc filters](https://pandoc.org/lua-filters.html) for more info.

See the [Lua manual](https://www.lua.org/manual/5.3/manual.html#6.4) for string functions.
4 changes: 2 additions & 2 deletions tests/docs/smoke-all/website/llms-txt/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ _quarto:
ensureLlmsMdRegexMatches:
# First array: patterns that MUST match - verify anchor links, code annotations, and conditional content
- ["\\[callout examples\\]\\(about\\.llms\\.md#callout-examples\\)", "# <1>", "# <2>", "Load tidyverse", "Open help for ggplot", "only for LLM consumption"]
# Second array: patterns that must NOT match (no .html links, no annotation UI, no hidden content)
- ["\\.html\\)", "\\.html#", "code-annotation-anchor", "should not appear in LLM output"]
# Second array: patterns that must NOT match (no internal .html links, no annotation UI, no hidden content)
- ["\\(about\\.html\\)", "\\(about\\.html#", "code-annotation-anchor", "should not appear in LLM output"]
---

## Test Content
Expand Down
Loading