diff --git a/src/resources/filters/llms/llms.lua b/src/resources/filters/llms/llms.lua index 916457c505..f0b2ff1df9 100644 --- a/src/resources/filters/llms/llms.lua +++ b/src/resources/filters/llms/llms.lua @@ -190,7 +190,8 @@ function Link(link) return link.content end - if link.target and (link.target:match("%.html$") or link.target:match("%.html#")) then + local is_absolute = link.target:match("^%a[%w+%-%.]*:") or link.target:match("^//") + if link.target and not is_absolute and (link.target:match("%.html$") or link.target:match("%.html#")) then link.target = link.target:gsub("%.html#", ".llms.md#") link.target = link.target:gsub("%.html$", ".llms.md") link.target = link.target:gsub("^%./", "") diff --git a/tests/docs/smoke-all/website/llms-txt/about.qmd b/tests/docs/smoke-all/website/llms-txt/about.qmd index 1da14ade37..f0b8dd0e9f 100644 --- a/tests/docs/smoke-all/website/llms-txt/about.qmd +++ b/tests/docs/smoke-all/website/llms-txt/about.qmd @@ -6,9 +6,9 @@ _quarto: ensureLlmsMdExists: true ensureLlmsMdRegexMatches: # First array: patterns that MUST match - - ["^# About", "> \\*\\*NOTE:\\*\\*", "> \\*\\*WARNING:\\*\\*", "This is a note", "``` python", "def hello", "\\| Feature", "\\|[-]+\\|", "\\[home page\\]\\(.*\\.llms\\.md\\)", "\\[test site intro\\]\\(index\\.llms\\.md#test-content\\)", "## Alpha Tab", "Alpha content here", "## Beta Tab", "Beta content here"] - # Second array: patterns that must NOT match (no .html links, no breadcrumbs, no empty tab links) - - ["\\.html\\)", "\\.html#", "\\[Info\\]", "\\[Alpha Tab\\]\\(\\)", "\\[Beta Tab\\]\\(\\)"] + - ["^# About", "> \\*\\*NOTE:\\*\\*", "> \\*\\*WARNING:\\*\\*", "This is a note", "``` python", "def hello", "\\| Feature", "\\|[-]+\\|", "\\[home page\\]\\(.*\\.llms\\.md\\)", "\\[test site intro\\]\\(index\\.llms\\.md#test-content\\)", "## Alpha Tab", "Alpha content here", "## Beta Tab", "Beta content here", "https://pandoc.org/lua-filters.html", "https://www.lua.org/manual/5.3/manual.html#6.4"] + # Second array: patterns that must NOT match (no internal .html links, no breadcrumbs, no empty tab links) + - ["\\(index\\.html\\)", "\\(index\\.html#", "\\[Info\\]", "\\[Alpha Tab\\]\\(\\)", "\\[Beta Tab\\]\\(\\)"] --- About this test site. @@ -58,3 +58,9 @@ Beta content here. Go back to the [home page](index.qmd). Go to the [test site intro](index.qmd#test-content). + +## External Link Example + +Check out [Pandoc filters](https://pandoc.org/lua-filters.html) for more info. + +See the [Lua manual](https://www.lua.org/manual/5.3/manual.html#6.4) for string functions. diff --git a/tests/docs/smoke-all/website/llms-txt/index.qmd b/tests/docs/smoke-all/website/llms-txt/index.qmd index ca33b482e8..1465063221 100644 --- a/tests/docs/smoke-all/website/llms-txt/index.qmd +++ b/tests/docs/smoke-all/website/llms-txt/index.qmd @@ -14,8 +14,8 @@ _quarto: ensureLlmsMdRegexMatches: # First array: patterns that MUST match - verify anchor links, code annotations, and conditional content - ["\\[callout examples\\]\\(about\\.llms\\.md#callout-examples\\)", "# <1>", "# <2>", "Load tidyverse", "Open help for ggplot", "only for LLM consumption"] - # Second array: patterns that must NOT match (no .html links, no annotation UI, no hidden content) - - ["\\.html\\)", "\\.html#", "code-annotation-anchor", "should not appear in LLM output"] + # Second array: patterns that must NOT match (no internal .html links, no annotation UI, no hidden content) + - ["\\(about\\.html\\)", "\\(about\\.html#", "code-annotation-anchor", "should not appear in LLM output"] --- ## Test Content