Skip to content

Commit 8780040

Browse files
authored
docs for quarto-markdown desugaring (#67)
* parse yaml markdown and interpret tags * be more robust in the presence of unexpected AST * fix scanner matching rules inside code blocks * new binary: syntax conversion helper * defn list checking, glob expansion * div whitespace * restructure syntax helping tool * clean up warnings * --json output * desugar and table attributes * docs on desugaring
1 parent bcc1a1b commit 8780040

12 files changed

Lines changed: 488 additions & 3 deletions

File tree

crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/postprocess.rs

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,45 @@ pub fn postprocess<E: ErrorCollector>(doc: Pandoc, error_collector: &mut E) -> R
450450
)
451451
})
452452
.with_inlines(|inlines| {
453+
// Combined filter: Handle Math + Attr pattern, then citation suffix pattern
454+
455+
// Step 1: Handle Math nodes followed by Attr
456+
// Pattern: Math, Space (optional), Attr -> Span with "quarto-math-with-attribute" class
457+
let mut math_processed = vec![];
458+
let mut i = 0;
459+
460+
while i < inlines.len() {
461+
if let Inline::Math(math) = &inlines[i] {
462+
// Check if followed by Space then Attr, or just Attr
463+
let has_space = i + 1 < inlines.len() && matches!(inlines[i + 1], Inline::Space(_));
464+
let attr_idx = if has_space { i + 2 } else { i + 1 };
465+
466+
if attr_idx < inlines.len() {
467+
if let Inline::Attr(attr) = &inlines[attr_idx] {
468+
// Found Math + (Space?) + Attr pattern
469+
// Wrap Math in a Span with the attribute
470+
let mut classes = vec!["quarto-math-with-attribute".to_string()];
471+
classes.extend(attr.1.clone());
472+
473+
math_processed.push(Inline::Span(Span {
474+
attr: (attr.0.clone(), classes, attr.2.clone()),
475+
content: vec![Inline::Math(math.clone())],
476+
source_info: empty_source_info(),
477+
}));
478+
479+
// Skip the Math, optional Space, and Attr
480+
i = attr_idx + 1;
481+
continue;
482+
}
483+
}
484+
}
485+
486+
// Not a Math + Attr pattern, add as is
487+
math_processed.push(inlines[i].clone());
488+
i += 1;
489+
}
490+
491+
// Step 2: Handle citation suffix pattern on the math-processed result
453492
let mut result = vec![];
454493
// states in this state machine:
455494
// 0. normal state, where we just collect inlines
@@ -461,7 +500,7 @@ pub fn postprocess<E: ErrorCollector>(doc: Pandoc, error_collector: &mut E) -> R
461500
let mut state = 0;
462501
let mut pending_cite: Option<crate::pandoc::inline::Cite> = None;
463502

464-
for inline in inlines {
503+
for inline in math_processed {
465504
match state {
466505
0 => {
467506
// Normal state - check if we see a valid cite
@@ -629,11 +668,40 @@ pub fn postprocess<E: ErrorCollector>(doc: Pandoc, error_collector: &mut E) -> R
629668
if let Block::CaptionBlock(caption_block) = block {
630669
// Look for a preceding Table
631670
if let Some(Block::Table(table)) = result.last_mut() {
632-
// Attach caption to the table
671+
// Extract any trailing Inline::Attr from caption content
672+
let mut caption_content = caption_block.content.clone();
673+
let mut caption_attr: Option<Attr> = None;
674+
675+
if let Some(Inline::Attr(attr)) = caption_content.last() {
676+
caption_attr = Some(attr.clone());
677+
caption_content.pop(); // Remove the Attr from caption content
678+
}
679+
680+
// If we found attributes in the caption, merge them with the table's attr
681+
if let Some(caption_attr_value) = caption_attr {
682+
// Merge: caption attributes override table attributes
683+
// table.attr is (id, classes, key_values)
684+
// Merge key-value pairs from caption into table
685+
for (key, value) in caption_attr_value.2 {
686+
table.attr.2.insert(key, value);
687+
}
688+
// Merge classes from caption into table
689+
for class in caption_attr_value.1 {
690+
if !table.attr.1.contains(&class) {
691+
table.attr.1.push(class);
692+
}
693+
}
694+
// Use caption id if table doesn't have one
695+
if table.attr.0.is_empty() && !caption_attr_value.0.is_empty() {
696+
table.attr.0 = caption_attr_value.0;
697+
}
698+
}
699+
700+
// Attach caption to the table (with Attr removed from content)
633701
table.caption = Caption {
634702
short: None,
635703
long: Some(vec![Block::Plain(Plain {
636-
content: caption_block.content.clone(),
704+
content: caption_content,
637705
source_info: caption_block.source_info.clone(),
638706
})]),
639707
};
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Inline math with attribute: $E = mc^2$ {#eq-einstein}
2+
3+
Display math with attribute:
4+
5+
$$
6+
\int_0^\infty e^{-x^2} dx = \frac{\sqrt{\pi}}{2}
7+
$$ {#eq-gaussian}
8+
9+
Another inline example: $a^2 + b^2 = c^2$ {#eq-pythagorean}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"astContext":{"filenames":["tests/snapshots/json/math-with-attr.qmd"]},"blocks":[{"c":[{"c":"Inline","l":{"end":{"column":6,"offset":6,"row":0},"filenameIndex":0,"start":{"column":0,"offset":0,"row":0}},"t":"Str"},{"l":{"end":{"column":7,"offset":7,"row":0},"filenameIndex":0,"start":{"column":6,"offset":6,"row":0}},"t":"Space"},{"c":"math","l":{"end":{"column":11,"offset":11,"row":0},"filenameIndex":0,"start":{"column":7,"offset":7,"row":0}},"t":"Str"},{"l":{"end":{"column":12,"offset":12,"row":0},"filenameIndex":0,"start":{"column":11,"offset":11,"row":0}},"t":"Space"},{"c":"with","l":{"end":{"column":16,"offset":16,"row":0},"filenameIndex":0,"start":{"column":12,"offset":12,"row":0}},"t":"Str"},{"l":{"end":{"column":17,"offset":17,"row":0},"filenameIndex":0,"start":{"column":16,"offset":16,"row":0}},"t":"Space"},{"c":"attribute:","l":{"end":{"column":27,"offset":27,"row":0},"filenameIndex":0,"start":{"column":17,"offset":17,"row":0}},"t":"Str"},{"l":{"end":{"column":28,"offset":28,"row":0},"filenameIndex":0,"start":{"column":27,"offset":27,"row":0}},"t":"Space"},{"c":[["eq-einstein",["quarto-math-with-attribute"],[]],[{"c":[{"t":"InlineMath"},"E = mc^2"],"l":{"end":{"column":38,"offset":38,"row":0},"filenameIndex":0,"start":{"column":28,"offset":28,"row":0}},"t":"Math"}]],"l":{"end":{"column":0,"offset":0,"row":0},"filenameIndex":null,"start":{"column":0,"offset":0,"row":0}},"t":"Span"}],"l":{"end":{"column":0,"offset":54,"row":1},"filenameIndex":0,"start":{"column":0,"offset":0,"row":0}},"t":"Para"},{"c":[{"c":"Display","l":{"end":{"column":7,"offset":62,"row":2},"filenameIndex":0,"start":{"column":0,"offset":55,"row":2}},"t":"Str"},{"l":{"end":{"column":8,"offset":63,"row":2},"filenameIndex":0,"start":{"column":7,"offset":62,"row":2}},"t":"Space"},{"c":"math","l":{"end":{"column":12,"offset":67,"row":2},"filenameIndex":0,"start":{"column":8,"offset":63,"row":2}},"t":"Str"},{"l":{"end":{"column":13,"offset":68,"row":2},"filenameIndex":0,"start":{"column":12,"offset":67,"row":2}},"t":"Space"},{"c":"with","l":{"end":{"column":17,"offset":72,"row":2},"filenameIndex":0,"start":{"column":13,"offset":68,"row":2}},"t":"Str"},{"l":{"end":{"column":18,"offset":73,"row":2},"filenameIndex":0,"start":{"column":17,"offset":72,"row":2}},"t":"Space"},{"c":"attribute:","l":{"end":{"column":28,"offset":83,"row":2},"filenameIndex":0,"start":{"column":18,"offset":73,"row":2}},"t":"Str"}],"l":{"end":{"column":0,"offset":84,"row":3},"filenameIndex":0,"start":{"column":0,"offset":55,"row":2}},"t":"Para"},{"c":[{"c":[["eq-gaussian",["quarto-math-with-attribute"],[]],[{"c":[{"t":"DisplayMath"},"\n\\int_0^\\infty e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}\n"],"l":{"end":{"column":2,"offset":139,"row":6},"filenameIndex":0,"start":{"column":0,"offset":85,"row":4}},"t":"Math"}]],"l":{"end":{"column":0,"offset":0,"row":0},"filenameIndex":null,"start":{"column":0,"offset":0,"row":0}},"t":"Span"}],"l":{"end":{"column":0,"offset":155,"row":7},"filenameIndex":0,"start":{"column":0,"offset":85,"row":4}},"t":"Para"},{"c":[{"c":"Another","l":{"end":{"column":7,"offset":163,"row":8},"filenameIndex":0,"start":{"column":0,"offset":156,"row":8}},"t":"Str"},{"l":{"end":{"column":8,"offset":164,"row":8},"filenameIndex":0,"start":{"column":7,"offset":163,"row":8}},"t":"Space"},{"c":"inline","l":{"end":{"column":14,"offset":170,"row":8},"filenameIndex":0,"start":{"column":8,"offset":164,"row":8}},"t":"Str"},{"l":{"end":{"column":15,"offset":171,"row":8},"filenameIndex":0,"start":{"column":14,"offset":170,"row":8}},"t":"Space"},{"c":"example:","l":{"end":{"column":23,"offset":179,"row":8},"filenameIndex":0,"start":{"column":15,"offset":171,"row":8}},"t":"Str"},{"l":{"end":{"column":24,"offset":180,"row":8},"filenameIndex":0,"start":{"column":23,"offset":179,"row":8}},"t":"Space"},{"c":[["eq-pythagorean",["quarto-math-with-attribute"],[]],[{"c":[{"t":"InlineMath"},"a^2 + b^2 = c^2"],"l":{"end":{"column":41,"offset":197,"row":8},"filenameIndex":0,"start":{"column":24,"offset":180,"row":8}},"t":"Math"}]],"l":{"end":{"column":0,"offset":0,"row":0},"filenameIndex":null,"start":{"column":0,"offset":0,"row":0}},"t":"Span"}],"l":{"end":{"column":0,"offset":216,"row":9},"filenameIndex":0,"start":{"column":0,"offset":156,"row":8}},"t":"Para"}],"meta":{},"pandoc-api-version":[1,23,1]}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
| Column 1 | Column 2 |
2+
|----------|----------|
3+
| Data 1 | Data 2 |
4+
5+
: Table caption {tbl-colwidths="[30,70]"}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"astContext":{"filenames":["tests/snapshots/json/table-caption-attr.qmd"]},"blocks":[{"c":[["",[],[["tbl-colwidths","[30,70]"]]],[null,[{"c":[{"c":"Table","l":{"end":{"column":7,"offset":80,"row":4},"filenameIndex":0,"start":{"column":2,"offset":75,"row":4}},"t":"Str"},{"l":{"end":{"column":8,"offset":81,"row":4},"filenameIndex":0,"start":{"column":7,"offset":80,"row":4}},"t":"Space"},{"c":"caption","l":{"end":{"column":15,"offset":88,"row":4},"filenameIndex":0,"start":{"column":8,"offset":81,"row":4}},"t":"Str"},{"l":{"end":{"column":16,"offset":89,"row":4},"filenameIndex":0,"start":{"column":15,"offset":88,"row":4}},"t":"Space"}],"l":{"end":{"column":0,"offset":115,"row":5},"filenameIndex":0,"start":{"column":0,"offset":72,"row":3}},"t":"Plain"}]],[[{"t":"AlignDefault"},{"t":"ColWidthDefault"}],[{"t":"AlignDefault"},{"t":"ColWidthDefault"}]],[["",[],[]],[[["",[],[]],[[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Column","l":{"end":{"column":8,"offset":8,"row":0},"filenameIndex":0,"start":{"column":2,"offset":2,"row":0}},"t":"Str"},{"l":{"end":{"column":9,"offset":9,"row":0},"filenameIndex":0,"start":{"column":8,"offset":8,"row":0}},"t":"Space"},{"c":"1","l":{"end":{"column":10,"offset":10,"row":0},"filenameIndex":0,"start":{"column":9,"offset":9,"row":0}},"t":"Str"}],"l":{"end":{"column":11,"offset":11,"row":0},"filenameIndex":0,"start":{"column":2,"offset":2,"row":0}},"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Column","l":{"end":{"column":19,"offset":19,"row":0},"filenameIndex":0,"start":{"column":13,"offset":13,"row":0}},"t":"Str"},{"l":{"end":{"column":20,"offset":20,"row":0},"filenameIndex":0,"start":{"column":19,"offset":19,"row":0}},"t":"Space"},{"c":"2","l":{"end":{"column":21,"offset":21,"row":0},"filenameIndex":0,"start":{"column":20,"offset":20,"row":0}},"t":"Str"}],"l":{"end":{"column":22,"offset":22,"row":0},"filenameIndex":0,"start":{"column":13,"offset":13,"row":0}},"t":"Plain"}]]]]]],[[["",[],[]],0,[],[[["",[],[]],[[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Data","l":{"end":{"column":6,"offset":54,"row":2},"filenameIndex":0,"start":{"column":2,"offset":50,"row":2}},"t":"Str"},{"l":{"end":{"column":7,"offset":55,"row":2},"filenameIndex":0,"start":{"column":6,"offset":54,"row":2}},"t":"Space"},{"c":"1","l":{"end":{"column":8,"offset":56,"row":2},"filenameIndex":0,"start":{"column":7,"offset":55,"row":2}},"t":"Str"}],"l":{"end":{"column":11,"offset":59,"row":2},"filenameIndex":0,"start":{"column":2,"offset":50,"row":2}},"t":"Plain"}]],[["",[],[]],{"t":"AlignDefault"},1,1,[{"c":[{"c":"Data","l":{"end":{"column":17,"offset":65,"row":2},"filenameIndex":0,"start":{"column":13,"offset":61,"row":2}},"t":"Str"},{"l":{"end":{"column":18,"offset":66,"row":2},"filenameIndex":0,"start":{"column":17,"offset":65,"row":2}},"t":"Space"},{"c":"2","l":{"end":{"column":19,"offset":67,"row":2},"filenameIndex":0,"start":{"column":18,"offset":66,"row":2}},"t":"Str"}],"l":{"end":{"column":22,"offset":70,"row":2},"filenameIndex":0,"start":{"column":13,"offset":61,"row":2}},"t":"Plain"}]]]]]]],[["",[],[]],[]]],"l":{"end":{"column":0,"offset":72,"row":3},"filenameIndex":0,"start":{"column":0,"offset":0,"row":0}},"t":"Table"}],"meta":{},"pandoc-api-version":[1,23,1]}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
---
2+
title: "Definition Lists"
3+
---
4+
5+
## Overview
6+
7+
Quarto Markdown supports definition lists through a special div syntax with the `definition-list` class. During post-processing, divs meeting the structural requirements are transformed into Pandoc's native `DefinitionList` blocks.
8+
9+
## Transformation
10+
11+
A div with class `definition-list` containing a specific bullet list structure is converted to a `DefinitionList` block.
12+
13+
### Required Structure
14+
15+
```markdown
16+
::: {.definition-list}
17+
- Term 1
18+
- Definition 1a
19+
- Definition 1b
20+
- Term 2
21+
- Definition 2
22+
:::
23+
```
24+
25+
The structure must follow these rules:
26+
27+
1. Div must have `definition-list` class
28+
2. Div contains exactly one bullet list
29+
3. Each list item has exactly two blocks:
30+
- First: Plain or Paragraph (the term)
31+
- Second: BulletList (the definitions)
32+
33+
## Example
34+
35+
### Input QMD
36+
37+
```markdown
38+
::: {.definition-list}
39+
- **Markdown**
40+
- A lightweight markup language
41+
- Easy to read and write
42+
- **Pandoc**
43+
- A universal document converter
44+
:::
45+
```
46+
47+
### Output Structure
48+
49+
Transforms to a `DefinitionList` block:
50+
51+
```json
52+
{
53+
"t": "DefinitionList",
54+
"c": [
55+
[
56+
[{"t": "Strong", "c": [{"t": "Str", "c": "Markdown"}]}],
57+
[
58+
[[{"t": "Plain", "c": [{"t": "Str", "c": "A lightweight markup language"}]}]],
59+
[[{"t": "Plain", "c": [{"t": "Str", "c": "Easy to read and write"}]}]]
60+
]
61+
],
62+
[
63+
[{"t": "Strong", "c": [{"t": "Str", "c": "Pandoc"}]}],
64+
[
65+
[[{"t": "Plain", "c": [{"t": "Str", "c": "A universal document converter"}]}]]
66+
]
67+
]
68+
]
69+
}
70+
```
71+
72+
## Validation
73+
74+
Invalid structures are left as regular divs. Common validation failures:
75+
76+
- Div contains more than one bullet list
77+
- List items don't have exactly two blocks
78+
- First block is not Plain or Paragraph
79+
- Second block is not a BulletList
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
---
2+
title: "Editorial Marks"
3+
---
4+
5+
## Overview
6+
7+
Quarto Markdown's editorial marks (`[!! highlight]`, `[++ insert]`, `[-- delete]`, `[>> comment]`) are custom inline node types that don't exist in Pandoc's AST. During post-processing, these nodes are desugared into standard `Span` nodes with special classes.
8+
9+
## Transformation
10+
11+
All four editorial mark types follow the same desugaring pattern:
12+
13+
| Original Node | Special Class | Example |
14+
|---------------|---------------|---------|
15+
| `Insert` | `quarto-insert` | `[++ text]` |
16+
| `Delete` | `quarto-delete` | `[-- text]` |
17+
| `Highlight` | `quarto-highlight` | `[!! text]` |
18+
| `EditComment` | `quarto-edit-comment` | `[>> text]` |
19+
20+
The content is trimmed (leading/trailing spaces removed) before being placed in the Span.
21+
22+
## Example
23+
24+
### Input QMD
25+
26+
```markdown
27+
This has [++ added text]{#my-add .important} and [!! highlighted]{.warn}.
28+
```
29+
30+
### Output AST (simplified)
31+
32+
```json
33+
[
34+
{"t": "Str", "c": "This"},
35+
{"t": "Space"},
36+
{"t": "Str", "c": "has"},
37+
{"t": "Space"},
38+
{
39+
"t": "Span",
40+
"c": [
41+
["my-add", ["quarto-insert", "important"], []],
42+
[{"t": "Str", "c": "added"}, {"t": "Space"}, {"t": "Str", "c": "text"}]
43+
]
44+
},
45+
{"t": "Space"},
46+
{"t": "Str", "c": "and"},
47+
{"t": "Space"},
48+
{
49+
"t": "Span",
50+
"c": [
51+
["", ["quarto-highlight", "warn"], []],
52+
[{"t": "Str", "c": "highlighted"}]
53+
]
54+
}
55+
]
56+
```
57+
58+
## Recognition
59+
60+
Downstream tools can identify desugared editorial marks by checking for the special classes:
61+
62+
```lua
63+
if span.classes:includes("quarto-insert") then
64+
-- Handle insertion suggestion
65+
elseif span.classes:includes("quarto-delete") then
66+
-- Handle deletion suggestion
67+
elseif span.classes:includes("quarto-highlight") then
68+
-- Handle highlight
69+
elseif span.classes:includes("quarto-edit-comment") then
70+
-- Handle editorial comment
71+
end
72+
```

docs/syntax/desugaring/index.qmd

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
---
2+
title: "AST Desugaring"
3+
---
4+
5+
Desugaring is the process of transforming extended syntax constructs into simpler, equivalent representations in the AST. Quarto Markdown includes several syntax features that don't have direct equivalents in Pandoc's standard AST. During the parsing and post-processing phases, `quarto-markdown-pandoc` transforms these extended constructs into standard Pandoc AST nodes with special attributes or classes, allowing downstream tools to recognize and process them appropriately.
6+
7+
## Desugaring Transformations
8+
9+
The following transformations are applied during AST post-processing:
10+
11+
- [**Math with Attributes**](math-attributes.qmd) - Math expressions followed by attributes are wrapped in Span nodes with a special class
12+
- [**Editorial Marks**](editorial-marks.qmd) - Insert, Delete, Highlight, and EditComment nodes are converted to Span nodes with identifying classes
13+
- [**Table Caption Attributes**](table-captions.qmd) - Attributes in table captions are extracted and merged with the table's attribute field
14+
- [**Definition Lists**](definition-lists.qmd) - Divs with `definition-list` class are transformed into DefinitionList blocks
15+
- [**Note References**](note-references.qmd) - NoteReference nodes are converted to Span nodes with reference metadata
16+
- **Figures** - Single-image paragraphs are automatically promoted to Figure blocks with captions
17+
- **Shortcodes** - Shortcode nodes are transformed into Span nodes
18+
- **Citation Suffixes** - Citation followed by space and span are merged into citation with suffix
19+
20+
## Implementation
21+
22+
All desugaring transformations are implemented in `src/pandoc/treesitter_utils/postprocess.rs`. The transformations are applied using a filter-based traversal system that walks the AST and applies pattern-matching transformations.

0 commit comments

Comments
 (0)