Skip to content

Commit 56f72fd

Browse files
fix codeblock content split across Text events
Fixes: #1
1 parent 70c3849 commit 56f72fd

2 files changed

Lines changed: 94 additions & 39 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
- Fixed: handle fenced code block split across Text events (#1)
11+
1012
## [0.8.0 2025-06-29]
1113

1214
- Changed: updated to svgdx 0.21.0

src/lib.rs

Lines changed: 92 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -59,37 +59,12 @@ impl Preprocessor for SvgdxProc {
5959
}
6060
}
6161

62-
fn inject_xml(events: &mut Vec<Event>, content: &str) {
63-
events.push(Html(
64-
"\n\n<div style='overflow-x: auto; font-size: 0.9em;'>\n".into(),
65-
));
66-
events.push(Start(Tag::CodeBlock(CodeBlockKind::Fenced("xml".into()))));
67-
events.push(Text(content.to_owned().into()));
68-
events.push(End(TagEnd::CodeBlock));
69-
events.push(Html("\n</div>\n".into()));
70-
}
71-
72-
fn inject_svgdx(events: &mut Vec<Event>, content: &str) {
73-
events.push(Start(Tag::Paragraph));
74-
// Need to avoid blank lines in the rendered SVG, as they can cause
75-
// markdown to resume 'normal' md processing, especially when e.g.
76-
// indentation can cause an implicit code block to be started.
77-
// See https://talk.commonmark.org/t/inline-html-breaks-when-using-indentation/3317
78-
// and https://spec.commonmark.org/0.31.2/#html-blocks
79-
let svg_output = svgdx_handler(content)
80-
.lines()
81-
.filter(|line| !line.trim().is_empty())
82-
.collect::<Vec<_>>()
83-
.join("\n");
84-
events.push(Html(svg_output.into()));
85-
events.push(End(TagEnd::Paragraph));
86-
}
87-
8862
fn codeblock_parser(chapter: &mut Chapter) -> Result<String, std::fmt::Error> {
8963
let md_events = mdbook::utils::new_cmark_parser(&chapter.content, false);
9064

9165
let mut in_block = None;
9266
let mut events = Vec::new();
67+
let mut block_content = Vec::new();
9368
for ev in md_events {
9469
match (&mut in_block, ev.clone()) {
9570
(None, Start(Tag::CodeBlock(Fenced(Borrowed(block_type)))))
@@ -108,31 +83,73 @@ fn codeblock_parser(chapter: &mut Chapter) -> Result<String, std::fmt::Error> {
10883
));
10984
in_block = Some(block_type.to_string());
11085
}
111-
(Some(block_type), Text(content)) => {
112-
if block_type.starts_with("xml-svgdx") {
113-
// Special case this fence type to display the XML input
114-
// prior to the rendered SVG output.
115-
inject_xml(&mut events, &content);
116-
}
117-
inject_svgdx(&mut events, &content);
118-
if block_type.starts_with("svgdx-xml") {
119-
// Special case this fence type to display the XML input
120-
// prior to the rendered SVG output.
121-
inject_xml(&mut events, &content);
122-
}
86+
(Some(_), Text(content)) => {
87+
// content of code block isn't necessarily in a single Text event;
88+
// CRLF sources in particular seem to generate a Text event per line.
89+
block_content.push(content.clone());
12390
}
124-
(Some(_), End(TagEnd::CodeBlock)) => {
91+
(Some(block_type), End(TagEnd::CodeBlock)) => {
92+
handle_content(&block_content.concat(), block_type, &mut events);
12593
events.push(Html("</div>".into()));
94+
block_content.clear();
12695
in_block = None;
12796
}
12897
_ => events.push(ev),
12998
}
13099
}
100+
if let Some(block_type) = in_block {
101+
// The CommonMark spec allows for non-terminated code blocks, treating
102+
// the end of the document as an implicit end-of-fence.
103+
// https://spec.commonmark.org/0.31.2/#fenced-code-blocks
104+
handle_content(&block_content.concat(), &block_type, &mut events);
105+
events.push(Html("</div>".into()));
106+
}
107+
131108
let mut buf = String::new();
132109
cmark(&mut events.iter(), &mut buf)?;
133110
Ok(buf)
134111
}
135112

113+
fn handle_content(content: &str, block_type: &str, events: &mut Vec<Event>) {
114+
if block_type.starts_with("xml-svgdx") {
115+
// Special case this fence type to display the XML input
116+
// prior to the rendered SVG output.
117+
inject_xml(events, content);
118+
}
119+
inject_svgdx(events, content);
120+
if block_type.starts_with("svgdx-xml") {
121+
// Special case this fence type to display the XML input
122+
// prior to the rendered SVG output.
123+
inject_xml(events, content);
124+
}
125+
}
126+
127+
fn inject_xml(events: &mut Vec<Event>, content: &str) {
128+
events.push(Html(
129+
"\n\n<div style='overflow-x: auto; font-size: 0.9em;'>\n".into(),
130+
));
131+
events.push(Start(Tag::CodeBlock(CodeBlockKind::Fenced("xml".into()))));
132+
events.push(Text(content.to_owned().into()));
133+
events.push(End(TagEnd::CodeBlock));
134+
events.push(Html("\n</div>\n".into()));
135+
}
136+
137+
fn inject_svgdx(events: &mut Vec<Event>, content: &str) {
138+
events.push(Start(Tag::Paragraph));
139+
// Need to avoid blank lines in the rendered SVG, as they can cause
140+
// markdown to resume 'normal' md processing, especially when e.g.
141+
// indentation can cause an implicit code block to be started.
142+
// See https://talk.commonmark.org/t/inline-html-breaks-when-using-indentation/3317
143+
// and https://spec.commonmark.org/0.31.2/#html-blocks
144+
let svg_output = svgdx_handler(content)
145+
.lines()
146+
.filter(|line| !line.trim().is_empty())
147+
.collect::<Vec<_>>()
148+
.join("\n");
149+
events.push(Html(svg_output.into()));
150+
events.push(End(TagEnd::Paragraph));
151+
}
152+
136153
fn svgdx_handler(s: &str) -> String {
137154
let cfg = svgdx::TransformConfig {
138155
svg_style: Some("min-width: 25%; max-width: 100%; height: auto;".to_string()),
@@ -185,4 +202,40 @@ Some **markdown** text
185202
let mut z = Book::new();
186203
z.push_item(chapter);
187204
}
205+
206+
#[test]
207+
fn process_with_crlf() {
208+
// crlf-separated text seems to be parsed into multiple Text events;
209+
// check the fenced code block is still processed as a single unit.
210+
let content = vec![
211+
"Some **markdown** text",
212+
"",
213+
"```svgdx",
214+
"<svg>",
215+
r#" <rect wh="20 5"/>"#,
216+
r#" <rect xy="^|h" wh="20 5"/>"#,
217+
"</svg>",
218+
"```",
219+
]
220+
.join("\r\n");
221+
222+
let expected1 = r##"Some **markdown** text
223+
224+
<div style="##;
225+
let expected2 = r##" class='svgdx'>
226+
227+
228+
<svg "##;
229+
let expected3 = r##"
230+
<rect x="20" y="0" width="20" height="5"/>
231+
</svg></div>"##;
232+
let mut chapter = Chapter::new("test", content.to_owned(), ".", Vec::new());
233+
let result = codeblock_parser(&mut chapter).unwrap();
234+
assert_contains!(result, expected1);
235+
assert_contains!(result, expected2);
236+
assert_contains!(result, expected3);
237+
238+
let mut z = Book::new();
239+
z.push_item(chapter);
240+
}
188241
}

0 commit comments

Comments
 (0)