Skip to content

Commit 614f576

Browse files
committed
fix(formatting): collapse multiple blank lines after frontmatter and lists (#399)
1 parent 2a4a42b commit 614f576

4 files changed

Lines changed: 66 additions & 4 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10-
## [3.6.0] - 2026-05-31
11-
1210
### Added
1311

1412
- **`convert()` accepts options as a bare `ConversionOptions`** in addition to
@@ -26,6 +24,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2624

2725
### Fixed
2826

27+
- **spurious blank lines after frontmatter and lists (MD012)** (resolves #399). Block-level
28+
emission now collapses runs of three or more consecutive newlines into exactly two, so
29+
the frontmatter→body and list→next-block transitions no longer produce extra blank lines
30+
that violate markdownlint MD012.
2931
- **autolinks: bare paths and filenames are no longer wrapped as autolinks** (resolves #397).
3032
Per GFM §6.5, autolinks require an absolute URI with a scheme — but the previous check only
3133
compared the link text to the `href`, so `<a href="foobar.png">foobar.png</a>` became the

crates/html-to-markdown/src/converter/main.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ use std::collections::{BTreeMap, HashSet};
1515

1616
use crate::converter::dom_context::DomContext;
1717
use crate::converter::main_helpers::{
18-
extract_head_metadata, format_metadata_frontmatter, has_custom_element_tags, repair_with_html5ever,
19-
trim_line_end_whitespace, trim_trailing_whitespace,
18+
collapse_excess_blank_lines, extract_head_metadata, format_metadata_frontmatter, has_custom_element_tags,
19+
repair_with_html5ever, trim_line_end_whitespace, trim_trailing_whitespace,
2020
};
2121
use crate::converter::plain_text::extract_plain_text;
2222
use crate::converter::preprocessing_helpers::{has_inline_block_misnest, should_drop_for_preprocessing};
@@ -299,6 +299,7 @@ pub fn convert_html_impl(
299299
extract_plain_text(&dom, parser, options)
300300
} else {
301301
trim_line_end_whitespace(&mut output);
302+
collapse_excess_blank_lines(&mut output);
302303
output
303304
};
304305
let (document, tables) = finish_structure_collector(structure_collector);

crates/html-to-markdown/src/converter/main_helpers.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,34 @@ pub fn trim_trailing_whitespace(output: &mut String) {
1919
}
2020
}
2121

22+
/// Collapse runs of three or more consecutive newlines into exactly two.
23+
///
24+
/// Block-level emitters append their own trailing newlines and the next block
25+
/// emitter typically prepends a leading newline, which can produce `\n\n\n`
26+
/// runs in transitions such as frontmatter → first block or list → next block.
27+
/// markdownlint's MD012 rule forbids multiple consecutive blank lines, so the
28+
/// final emission is normalized here. This intentionally preserves single
29+
/// blank lines (`\n\n`) — only runs of three or more newlines are collapsed.
30+
pub fn collapse_excess_blank_lines(output: &mut String) {
31+
if !output.contains("\n\n\n") {
32+
return;
33+
}
34+
let mut cleaned = String::with_capacity(output.len());
35+
let mut consecutive = 0usize;
36+
for ch in output.chars() {
37+
if ch == '\n' {
38+
consecutive += 1;
39+
if consecutive <= 2 {
40+
cleaned.push(ch);
41+
}
42+
} else {
43+
consecutive = 0;
44+
cleaned.push(ch);
45+
}
46+
}
47+
*output = cleaned;
48+
}
49+
2250
/// Remove trailing spaces/tabs from every line while preserving newlines.
2351
pub fn trim_line_end_whitespace(output: &mut String) {
2452
if output.is_empty() {
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#![allow(missing_docs)]
2+
3+
//! Regression test for issue #399: spurious blank lines after the YAML
4+
//! frontmatter and after lists produced markdown that violates markdownlint
5+
//! MD012 (no multiple consecutive blank lines). Block-level emission must
6+
//! collapse runs of more than one blank line so the output contains at most a
7+
//! single empty line between blocks.
8+
9+
use html_to_markdown_rs::{ConversionOptions, convert};
10+
11+
#[test]
12+
fn no_double_blank_line_after_frontmatter_or_list() {
13+
let html =
14+
"<head>\n <title>Foobar</title>\n</head>\n<body>\n <p>Baz</p><ul><li>qux</li></ul><p>Thud</p>\n</body>";
15+
let options = ConversionOptions {
16+
extract_metadata: true,
17+
..Default::default()
18+
};
19+
let result = convert(html, Some(options)).expect("conversion should succeed");
20+
let content = result.content.unwrap_or_default();
21+
assert_eq!(content, "---\ntitle: Foobar\n---\n\nBaz\n\n- qux\n\nThud\n");
22+
}
23+
24+
#[test]
25+
fn no_triple_newline_between_blocks_without_frontmatter() {
26+
let html = "<p>Baz</p><ul><li>qux</li></ul><p>Thud</p>";
27+
let options = ConversionOptions::default();
28+
let result = convert(html, Some(options)).expect("conversion should succeed");
29+
let content = result.content.unwrap_or_default();
30+
assert_eq!(content, "Baz\n\n- qux\n\nThud\n");
31+
}

0 commit comments

Comments
 (0)