Convert Markdown soft line breaks to <br> tags in HTML output

st0012 · st0012 · commit c168d425ce2b · 2026-02-28T22:33:16.000Z
Replace the gsub in accept_paragraph that collapsed newlines between
word characters into spaces. Instead, convert soft line breaks (\n)
to &lt;br&gt; tags to produce visible line breaks matching GFM rendering.

The RDoc markup parser already converts newlines to spaces at parse
time (in RDoc::Markup::Parser#build_paragraph), so only Markdown
content has bare \n in paragraph text. Hard breaks (already represented
as &lt;br&gt;\n) are preserved as-is via a negative lookbehind in the regex.
diff --git a/lib/rdoc/markup/to_html.rb b/lib/rdoc/markup/to_html.rb
@@ -313,10 +313,14 @@ def accept_block_quote(block_quote)
   def accept_paragraph(paragraph)
     @res << "\n<p>"
     text = paragraph.text @hard_break
-    text = text.gsub(/(#{SPACE_SEPARATED_LETTER_CLASS})?\K\r?\n(?=(?(1)(#{SPACE_SEPARATED_LETTER_CLASS})?))/o) {
-      defined?($2) && ' '
-    }
-    @res << to_html(text)
+    text = to_html(text)
+    # Convert soft line breaks (\n) to <br> tags to match GFM rendering.
+    # The RDoc markup parser already converts newlines to spaces at parse
+    # time (see RDoc::Markup::Parser#build_paragraph), so only Markdown
+    # content has bare \n in paragraph text. Hard breaks are already
+    # represented as <br>\n, so we skip those with a negative lookbehind.
+    text = text.gsub(/(?<!<br>)\n/, "<br>\n")
+    @res << text
     @res << "</p>\n"
   end
 
diff --git a/test/rdoc/markup/to_html_test.rb b/test/rdoc/markup/to_html_test.rb
@@ -236,7 +236,7 @@ def accept_paragraph_br
   end
 
   def accept_paragraph_break
-    assert_equal "\n<p>hello<br> world</p>\n", @to.res.join
+    assert_equal "\n<p>hello<br>\nworld</p>\n", @to.res.join
   end
 
   def accept_paragraph_i
@@ -411,43 +411,17 @@ def test_accept_heading_dedup_resets_on_start_accepting
   end
 
   def test_accept_paragraph_newline
-    hellos = ["hello", "\u{393 3b5 3b9 3ac} \u{3c3 3bf 3c5}"]
-    worlds = ["world", "\u{3ba 3cc 3c3 3bc 3bf 3c2}"]
-    ohayo, sekai = %W"\u{304a 306f 3088 3046} \u{4e16 754c}"
-
-    hellos.product(worlds) do |hello, world|
-      @to.start_accepting
-      @to.accept_paragraph para("#{hello}\n", "#{world}\n")
-      assert_equal "\n<p>#{hello} #{world}</p>\n", @to.res.join
-    end
-
-    hellos.each do |hello|
-      @to.start_accepting
-      @to.accept_paragraph para("#{hello}\n", "#{sekai}\n")
-      assert_equal "\n<p>#{hello}#{sekai}</p>\n", @to.res.join
-    end
-
-    worlds.each do |world|
-      @to.start_accepting
-      @to.accept_paragraph para("#{ohayo}\n", "#{world}\n")
-      assert_equal "\n<p>#{ohayo}#{world}</p>\n", @to.res.join
-    end
-
+    # Soft line breaks (\n) are converted to <br> tags in HTML output to
+    # match GFM rendering. The RDoc markup parser already converts newlines
+    # to spaces at parse time (in parser.rb), so this only affects Markdown
+    # content where newlines represent GFM soft breaks.
     @to.start_accepting
-    @to.accept_paragraph para("#{ohayo}\n", "#{sekai}\n")
-    assert_equal "\n<p>#{ohayo}#{sekai}</p>\n", @to.res.join
+    @to.accept_paragraph para("hello\n", "world\n")
+    assert_equal "\n<p>hello<br>\nworld<br>\n</p>\n", @to.res.join
 
     @to.start_accepting
     @to.accept_paragraph para("+hello+\n", "world\n")
-    assert_equal "\n<p><code>hello</code> world</p>\n", @to.res.join
-
-    @to.start_accepting
-    @to.accept_paragraph para("hello\n", "+world+\n")
-    assert_equal "\n<p>hello <code>world</code></p>\n", @to.res.join
-
-    @to.start_accepting
-    @to.accept_paragraph para("+hello+\n", "+world+\n")
-    assert_equal "\n<p><code>hello</code> <code>world</code></p>\n", @to.res.join
+    assert_equal "\n<p><code>hello</code><br>\nworld<br>\n</p>\n", @to.res.join
   end
 
   def test_accept_heading_output_decoration
diff --git a/test/rdoc/rdoc_markdown_test.rb b/test/rdoc/rdoc_markdown_test.rb
@@ -75,6 +75,17 @@ def test_parse_block_quote_continue
     assert_equal expected, doc
   end
 
+  def test_parse_block_quote_continue_html
+    doc = parse <<-BLOCK_QUOTE
+> this is
+a block quote
+    BLOCK_QUOTE
+
+    html = doc.accept(RDoc::Markup::ToHtml.new)
+
+    assert_include html, "<p>this is<br>\na block quote</p>"
+  end
+
   def test_parse_block_quote_list
     doc = parse <<-BLOCK_QUOTE
 > text
@@ -120,12 +131,77 @@ def test_parse_block_quote_separate
     expected =
       doc(
         block(
-          para("this is\na block quote"),
+          para("this is\na block quote")),
+        block(
           para("that continues")))
 
     assert_equal expected, doc
   end
 
+  def test_parse_block_quote_no_lazy_continuation_for_list
+    doc = parse <<-BLOCK_QUOTE
+> foo
+- bar
+    BLOCK_QUOTE
+
+    expected =
+      doc(
+        block(
+          para("foo")),
+        list(:BULLET,
+          item(nil, para("bar"))))
+
+    assert_equal expected, doc
+  end
+
+  def test_parse_block_quote_no_lazy_continuation_for_ordered_list
+    doc = parse <<-BLOCK_QUOTE
+> foo
+1. bar
+    BLOCK_QUOTE
+
+    expected =
+      doc(
+        block(
+          para("foo")),
+        list(:NUMBER,
+          item(nil, para("bar"))))
+
+    assert_equal expected, doc
+  end
+
+  def test_parse_block_quote_no_lazy_continuation_for_heading
+    doc = parse <<-BLOCK_QUOTE
+> foo
+# bar
+    BLOCK_QUOTE
+
+    expected =
+      doc(
+        block(
+          para("foo")),
+        head(1, "bar"))
+
+    assert_equal expected, doc
+  end
+
+  def test_parse_block_quote_no_lazy_continuation_for_code_fence
+    doc = parse <<~BLOCK_QUOTE
+      > foo
+      ```
+      code
+      ```
+    BLOCK_QUOTE
+
+    expected =
+      doc(
+        block(
+          para("foo")),
+        verb("code\n"))
+
+    assert_equal expected, doc
+  end
+
   def test_parse_char_entity
     doc = parse '&pi; &nn;'