Preserve newlines in Markdown paragraph HTML output

st0012 · st0012 · commit 1959fd17f90b · 2026-02-28T21:16:39.000Z
The accept_paragraph method in ToHtml had a gsub that collapsed
newlines into spaces (or removed them for CJK text). This only
affected Markdown content since the RDoc markup parser already
converts newlines to spaces at parse time (parser.rb:221).

Remove the gsub so that Markdown soft breaks are preserved as
newlines in the HTML output, matching GFM behavior. For example,
`&gt; foo\nbar` now produces `&lt;p&gt;foo\nbar&lt;/p&gt;` instead of
`&lt;p&gt;foo bar&lt;/p&gt;`.
diff --git a/lib/rdoc/markup/to_html.rb b/lib/rdoc/markup/to_html.rb
@@ -313,9 +313,6 @@ def accept_block_quote(block_quote)
   def accept_paragraph(paragraph)
     @res << "\n<p>"
     text = paragraph.text @hard_break
-    text = text.gsub(/(#{SPACE_SEPARATED_LETTER_CLASS})?\K\r?\n(?=(?(1)(#{SPACE_SEPARATED_LETTER_CLASS})?))/o) {
-      defined?($2) && ' '
-    }
     @res << to_html(text)
     @res << "</p>\n"
   end
diff --git a/test/rdoc/markup/to_html_test.rb b/test/rdoc/markup/to_html_test.rb
@@ -236,7 +236,7 @@ def accept_paragraph_br
   end
 
   def accept_paragraph_break
-    assert_equal "\n<p>hello<br> world</p>\n", @to.res.join
+    assert_equal "\n<p>hello<br>\nworld</p>\n", @to.res.join
   end
 
   def accept_paragraph_i
@@ -411,43 +411,17 @@ def test_accept_heading_dedup_resets_on_start_accepting
   end
 
   def test_accept_paragraph_newline
-    hellos = ["hello", "\u{393 3b5 3b9 3ac} \u{3c3 3bf 3c5}"]
-    worlds = ["world", "\u{3ba 3cc 3c3 3bc 3bf 3c2}"]
-    ohayo, sekai = %W"\u{304a 306f 3088 3046} \u{4e16 754c}"
-
-    hellos.product(worlds) do |hello, world|
-      @to.start_accepting
-      @to.accept_paragraph para("#{hello}\n", "#{world}\n")
-      assert_equal "\n<p>#{hello} #{world}</p>\n", @to.res.join
-    end
-
-    hellos.each do |hello|
-      @to.start_accepting
-      @to.accept_paragraph para("#{hello}\n", "#{sekai}\n")
-      assert_equal "\n<p>#{hello}#{sekai}</p>\n", @to.res.join
-    end
-
-    worlds.each do |world|
-      @to.start_accepting
-      @to.accept_paragraph para("#{ohayo}\n", "#{world}\n")
-      assert_equal "\n<p>#{ohayo}#{world}</p>\n", @to.res.join
-    end
-
+    # Newlines in paragraph parts are preserved as-is in HTML output.
+    # The RDoc markup parser already converts newlines to spaces at parse
+    # time (in parser.rb), so this only affects Markdown content where
+    # newlines represent soft breaks and should be preserved per GFM spec.
     @to.start_accepting
-    @to.accept_paragraph para("#{ohayo}\n", "#{sekai}\n")
-    assert_equal "\n<p>#{ohayo}#{sekai}</p>\n", @to.res.join
+    @to.accept_paragraph para("hello\n", "world\n")
+    assert_equal "\n<p>hello\nworld\n</p>\n", @to.res.join
 
     @to.start_accepting
     @to.accept_paragraph para("+hello+\n", "world\n")
-    assert_equal "\n<p><code>hello</code> world</p>\n", @to.res.join
-
-    @to.start_accepting
-    @to.accept_paragraph para("hello\n", "+world+\n")
-    assert_equal "\n<p>hello <code>world</code></p>\n", @to.res.join
-
-    @to.start_accepting
-    @to.accept_paragraph para("+hello+\n", "+world+\n")
-    assert_equal "\n<p><code>hello</code> <code>world</code></p>\n", @to.res.join
+    assert_equal "\n<p><code>hello</code>\nworld\n</p>\n", @to.res.join
   end
 
   def test_accept_heading_output_decoration
diff --git a/test/rdoc/rdoc_markdown_test.rb b/test/rdoc/rdoc_markdown_test.rb
@@ -75,6 +75,17 @@ def test_parse_block_quote_continue
     assert_equal expected, doc
   end
 
+  def test_parse_block_quote_continue_html
+    doc = parse <<-BLOCK_QUOTE
+> this is
+a block quote
+    BLOCK_QUOTE
+
+    html = doc.accept(RDoc::Markup::ToHtml.new)
+
+    assert_include html, "<p>this is\na block quote</p>"
+  end
+
   def test_parse_block_quote_list
     doc = parse <<-BLOCK_QUOTE
 > text