From 41889a49705599bf6cdd85e17b05b7fe6e8d69b7 Mon Sep 17 00:00:00 2001 From: tomoya ishida Date: Thu, 1 Jan 2026 22:13:13 +0900 Subject: [PATCH 1/2] Simplify newline handling of comment token in TokenStream#to_html (#1532) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The logic of newline handling to make html `` tag pretty does not need to be restricted to comment token. Remove comment-specific condition and make it simple. ### What this code is doing Moves trailing `"\n"` outside of closing `` tag to make generated HTML pretty. ```html
# comment1
# comment2
42
``` ↓ ```html
# comment1
# comment2
42
``` ### Background In most case, only `:on_comment` token (and weird percent literal string token) might end with `"\n"` because `RDoc::Parser::Ruby` has token-squashing. I'm writing a new syntax highlighter for RDoc and want to make this pretty-html generation applied to other tokens too. Removing token-squashing will generate more tokens that ends with `"\n"`. --- lib/rdoc/token_stream.rb | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/lib/rdoc/token_stream.rb b/lib/rdoc/token_stream.rb index 5a4ca82a67..abf5fc0980 100644 --- a/lib/rdoc/token_stream.rb +++ b/lib/rdoc/token_stream.rb @@ -45,13 +45,7 @@ def self.to_html(token_stream) then 'ruby-identifier' end - comment_with_nl = false - if :on_comment == t[:kind] or :on_embdoc == t[:kind] or :on_heredoc_end == t[:kind] - comment_with_nl = true if "\n" == t[:text][-1] - text = t[:text].rstrip - else - text = t[:text] - end + text = t[:text] if :on_ident == t[:kind] && starting_title starting_title = false @@ -65,7 +59,9 @@ def self.to_html(token_stream) text = CGI.escapeHTML text if style then - "#{text}#{"\n" if comment_with_nl}" + end_with_newline = text.end_with?("\n") + text = text.chomp if end_with_newline + "#{text}#{"\n" if end_with_newline}" else text end From bd9cada31860cbe9cc888282c07004570897aaa3 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Thu, 1 Jan 2026 17:43:07 +0000 Subject: [PATCH 2/2] Prevent style rendering in code blocks (#1536) ### Before Screenshot 2025-12-31 at 23 44 19 ### After Screenshot 2025-12-31 at 23 43 58 --- lib/rdoc/markup/attribute_manager.rb | 23 ++++++++++++++++++++- test/rdoc/markup/attribute_manager_test.rb | 24 ++++++++++++++++++++++ test/rdoc/rdoc_markdown_test.rb | 10 +++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/lib/rdoc/markup/attribute_manager.rb b/lib/rdoc/markup/attribute_manager.rb index 4fa0a9d6aa..e21560ff08 100644 --- a/lib/rdoc/markup/attribute_manager.rb +++ b/lib/rdoc/markup/attribute_manager.rb @@ -95,6 +95,12 @@ def initialize add_html "b", :BOLD, true add_html "tt", :TT, true add_html "code", :TT, true + + @word_pair_chars = @matching_word_pairs.keys.join + + # Matches a word pair delimiter (*, _, +) that is NOT already protected. + # Used by #protect_code_markup to escape delimiters inside / tags. + @unprotected_word_pair_regexp = /([#{@word_pair_chars}])(?!#{PROTECT_ATTR})/ end ## @@ -164,7 +170,7 @@ def convert_attrs_matching_word_pairs(str, attrs, exclusive) }.keys return if tags.empty? tags = "[#{tags.join("")}](?!#{PROTECT_ATTR})" - all_tags = "[#{@matching_word_pairs.keys.join("")}](?!#{PROTECT_ATTR})" + all_tags = "[#{@word_pair_chars}](?!#{PROTECT_ATTR})" re = /(?:^|\W|#{all_tags})\K(#{tags})(\1*[#\\]?[\w:#{PROTECT_ATTR}.\/\[\]-]+?\S?)\1(?!\1)(?=#{all_tags}|\W|$)/ @@ -245,6 +251,20 @@ def mask_protected_sequences @str.gsub!(/\\(\\[#{Regexp.escape @protectable.join}])/m, "\\1") end + ## + # Protects word pair delimiters (*, _, +) inside + # and tags from being processed as inline formatting. + # For example, *bold* in +*bold*+ will NOT be rendered as bold. + + def protect_code_markup + @str.gsub!(/<(code|tt)>(.*?)<\/\1>/im) do + tag = $1 + content = $2 + escaped = content.gsub(@unprotected_word_pair_regexp, "\\1#{PROTECT_ATTR}") + "<#{tag}>#{escaped}" + end + end + ## # Unescapes regexp handling sequences of text @@ -308,6 +328,7 @@ def flow(str) @str = str.dup mask_protected_sequences + protect_code_markup @attrs = RDoc::Markup::AttrSpan.new @str.length, @exclusive_bitmap diff --git a/test/rdoc/markup/attribute_manager_test.rb b/test/rdoc/markup/attribute_manager_test.rb index a60bd4bf15..0e267aabf4 100644 --- a/test/rdoc/markup/attribute_manager_test.rb +++ b/test/rdoc/markup/attribute_manager_test.rb @@ -202,6 +202,30 @@ def test_convert_attrs_ignores_code assert_equal 'foo __send__ bar', output('foo __send__ bar') end + def test_convert_attrs_ignores_bold_inside_code + assert_equal 'foo *bold* bar', output('foo *bold* bar') + end + + def test_convert_attrs_ignores_em_inside_code + assert_equal 'foo _em_ bar', output('foo _em_ bar') + end + + def test_convert_attrs_ignores_tt_inside_code + assert_equal 'foo +tt+ bar', output('foo +tt+ bar') + end + + def test_convert_attrs_ignores_bold_inside_tt + assert_equal 'foo *bold* bar', output('foo *bold* bar') + end + + def test_convert_attrs_ignores_em_inside_tt + assert_equal 'foo _em_ bar', output('foo _em_ bar') + end + + def test_convert_attrs_ignores_tt_inside_tt + assert_equal 'foo +tt+ bar', output('foo +tt+ bar') + end + def test_convert_attrs_ignores_tt assert_equal 'foo __send__ bar', output('foo __send__ bar') end diff --git a/test/rdoc/rdoc_markdown_test.rb b/test/rdoc/rdoc_markdown_test.rb index 5ca4ddfc0e..82ea9f87e0 100644 --- a/test/rdoc/rdoc_markdown_test.rb +++ b/test/rdoc/rdoc_markdown_test.rb @@ -1283,6 +1283,16 @@ def test_markdown_link_with_styled_label assert_includes html, 'Link to Foo and Bar and Baz' end + def test_code_span_preserves_inline_formatting_chars + # Code spans should display formatting characters literally, not as styling + doc = parse "Code: `*bold*` and `_em_` and `+tt+`" + html = @to_html.convert doc + + assert_includes html, '*bold*' + assert_includes html, '_em_' + assert_includes html, '+tt+' + end + def parse(text) @parser.parse text end