From d3e62fd5d3cf8e11065fafbbf7fd01ab80734092 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 22 Feb 2025 19:16:06 +0900 Subject: [PATCH 1/5] Extract repeated regexp as a constant --- lib/rdoc/generator/darkfish.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/rdoc/generator/darkfish.rb b/lib/rdoc/generator/darkfish.rb index e4e20831f3..02fab37379 100644 --- a/lib/rdoc/generator/darkfish.rb +++ b/lib/rdoc/generator/darkfish.rb @@ -700,6 +700,8 @@ def template_for file, page = true, klass = ERB template end + ParagraphExcerptRegexp = /[A-Z][^\.:\/]+\./ + # Returns an excerpt of the comment for usage in meta description tags def excerpt(comment) text = case comment @@ -711,11 +713,11 @@ def excerpt(comment) # Match from a capital letter to the first period, discarding any links, so # that we don't end up matching badges in the README - first_paragraph_match = text.match(/[A-Z][^\.:\/]+\./) + first_paragraph_match = text.match(ParagraphExcerptRegexp) return text[0...150].gsub(/\n/, " ").squeeze(" ") unless first_paragraph_match extracted_text = first_paragraph_match[0] - second_paragraph = first_paragraph_match.post_match.match(/[A-Z][^\.:\/]+\./) + second_paragraph = first_paragraph_match.post_match.match(ParagraphExcerptRegexp) extracted_text << " " << second_paragraph[0] if second_paragraph extracted_text[0...150].gsub(/\n/, " ").squeeze(" ") From 029388736c79854e290e460e1d2c8cfa5abcb025 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 27 Feb 2025 15:50:26 +0900 Subject: [PATCH 2/5] Prefer `tr` over `gsub` to translate single letters --- lib/rdoc/generator/darkfish.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rdoc/generator/darkfish.rb b/lib/rdoc/generator/darkfish.rb index 02fab37379..d87dc338b2 100644 --- a/lib/rdoc/generator/darkfish.rb +++ b/lib/rdoc/generator/darkfish.rb @@ -714,13 +714,13 @@ def excerpt(comment) # Match from a capital letter to the first period, discarding any links, so # that we don't end up matching badges in the README first_paragraph_match = text.match(ParagraphExcerptRegexp) - return text[0...150].gsub(/\n/, " ").squeeze(" ") unless first_paragraph_match + return text[0...150].tr_s("\n", " ").squeeze(" ") unless first_paragraph_match extracted_text = first_paragraph_match[0] second_paragraph = first_paragraph_match.post_match.match(ParagraphExcerptRegexp) extracted_text << " " << second_paragraph[0] if second_paragraph - extracted_text[0...150].gsub(/\n/, " ").squeeze(" ") + extracted_text[0...150].tr_s("\n", " ").squeeze(" ") end def generate_ancestor_list(ancestors, klass) From 3351d5b1f07611004bce31e1fed16ebc98d205ca Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 5 Mar 2025 19:40:53 +0900 Subject: [PATCH 3/5] Relax paragraph pattern Fix #1298 Not all paragraphs in documentations start with a capital letter, as usual English text. --- lib/rdoc/generator/darkfish.rb | 18 +++++++++++++++--- test/rdoc/test_rdoc_generator_darkfish.rb | 21 +++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/lib/rdoc/generator/darkfish.rb b/lib/rdoc/generator/darkfish.rb index d87dc338b2..b6919b3f55 100644 --- a/lib/rdoc/generator/darkfish.rb +++ b/lib/rdoc/generator/darkfish.rb @@ -700,7 +700,11 @@ def template_for file, page = true, klass = ERB template end - ParagraphExcerptRegexp = /[A-Z][^\.:\/]+\./ + # :stopdoc: + ParagraphExcerptRegexpOther = %r[\b\w[^./:]++\.] + # use \p/\P{letter} instead of \w/\W in Unicode + ParagraphExcerptRegexpUnicode = %r[\b\p{letter}[^./:]++\.] + # :startdoc: # Returns an excerpt of the comment for usage in meta description tags def excerpt(comment) @@ -713,11 +717,19 @@ def excerpt(comment) # Match from a capital letter to the first period, discarding any links, so # that we don't end up matching badges in the README - first_paragraph_match = text.match(ParagraphExcerptRegexp) + pattern = ParagraphExcerptRegexpUnicode + begin + first_paragraph_match = text.match(pattern) + rescue Encoding::CompatibilityError + # The doc is non-ASCII text and encoded in other than Unicode base encodings. + raise unless pattern.eaual?(ParagraphExcerptRegexpUnicode) + pattern = ParagraphExcerptRegexpOther + retry + end return text[0...150].tr_s("\n", " ").squeeze(" ") unless first_paragraph_match extracted_text = first_paragraph_match[0] - second_paragraph = first_paragraph_match.post_match.match(ParagraphExcerptRegexp) + second_paragraph = text.match(pattern, first_paragraph_match.end(0)) extracted_text << " " << second_paragraph[0] if second_paragraph extracted_text[0...150].tr_s("\n", " ").squeeze(" ") diff --git a/test/rdoc/test_rdoc_generator_darkfish.rb b/test/rdoc/test_rdoc_generator_darkfish.rb index ed84543ee1..ee3a72b70e 100644 --- a/test/rdoc/test_rdoc_generator_darkfish.rb +++ b/test/rdoc/test_rdoc_generator_darkfish.rb @@ -449,6 +449,27 @@ def test_meta_tags_for_rdoc_files ) end + def test_meta_tags_for_markdwon_files_paragraph + top_level = @store.add_file("README.md", parser: RDoc::Parser::Simple) + top_level.comment = <<~MARKDOWN + # Distributed Ruby: dRuby + + dRuby is a distributed object system for Ruby. It allows an object in one + Ruby process to invoke methods on an object in another Ruby process. + MARKDOWN + + @g.generate + + content = File.binread("README_md.html") + assert_include( + content, + " Date: Fri, 7 Mar 2025 20:51:10 +0900 Subject: [PATCH 4/5] Make retry condition more defensive not to loop infinitely --- lib/rdoc/generator/darkfish.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rdoc/generator/darkfish.rb b/lib/rdoc/generator/darkfish.rb index b6919b3f55..558e58c53b 100644 --- a/lib/rdoc/generator/darkfish.rb +++ b/lib/rdoc/generator/darkfish.rb @@ -722,7 +722,7 @@ def excerpt(comment) first_paragraph_match = text.match(pattern) rescue Encoding::CompatibilityError # The doc is non-ASCII text and encoded in other than Unicode base encodings. - raise unless pattern.eaual?(ParagraphExcerptRegexpUnicode) + raise if pattern == ParagraphExcerptRegexpOther pattern = ParagraphExcerptRegexpOther retry end From 0ed8f0179f38f9e0f01c55401b5045ed24b57f1b Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 7 Mar 2025 22:23:11 +0900 Subject: [PATCH 5/5] Shorten the description text to get rid of JRuby exception --- test/rdoc/test_rdoc_generator_darkfish.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/rdoc/test_rdoc_generator_darkfish.rb b/test/rdoc/test_rdoc_generator_darkfish.rb index ee3a72b70e..680c663287 100644 --- a/test/rdoc/test_rdoc_generator_darkfish.rb +++ b/test/rdoc/test_rdoc_generator_darkfish.rb @@ -454,8 +454,7 @@ def test_meta_tags_for_markdwon_files_paragraph top_level.comment = <<~MARKDOWN # Distributed Ruby: dRuby - dRuby is a distributed object system for Ruby. It allows an object in one - Ruby process to invoke methods on an object in another Ruby process. + dRuby is a distributed object system for Ruby. It allows an object. MARKDOWN @g.generate @@ -466,7 +465,7 @@ def test_meta_tags_for_markdwon_files_paragraph "