From d3e62fd5d3cf8e11065fafbbf7fd01ab80734092 Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@ruby-lang.org>
Date: Sat, 22 Feb 2025 19:16:06 +0900
Subject: [PATCH 1/5] Extract repeated regexp as a constant

---
 lib/rdoc/generator/darkfish.rb | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/rdoc/generator/darkfish.rb b/lib/rdoc/generator/darkfish.rb
index e4e20831f3..02fab37379 100644
--- a/lib/rdoc/generator/darkfish.rb
+++ b/lib/rdoc/generator/darkfish.rb
@@ -700,6 +700,8 @@ def template_for file, page = true, klass = ERB
     template
   end
 
+  ParagraphExcerptRegexp = /[A-Z][^\.:\/]+\./
+
   # Returns an excerpt of the comment for usage in meta description tags
   def excerpt(comment)
     text = case comment
@@ -711,11 +713,11 @@ def excerpt(comment)
 
     # Match from a capital letter to the first period, discarding any links, so
     # that we don't end up matching badges in the README
-    first_paragraph_match = text.match(/[A-Z][^\.:\/]+\./)
+    first_paragraph_match = text.match(ParagraphExcerptRegexp)
     return text[0...150].gsub(/\n/, " ").squeeze(" ") unless first_paragraph_match
 
     extracted_text = first_paragraph_match[0]
-    second_paragraph = first_paragraph_match.post_match.match(/[A-Z][^\.:\/]+\./)
+    second_paragraph = first_paragraph_match.post_match.match(ParagraphExcerptRegexp)
     extracted_text << " " << second_paragraph[0] if second_paragraph
 
     extracted_text[0...150].gsub(/\n/, " ").squeeze(" ")

From 029388736c79854e290e460e1d2c8cfa5abcb025 Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@ruby-lang.org>
Date: Thu, 27 Feb 2025 15:50:26 +0900
Subject: [PATCH 2/5] Prefer `tr` over `gsub` to translate single letters

---
 lib/rdoc/generator/darkfish.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/rdoc/generator/darkfish.rb b/lib/rdoc/generator/darkfish.rb
index 02fab37379..d87dc338b2 100644
--- a/lib/rdoc/generator/darkfish.rb
+++ b/lib/rdoc/generator/darkfish.rb
@@ -714,13 +714,13 @@ def excerpt(comment)
     # Match from a capital letter to the first period, discarding any links, so
     # that we don't end up matching badges in the README
     first_paragraph_match = text.match(ParagraphExcerptRegexp)
-    return text[0...150].gsub(/\n/, " ").squeeze(" ") unless first_paragraph_match
+    return text[0...150].tr_s("\n", " ").squeeze(" ") unless first_paragraph_match
 
     extracted_text = first_paragraph_match[0]
     second_paragraph = first_paragraph_match.post_match.match(ParagraphExcerptRegexp)
     extracted_text << " " << second_paragraph[0] if second_paragraph
 
-    extracted_text[0...150].gsub(/\n/, " ").squeeze(" ")
+    extracted_text[0...150].tr_s("\n", " ").squeeze(" ")
   end
 
   def generate_ancestor_list(ancestors, klass)

From 3351d5b1f07611004bce31e1fed16ebc98d205ca Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@ruby-lang.org>
Date: Wed, 5 Mar 2025 19:40:53 +0900
Subject: [PATCH 3/5] Relax paragraph pattern

Fix #1298
Not all paragraphs in documentations start with a capital letter, as
usual English text.
---
 lib/rdoc/generator/darkfish.rb            | 18 +++++++++++++++---
 test/rdoc/test_rdoc_generator_darkfish.rb | 21 +++++++++++++++++++++
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/lib/rdoc/generator/darkfish.rb b/lib/rdoc/generator/darkfish.rb
index d87dc338b2..b6919b3f55 100644
--- a/lib/rdoc/generator/darkfish.rb
+++ b/lib/rdoc/generator/darkfish.rb
@@ -700,7 +700,11 @@ def template_for file, page = true, klass = ERB
     template
   end
 
-  ParagraphExcerptRegexp = /[A-Z][^\.:\/]+\./
+  # :stopdoc:
+  ParagraphExcerptRegexpOther = %r[\b\w[^./:]++\.]
+  # use \p/\P{letter} instead of \w/\W in Unicode
+  ParagraphExcerptRegexpUnicode = %r[\b\p{letter}[^./:]++\.]
+  # :startdoc:
 
   # Returns an excerpt of the comment for usage in meta description tags
   def excerpt(comment)
@@ -713,11 +717,19 @@ def excerpt(comment)
 
     # Match from a capital letter to the first period, discarding any links, so
     # that we don't end up matching badges in the README
-    first_paragraph_match = text.match(ParagraphExcerptRegexp)
+    pattern = ParagraphExcerptRegexpUnicode
+    begin
+      first_paragraph_match = text.match(pattern)
+    rescue Encoding::CompatibilityError
+      # The doc is non-ASCII text and encoded in other than Unicode base encodings.
+      raise unless pattern.eaual?(ParagraphExcerptRegexpUnicode)
+      pattern = ParagraphExcerptRegexpOther
+      retry
+    end
     return text[0...150].tr_s("\n", " ").squeeze(" ") unless first_paragraph_match
 
     extracted_text = first_paragraph_match[0]
-    second_paragraph = first_paragraph_match.post_match.match(ParagraphExcerptRegexp)
+    second_paragraph = text.match(pattern, first_paragraph_match.end(0))
     extracted_text << " " << second_paragraph[0] if second_paragraph
 
     extracted_text[0...150].tr_s("\n", " ").squeeze(" ")
diff --git a/test/rdoc/test_rdoc_generator_darkfish.rb b/test/rdoc/test_rdoc_generator_darkfish.rb
index ed84543ee1..ee3a72b70e 100644
--- a/test/rdoc/test_rdoc_generator_darkfish.rb
+++ b/test/rdoc/test_rdoc_generator_darkfish.rb
@@ -449,6 +449,27 @@ def test_meta_tags_for_rdoc_files
     )
   end
 
+  def test_meta_tags_for_markdwon_files_paragraph
+    top_level = @store.add_file("README.md", parser: RDoc::Parser::Simple)
+    top_level.comment = <<~MARKDOWN
+      # Distributed Ruby: dRuby
+
+      dRuby is a distributed object system for Ruby.  It allows an object in one
+      Ruby process to invoke methods on an object in another Ruby process.
+    MARKDOWN
+
+    @g.generate
+
+    content = File.binread("README_md.html")
+    assert_include(
+      content,
+      "<meta name=\"description\" content=\"" \
+      "README: dRuby " \
+      "dRuby is a distributed object system for Ruby. " \
+      "It allows an object in one Ruby process to invoke methods on an object"
+    )
+  end
+
   def test_meta_tags_for_markdown_files
     top_level = @store.add_file("MyPage.md", parser: RDoc::Parser::Markdown)
     top_level.comment = <<~MARKDOWN

From eb3a5ba6f2e38199758266124847e57174314179 Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@ruby-lang.org>
Date: Fri, 7 Mar 2025 20:51:10 +0900
Subject: [PATCH 4/5] Make retry condition more defensive not to loop
 infinitely

---
 lib/rdoc/generator/darkfish.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/rdoc/generator/darkfish.rb b/lib/rdoc/generator/darkfish.rb
index b6919b3f55..558e58c53b 100644
--- a/lib/rdoc/generator/darkfish.rb
+++ b/lib/rdoc/generator/darkfish.rb
@@ -722,7 +722,7 @@ def excerpt(comment)
       first_paragraph_match = text.match(pattern)
     rescue Encoding::CompatibilityError
       # The doc is non-ASCII text and encoded in other than Unicode base encodings.
-      raise unless pattern.eaual?(ParagraphExcerptRegexpUnicode)
+      raise if pattern == ParagraphExcerptRegexpOther
       pattern = ParagraphExcerptRegexpOther
       retry
     end

From 0ed8f0179f38f9e0f01c55401b5045ed24b57f1b Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@ruby-lang.org>
Date: Fri, 7 Mar 2025 22:23:11 +0900
Subject: [PATCH 5/5] Shorten the description text to get rid of JRuby
 exception

---
 test/rdoc/test_rdoc_generator_darkfish.rb | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/test/rdoc/test_rdoc_generator_darkfish.rb b/test/rdoc/test_rdoc_generator_darkfish.rb
index ee3a72b70e..680c663287 100644
--- a/test/rdoc/test_rdoc_generator_darkfish.rb
+++ b/test/rdoc/test_rdoc_generator_darkfish.rb
@@ -454,8 +454,7 @@ def test_meta_tags_for_markdwon_files_paragraph
     top_level.comment = <<~MARKDOWN
       # Distributed Ruby: dRuby
 
-      dRuby is a distributed object system for Ruby.  It allows an object in one
-      Ruby process to invoke methods on an object in another Ruby process.
+      dRuby is a distributed object system for Ruby.  It allows an object.
     MARKDOWN
 
     @g.generate
@@ -466,7 +465,7 @@ def test_meta_tags_for_markdwon_files_paragraph
       "<meta name=\"description\" content=\"" \
       "README: dRuby " \
       "dRuby is a distributed object system for Ruby. " \
-      "It allows an object in one Ruby process to invoke methods on an object"
+      "It allows an object."
     )
   end