From c272a8b138a87922a1622b8f06509032e5255690 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Thu, 4 Jun 2026 17:36:46 +0900 Subject: [PATCH 1/4] Strip C1 control characters in Gem::Text#clean_text Match C1 controls (U+0080-U+009F) as codepoints and only for valid UTF-8 text, so multibyte characters are preserved and other encodings are left unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/rubygems/text.rb | 10 +++++++++- test/rubygems/test_gem_text.rb | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/lib/rubygems/text.rb b/lib/rubygems/text.rb index 88d4ce59b4b9..8c78304d4ec2 100644 --- a/lib/rubygems/text.rb +++ b/lib/rubygems/text.rb @@ -8,7 +8,15 @@ module Gem::Text # Remove any non-printable characters and make the text suitable for # printing. def clean_text(text) - text.gsub(/[\000-\b\v-\f\016-\037\177]/, ".") + text = text.gsub(/[\000-\b\v-\f\016-\037\177]/, ".") + + # C1 control characters (U+0080-U+009F) only occur in UTF-8 text and must + # be matched as codepoints so that multibyte characters are preserved. + if text.encoding == Encoding::UTF_8 && text.valid_encoding? + text = text.gsub(/[\u0080-\u009f]/, ".") + end + + text end def truncate_text(text, description, max_length = 100_000) diff --git a/test/rubygems/test_gem_text.rb b/test/rubygems/test_gem_text.rb index 8e9961094612..ad35210c5968 100644 --- a/test/rubygems/test_gem_text.rb +++ b/test/rubygems/test_gem_text.rb @@ -100,4 +100,19 @@ def test_truncate_text def test_clean_text assert_equal ".]2;nyan.", clean_text("\e]2;nyan\a") end + + def test_clean_text_strips_c1_control_characters + text = [0x41, 0x9b, 0x42].pack("U*") # "A", CSI (U+009B), "B" + assert_equal "A.B", clean_text(text) + end + + def test_clean_text_preserves_multibyte_characters + text = [0xe9, 0x85].pack("U*") # U+00E9 kept, NEL (U+0085) stripped + assert_equal [0xe9, 0x2e].pack("U*"), clean_text(text) + end + + def test_clean_text_passes_through_non_unicode_encodings + text = "x\x9by".dup.force_encoding("ISO-8859-1") + assert_equal text, clean_text(text) + end end From cc62ee89ab1f829176933b4682a578fc8f467a2b Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Thu, 4 Jun 2026 17:36:46 +0900 Subject: [PATCH 2/4] Clean control characters from the post-install message Route the post-install message through Gem::Text#clean_text before printing it so a crafted message cannot emit raw terminal control sequences. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/rubygems/installer.rb | 2 +- test/rubygems/test_gem_installer.rb | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/lib/rubygems/installer.rb b/lib/rubygems/installer.rb index 15d6aac0fd1b..5b4504aa96b6 100644 --- a/lib/rubygems/installer.rb +++ b/lib/rubygems/installer.rb @@ -299,7 +299,7 @@ def install File.chmod(dir_mode, gem_dir) if dir_mode - say spec.post_install_message if options[:post_install_message] && !spec.post_install_message.nil? + say clean_text(spec.post_install_message) if options[:post_install_message] && !spec.post_install_message.nil? Gem::Specification.add_spec(spec) unless @install_dir diff --git a/test/rubygems/test_gem_installer.rb b/test/rubygems/test_gem_installer.rb index bf7a4a8dfc81..44d14e8150c4 100644 --- a/test/rubygems/test_gem_installer.rb +++ b/test/rubygems/test_gem_installer.rb @@ -1481,6 +1481,23 @@ def test_install_with_skipped_message refute_match(/I am a shiny gem!/, @ui.output) end + def test_install_sanitizes_post_install_message + # Use for_spec so the in-memory message reaches the installer verbatim; + # building a gem would escape the control characters during serialization. + @spec = setup_base_spec + @spec.post_install_message = "shiny \e]2;pwn\a gem" + + installer = Gem::Installer.for_spec @spec, post_install_message: true + installer.gem_home = @gemhome + + use_ui @ui do + installer.install + end + + assert_match(/shiny \.\]2;pwn\. gem/, @ui.output) + refute_match(/\e\]2;pwn/, @ui.output) + end + def test_install_extension_dir gemhome2 = "#{@gemhome}2" From 0db489ff2e0b5c52d6ba5a48b8480643e54b51c7 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Thu, 4 Jun 2026 18:39:44 +0900 Subject: [PATCH 3/4] Clarify the C1 comment and strengthen the multibyte test Reword the comment to explain that the UTF-8 guard avoids splitting multibyte sequences, and assert preservation with U+0400, whose continuation byte falls in the C1 byte range. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/rubygems/text.rb | 5 +++-- test/rubygems/test_gem_text.rb | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/rubygems/text.rb b/lib/rubygems/text.rb index 8c78304d4ec2..0550dc473d33 100644 --- a/lib/rubygems/text.rb +++ b/lib/rubygems/text.rb @@ -10,8 +10,9 @@ module Gem::Text def clean_text(text) text = text.gsub(/[\000-\b\v-\f\016-\037\177]/, ".") - # C1 control characters (U+0080-U+009F) only occur in UTF-8 text and must - # be matched as codepoints so that multibyte characters are preserved. + # Match C1 control characters (U+0080-U+009F) as codepoints. This requires + # a valid UTF-8 string so the regexp does not split a multibyte sequence; + # strings in other encodings are left unchanged. if text.encoding == Encoding::UTF_8 && text.valid_encoding? text = text.gsub(/[\u0080-\u009f]/, ".") end diff --git a/test/rubygems/test_gem_text.rb b/test/rubygems/test_gem_text.rb index ad35210c5968..60739e613198 100644 --- a/test/rubygems/test_gem_text.rb +++ b/test/rubygems/test_gem_text.rb @@ -107,8 +107,10 @@ def test_clean_text_strips_c1_control_characters end def test_clean_text_preserves_multibyte_characters - text = [0xe9, 0x85].pack("U*") # U+00E9 kept, NEL (U+0085) stripped - assert_equal [0xe9, 0x2e].pack("U*"), clean_text(text) + # U+0400 encodes to bytes D0 80, whose 0x80 continuation byte must not be + # mistaken for a C1 control byte. NEL (U+0085) is stripped. + text = [0x400, 0x85].pack("U*") + assert_equal [0x400, 0x2e].pack("U*"), clean_text(text) end def test_clean_text_passes_through_non_unicode_encodings From 95b6bfbd16dbb7a9b4a53e4007e612f6d8777861 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Thu, 4 Jun 2026 18:39:44 +0900 Subject: [PATCH 4/4] Coerce the post-install message to a String before sanitizing post_install_message may be a non-String such as an array, so call to_s before clean_text to avoid raising during install. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/rubygems/installer.rb | 2 +- test/rubygems/test_gem_installer.rb | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/lib/rubygems/installer.rb b/lib/rubygems/installer.rb index 5b4504aa96b6..15d241d633d4 100644 --- a/lib/rubygems/installer.rb +++ b/lib/rubygems/installer.rb @@ -299,7 +299,7 @@ def install File.chmod(dir_mode, gem_dir) if dir_mode - say clean_text(spec.post_install_message) if options[:post_install_message] && !spec.post_install_message.nil? + say clean_text(spec.post_install_message.to_s) if options[:post_install_message] && !spec.post_install_message.nil? Gem::Specification.add_spec(spec) unless @install_dir diff --git a/test/rubygems/test_gem_installer.rb b/test/rubygems/test_gem_installer.rb index 44d14e8150c4..2f08024ef048 100644 --- a/test/rubygems/test_gem_installer.rb +++ b/test/rubygems/test_gem_installer.rb @@ -1498,6 +1498,22 @@ def test_install_sanitizes_post_install_message refute_match(/\e\]2;pwn/, @ui.output) end + def test_install_handles_non_string_post_install_message + # post_install_message may be a non-String (the gemspec schema allows an + # array), so sanitizing must not assume it responds to gsub. + @spec = setup_base_spec + @spec.post_install_message = %w[one two] + + installer = Gem::Installer.for_spec @spec, post_install_message: true + installer.gem_home = @gemhome + + use_ui @ui do + installer.install + end + + assert_match(/one/, @ui.output) + end + def test_install_extension_dir gemhome2 = "#{@gemhome}2"