From 8247c42f41557e5b7c0fdbdba1a959d4405b176b Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 12 Jun 2026 08:39:27 +0900 Subject: [PATCH 1/8] Intern link elements in SafeMarshal reader specs.4.8 from rubygems.org contains 4.7M object links pointing at only 156k unique offsets, so allocating an element per occurrence makes links the largest part of the AST. Caching them by offset reduces peak RSS of loading the full index from 573MB to 448MB and subsumes the hard-coded OBJECT_LINKS table. https://github.com/ruby/rubygems/issues/9368 Co-Authored-By: Claude Fable 5 --- lib/rubygems/safe_marshal/reader.rb | 40 +++-------------------------- 1 file changed, 4 insertions(+), 36 deletions(-) diff --git a/lib/rubygems/safe_marshal/reader.rb b/lib/rubygems/safe_marshal/reader.rb index 4362d65fd681..4d5796f93703 100644 --- a/lib/rubygems/safe_marshal/reader.rb +++ b/lib/rubygems/safe_marshal/reader.rb @@ -28,6 +28,8 @@ class NegativeLengthError < Error def initialize(io) @io = io + @object_links = {} + @symbol_links = {} end def read! @@ -191,7 +193,7 @@ def read_object_with_ivars def read_symbol_link offset = read_integer - Elements::SymbolLink.new(offset) + @symbol_links[offset] ||= Elements::SymbolLink.new(offset) end def read_user_marshal @@ -200,43 +202,9 @@ def read_user_marshal Elements::UserMarshal.new(name, data) end - # profiling bundle install --full-index shows that - # offset 6 is by far the most common object link, - # so we special case it to avoid allocating a new - # object a third of the time. - # the following are all the object links that - # appear more than 10000 times in my profiling - - OBJECT_LINKS = { - 6 => Elements::ObjectLink.new(6).freeze, - 30 => Elements::ObjectLink.new(30).freeze, - 81 => Elements::ObjectLink.new(81).freeze, - 34 => Elements::ObjectLink.new(34).freeze, - 38 => Elements::ObjectLink.new(38).freeze, - 50 => Elements::ObjectLink.new(50).freeze, - 91 => Elements::ObjectLink.new(91).freeze, - 42 => Elements::ObjectLink.new(42).freeze, - 46 => Elements::ObjectLink.new(46).freeze, - 150 => Elements::ObjectLink.new(150).freeze, - 100 => Elements::ObjectLink.new(100).freeze, - 104 => Elements::ObjectLink.new(104).freeze, - 108 => Elements::ObjectLink.new(108).freeze, - 242 => Elements::ObjectLink.new(242).freeze, - 246 => Elements::ObjectLink.new(246).freeze, - 139 => Elements::ObjectLink.new(139).freeze, - 143 => Elements::ObjectLink.new(143).freeze, - 114 => Elements::ObjectLink.new(114).freeze, - 308 => Elements::ObjectLink.new(308).freeze, - 200 => Elements::ObjectLink.new(200).freeze, - 54 => Elements::ObjectLink.new(54).freeze, - 62 => Elements::ObjectLink.new(62).freeze, - 1_286_245 => Elements::ObjectLink.new(1_286_245).freeze, - }.freeze - private_constant :OBJECT_LINKS - def read_object_link offset = read_integer - OBJECT_LINKS[offset] || Elements::ObjectLink.new(offset) + @object_links[offset] ||= Elements::ObjectLink.new(offset) end EMPTY_HASH = Elements::Hash.new([].freeze).freeze From f22539403b39e0128218cda8d10c04695fcaeea9 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 12 Jun 2026 09:15:32 +0900 Subject: [PATCH 2/8] Release resolution memory before installing gems Resolution retains an EndpointSpecification for every version of every gem in the dependency closure, plus parsed checksums for all ~190k gems in the compact index, none of which are needed once the resolution result has been materialized. Dropping them (and forcing a major GC, since they are mostly old generation by that point) reduces peak RSS of a cold bundle install of a 103-gem app from 427MB to 342MB. https://github.com/ruby/rubygems/issues/9368 Co-Authored-By: Claude Fable 5 --- bundler/lib/bundler/definition.rb | 15 +++++++++++++++ bundler/lib/bundler/fetcher.rb | 4 ++++ bundler/lib/bundler/fetcher/base.rb | 3 +++ bundler/lib/bundler/fetcher/compact_index.rb | 7 +++++++ bundler/lib/bundler/installer.rb | 4 +++- bundler/lib/bundler/source/rubygems.rb | 6 ++++++ bundler/lib/bundler/source_list.rb | 4 ++++ 7 files changed, 42 insertions(+), 1 deletion(-) diff --git a/bundler/lib/bundler/definition.rb b/bundler/lib/bundler/definition.rb index 7a9567147103..d8cf8681b90a 100644 --- a/bundler/lib/bundler/definition.rb +++ b/bundler/lib/bundler/definition.rb @@ -236,6 +236,21 @@ def prefer_local! sources.prefer_local! end + # Releases memory only needed during resolution, such as remote spec + # indexes and resolver state. Only safe to call once resolution is + # complete and the result has been materialized, since any further + # resolution will need to refetch remote specs. + def release_resolution_memory! + @resolver = nil + @resolution_base = nil + sources.release_resolution_memory! + + # Most of the released objects are old generation, so they won't be + # reclaimed by minor GCs and would otherwise keep the heap from + # shrinking until a major GC happens to run. + GC.start + end + # For given dependency list returns a SpecSet with Gemspec of all the required # dependencies. # 1. The method first resolves the dependencies specified in Gemfile diff --git a/bundler/lib/bundler/fetcher.rb b/bundler/lib/bundler/fetcher.rb index 0b6ced6f395c..cce005dd34be 100644 --- a/bundler/lib/bundler/fetcher.rb +++ b/bundler/lib/bundler/fetcher.rb @@ -243,6 +243,10 @@ def api_fetcher? fetchers.first.api_fetcher? end + def release_resolution_memory! + @fetchers&.each(&:release_resolution_memory!) + end + def gem_remote_fetcher @gem_remote_fetcher ||= begin require_relative "fetcher/gem_remote_fetcher" diff --git a/bundler/lib/bundler/fetcher/base.rb b/bundler/lib/bundler/fetcher/base.rb index cfec2f8e9419..8012bbc71641 100644 --- a/bundler/lib/bundler/fetcher/base.rb +++ b/bundler/lib/bundler/fetcher/base.rb @@ -38,6 +38,9 @@ def api_fetcher? false end + def release_resolution_memory! + end + private def log_specs(&block) diff --git a/bundler/lib/bundler/fetcher/compact_index.rb b/bundler/lib/bundler/fetcher/compact_index.rb index 52168111fea7..e002c6323dfd 100644 --- a/bundler/lib/bundler/fetcher/compact_index.rb +++ b/bundler/lib/bundler/fetcher/compact_index.rb @@ -63,6 +63,13 @@ def api_fetcher? true end + # The client holds the parsed checksums of all info files in the + # index. Dropping it is always safe because it is rebuilt from the + # local cache on demand. + def release_resolution_memory! + @compact_index_client = nil + end + private def compact_index_client diff --git a/bundler/lib/bundler/installer.rb b/bundler/lib/bundler/installer.rb index 87d9a7562740..0d9a5b46f91b 100644 --- a/bundler/lib/bundler/installer.rb +++ b/bundler/lib/bundler/installer.rb @@ -195,7 +195,9 @@ def install(options) force = options[:force] local = options[:local] || options[:"prefer-local"] jobs = Bundler.settings.installation_parallelization - spec_installations = ParallelInstaller.call(self, @definition.specs, jobs, standalone, force, local: local) + specs = @definition.specs + @definition.release_resolution_memory! + spec_installations = ParallelInstaller.call(self, specs, jobs, standalone, force, local: local) spec_installations.each do |installation| post_install_messages[installation.name] = installation.post_install_message if installation.has_post_install_message? end diff --git a/bundler/lib/bundler/source/rubygems.rb b/bundler/lib/bundler/source/rubygems.rb index 9109f399a7d7..b128ec23a659 100644 --- a/bundler/lib/bundler/source/rubygems.rb +++ b/bundler/lib/bundler/source/rubygems.rb @@ -337,6 +337,12 @@ def clear_cache @cached_specs = nil end + def release_resolution_memory! + @specs = nil + @remote_specs = nil + @fetchers&.each(&:release_resolution_memory!) + end + protected def remote_names diff --git a/bundler/lib/bundler/source_list.rb b/bundler/lib/bundler/source_list.rb index 954efbb65fc1..23c73c23f1b2 100644 --- a/bundler/lib/bundler/source_list.rb +++ b/bundler/lib/bundler/source_list.rb @@ -140,6 +140,10 @@ def clear_cache rubygems_sources.each(&:clear_cache) end + def release_resolution_memory! + rubygems_sources.each(&:release_resolution_memory!) + end + private def map_sources(replacement_sources) From 1fdc2a5683733ae958b456bf7a63c0dd2f8652a9 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 12 Jun 2026 09:37:27 +0900 Subject: [PATCH 3/8] Drop explicit GC.start when releasing resolution memory This was the only forced GC in the codebase and would inject a full mark pause into any future caller of release_resolution_memory!. Allocation pressure during installation triggers major GCs soon enough anyway: peak RSS of the same cold install measures 360MB without the explicit GC versus 342MB with it, still well below the 427MB baseline. Co-Authored-By: Claude Fable 5 --- bundler/lib/bundler/definition.rb | 5 ----- 1 file changed, 5 deletions(-) diff --git a/bundler/lib/bundler/definition.rb b/bundler/lib/bundler/definition.rb index d8cf8681b90a..03e56fdc3a8d 100644 --- a/bundler/lib/bundler/definition.rb +++ b/bundler/lib/bundler/definition.rb @@ -244,11 +244,6 @@ def release_resolution_memory! @resolver = nil @resolution_base = nil sources.release_resolution_memory! - - # Most of the released objects are old generation, so they won't be - # reclaimed by minor GCs and would otherwise keep the heap from - # shrinking until a major GC happens to run. - GC.start end # For given dependency list returns a SpecSet with Gemspec of all the required From 0aea65b7f77968168def238a4fc36ab833007a8c Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 12 Jun 2026 10:03:19 +0900 Subject: [PATCH 4/8] Keep resolution memory when installing default gems Downloading a default gem always goes through cached_built_in_gem, which searches the remote index, so releasing it up front would force a full rebuild in the middle of parallel installation. Co-Authored-By: Claude Fable 5 --- bundler/lib/bundler/installer.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bundler/lib/bundler/installer.rb b/bundler/lib/bundler/installer.rb index 0d9a5b46f91b..4beb839fe571 100644 --- a/bundler/lib/bundler/installer.rb +++ b/bundler/lib/bundler/installer.rb @@ -196,7 +196,9 @@ def install(options) local = options[:local] || options[:"prefer-local"] jobs = Bundler.settings.installation_parallelization specs = @definition.specs - @definition.release_resolution_memory! + # Installing default gems may need the remote index again to cache + # their .gem files, so keep resolution memory around in that case. + @definition.release_resolution_memory! if specs.none?(&:default_gem?) spec_installations = ParallelInstaller.call(self, specs, jobs, standalone, force, local: local) spec_installations.each do |installation| post_install_messages[installation.name] = installation.post_install_message if installation.has_post_install_message? From e897ca62ed6045519da5b9958796a14aa92e6c07 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 12 Jun 2026 10:05:28 +0900 Subject: [PATCH 5/8] Fetch a single name when caching built-in gems cached_built_in_gem only needs the remote versions of one gem, so searching the full remote index is wasteful when it is not already materialized, for example after resolution memory has been released. Co-Authored-By: Claude Fable 5 --- bundler/lib/bundler/source/rubygems.rb | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/bundler/lib/bundler/source/rubygems.rb b/bundler/lib/bundler/source/rubygems.rb index b128ec23a659..bc70b9797a36 100644 --- a/bundler/lib/bundler/source/rubygems.rb +++ b/bundler/lib/bundler/source/rubygems.rb @@ -243,7 +243,7 @@ def cache(spec, custom_path = nil) def cached_built_in_gem(spec, local: false) cached_path = cached_gem(spec) if cached_path.nil? && !local - remote_spec = remote_specs.search(spec).first + remote_spec = remote_spec_for(spec) if remote_spec cached_path = fetch_gem(remote_spec) spec.remote = remote_spec.remote @@ -431,6 +431,17 @@ def remote_specs end end + # Looks up a single spec in the remote sources, fetching only its own + # name when the full remote index is not already materialized. + def remote_spec_for(spec) + return remote_specs.search(spec).first if @remote_specs || api_fetchers.empty? + + index = Index.build do |idx| + fetch_names(api_fetchers, [spec.name], idx) + end + index.search(spec).first + end + def fetch_names(fetchers, dependency_names, index) fetchers.each do |f| if dependency_names From d0d353954ec3a17a4554373cf384010c752d8435 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 12 Jun 2026 10:08:08 +0900 Subject: [PATCH 6/8] Synchronize remote_specs memoization The remote index can now be released and lazily rebuilt while parallel install workers are running, so guard the memoization with a mutex to avoid concurrent duplicate builds. Co-Authored-By: Claude Fable 5 --- bundler/lib/bundler/source/rubygems.rb | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/bundler/lib/bundler/source/rubygems.rb b/bundler/lib/bundler/source/rubygems.rb index bc70b9797a36..0a0914f6c6e3 100644 --- a/bundler/lib/bundler/source/rubygems.rb +++ b/bundler/lib/bundler/source/rubygems.rb @@ -25,6 +25,7 @@ def initialize(options = {}) @checksum_store = Checksum::Store.new @gem_installers = {} @gem_installers_mutex = Mutex.new + @remote_specs_mutex = Mutex.new cooldown = options["cooldown"] Array(options["remotes"]).reverse_each {|r| add_remote(r, cooldown: cooldown) } @@ -339,7 +340,7 @@ def clear_cache def release_resolution_memory! @specs = nil - @remote_specs = nil + @remote_specs_mutex.synchronize { @remote_specs = nil } @fetchers&.each(&:release_resolution_memory!) end @@ -420,13 +421,15 @@ def api_fetchers end def remote_specs - @remote_specs ||= Index.build do |idx| - index_fetchers = fetchers - api_fetchers + @remote_specs ||= @remote_specs_mutex.synchronize do + @remote_specs ||= Index.build do |idx| + index_fetchers = fetchers - api_fetchers - if index_fetchers.empty? - fetch_names(api_fetchers, dependency_names, idx) - else - fetch_names(fetchers, nil, idx) + if index_fetchers.empty? + fetch_names(api_fetchers, dependency_names, idx) + else + fetch_names(fetchers, nil, idx) + end end end end From d405378fa62fd65c7c37b7221d6ecc3508755b69 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 12 Jun 2026 10:14:44 +0900 Subject: [PATCH 7/8] Scope the default gem check to RubyGems sources The bundler spec comes from the metadata source and is a default gem on any modern Ruby, so checking all specs disabled the memory release for every install. Only default gems from RubyGems sources go through cached_built_in_gem during installation. Co-Authored-By: Claude Fable 5 --- bundler/lib/bundler/installer.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bundler/lib/bundler/installer.rb b/bundler/lib/bundler/installer.rb index 4beb839fe571..aac91087cd20 100644 --- a/bundler/lib/bundler/installer.rb +++ b/bundler/lib/bundler/installer.rb @@ -198,7 +198,9 @@ def install(options) specs = @definition.specs # Installing default gems may need the remote index again to cache # their .gem files, so keep resolution memory around in that case. - @definition.release_resolution_memory! if specs.none?(&:default_gem?) + # The bundler spec itself is excluded because it comes from the + # metadata source and never goes through that path. + @definition.release_resolution_memory! if specs.none? {|s| s.default_gem? && s.source.is_a?(Source::Rubygems) } spec_installations = ParallelInstaller.call(self, specs, jobs, standalone, force, local: local) spec_installations.each do |installation| post_install_messages[installation.name] = installation.post_install_message if installation.has_post_install_message? From 0649eb11ed97a0b2106ac57f9fad58dc1f482018 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 12 Jun 2026 10:59:38 +0900 Subject: [PATCH 8/8] Update the versions file before fetching infos in parallel When the compact index client has been released after resolution and is lazily rebuilt, for example by bundle cache --all-platforms, its first use happens inside the parallel fetch workers. They then race to create the client and update the versions file, whose temp file name is based on the pid, so concurrent renames fail with ENOENT, the fetcher falls back to an empty index, and caching crashes on an unmaterialized LazySpecification. Warm the client up on the calling thread instead. Co-Authored-By: Claude Fable 5 --- bundler/lib/bundler/fetcher/compact_index.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bundler/lib/bundler/fetcher/compact_index.rb b/bundler/lib/bundler/fetcher/compact_index.rb index e002c6323dfd..b41d42b794fd 100644 --- a/bundler/lib/bundler/fetcher/compact_index.rb +++ b/bundler/lib/bundler/fetcher/compact_index.rb @@ -80,6 +80,12 @@ def compact_index_client end def fetch_gem_infos(names) + # Create the client and update the versions file on this thread. + # Otherwise the workers race to lazily create the client and update + # the versions file concurrently, e.g. when the client was released + # after resolution and is being rebuilt for `bundle cache`. + compact_index_client.available? + in_parallel(names) {|name| compact_index_client.info(name) } rescue TooManyRequestsError # rubygems.org is rate limiting us, slow down. @bundle_worker&.stop