Skip to content

Commit 67adc68

Browse files
authored
Merge pull request #9606 from ruby/gem-compact-index
Adopt the compact index for gem commands
2 parents a92ed76 + 995e21e commit 67adc68

26 files changed

Lines changed: 1943 additions & 74 deletions

Manifest.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,12 @@ lib/rubygems/commands/unpack_command.rb
404404
lib/rubygems/commands/update_command.rb
405405
lib/rubygems/commands/which_command.rb
406406
lib/rubygems/commands/yank_command.rb
407+
lib/rubygems/compact_index_client.rb
408+
lib/rubygems/compact_index_client/cache.rb
409+
lib/rubygems/compact_index_client/cache_file.rb
410+
lib/rubygems/compact_index_client/http_fetcher.rb
411+
lib/rubygems/compact_index_client/parser.rb
412+
lib/rubygems/compact_index_client/updater.rb
407413
lib/rubygems/config_file.rb
408414
lib/rubygems/core_ext/kernel_gem.rb
409415
lib/rubygems/core_ext/kernel_require.rb
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# frozen_string_literal: true
2+
3+
##
4+
# The CompactIndexClient fetches and parses the compact index files
5+
# (names, versions and info/[gem]) served by a gem server, keeping a
6+
# local cache so subsequent fetches only transfer what changed.
7+
#
8+
# This is an independent RubyGems port of Bundler::CompactIndexClient.
9+
# Both implementations are intentionally kept separate so that changes
10+
# on either side cannot affect the other; this one only depends on
11+
# RubyGems itself.
12+
13+
class Gem::CompactIndexClient
14+
SUPPORTED_DIGESTS = { "sha-256" => :SHA256 }.freeze
15+
DEBUG_MUTEX = Thread::Mutex.new
16+
17+
# info returns an Array of INFO Arrays. Each INFO Array has the following indices:
18+
INFO_NAME = 0
19+
INFO_VERSION = 1
20+
INFO_PLATFORM = 2
21+
INFO_DEPS = 3
22+
INFO_REQS = 4
23+
24+
def self.debug
25+
return unless ENV["DEBUG_COMPACT_INDEX"]
26+
DEBUG_MUTEX.synchronize { warn("[#{self}] #{yield}") }
27+
end
28+
29+
class Error < StandardError; end
30+
31+
require_relative "compact_index_client/cache"
32+
require_relative "compact_index_client/cache_file"
33+
require_relative "compact_index_client/http_fetcher"
34+
require_relative "compact_index_client/parser"
35+
require_relative "compact_index_client/updater"
36+
37+
# The client is instantiated with:
38+
# - `directory`: the root directory where the cache files are stored.
39+
# - `fetcher`: (optional) an object that responds to #call(uri_path, headers)
40+
# and returns a Gem::Net::HTTP response. When the fetcher is not provided,
41+
# the client only reads cached files from disk.
42+
def initialize(directory, fetcher = nil)
43+
@cache = Cache.new(directory, fetcher)
44+
@parser = Parser.new(@cache)
45+
end
46+
47+
def names
48+
Gem::CompactIndexClient.debug { "names" }
49+
@parser.names
50+
end
51+
52+
def versions
53+
Gem::CompactIndexClient.debug { "versions" }
54+
@parser.versions
55+
end
56+
57+
def dependencies(names)
58+
Gem::CompactIndexClient.debug { "dependencies(#{names})" }
59+
names.map {|name| info(name) }
60+
end
61+
62+
def info(name)
63+
Gem::CompactIndexClient.debug { "info(#{name})" }
64+
@parser.info(name)
65+
end
66+
67+
# Fetches a single gem's info without consulting the versions index,
68+
# using a conditional request to refresh the cached file. Useful when
69+
# only a few gems are needed and the versions index download would
70+
# dominate, as in gem install.
71+
def fetch_info(name)
72+
Gem::CompactIndexClient.debug { "fetch_info(#{name})" }
73+
@parser.parse_info(@cache.fetch_info(name), name)
74+
end
75+
76+
def latest_version(name)
77+
Gem::CompactIndexClient.debug { "latest_version(#{name})" }
78+
@parser.info(name).map {|d| Gem::Version.new(d[INFO_VERSION]) }.max
79+
end
80+
81+
def available?
82+
Gem::CompactIndexClient.debug { "available?" }
83+
@parser.available?
84+
end
85+
86+
def reset!
87+
Gem::CompactIndexClient.debug { "reset!" }
88+
@cache.reset!
89+
end
90+
end
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# frozen_string_literal: true
2+
3+
require "digest"
4+
require "fileutils"
5+
require "pathname" unless defined?(Pathname)
6+
require "set"
7+
8+
class Gem::CompactIndexClient
9+
# Calls the Updater to update the cached files on disk, reads the
10+
# cached files and returns their contents.
11+
class Cache
12+
attr_reader :directory
13+
14+
def initialize(directory, fetcher = nil)
15+
@directory = Pathname.new(directory).expand_path
16+
@updater = Updater.new(fetcher) if fetcher
17+
@mutex = Thread::Mutex.new
18+
@endpoints = Set.new
19+
20+
@info_root = mkdir("info")
21+
@special_characters_info_root = mkdir("info-special-characters")
22+
@info_etag_root = mkdir("info-etags")
23+
end
24+
25+
def names
26+
fetch("names", names_path, names_etag_path)
27+
end
28+
29+
def versions
30+
fetch("versions", versions_path, versions_etag_path)
31+
end
32+
33+
def info(name, remote_checksum = nil)
34+
path = info_path(name)
35+
36+
if remote_checksum && remote_checksum != checksum_for_file(path)
37+
fetch("info/#{name}", path, info_etag_path(name))
38+
else
39+
Gem::CompactIndexClient.debug { "update skipped info/#{name} (#{remote_checksum ? "versions index checksum matches local" : "versions index checksum is nil"})" }
40+
read(path)
41+
end
42+
end
43+
44+
# Fetch a single gem's info file without consulting the versions
45+
# index, refreshing the cached file with a conditional request.
46+
def fetch_info(name)
47+
fetch("info/#{name}", info_path(name), info_etag_path(name))
48+
end
49+
50+
def reset!
51+
@mutex.synchronize { @endpoints.clear }
52+
end
53+
54+
private
55+
56+
def names_path = directory.join("names")
57+
def names_etag_path = directory.join("names.etag")
58+
def versions_path = directory.join("versions")
59+
def versions_etag_path = directory.join("versions.etag")
60+
61+
def info_path(name)
62+
name = name.to_s
63+
if /[^a-z0-9_-]/.match?(name)
64+
name += "-#{Digest::MD5.hexdigest(name).downcase}"
65+
@special_characters_info_root.join(name)
66+
else
67+
@info_root.join(name)
68+
end
69+
end
70+
71+
def info_etag_path(name)
72+
name = name.to_s
73+
@info_etag_root.join("#{name}-#{Digest::MD5.hexdigest(name).downcase}")
74+
end
75+
76+
def checksum_for_file(path)
77+
return unless path.file?
78+
Digest::MD5.file(path).hexdigest
79+
end
80+
81+
def mkdir(name)
82+
directory.join(name).tap do |dir|
83+
FileUtils.mkdir_p(dir)
84+
end
85+
end
86+
87+
def fetch(remote_path, path, etag_path)
88+
if already_fetched?(remote_path)
89+
Gem::CompactIndexClient.debug { "already fetched #{remote_path}" }
90+
else
91+
Gem::CompactIndexClient.debug { "fetching #{remote_path}" }
92+
@updater&.update(remote_path, path, etag_path)
93+
end
94+
95+
read(path)
96+
end
97+
98+
def already_fetched?(remote_path)
99+
@mutex.synchronize { !@endpoints.add?(remote_path) }
100+
end
101+
102+
def read(path)
103+
return unless path.file?
104+
path.read
105+
end
106+
end
107+
end
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# frozen_string_literal: true
2+
3+
require "digest"
4+
require "fileutils"
5+
require_relative "../package"
6+
7+
class Gem::CompactIndexClient
8+
# write cache files in a way that is robust to concurrent modifications
9+
# if digests are given, the checksums will be verified
10+
class CacheFile
11+
DEFAULT_FILE_MODE = 0o644
12+
private_constant :DEFAULT_FILE_MODE
13+
14+
class Error < RuntimeError; end
15+
class ClosedError < Error; end
16+
17+
class DigestMismatchError < Error
18+
def initialize(digests, expected_digests)
19+
super "Calculated checksums #{digests.inspect} did not match expected #{expected_digests.inspect}."
20+
end
21+
end
22+
23+
# Initialize with a copy of the original file, then yield the instance.
24+
def self.copy(path, &block)
25+
new(path) do |file|
26+
file.initialize_digests
27+
28+
path.open("rb") do |s|
29+
file.open {|f| IO.copy_stream(s, f) }
30+
end
31+
32+
yield file
33+
end
34+
end
35+
36+
# Write data to a temp file, then replace the original file with it verifying the digests if given.
37+
def self.write(path, data, digests = nil)
38+
return unless data
39+
new(path) do |file|
40+
file.digests = digests
41+
file.write(data)
42+
end
43+
end
44+
45+
attr_reader :original_path, :path
46+
47+
def initialize(original_path, &block)
48+
@original_path = original_path
49+
@perm = original_path.file? ? original_path.stat.mode : DEFAULT_FILE_MODE
50+
@path = original_path.sub(/$/, ".#{$$}.tmp")
51+
return unless block_given?
52+
begin
53+
yield self
54+
ensure
55+
close
56+
end
57+
end
58+
59+
def size
60+
path.size
61+
end
62+
63+
# initialize the digests using CompactIndexClient::SUPPORTED_DIGESTS, or a subset based on keys.
64+
def initialize_digests(keys = nil)
65+
@digests = keys ? SUPPORTED_DIGESTS.slice(*keys) : SUPPORTED_DIGESTS.dup
66+
@digests.transform_values! {|algo_class| Digest(algo_class).new }
67+
end
68+
69+
# reset the digests so they don't contain any previously read data
70+
def reset_digests
71+
@digests&.each_value(&:reset)
72+
end
73+
74+
# set the digests that will be verified at the end
75+
def digests=(expected_digests)
76+
@expected_digests = expected_digests
77+
78+
if @expected_digests.nil?
79+
@digests = nil
80+
elsif @digests
81+
@digests = @digests.slice(*@expected_digests.keys)
82+
else
83+
initialize_digests(@expected_digests.keys)
84+
end
85+
end
86+
87+
def digests?
88+
@digests&.any?
89+
end
90+
91+
# Open the temp file for writing, reusing original permissions, yielding the IO object.
92+
def open(write_mode = "wb", perm = @perm, &block)
93+
raise ClosedError, "Cannot reopen closed file" if @closed
94+
path.open(write_mode, perm) do |f|
95+
yield digests? ? Gem::Package::DigestIO.new(f, @digests) : f
96+
end
97+
end
98+
99+
# Returns false without appending when no digests since appending is too error prone to do without digests.
100+
def append(data)
101+
return false unless digests?
102+
open("a") {|f| f.write data }
103+
verify && commit
104+
end
105+
106+
def write(data)
107+
reset_digests
108+
open {|f| f.write data }
109+
commit!
110+
end
111+
112+
def commit!
113+
verify || raise(DigestMismatchError.new(@base64digests, @expected_digests))
114+
commit
115+
end
116+
117+
# Verify the digests, returning true on match, false on mismatch.
118+
def verify
119+
return true unless @expected_digests && digests?
120+
@base64digests = @digests.transform_values!(&:base64digest)
121+
@digests = nil
122+
@base64digests.all? {|algo, digest| @expected_digests[algo] == digest }
123+
end
124+
125+
# Replace the original file with the temp file without verifying digests.
126+
# The file is permanently closed.
127+
def commit
128+
raise ClosedError, "Cannot commit closed file" if @closed
129+
FileUtils.mv(path, original_path)
130+
@closed = true
131+
end
132+
133+
# Remove the temp file without replacing the original file.
134+
# The file is permanently closed.
135+
def close
136+
return if @closed
137+
FileUtils.remove_file(path) if @path&.file?
138+
@closed = true
139+
end
140+
end
141+
end

0 commit comments

Comments
 (0)