Skip to content

Commit 65dec3b

Browse files
feat(storage): setting default checksum (#33331)
1 parent ae22466 commit 65dec3b

6 files changed

Lines changed: 197 additions & 28 deletions

File tree

google-cloud-storage/lib/google/cloud/storage/bucket.rb

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1821,19 +1821,20 @@ def file path,
18211821
# changed to a time in the future. If custom_time must be unset, you
18221822
# must either perform a rewrite operation, or upload the data again
18231823
# and create a new file.
1824-
# @param [Symbol, nil] checksum The type of checksum for the client to
1824+
# @param [Symbol, nil, Boolean] checksum The type of checksum for the client to
18251825
# automatically calculate and send with the create request to verify
18261826
# the integrity of the object. If provided, Cloud Storage will only
18271827
# create the file if the value calculated by the client matches the
18281828
# value calculated by the service.
18291829
#
18301830
# Acceptable values are:
18311831
#
1832+
# * `true` [Boolean] - Calculate and provide a checksum using the CRC32c hash.
1833+
# * `false` [Boolean] - Do not calculate or provide a checksum.
18321834
# * `md5` - Calculate and provide a checksum using the MD5 hash.
18331835
# * `crc32c` - Calculate and provide a checksum using the CRC32c hash.
18341836
# * `all` - Calculate and provide checksums for all available verifications.
1835-
#
1836-
# Optional. The default is `nil`. Do not provide if also providing a
1837+
# Optional. The default is `crc32c`. Do not provide if also providing a
18371838
# corresponding `crc32c` or `md5` argument. See
18381839
# [Validation](https://cloud.google.com/storage/docs/hashes-etags)
18391840
# for more information.
@@ -1998,6 +1999,11 @@ def create_file file,
19981999
path ||= file.path if file.respond_to? :path
19992000
path ||= file if file.is_a? String
20002001
raise ArgumentError, "must provide path" if path.nil?
2002+
# If no checksum type or specific value is provided, the default will be set to crc32c.
2003+
# If the checksum is set to false, it will be disabled.
2004+
if [checksum, crc32c, md5].all?(&:nil?) || checksum == true
2005+
checksum = :crc32c
2006+
end
20012007
crc32c = crc32c_for file, checksum, crc32c
20022008
md5 = md5_for file, checksum, md5
20032009

google-cloud-storage/lib/google/cloud/storage/file/verifier.rb

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,29 +49,47 @@ def self.verify_crc32c gcloud_file, local_file
4949
gcloud_file.crc32c == crc32c_for(local_file)
5050
end
5151

52+
# Calculates MD5 digest using either file path or open stream.
5253
def self.md5_for local_file
53-
if local_file.respond_to? :to_path
54-
::File.open Pathname(local_file).to_path, "rb" do |f|
55-
::Digest::MD5.file(f).base64digest
56-
end
57-
else # StringIO
58-
local_file.rewind
59-
md5 = ::Digest::MD5.base64digest local_file.read
60-
local_file.rewind
61-
md5
62-
end
54+
_digest_for local_file, ::Digest::MD5
6355
end
6456

57+
# Calculates CRC32c digest using either file path or open stream.
6558
def self.crc32c_for local_file
66-
if local_file.respond_to? :to_path
59+
_digest_for local_file, ::Digest::CRC32c
60+
end
61+
62+
# @private
63+
# Computes a base64-encoded digest for a local file or IO stream.
64+
#
65+
# This method handles two types of inputs for `local_file`:
66+
# 1. A file path (String or Pathname): It efficiently streams the file
67+
# to compute the digest without loading the entire file into memory.
68+
# 2. An IO-like stream (e.g., File, StringIO): It reads the stream's
69+
# content to compute the digest. The stream is rewound before and after
70+
# reading to ensure its position is not permanently changed.
71+
#
72+
# @param local_file [String, Pathname, IO] The local file path or IO
73+
# stream for which to compute the digest.
74+
# @param digest_class [Class] The digest class to use for the
75+
# calculation (e.g., `Digest::MD5`). It must respond to `.file` and
76+
# `.base64digest`.
77+
#
78+
# @return [String] The base64-encoded digest of the file's content.
79+
#
80+
def self._digest_for local_file, digest_class
81+
82+
if local_file.respond_to?(:to_path) || local_file.is_a?(String)
83+
# Case 1: Input is a file path (String, Pathname, or object that responds to :to_path).
6784
::File.open Pathname(local_file).to_path, "rb" do |f|
68-
::Digest::CRC32c.file(f).base64digest
85+
digest_class.file(f).base64digest
6986
end
70-
else # StringIO
87+
else
88+
# Case 2: Input is an open stream (File or StringIO).
7189
local_file.rewind
72-
crc32c = ::Digest::CRC32c.base64digest local_file.read
90+
digest = digest_class.base64digest local_file.read
7391
local_file.rewind
74-
crc32c
92+
digest
7593
end
7694
end
7795
end

google-cloud-storage/test/google/cloud/storage/bucket_encryption_test.rb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,12 @@ def create_file_gapi bucket=nil, name = nil
235235
def empty_file_gapi cache_control: nil, content_disposition: nil,
236236
content_encoding: nil, content_language: nil,
237237
content_type: nil, crc32c: nil, md5: nil, metadata: nil,
238-
storage_class: nil
238+
storage_class: nil, checksum: nil, content: nil
239+
240+
# If no checksum type or specific value is provided, the default will be set to crc32c.
241+
# If the checksum is set to false, it will be disabled.
242+
crc32c ||= set_crc32c_as_default md5, crc32c, checksum, content
243+
239244
params = {
240245
cache_control: cache_control, content_type: content_type,
241246
content_disposition: content_disposition, md5_hash: md5,

google-cloud-storage/test/google/cloud/storage/bucket_test.rb

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,40 @@
101101
_(bucket_complete.autoclass_enabled).must_equal bucket_autoclass_enabled
102102
_(bucket_complete.autoclass_terminal_storage_class).must_equal bucket_autoclass_terminal_storage_class
103103
end
104+
105+
it "creates a file with checksum: :crc32c by default" do
106+
new_file_name = random_file_path
107+
108+
Tempfile.open ["google-cloud", ".txt"] do |tmpfile|
109+
tmpfile.write "Hello world!"
110+
tmpfile.rewind
111+
112+
crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for tmpfile
113+
mock = Minitest::Mock.new
114+
mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name),
115+
[bucket.name, empty_file_gapi(content: tmpfile.read)], **insert_object_args(name: new_file_name, upload_source: tmpfile, options: {retries: 0})
116+
117+
bucket.service.mocked_service = mock
118+
bucket.create_file tmpfile, new_file_name
119+
120+
mock.verify
121+
end
122+
end
123+
124+
it "creates a file with a StringIO and checksum: :crc32c by default" do
125+
new_file_name = random_file_path
126+
new_file_contents = StringIO.new "Hello world"
127+
crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for new_file_contents
128+
mock = Minitest::Mock.new
129+
mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name),
130+
[bucket.name, empty_file_gapi(content: new_file_contents.read)], **insert_object_args(name: new_file_name, upload_source: new_file_contents, options: {retries: 0})
131+
132+
bucket.service.mocked_service = mock
133+
134+
bucket.create_file new_file_contents, new_file_name
135+
136+
mock.verify
137+
end
104138

105139
it "returns frozen cors" do
106140
bucket_complete.cors.each do |cors|
@@ -405,6 +439,41 @@
405439
end
406440
end
407441

442+
it "creates a file with no checksum" do
443+
new_file_name = random_file_path
444+
445+
Tempfile.open ["google-cloud", ".txt"] do |tmpfile|
446+
tmpfile.write "Hello world!"
447+
tmpfile.rewind
448+
449+
mock = Minitest::Mock.new
450+
mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name),
451+
[bucket.name, empty_file_gapi(checksum: false)], **insert_object_args(name: new_file_name, upload_source: tmpfile, options: {retries: 0})
452+
453+
bucket.service.mocked_service = mock
454+
bucket.create_file tmpfile, new_file_name, checksum: false
455+
mock.verify
456+
end
457+
end
458+
459+
it "creates a file with crc32c if checksum is true" do
460+
new_file_name = random_file_path
461+
462+
Tempfile.open ["google-cloud", ".txt"] do |tmpfile|
463+
tmpfile.write "Hello world!"
464+
tmpfile.rewind
465+
466+
mock = Minitest::Mock.new
467+
mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name),
468+
[bucket.name, empty_file_gapi(checksum: true, crc32c: "e5jnUQ==")], **insert_object_args(name: new_file_name, upload_source: tmpfile, options: {retries: 0})
469+
bucket.service.mocked_service = mock
470+
471+
bucket.create_file tmpfile, new_file_name, checksum: true
472+
473+
mock.verify
474+
end
475+
end
476+
408477
it "creates a file with attributes" do
409478
new_file_name = random_file_path
410479

@@ -595,9 +664,11 @@
595664
new_file_name = random_file_path
596665

597666
Tempfile.create ["google-cloud", ".txt"] do |tmpfile|
667+
668+
crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for tmpfile
598669
mock = Minitest::Mock.new
599670
mock.expect :insert_object, create_file_gapi(bucket_user_project.name, new_file_name),
600-
[bucket.name, empty_file_gapi], **insert_object_args(name: new_file_name, upload_source: tmpfile, user_project: "test", options: {retries: 0})
671+
[bucket.name, empty_file_gapi(crc32c: crc32c)], **insert_object_args(name: new_file_name, upload_source: tmpfile, user_project: "test", options: {retries: 0})
601672

602673
bucket_user_project.service.mocked_service = mock
603674

@@ -608,13 +679,13 @@
608679
end
609680
end
610681

611-
it "creates an file with a StringIO" do
682+
it "creates a file with StringIO" do
612683
new_file_name = random_file_path
613-
new_file_contents = StringIO.new
614-
684+
new_file_contents = StringIO.new("Hello world string_io")
685+
crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for new_file_contents
615686
mock = Minitest::Mock.new
616687
mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name),
617-
[bucket.name, empty_file_gapi], **insert_object_args(name: new_file_name, upload_source: new_file_contents, options: {retries: 0})
688+
[bucket.name, empty_file_gapi(crc32c: crc32c)], **insert_object_args(name: new_file_name, upload_source: new_file_contents, options: {retries: 0})
618689

619690
bucket.service.mocked_service = mock
620691

@@ -1416,7 +1487,11 @@ def empty_file_gapi cache_control: nil, content_disposition: nil,
14161487
content_encoding: nil, content_language: nil,
14171488
content_type: nil, crc32c: nil, md5: nil, metadata: nil,
14181489
storage_class: nil, temporary_hold: nil,
1419-
event_based_hold: nil
1490+
event_based_hold: nil, checksum: nil, content: nil
1491+
1492+
# If no checksum type or specific value is provided, the default will be set to crc32c.
1493+
# If the checksum is set to false, it will be disabled.
1494+
crc32c ||= set_crc32c_as_default md5, crc32c, checksum, content
14201495
params = {
14211496
cache_control: cache_control, content_type: content_type,
14221497
content_disposition: content_disposition, md5_hash: md5,

google-cloud-storage/test/google/cloud/storage/lazy/bucket_test.rb

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,57 @@
244244
mock.verify
245245
end
246246
end
247+
248+
it "creates a file with checksum: :crc32c by default" do
249+
new_file_name = random_file_path
250+
251+
Tempfile.open ["google-cloud", ".txt"] do |tmpfile|
252+
tmpfile.write "Hello world 123"
253+
tmpfile.rewind
254+
crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for tmpfile
255+
mock = Minitest::Mock.new
256+
mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name),
257+
[bucket.name, empty_file_gapi(crc32c: crc32c)], **insert_object_args(name: new_file_name, upload_source: tmpfile, options: {retries: 0})
258+
259+
bucket.service.mocked_service = mock
260+
bucket.create_file tmpfile, new_file_name
261+
mock.verify
262+
end
263+
end
264+
265+
it "creates a file with no checksum" do
266+
new_file_name = random_file_path
267+
268+
Tempfile.open ["google-cloud", ".txt"] do |tmpfile|
269+
tmpfile.write "Hello world!"
270+
tmpfile.rewind
271+
272+
mock = Minitest::Mock.new
273+
mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name),
274+
[bucket.name, empty_file_gapi(checksum: false, content: tmpfile.read)], **insert_object_args(name: new_file_name, upload_source: tmpfile, options: {retries: 0})
275+
276+
bucket.service.mocked_service = mock
277+
bucket.create_file tmpfile, new_file_name, checksum: false
278+
mock.verify
279+
end
280+
end
281+
282+
it "creates a file with crc32c if checksum is true" do
283+
new_file_name = random_file_path
284+
Tempfile.open ["google-cloud", ".txt"] do |tmpfile|
285+
tmpfile.write "Hello world!"
286+
tmpfile.rewind
287+
mock = Minitest::Mock.new
288+
mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name),
289+
[bucket.name, empty_file_gapi(checksum: true, crc32c: "e5jnUQ==")], **insert_object_args(name: new_file_name, upload_source: tmpfile, options: {retries: 0})
290+
291+
bucket.service.mocked_service = mock
292+
293+
bucket.create_file tmpfile, new_file_name, checksum: true
294+
295+
mock.verify
296+
end
297+
end
247298

248299
it "creates a file with attributes" do
249300
new_file_name = random_file_path
@@ -279,7 +330,6 @@
279330
Tempfile.open ["google-cloud", ".txt"] do |tmpfile|
280331
tmpfile.write "Hello world"
281332
tmpfile.rewind
282-
283333
metadata = {
284334
"player" => "Bob",
285335
score: 10
@@ -340,9 +390,10 @@
340390
new_file_name = random_file_path
341391

342392
Tempfile.create ["google-cloud", ".txt"] do |tmpfile|
393+
crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for tmpfile
343394
mock = Minitest::Mock.new
344395
mock.expect :insert_object, create_file_gapi(bucket_user_project.name, new_file_name),
345-
[bucket.name, empty_file_gapi], **insert_object_args(name: new_file_name, upload_source: tmpfile, user_project: "test", options: {retries: 0})
396+
[bucket.name, empty_file_gapi(crc32c: crc32c)], **insert_object_args(name: new_file_name, upload_source: tmpfile, user_project: "test", options: {retries: 0})
346397

347398
bucket_user_project.service.mocked_service = mock
348399

@@ -1090,7 +1141,11 @@ def create_file_gapi bucket=nil, name = nil
10901141
def empty_file_gapi cache_control: nil, content_disposition: nil,
10911142
content_encoding: nil, content_language: nil,
10921143
content_type: nil, crc32c: nil, md5: nil, metadata: nil,
1093-
storage_class: nil
1144+
storage_class: nil, checksum: nil, content: nil
1145+
1146+
# If no checksum type or specific value is provided, the default will be set to crc32c.
1147+
# If the checksum is set to false, it will be disabled.
1148+
crc32c ||= set_crc32c_as_default md5, crc32c, checksum, content
10941149
params = {
10951150
cache_control: cache_control, content_type: content_type,
10961151
content_disposition: content_disposition, md5_hash: md5,

google-cloud-storage/test/helper.rb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,4 +653,14 @@ def restore_file_gapi bucket, file_name, generation=nil
653653
file_hash = random_file_hash(bucket, file_name, generation).to_json
654654
Google::Apis::StorageV1::Object.from_json file_hash
655655
end
656+
657+
def set_crc32c_as_default md5, crc32c, checksum, content = nil
658+
# If no checksum type or specific value is provided, the default will be set to crc32c.
659+
# If the checksum is set to false, it will be disabled.
660+
if [checksum, crc32c, md5].all?(&:nil?) || checksum == true
661+
# if content is present and crc32c is not provided, calculate crc32c based on content
662+
crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for(StringIO.new(content || "Hello world"))
663+
end
664+
crc32c
665+
end
656666
end

0 commit comments

Comments
 (0)