Skip to content

Commit 752f0ed

Browse files
authored
feat(storage): Add full object checksum validation at finalization for json path (#15815)
* feat(storage): Add full object checksum validation at finalization for JSON path * Add tests * remove unnecesary changes * remove unnecessary change * remove debug statement * added integration test * fix the test * fix the format * update the grpc path for checksum * correct the test * remove the test for grpc as it is not present for backend * remove the bucket creation * fix clang tidy * fix the format
1 parent 3c1d254 commit 752f0ed

File tree

11 files changed

+247
-37
lines changed

11 files changed

+247
-37
lines changed

google/cloud/storage/internal/grpc/object_request_parser.cc

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -851,6 +851,15 @@ Status Finalize(google::storage::v2::WriteObjectRequest& write_request,
851851
Merge(std::move(hashes), hash_function.Finish()));
852852
}
853853

854+
Status Finalize(google::storage::v2::WriteObjectRequest& write_request,
855+
grpc::WriteOptions& options,
856+
storage::internal::HashValues const& hashes) {
857+
write_request.set_finish_write(true);
858+
options.set_last_message();
859+
return FinalizeChecksums(*write_request.mutable_object_checksums(),
860+
std::move(hashes));
861+
}
862+
854863
Status Finalize(google::storage::v2::BidiWriteObjectRequest& write_request,
855864
grpc::WriteOptions& options,
856865
storage::internal::HashFunction& hash_function,
@@ -879,8 +888,11 @@ Status MaybeFinalize(google::storage::v2::WriteObjectRequest& write_request,
879888
bool chunk_has_more) {
880889
if (!chunk_has_more) options.set_last_message();
881890
if (!request.last_chunk() || chunk_has_more) return {};
882-
return Finalize(write_request, options, request.hash_function(),
883-
request.known_object_hashes());
891+
if (request.hash_function_ptr()) {
892+
return Finalize(write_request, options, request.hash_function(),
893+
request.known_object_hashes());
894+
}
895+
return Finalize(write_request, options, request.known_object_hashes());
884896
}
885897

886898
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END

google/cloud/storage/internal/grpc/object_request_parser.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ Status Finalize(google::storage::v2::WriteObjectRequest& write_request,
8787
storage::internal::HashFunction& hash_function,
8888
storage::internal::HashValues = {});
8989

90+
Status Finalize(google::storage::v2::WriteObjectRequest& write_request,
91+
grpc::WriteOptions& options,
92+
storage::internal::HashValues const& hashes);
93+
9094
Status Finalize(google::storage::v2::BidiWriteObjectRequest& write_request,
9195
grpc::WriteOptions& options,
9296
storage::internal::HashFunction& hash_function,

google/cloud/storage/internal/grpc/stub.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,9 @@ StatusOr<storage::internal::QueryResumableUploadResponse> GrpcStub::UploadChunk(
630630
auto& data = *proto_request.mutable_checksummed_data();
631631
SetMutableContent(data, splitter.Next());
632632
data.set_crc32c(Crc32c(GetContent(data)));
633-
request.hash_function().Update(offset, GetContent(data), data.crc32c());
633+
if (request.hash_function_ptr()) {
634+
request.hash_function().Update(offset, GetContent(data), data.crc32c());
635+
}
634636
offset += GetContent(data).size();
635637

636638
auto wopts = grpc::WriteOptions();

google/cloud/storage/internal/object_requests.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,10 @@ UploadChunkRequest UploadChunkRequest::RemainingChunk(
533533
HashValues FinishHashes(UploadChunkRequest const& request) {
534534
// Prefer the hashes provided via *Value options in the request. If those
535535
// are not set, use the computed hashes from the data.
536-
return Merge(request.known_object_hashes(), request.hash_function().Finish());
536+
if (auto hf = request.hash_function_ptr()) {
537+
return Merge(request.known_object_hashes(), hf->Finish());
538+
}
539+
return request.known_object_hashes();
537540
}
538541

539542
std::ostream& operator<<(std::ostream& os, UploadChunkRequest const& r) {

google/cloud/storage/internal/object_requests.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,9 @@ class UploadChunkRequest
534534
HashValues const& known_object_hashes() const { return known_object_hashes_; }
535535

536536
HashFunction& hash_function() const { return *hash_function_; }
537+
std::shared_ptr<HashFunction> hash_function_ptr() const {
538+
return hash_function_;
539+
}
537540

538541
bool last_chunk() const { return upload_size_.has_value(); }
539542
std::size_t payload_size() const { return TotalBytes(payload_); }

google/cloud/storage/internal/object_write_streambuf.cc

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,22 +144,26 @@ void ObjectWriteStreambuf::FlushFinal() {
144144

145145
// Calculate the portion of the buffer that needs to be uploaded, if any.
146146
auto const actual_size = put_area_size();
147+
HashValues final_hashes = known_hashes_;
148+
if (hash_function_) {
149+
hash_function_->Update(committed_size_, {pbase(), actual_size});
150+
final_hashes = hash_function_->Finish();
151+
hash_function_.reset();
152+
}
147153

148154
// After this point the session will be closed, and no more calls to the hash
149155
// function are possible.
150156
auto upload_request = UploadChunkRequest(upload_id_, committed_size_,
151157
{ConstBuffer(pbase(), actual_size)},
152-
hash_function_, known_hashes_);
158+
hash_function_, final_hashes);
153159
request_.ForEachOption(internal::CopyCommonOptions(upload_request));
154160
OptionsSpan const span(span_options_);
155161
auto response = connection_->UploadChunk(upload_request);
156162
if (!response) {
157163
last_status_ = std::move(response).status();
158164
return;
159165
}
160-
161-
auto function = std::move(hash_function_);
162-
hash_values_ = std::move(*function).Finish();
166+
hash_values_ = final_hashes;
163167

164168
committed_size_ = response->committed_size.value_or(0);
165169
metadata_ = std::move(response->payload);

google/cloud/storage/internal/object_write_streambuf_test.cc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,37 @@ TEST(ObjectWriteStreambufTest, WriteObjectWithCustomHeader) {
670670
EXPECT_STATUS_OK(response);
671671
}
672672

673+
/// @test Verify that hashes are computed and passed in FlushFinal.
674+
TEST(ObjectWriteStreambufTest, FlushFinalWithHashes) {
675+
auto mock = std::make_unique<testing::MockClient>();
676+
auto const quantum = UploadChunkRequest::kChunkSizeQuantum;
677+
std::string const payload = "small test payload";
678+
679+
EXPECT_CALL(*mock, UploadChunk).WillOnce([&](UploadChunkRequest const& r) {
680+
EXPECT_EQ(payload.size(), r.payload_size());
681+
EXPECT_EQ(0, r.offset());
682+
EXPECT_TRUE(r.last_chunk());
683+
EXPECT_EQ(r.hash_function_ptr(), nullptr);
684+
EXPECT_EQ(r.known_object_hashes().crc32c, ComputeCrc32cChecksum(payload));
685+
EXPECT_EQ(r.known_object_hashes().md5, ComputeMD5Hash(payload));
686+
return QueryResumableUploadResponse{payload.size(), ObjectMetadata()};
687+
});
688+
689+
ResumableUploadRequest request;
690+
request.set_option(DisableCrc32cChecksum(false));
691+
request.set_option(DisableMD5Hash(false));
692+
ObjectWriteStreambuf streambuf(
693+
std::move(mock), request, "test-only-upload-id",
694+
/*committed_size=*/0, absl::nullopt, /*max_buffer_size=*/quantum,
695+
CreateHashFunction(Crc32cChecksumValue(), DisableCrc32cChecksum(false),
696+
MD5HashValue(), DisableMD5Hash(false)),
697+
HashValues{}, CreateHashValidator(request), AutoFinalizeConfig::kEnabled);
698+
699+
streambuf.sputn(payload.data(), payload.size());
700+
auto response = streambuf.Close();
701+
EXPECT_STATUS_OK(response);
702+
}
703+
673704
} // namespace
674705
} // namespace internal
675706
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END

google/cloud/storage/internal/rest/stub.cc

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -770,11 +770,22 @@ StatusOr<QueryResumableUploadResponse> RestStub::UploadChunk(
770770
// default (at least in this case), and that wastes bandwidth as the content
771771
// length is known.
772772
builder.AddHeader("Transfer-Encoding", {});
773-
auto offset = request.offset();
774-
for (auto const& b : request.payload()) {
775-
request.hash_function().Update(offset,
776-
absl::string_view{b.data(), b.size()});
777-
offset += b.size();
773+
auto hash_function = request.hash_function_ptr();
774+
if (hash_function) {
775+
auto offset = request.offset();
776+
for (auto const& b : request.payload()) {
777+
hash_function->Update(offset, absl::string_view{b.data(), b.size()});
778+
offset += b.size();
779+
}
780+
}
781+
if (request.last_chunk()) {
782+
auto const& hashes = request.known_object_hashes();
783+
if (!hashes.crc32c.empty()) {
784+
builder.AddHeader("x-goog-hash", "crc32c=" + hashes.crc32c);
785+
}
786+
if (!hashes.md5.empty()) {
787+
builder.AddHeader("x-goog-hash", "md5=" + hashes.md5);
788+
}
778789
}
779790

780791
auto failure_predicate = [](rest::HttpStatusCode code) {

google/cloud/storage/internal/rest/stub_test.cc

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
#include "google/cloud/storage/internal/rest/stub.h"
16+
#include "google/cloud/storage/internal/hash_function.h"
1617
#include "google/cloud/storage/testing/canonical_errors.h"
1718
#include "google/cloud/internal/api_client_header.h"
1819
#include "google/cloud/testing_util/mock_rest_client.h"
@@ -45,6 +46,29 @@ using ::testing::Pair;
4546
using ::testing::ResultOf;
4647
using ::testing::Return;
4748

49+
class NoOpHashFunction : public HashFunction {
50+
public:
51+
std::string Name() const override { return "NoOp"; }
52+
void Update(absl::string_view b) override { Cormorant(b); }
53+
Status Update(std::int64_t o, absl::string_view b) override {
54+
Cormorant(o, b);
55+
return Status{};
56+
}
57+
Status Update(std::int64_t o, absl::string_view b, std::uint32_t c) override {
58+
Cormorant(o, b, c);
59+
return Status{};
60+
}
61+
Status Update(std::int64_t o, absl::Cord const& b, std::uint32_t c) override {
62+
Cormorant(o, b, c);
63+
return Status{};
64+
}
65+
HashValues Finish() override { return {}; }
66+
67+
private:
68+
template <typename... Args>
69+
void Cormorant(Args const&...) {}
70+
};
71+
4872
TEST(RestStubTest, ResolveStorageAuthorityProdEndpoint) {
4973
auto options =
5074
Options{}.set<RestEndpointOption>("https://storage.googleapis.com");
@@ -921,6 +945,92 @@ TEST(RestStubTest, DeleteNotification) {
921945
StatusIs(PermanentError().code(), PermanentError().message()));
922946
}
923947

948+
TEST(RestStubTest, UploadChunkLastChunkWithCrc32c) {
949+
auto mock = std::make_shared<MockRestClient>();
950+
EXPECT_CALL(
951+
*mock,
952+
Put(ExpectedContext(),
953+
ResultOf(
954+
"request headers contain x-goog-hash with crc32c",
955+
[](RestRequest const& r) { return r.headers(); },
956+
Contains(Pair("x-goog-hash", ElementsAre("crc32c=test-crc32c")))),
957+
ExpectedPayload()))
958+
.WillOnce(Return(PermanentError()));
959+
auto tested = std::make_unique<RestStub>(Options{}, mock, mock);
960+
auto context = TestContext();
961+
auto status = tested->UploadChunk(
962+
context, TestOptions(),
963+
UploadChunkRequest("test-url", 0, {},
964+
std::make_shared<NoOpHashFunction>(),
965+
{"test-crc32c", ""}));
966+
EXPECT_THAT(status,
967+
StatusIs(PermanentError().code(), PermanentError().message()));
968+
}
969+
970+
TEST(RestStubTest, UploadChunkLastChunkWithMd5) {
971+
auto mock = std::make_shared<MockRestClient>();
972+
EXPECT_CALL(
973+
*mock,
974+
Put(ExpectedContext(),
975+
ResultOf(
976+
"request headers contain x-goog-hash with md5",
977+
[](RestRequest const& r) { return r.headers(); },
978+
Contains(Pair("x-goog-hash", ElementsAre("md5=test-md5")))),
979+
ExpectedPayload()))
980+
.WillOnce(Return(PermanentError()));
981+
auto tested = std::make_unique<RestStub>(Options{}, mock, mock);
982+
auto context = TestContext();
983+
auto status = tested->UploadChunk(
984+
context, TestOptions(),
985+
UploadChunkRequest("test-url", 0, {},
986+
std::make_shared<NoOpHashFunction>(),
987+
{"", "test-md5"}));
988+
EXPECT_THAT(status,
989+
StatusIs(PermanentError().code(), PermanentError().message()));
990+
}
991+
992+
TEST(RestStubTest, UploadChunkLastChunkWithBoth) {
993+
auto mock = std::make_shared<MockRestClient>();
994+
EXPECT_CALL(
995+
*mock,
996+
Put(ExpectedContext(),
997+
ResultOf(
998+
"request headers contain x-goog-hash with crc32c and md5",
999+
[](RestRequest const& r) { return r.headers(); },
1000+
Contains(Pair("x-goog-hash", ElementsAre("crc32c=test-crc32c",
1001+
"md5=test-md5")))),
1002+
ExpectedPayload()))
1003+
.WillOnce(Return(PermanentError()));
1004+
auto tested = std::make_unique<RestStub>(Options{}, mock, mock);
1005+
auto context = TestContext();
1006+
auto status = tested->UploadChunk(
1007+
context, TestOptions(),
1008+
UploadChunkRequest("test-url", 0, {},
1009+
std::make_shared<NoOpHashFunction>(),
1010+
{"test-crc32c", "test-md5"}));
1011+
EXPECT_THAT(status,
1012+
StatusIs(PermanentError().code(), PermanentError().message()));
1013+
}
1014+
1015+
TEST(RestStubTest, UploadChunkIntermediate) {
1016+
auto mock = std::make_shared<MockRestClient>();
1017+
EXPECT_CALL(*mock, Put(ExpectedContext(),
1018+
ResultOf(
1019+
"request headers do not contain x-goog-hash",
1020+
[](RestRequest const& r) { return r.headers(); },
1021+
Not(Contains(Pair("x-goog-hash", _)))),
1022+
ExpectedPayload()))
1023+
.WillOnce(Return(PermanentError()));
1024+
auto tested = std::make_unique<RestStub>(Options{}, mock, mock);
1025+
auto context = TestContext();
1026+
auto status = tested->UploadChunk(
1027+
context, TestOptions(),
1028+
UploadChunkRequest("test-url", 0, {},
1029+
std::make_shared<NoOpHashFunction>()));
1030+
EXPECT_THAT(status,
1031+
StatusIs(PermanentError().code(), PermanentError().message()));
1032+
}
1033+
9241034
} // namespace
9251035
} // namespace internal
9261036
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END

google/cloud/storage/tests/object_checksum_integration_test.cc

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -145,14 +145,8 @@ TEST_F(ObjectChecksumIntegrationTest, WriteObjectDefault) {
145145
EXPECT_THAT(os.computed_hash(),
146146
HasSubstr(ComputeCrc32cChecksum(LoremIpsum())));
147147
if (meta->has_metadata("x_emulator_upload")) {
148-
if (UsingGrpc()) {
149-
EXPECT_THAT(meta->metadata(), Contains(Pair("x_emulator_crc32c", _)));
150-
EXPECT_THAT(meta->metadata(), Contains(Pair("x_emulator_no_md5", _)));
151-
} else {
152-
// Streaming uploads over REST cannot include checksums
153-
EXPECT_THAT(meta->metadata(), Contains(Pair("x_emulator_no_crc32c", _)));
154-
EXPECT_THAT(meta->metadata(), Contains(Pair("x_emulator_no_md5", _)));
155-
}
148+
EXPECT_THAT(meta->metadata(), Contains(Pair("x_emulator_crc32c", _)));
149+
EXPECT_THAT(meta->metadata(), Contains(Pair("x_emulator_no_md5", _)));
156150
}
157151
}
158152

@@ -200,14 +194,8 @@ TEST_F(ObjectChecksumIntegrationTest, WriteObjectExplicitEnable) {
200194
EXPECT_THAT(os.computed_hash(),
201195
HasSubstr(ComputeCrc32cChecksum(LoremIpsum())));
202196
if (meta->has_metadata("x_emulator_upload")) {
203-
if (UsingGrpc()) {
204-
EXPECT_THAT(meta->metadata(), Contains(Pair("x_emulator_crc32c", _)));
205-
EXPECT_THAT(meta->metadata(), Contains(Pair("x_emulator_no_md5", _)));
206-
} else {
207-
// Streaming uploads over REST cannot include checksums
208-
EXPECT_THAT(meta->metadata(), Contains(Pair("x_emulator_no_crc32c", _)));
209-
EXPECT_THAT(meta->metadata(), Contains(Pair("x_emulator_no_md5", _)));
210-
}
197+
EXPECT_THAT(meta->metadata(), Contains(Pair("x_emulator_crc32c", _)));
198+
EXPECT_THAT(meta->metadata(), Contains(Pair("x_emulator_no_md5", _)));
211199
}
212200
}
213201

@@ -289,6 +277,54 @@ TEST_F(ObjectChecksumIntegrationTest, WriteObjectUploadBadChecksum) {
289277
ASSERT_THAT(stream.metadata(), Not(IsOk()));
290278
}
291279

280+
/// @test Verify that full object checksums are sent in the final chunk.
281+
TEST_F(ObjectChecksumIntegrationTest, WriteObjectWithFullChecksumValidation) {
282+
auto client = MakeIntegrationTestClient();
283+
auto object_name = MakeRandomObjectName();
284+
auto content = LoremIpsum();
285+
auto expected_crc32c = ComputeCrc32cChecksum(content);
286+
287+
auto os = client.WriteObject(bucket_name_, object_name,
288+
DisableCrc32cChecksum(false),
289+
DisableMD5Hash(true), IfGenerationMatch(0));
290+
os << content;
291+
os.Close();
292+
auto meta = os.metadata();
293+
ASSERT_STATUS_OK(meta);
294+
ScheduleForDelete(*meta);
295+
296+
EXPECT_EQ(os.computed_hash(), expected_crc32c);
297+
298+
if (meta->has_metadata("x_emulator_upload")) {
299+
EXPECT_THAT(meta->metadata(),
300+
Contains(Pair("x_emulator_crc32c", expected_crc32c)));
301+
EXPECT_THAT(meta->metadata(), Contains(Pair("x_emulator_no_md5", "true")));
302+
}
303+
}
304+
305+
/// @test Verify that the upload fails when the provided CRC32C checksum does
306+
/// not match the data.
307+
TEST_F(ObjectChecksumIntegrationTest, WriteObjectWithIncorrectChecksumValue) {
308+
// TODO(#14385) - the emulator does not support this feature for gRPC.
309+
if (UsingEmulator() && UsingGrpc()) GTEST_SKIP();
310+
auto client = MakeIntegrationTestClient();
311+
auto object_name = MakeRandomObjectName();
312+
auto content = LoremIpsum();
313+
314+
auto bad_crc32c =
315+
ComputeCrc32cChecksum("this is not the data being uploaded");
316+
317+
auto os = client.WriteObject(bucket_name_, object_name,
318+
Crc32cChecksumValue(bad_crc32c),
319+
DisableMD5Hash(true), IfGenerationMatch(0));
320+
321+
os << content;
322+
os.Close();
323+
EXPECT_TRUE(os.bad());
324+
auto meta = os.metadata();
325+
EXPECT_THAT(meta, Not(IsOk()));
326+
}
327+
292328
/// @test Verify that CRC32C checksums are computed by default on downloads.
293329
TEST_F(ObjectChecksumIntegrationTest, ReadObjectDefault) {
294330
// TODO(#14385) - the emulator does not support this feature for gRPC.

0 commit comments

Comments
 (0)