Skip to content

Commit 872d7b7

Browse files
authored
feat(storage): add checksum validation in the json read channel (#13270)
Enabled default full object checksum validation for the following: `Storage#reader(blobId)` `Storage#reader(bucketName, blobName)`
1 parent 396b042 commit 872d7b7

3 files changed

Lines changed: 259 additions & 4 deletions

File tree

java-storage/google-cloud-storage/src/main/java/com/google/cloud/storage/ApiaryUnbufferedReadableByteChannel.java

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@
3838
import com.google.common.collect.ImmutableMap;
3939
import com.google.common.hash.HashFunction;
4040
import com.google.common.hash.Hashing;
41+
import com.google.common.hash.HashingInputStream;
4142
import com.google.common.io.BaseEncoding;
43+
import com.google.common.primitives.Ints;
4244
import com.google.gson.Gson;
4345
import com.google.gson.stream.JsonReader;
4446
import java.io.IOException;
@@ -61,13 +63,15 @@
6163
import org.checkerframework.checker.nullness.qual.NonNull;
6264
import org.checkerframework.checker.nullness.qual.Nullable;
6365

66+
@SuppressWarnings("UnstableApiUsage")
6467
class ApiaryUnbufferedReadableByteChannel implements UnbufferedReadableByteChannel {
6568

6669
private final ApiaryReadRequest apiaryReadRequest;
6770
private final Storage storage;
6871
private final SettableApiFuture<StorageObject> result;
6972
private final ResultRetryAlgorithm<?> resultRetryAlgorithm;
7073
private final Retrier retrier;
74+
private final Hasher hasher;
7175

7276
private long position;
7377
private ScatteringByteChannel sbc;
@@ -77,16 +81,21 @@ class ApiaryUnbufferedReadableByteChannel implements UnbufferedReadableByteChann
7781
// returned X-Goog-Generation header value
7882
private Long xGoogGeneration;
7983

84+
private HashingInputStream hashingInputStream;
85+
private String expectedCrc32cBase64;
86+
8087
ApiaryUnbufferedReadableByteChannel(
8188
ApiaryReadRequest apiaryReadRequest,
8289
Storage storage,
8390
SettableApiFuture<StorageObject> result,
8491
Retrier retrier,
85-
ResultRetryAlgorithm<?> resultRetryAlgorithm) {
92+
ResultRetryAlgorithm<?> resultRetryAlgorithm,
93+
Hasher hasher) {
8694
this.apiaryReadRequest = apiaryReadRequest;
8795
this.storage = storage;
8896
this.result = result;
8997
this.retrier = retrier;
98+
this.hasher = hasher;
9099
this.resultRetryAlgorithm =
91100
new BasicResultRetryAlgorithm<Object>() {
92101
@Override
@@ -126,6 +135,16 @@ public long read(ByteBuffer[] dsts, int offset, int length) throws IOException {
126135
long read = sbc.read(dsts, offset, length);
127136
if (read == -1) {
128137
returnEOF = true;
138+
if (hashingInputStream != null && expectedCrc32cBase64 != null) {
139+
int calculatedCrc32c = hashingInputStream.hash().asInt();
140+
byte[] decoded = BaseEncoding.base64().decode(expectedCrc32cBase64);
141+
int expectedVal = Ints.fromByteArray(decoded);
142+
143+
Crc32cValue<?> expected = Crc32cValue.of(expectedVal, 0);
144+
Crc32cValue.Crc32cLengthKnown actual = Crc32cValue.of(calculatedCrc32c, 0);
145+
146+
hasher.validate(expected, actual);
147+
}
129148
} else {
130149
totalRead += read;
131150
}
@@ -180,6 +199,10 @@ private ScatteringByteChannel open() {
180199

181200
HttpResponse media = get.executeMedia();
182201
InputStream content = media.getContent();
202+
203+
Map<String, String> hashes = ChecksumResponseParser.extractHashesFromHeader(media);
204+
this.expectedCrc32cBase64 = hashes.get("crc32c");
205+
183206
if (xGoogGeneration == null) {
184207
HttpHeaders responseHeaders = media.getHeaders();
185208

@@ -214,6 +237,14 @@ private ScatteringByteChannel open() {
214237
}
215238
}
216239

240+
boolean isHasherEnabled = !(hasher instanceof Hasher.NoOpHasher);
241+
boolean shouldValidate =
242+
isHasherEnabled && HttpStorageRpcHasherHelper.INSTANCE.shouldValidate(media);
243+
if (shouldValidate && expectedCrc32cBase64 != null) {
244+
this.hashingInputStream = new HashingInputStream(Hashing.crc32c(), content);
245+
content = this.hashingInputStream;
246+
}
247+
217248
ReadableByteChannel rbc = Channels.newChannel(content);
218249
return StorageByteChannels.readable().asScatteringByteChannel(rbc);
219250
} catch (HttpResponseException e) {

java-storage/google-cloud-storage/src/main/java/com/google/cloud/storage/HttpDownloadSessionBuilder.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,11 @@ public static final class ReadableByteChannelSessionBuilder {
5454
private final BlobReadChannelContext blobReadChannelContext;
5555
private boolean autoGzipDecompression;
5656

57-
// private Hasher hasher; // TODO: wire in Hasher
57+
private final Hasher hasher;
5858

5959
private ReadableByteChannelSessionBuilder(BlobReadChannelContext blobReadChannelContext) {
6060
this.blobReadChannelContext = blobReadChannelContext;
61+
this.hasher = Hasher.defaultHasher();
6162
this.autoGzipDecompression = false;
6263
}
6364

@@ -96,7 +97,8 @@ public UnbufferedReadableByteChannelSessionBuilder unbuffered() {
9697
blobReadChannelContext.getApiaryClient(),
9798
resultFuture,
9899
blobReadChannelContext.getRetrier(),
99-
blobReadChannelContext.getRetryAlgorithmManager().idempotent()),
100+
blobReadChannelContext.getRetryAlgorithmManager().idempotent(),
101+
hasher),
100102
ApiFutures.transform(
101103
resultFuture, StorageObject::getContentEncoding, MoreExecutors.directExecutor()));
102104
} else {
@@ -105,7 +107,8 @@ public UnbufferedReadableByteChannelSessionBuilder unbuffered() {
105107
blobReadChannelContext.getApiaryClient(),
106108
resultFuture,
107109
blobReadChannelContext.getRetrier(),
108-
blobReadChannelContext.getRetryAlgorithmManager().idempotent());
110+
blobReadChannelContext.getRetryAlgorithmManager().idempotent(),
111+
hasher);
109112
}
110113
};
111114
}
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
/*
2+
* Copyright 2026 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.cloud.storage;
18+
19+
import static org.junit.Assert.assertArrayEquals;
20+
import static org.junit.Assert.assertThrows;
21+
import static org.junit.Assert.assertTrue;
22+
23+
import com.google.api.client.http.HttpTransport;
24+
import com.google.api.client.json.gson.GsonFactory;
25+
import com.google.api.client.testing.http.MockLowLevelHttpResponse;
26+
import com.google.api.core.SettableApiFuture;
27+
import com.google.api.services.storage.Storage;
28+
import com.google.api.services.storage.model.StorageObject;
29+
import com.google.cloud.storage.ApiaryUnbufferedReadableByteChannel.ApiaryReadRequest;
30+
import com.google.cloud.storage.Retrying.RetrierWithAlg;
31+
import com.google.common.collect.ImmutableMap;
32+
import java.io.ByteArrayOutputStream;
33+
import java.io.IOException;
34+
import java.nio.ByteBuffer;
35+
import java.nio.channels.Channels;
36+
import java.nio.channels.WritableByteChannel;
37+
import java.util.Map;
38+
import org.junit.Test;
39+
40+
public class ApiaryUnbufferedReadableByteChannelTest {
41+
42+
private static final byte[] CONTENT_BYTES = "Hello, World!".getBytes();
43+
private static final String CORRECT_CRC32C_BASE64 = "TVUQaA==";
44+
private static final String WRONG_CRC32C_BASE64 = "AAAAAA==";
45+
46+
private Storage createMockStorageClient(String googHashHeader) {
47+
return createMockStorageClient(200, googHashHeader, null);
48+
}
49+
50+
private Storage createMockStorageClient(
51+
int statusCode, String googHashHeader, Map<String, String> extraHeaders) {
52+
HttpTransport transport =
53+
new HttpTransport() {
54+
@Override
55+
protected com.google.api.client.http.LowLevelHttpRequest buildRequest(
56+
String method, String url) throws IOException {
57+
return new com.google.api.client.testing.http.MockLowLevelHttpRequest() {
58+
@Override
59+
public com.google.api.client.http.LowLevelHttpResponse execute() throws IOException {
60+
MockLowLevelHttpResponse lowLevelResponse =
61+
new MockLowLevelHttpResponse()
62+
.setStatusCode(statusCode)
63+
.setContent(CONTENT_BYTES)
64+
.setContentLength(CONTENT_BYTES.length)
65+
.addHeader("Content-Length", String.valueOf(CONTENT_BYTES.length))
66+
.addHeader("x-goog-generation", "12345");
67+
if (googHashHeader != null) {
68+
lowLevelResponse.addHeader("x-goog-hash", googHashHeader);
69+
}
70+
if (extraHeaders != null) {
71+
for (Map.Entry<String, String> entry : extraHeaders.entrySet()) {
72+
lowLevelResponse.addHeader(entry.getKey(), entry.getValue());
73+
}
74+
}
75+
return lowLevelResponse;
76+
}
77+
};
78+
}
79+
};
80+
return new Storage.Builder(transport, GsonFactory.getDefaultInstance(), null)
81+
.setApplicationName("test")
82+
.build();
83+
}
84+
85+
@Test
86+
public void testRead_successfulCrc32cValidation() throws IOException {
87+
Storage storageClient = createMockStorageClient("crc32c=" + CORRECT_CRC32C_BASE64);
88+
89+
StorageObject from = new StorageObject().setBucket("bucket").setName("blob");
90+
ApiaryReadRequest apiaryReadRequest =
91+
new ApiaryReadRequest(from, ImmutableMap.of(), ByteRangeSpec.nullRange());
92+
93+
SettableApiFuture<StorageObject> resultFuture = SettableApiFuture.create();
94+
try (ApiaryUnbufferedReadableByteChannel channel =
95+
new ApiaryUnbufferedReadableByteChannel(
96+
apiaryReadRequest,
97+
storageClient,
98+
resultFuture,
99+
RetrierWithAlg.attemptOnce(),
100+
Retrying.neverRetry(),
101+
Hasher.defaultHasher()); ) {
102+
103+
ByteArrayOutputStream out = new ByteArrayOutputStream();
104+
try (WritableByteChannel w = Channels.newChannel(out)) {
105+
ByteBuffer buf = ByteBuffer.allocate(4096);
106+
while (channel.read(new ByteBuffer[] {buf}, 0, 1) != -1) {
107+
buf.flip();
108+
w.write(buf);
109+
buf.clear();
110+
}
111+
}
112+
113+
assertArrayEquals(CONTENT_BYTES, out.toByteArray());
114+
}
115+
}
116+
117+
@Test
118+
public void testRead_mismatchedCrc32cValidation_throwsChecksumMismatch() throws IOException {
119+
Storage storageClient = createMockStorageClient("crc32c=" + WRONG_CRC32C_BASE64);
120+
121+
StorageObject from = new StorageObject().setBucket("bucket").setName("blob");
122+
ApiaryReadRequest apiaryReadRequest =
123+
new ApiaryReadRequest(from, ImmutableMap.of(), ByteRangeSpec.nullRange());
124+
125+
SettableApiFuture<StorageObject> resultFuture = SettableApiFuture.create();
126+
try (ApiaryUnbufferedReadableByteChannel channel =
127+
new ApiaryUnbufferedReadableByteChannel(
128+
apiaryReadRequest,
129+
storageClient,
130+
resultFuture,
131+
RetrierWithAlg.attemptOnce(),
132+
Retrying.neverRetry(),
133+
Hasher.defaultHasher()); ) {
134+
135+
ByteBuffer buf = ByteBuffer.allocate(4096);
136+
Hasher.ChecksumMismatchException expected =
137+
assertThrows(
138+
Hasher.ChecksumMismatchException.class,
139+
() -> {
140+
while (channel.read(new ByteBuffer[] {buf}, 0, 1) != -1) {
141+
buf.clear();
142+
}
143+
});
144+
145+
assertTrue(expected.getMessage().contains("Mismatch checksum value"));
146+
}
147+
}
148+
149+
@Test
150+
public void testRead_suffixRangeFullObjectCrc32cValidation() throws IOException {
151+
// Suffix range request resulting in content-range: bytes 0-12/13
152+
Map<String, String> extraHeaders = ImmutableMap.of("Content-Range", "bytes 0-12/13");
153+
Storage storageClient =
154+
createMockStorageClient(206, "crc32c=" + CORRECT_CRC32C_BASE64, extraHeaders);
155+
156+
StorageObject from = new StorageObject().setBucket("bucket").setName("blob");
157+
ApiaryReadRequest apiaryReadRequest =
158+
new ApiaryReadRequest(from, ImmutableMap.of(), ByteRangeSpec.nullRange());
159+
160+
SettableApiFuture<StorageObject> resultFuture = SettableApiFuture.create();
161+
try (ApiaryUnbufferedReadableByteChannel channel =
162+
new ApiaryUnbufferedReadableByteChannel(
163+
apiaryReadRequest,
164+
storageClient,
165+
resultFuture,
166+
RetrierWithAlg.attemptOnce(),
167+
Retrying.neverRetry(),
168+
Hasher.defaultHasher()); ) {
169+
170+
ByteArrayOutputStream out = new ByteArrayOutputStream();
171+
try (WritableByteChannel w = Channels.newChannel(out)) {
172+
ByteBuffer buf = ByteBuffer.allocate(4096);
173+
while (channel.read(new ByteBuffer[] {buf}, 0, 1) != -1) {
174+
buf.flip();
175+
w.write(buf);
176+
buf.clear();
177+
}
178+
}
179+
180+
assertArrayEquals(CONTENT_BYTES, out.toByteArray());
181+
}
182+
}
183+
184+
@Test
185+
public void testRead_partialRangeNoCrc32cValidation() throws IOException {
186+
// Partial range request resulting in content-range: bytes 1-12/13
187+
Map<String, String> extraHeaders = ImmutableMap.of("Content-Range", "bytes 1-12/13");
188+
// Even if checksum is wrong, it shouldn't throw ChecksumMismatchException because validation is
189+
// skipped for partial downloads.
190+
Storage storageClient =
191+
createMockStorageClient(206, "crc32c=" + WRONG_CRC32C_BASE64, extraHeaders);
192+
193+
StorageObject from = new StorageObject().setBucket("bucket").setName("blob");
194+
ApiaryReadRequest apiaryReadRequest =
195+
new ApiaryReadRequest(from, ImmutableMap.of(), ByteRangeSpec.nullRange());
196+
197+
SettableApiFuture<StorageObject> resultFuture = SettableApiFuture.create();
198+
try (ApiaryUnbufferedReadableByteChannel channel =
199+
new ApiaryUnbufferedReadableByteChannel(
200+
apiaryReadRequest,
201+
storageClient,
202+
resultFuture,
203+
RetrierWithAlg.attemptOnce(),
204+
Retrying.neverRetry(),
205+
Hasher.defaultHasher()); ) {
206+
207+
ByteArrayOutputStream out = new ByteArrayOutputStream();
208+
try (WritableByteChannel w = Channels.newChannel(out)) {
209+
ByteBuffer buf = ByteBuffer.allocate(4096);
210+
while (channel.read(new ByteBuffer[] {buf}, 0, 1) != -1) {
211+
buf.flip();
212+
w.write(buf);
213+
buf.clear();
214+
}
215+
}
216+
217+
// We read the entire response content successfully (no exception thrown)
218+
assertArrayEquals(CONTENT_BYTES, out.toByteArray());
219+
}
220+
}
221+
}

0 commit comments

Comments
 (0)