Skip to content

Commit 37051bf

Browse files
authored
Support reading or downloading a GCS object range. (#1748)
With this change applications can choose to download a small portion of an object. This fixes #1732.
1 parent 608dd15 commit 37051bf

10 files changed

Lines changed: 229 additions & 68 deletions

File tree

google/cloud/storage/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ add_library(storage_client
7676
client.cc
7777
client_options.h
7878
client_options.cc
79+
download_options.h
7980
hashing_options.h
8081
hashing_options.cc
8182
idempotency_policy.h

google/cloud/storage/client.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,7 @@ class Client {
792792
* Valid types for this operation include `DisableCrc32cChecksum`,
793793
* `DisableMD5Hash`, `IfGenerationMatch`, `EncryptionKey`, `Generation`,
794794
* `IfGenerationMatch`, `IfGenerationNotMatch`, `IfMetagenerationMatch`,
795-
* `IfMetagenerationNotMatch`, and `UserProject`.
795+
* `IfMetagenerationNotMatch`, `ReadRange`, and `UserProject`.
796796
*
797797
* @throw std::runtime_error if there is a permanent failure, or if there were
798798
* more transient failures than allowed by the current retry policy.
@@ -943,7 +943,7 @@ class Client {
943943
* @param options a list of optional query parameters and/or request headers.
944944
* Valid types for this operation include `IfGenerationMatch`,
945945
* `IfGenerationNotMatch`, `IfMetagenerationMatch`,
946-
* `IfMetagenerationNotMatch`, `Generation`, and `UserProject`.
946+
* `IfMetagenerationNotMatch`, `Generation`, `ReadRange`, and `UserProject`.
947947
*
948948
* @throw std::runtime_error if there is a permanent failure, or if there were
949949
* more transient failures than allowed by the current retry policy.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// Copyright 2018 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_STORAGE_DOWNLOAD_OPTIONS_H_
16+
#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_STORAGE_DOWNLOAD_OPTIONS_H_
17+
18+
#include "google/cloud/storage/internal/complex_option.h"
19+
#include <cstdint>
20+
#include <iostream>
21+
#include <string>
22+
23+
namespace google {
24+
namespace cloud {
25+
namespace storage {
26+
inline namespace STORAGE_CLIENT_NS {
27+
struct ReadRangeData {
28+
std::int64_t begin;
29+
std::int64_t end;
30+
};
31+
32+
/**
33+
* Request only a portion of the GCS object in a ReadObject operation.
34+
*
35+
* Note that the range is right-open, as it is customary in C++. That is, it
36+
* excludes the `end` byte.
37+
*/
38+
struct ReadRange : public internal::ComplexOption<ReadRange, ReadRangeData> {
39+
ReadRange() : ComplexOption() {}
40+
explicit ReadRange(std::int64_t begin, std::int64_t end)
41+
: ComplexOption(ReadRangeData{begin, end}) {}
42+
static char const* name() { return "read-range"; }
43+
};
44+
45+
inline std::ostream& operator<<(std::ostream& os, ReadRangeData const& rhs) {
46+
return os << "ReadRangeData={begin=" << rhs.begin << ", end=" << rhs.end
47+
<< "}";
48+
}
49+
50+
} // namespace STORAGE_CLIENT_NS
51+
} // namespace storage
52+
} // namespace cloud
53+
} // namespace google
54+
55+
#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_STORAGE_DOWNLOAD_OPTIONS_H_

google/cloud/storage/internal/curl_client.cc

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ std::unique_ptr<HashValidator> CreateHashValidator(bool disable_md5,
6868
/// Create a HashValidator for a download request.
6969
std::unique_ptr<HashValidator> CreateHashValidator(
7070
ReadObjectRangeRequest const& request) {
71+
if (request.HasOption<ReadRange>()) {
72+
return google::cloud::internal::make_unique<NullHashValidator>();
73+
}
7174
return CreateHashValidator(request.HasOption<DisableMD5Hash>(),
7275
request.HasOption<DisableCrc32cChecksum>());
7376
}
@@ -541,6 +544,18 @@ StatusOr<std::unique_ptr<ObjectReadStreambuf>> CurlClient::ReadObject(
541544
return status;
542545
}
543546
builder.AddQueryParameter("alt", "media");
547+
if (request.HasOption<ReadRange>()) {
548+
auto range = request.GetOption<ReadRange>().value();
549+
std::string header = "Range: bytes=" + std::to_string(range.begin) + "-" +
550+
std::to_string(range.end - 1);
551+
builder.AddHeader(header);
552+
// When doing a range read we need to disable decompression because range
553+
// reads do not work in that case:
554+
// https://cloud.google.com/storage/docs/transcoding#range
555+
// and
556+
// https://cloud.google.com/storage/docs/transcoding#decompressive_transcoding
557+
builder.AddHeader("Cache-Control: no-transform");
558+
}
544559

545560
std::unique_ptr<CurlReadStreambuf> buf(new CurlReadStreambuf(
546561
builder.BuildDownloadRequest(std::string{}),
@@ -1171,6 +1186,19 @@ StatusOr<std::unique_ptr<ObjectReadStreambuf>> CurlClient::ReadObjectXml(
11711186
// QuotaUser cannot be set, checked by the caller.
11721187
// UserIp cannot be set, checked by the caller.
11731188

1189+
if (request.HasOption<ReadRange>()) {
1190+
auto range = request.GetOption<ReadRange>().value();
1191+
std::string header = "Range: bytes=" + std::to_string(range.begin) + "-" +
1192+
std::to_string(range.end - 1);
1193+
builder.AddHeader(header);
1194+
// When doing a range read we need to disable decompression because range
1195+
// reads do not work in that case:
1196+
// https://cloud.google.com/storage/docs/transcoding#range
1197+
// and
1198+
// https://cloud.google.com/storage/docs/transcoding#decompressive_transcoding
1199+
builder.AddHeader("Cache-Control: no-transform");
1200+
}
1201+
11741202
std::unique_ptr<CurlReadStreambuf> buf(new CurlReadStreambuf(
11751203
builder.BuildDownloadRequest(std::string{}),
11761204
client_options().download_buffer_size(), CreateHashValidator(request)));

google/cloud/storage/internal/object_requests.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,7 @@ std::ostream& operator<<(std::ostream& os, CopyObjectRequest const& r) {
9595

9696
std::ostream& operator<<(std::ostream& os, ReadObjectRangeRequest const& r) {
9797
os << "ReadObjectRangeRequest={bucket_name=" << r.bucket_name()
98-
<< ", object_name=" << r.object_name() << ", begin=" << r.begin()
99-
<< ", end=" << r.end();
98+
<< ", object_name=" << r.object_name();
10099
r.DumpOptions(os, ", ");
101100
return os << "}";
102101
}

google/cloud/storage/internal/object_requests.h

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_STORAGE_INTERNAL_OBJECT_REQUESTS_H_
1616
#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_STORAGE_INTERNAL_OBJECT_REQUESTS_H_
1717

18+
#include "google/cloud/storage/download_options.h"
1819
#include "google/cloud/storage/hashing_options.h"
1920
#include "google/cloud/storage/internal/generic_object_request.h"
2021
#include "google/cloud/storage/internal/http_response.h"
@@ -178,29 +179,10 @@ class ReadObjectRangeRequest
178179
: public GenericObjectRequest<
179180
ReadObjectRangeRequest, DisableCrc32cChecksum, DisableMD5Hash,
180181
EncryptionKey, Generation, IfGenerationMatch, IfGenerationNotMatch,
181-
IfMetagenerationMatch, IfMetagenerationNotMatch, UserProject> {
182+
IfMetagenerationMatch, IfMetagenerationNotMatch, ReadRange,
183+
UserProject> {
182184
public:
183-
ReadObjectRangeRequest() : GenericObjectRequest(), begin_(0), end_(0) {}
184-
185-
explicit ReadObjectRangeRequest(std::string bucket_name,
186-
std::string object_name, std::int64_t begin,
187-
std::int64_t end)
188-
: GenericObjectRequest(std::move(bucket_name), std::move(object_name)),
189-
begin_(begin),
190-
end_(end) {}
191-
192-
explicit ReadObjectRangeRequest(std::string bucket_name,
193-
std::string object_name)
194-
: GenericObjectRequest(std::move(bucket_name), std::move(object_name)),
195-
begin_(0),
196-
end_(0) {}
197-
198-
std::int64_t begin() const { return begin_; }
199-
std::int64_t end() const { return end_; }
200-
201-
private:
202-
std::int64_t begin_;
203-
std::int64_t end_;
185+
using GenericObjectRequest::GenericObjectRequest;
204186
};
205187

206188
std::ostream& operator<<(std::ostream& os, ReadObjectRangeRequest const& r);

google/cloud/storage/internal/object_requests_test.cc

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -198,26 +198,25 @@ HttpResponse CreateRangeRequestResponse(
198198
}
199199

200200
TEST(ObjectRequestsTest, ReadObjectRange) {
201-
ReadObjectRangeRequest request("my-bucket", "my-object", 0, 1024);
201+
ReadObjectRangeRequest request("my-bucket", "my-object");
202202

203203
EXPECT_EQ("my-bucket", request.bucket_name());
204204
EXPECT_EQ("my-object", request.object_name());
205-
EXPECT_EQ(0, request.begin());
206-
EXPECT_EQ(1024, request.end());
207205

208206
request.set_option(storage::UserProject("my-project"));
209207
request.set_multiple_options(storage::IfGenerationMatch(7),
210-
storage::UserProject("my-project"));
208+
storage::UserProject("my-project"),
209+
storage::ReadRange(0, 1024));
211210

212211
std::ostringstream os;
213212
os << request;
214213
std::string actual = os.str();
215214
EXPECT_THAT(actual, HasSubstr("my-bucket"));
216215
EXPECT_THAT(actual, HasSubstr("my-object"));
217-
EXPECT_THAT(actual, HasSubstr("begin=0"));
218-
EXPECT_THAT(actual, HasSubstr("end=1024"));
219216
EXPECT_THAT(actual, HasSubstr("ifGenerationMatch=7"));
220217
EXPECT_THAT(actual, HasSubstr("my-project"));
218+
EXPECT_THAT(actual, HasSubstr("begin=0"));
219+
EXPECT_THAT(actual, HasSubstr("end=1024"));
221220
}
222221

223222
TEST(ObjectRequestsTest, RangeResponseParse) {

google/cloud/storage/storage_client.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ storage_client_hdrs = [
2121
"bucket_metadata.h",
2222
"client.h",
2323
"client_options.h",
24+
"download_options.h",
2425
"hashing_options.h",
2526
"idempotency_policy.h",
2627
"internal/access_control_common.h",

google/cloud/storage/testbench/testbench.py

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import httpbin
2424
import json
2525
import os
26+
import re
2627
import testbench_utils
2728
from werkzeug import serving
2829
from werkzeug import wsgi
@@ -445,14 +446,27 @@ def objects_get(bucket_name, object_name):
445446
if media != 'media':
446447
raise error_response.ErrorResponse('Invalid alt=%s parameter' % media)
447448
revision.validate_encryption_for_read(flask.request)
449+
# Respect the Range: header, if present.
450+
range_header = flask.request.headers.get('range')
451+
response_payload = revision.media
452+
begin = 0
453+
end = len(response_payload)
454+
if range_header is not None:
455+
print("\n\n\nrange_header = %s\n\n" % range_header)
456+
m = re.match('bytes=([0-9]+)-([0-9]+)', range_header)
457+
if m:
458+
print("\n\n\nmatch = %s\n\n" % m)
459+
begin = int(m.group(1))
460+
end = int(m.group(2))
461+
response_payload = response_payload[begin:end + 1]
462+
# Process custome headers to test error conditions.
448463
instructions = flask.request.headers.get('x-goog-testbench-instructions')
449464
if instructions == 'return-corrupted-data':
450-
response_payload = testbench_utils.corrupt_media(revision.media)
451-
else:
452-
response_payload = revision.media
465+
response_payload = testbench_utils.corrupt_media(response_payload)
453466
response = flask.make_response(response_payload)
454467
length = len(response_payload)
455-
response.headers['Content-Range'] = 'bytes 0-%d/%d' % (length - 1, length)
468+
content_range = 'bytes %d-%d/%d' % (begin, end - 1, length)
469+
response.headers['Content-Range'] = content_range
456470
response.headers['x-goog-hash'] = revision.x_goog_hash_header()
457471
return response
458472

@@ -693,14 +707,27 @@ def xmlapi_get_object(bucket_name, object_name):
693707
blob.check_preconditions_by_value(generation_match, None,
694708
metageneration_match, None)
695709
revision = blob.get_revision(flask.request)
710+
# Respect the Range: header, if present.
711+
range_header = flask.request.headers.get('range')
712+
response_payload = revision.media
713+
begin = 0
714+
end = len(response_payload)
715+
if range_header is not None:
716+
print("\n\n\nrange_header = %s\n\n" % range_header)
717+
m = re.match('bytes=([0-9]+)-([0-9]+)', range_header)
718+
if m:
719+
print("\n\n\nmatch = %s\n\n" % m)
720+
begin = int(m.group(1))
721+
end = int(m.group(2))
722+
response_payload = response_payload[begin:end + 1]
723+
# Process custome headers to test error conditions.
696724
instructions = flask.request.headers.get('x-goog-testbench-instructions')
697725
if instructions == 'return-corrupted-data':
698-
response_payload = testbench_utils.corrupt_media(revision.media)
699-
else:
700-
response_payload = revision.media
726+
response_payload = testbench_utils.corrupt_media(response_payload)
701727
response = flask.make_response(response_payload)
702728
length = len(response_payload)
703-
response.headers['Content-Range'] = 'bytes 0-%d/%d' % (length - 1, length)
729+
content_range = 'bytes %d-%d/%d' % (begin, end - 1, length)
730+
response.headers['Content-Range'] = content_range
704731
response.headers['x-goog-hash'] = revision.x_goog_hash_header()
705732
return response
706733

0 commit comments

Comments
 (0)