Skip to content
This repository was archived by the owner on Nov 12, 2025. It is now read-only.

Commit 45faf97

Browse files
authored
fix: don't fail with 429 when downloading wide tables (#79)
* fix: don't fail with 429 when downloading wide tables * make ssl_credentials match more generic * make synth.py more robust * update synth to update tests * fix updates to synth.py
1 parent e290752 commit 45faf97

5 files changed

Lines changed: 97 additions & 11 deletions

File tree

google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,10 @@ def __init__(
145145
ssl_credentials=ssl_credentials,
146146
scopes=scopes or self.AUTH_SCOPES,
147147
quota_project_id=quota_project_id,
148+
options=(
149+
("grpc.max_send_message_length", -1),
150+
("grpc.max_receive_message_length", -1),
151+
),
148152
)
149153
else:
150154
host = host if ":" in host else host + ":443"
@@ -162,6 +166,10 @@ def __init__(
162166
ssl_credentials=ssl_channel_credentials,
163167
scopes=scopes or self.AUTH_SCOPES,
164168
quota_project_id=quota_project_id,
169+
options=(
170+
("grpc.max_send_message_length", -1),
171+
("grpc.max_receive_message_length", -1),
172+
),
165173
)
166174

167175
self._stubs = {} # type: Dict[str, Callable]

google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc_asyncio.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,10 @@ def __init__(
190190
ssl_credentials=ssl_credentials,
191191
scopes=scopes or self.AUTH_SCOPES,
192192
quota_project_id=quota_project_id,
193+
options=(
194+
("grpc.max_send_message_length", -1),
195+
("grpc.max_receive_message_length", -1),
196+
),
193197
)
194198
else:
195199
host = host if ":" in host else host + ":443"
@@ -207,6 +211,10 @@ def __init__(
207211
ssl_credentials=ssl_channel_credentials,
208212
scopes=scopes or self.AUTH_SCOPES,
209213
quota_project_id=quota_project_id,
214+
options=(
215+
("grpc.max_send_message_length", -1),
216+
("grpc.max_receive_message_length", -1),
217+
),
210218
)
211219

212220
# Run the base constructor.

synth.py

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,9 @@
7777
unit_test_dependencies=optional_deps,
7878
cov_level=95,
7979
)
80-
s.move(templated_files, excludes=[".coveragerc"]) # microgenerator has a good .coveragerc file
80+
s.move(
81+
templated_files, excludes=[".coveragerc"]
82+
) # microgenerator has a good .coveragerc file
8183

8284

8385
# ----------------------------------------------------------------------------
@@ -94,13 +96,51 @@
9496
'\g<0>\n\n session.install("google-cloud-bigquery")',
9597
)
9698

99+
# Remove client-side validation of message length.
100+
# https://github.com/googleapis/python-bigquery-storage/issues/78
101+
s.replace(
102+
[
103+
"google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc.py",
104+
"google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc_asyncio.py",
105+
],
106+
(
107+
r"type\(self\).create_channel\(\s*"
108+
r"host,\s*"
109+
r"credentials=credentials,\s*"
110+
r"credentials_file=credentials_file,\s*"
111+
r"ssl_credentials=ssl_[a-z_]*credentials,\s*"
112+
r"scopes=scopes or self.AUTH_SCOPES,\s*"
113+
r"quota_project_id=quota_project_id"
114+
),
115+
"""\g<0>,
116+
options=(
117+
('grpc.max_send_message_length', -1),
118+
('grpc.max_receive_message_length', -1)
119+
)""",
120+
)
121+
s.replace(
122+
"tests/unit/gapic/bigquery_storage_v1/test_big_query_read.py",
123+
(
124+
r"grpc_create_channel\.assert_called_once_with\([^()]+"
125+
r"scopes=\([^()]+\),\s*"
126+
r"ssl_credentials=[a-z_]+,\s*"
127+
r"quota_project_id=None"
128+
),
129+
"""\g<0>,
130+
options=(
131+
('grpc.max_send_message_length', -1),
132+
('grpc.max_receive_message_length', -1)
133+
)""",
134+
)
135+
136+
97137
# We don't want the generated client to be accessible through
98138
# "google.cloud.bigquery_storage", replace it with the hand written client that
99139
# wraps it.
100140
s.replace(
101141
"google/cloud/bigquery_storage/__init__.py",
102142
r"from google\.cloud\.bigquery_storage_v1\.services.big_query_read.client import",
103-
"from google.cloud.bigquery_storage_v1 import"
143+
"from google.cloud.bigquery_storage_v1 import",
104144
)
105145

106146
# We also don't want to expose the async client just yet, at least not until
@@ -115,7 +155,7 @@
115155
)
116156
s.replace(
117157
"google/cloud/bigquery_storage/__init__.py",
118-
r"""["']BigQueryReadAsyncClient["'],\n""",
158+
r"""["']BigQueryReadAsyncClient["'],\n""",
119159
"",
120160
)
121161

@@ -133,11 +173,7 @@
133173
s.replace(
134174
"google/cloud/bigquery_storage/__init__.py",
135175
r"""["']ArrowRecordBatch["']""",
136-
(
137-
'"__version__",\n'
138-
' "types",\n'
139-
" \g<0>"
140-
),
176+
('"__version__",\n' ' "types",\n' " \g<0>"),
141177
)
142178

143179
# We want to expose all types through "google.cloud.bigquery_storage.types",
@@ -190,9 +226,7 @@
190226
),
191227
)
192228
s.replace(
193-
"noxfile.py",
194-
r'--cov=tests\.unit',
195-
'--cov=tests/unit',
229+
"noxfile.py", r"--cov=tests\.unit", "--cov=tests/unit",
196230
)
197231

198232
# TODO(busunkim): Use latest sphinx after microgenerator transition

tests/system/v1/test_reader_v1.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,3 +461,31 @@ def test_resuming_read_from_offset(
461461
expected_len = 164656 # total rows in shakespeare table
462462
actual_len = remaining_rows_count + some_rows.row_count + more_rows.row_count
463463
assert actual_len == expected_len
464+
465+
466+
def test_read_rows_to_dataframe_with_wide_table(client, project_id):
467+
# Use a wide table to boost the chance of getting a large message size.
468+
# https://github.com/googleapis/python-bigquery-storage/issues/78
469+
read_session = types.ReadSession()
470+
read_session.table = "projects/{}/datasets/{}/tables/{}".format(
471+
"bigquery-public-data", "geo_census_tracts", "us_census_tracts_national"
472+
)
473+
read_session.data_format = types.DataFormat.ARROW
474+
475+
session = client.create_read_session(
476+
request={
477+
"parent": "projects/{}".format(project_id),
478+
"read_session": read_session,
479+
"max_stream_count": 1,
480+
}
481+
)
482+
483+
stream = session.streams[0].name
484+
485+
read_rows_stream = client.read_rows(stream)
486+
487+
# fetch the first two batches of rows
488+
pages_iter = iter(read_rows_stream.rows(session).pages)
489+
some_rows = next(pages_iter)
490+
491+
assert all(len(row["tract_geom"].as_py()) > 0 for row in some_rows)

tests/unit/gapic/bigquery_storage_v1/test_big_query_read.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,6 +1232,10 @@ def test_big_query_read_transport_channel_mtls_with_client_cert_source(transport
12321232
),
12331233
ssl_credentials=mock_ssl_cred,
12341234
quota_project_id=None,
1235+
options=(
1236+
("grpc.max_send_message_length", -1),
1237+
("grpc.max_receive_message_length", -1),
1238+
),
12351239
)
12361240
assert transport.grpc_channel == mock_grpc_channel
12371241

@@ -1273,6 +1277,10 @@ def test_big_query_read_transport_channel_mtls_with_adc(transport_class):
12731277
),
12741278
ssl_credentials=mock_ssl_cred,
12751279
quota_project_id=None,
1280+
options=(
1281+
("grpc.max_send_message_length", -1),
1282+
("grpc.max_receive_message_length", -1),
1283+
),
12761284
)
12771285
assert transport.grpc_channel == mock_grpc_channel
12781286

0 commit comments

Comments
 (0)