Skip to content

Commit 384724c

Browse files
authored
feat: support row_range in sample_row_keys method (googleapis#17330)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/google-cloud-python/issues) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes googleapis#17329
1 parent 568b2ea commit 384724c

8 files changed

Lines changed: 132 additions & 6 deletions

File tree

packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@
8484
from google.cloud.bigtable.data.execute_query.values import ExecuteQueryValueType
8585
from google.cloud.bigtable.data.mutations import Mutation, RowMutationEntry
8686
from google.cloud.bigtable.data.read_modify_write_rules import ReadModifyWriteRule
87-
from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery
87+
from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery, RowRange
8888
from google.cloud.bigtable.data.row import Row
8989
from google.cloud.bigtable.data.row_filters import (
9090
CellsRowLimitFilter,
@@ -1412,6 +1412,7 @@ async def row_exists(
14121412
async def sample_row_keys(
14131413
self,
14141414
*,
1415+
row_range: RowRange | None = None,
14151416
operation_timeout: float | TABLE_DEFAULT = TABLE_DEFAULT.DEFAULT,
14161417
attempt_timeout: float | None | TABLE_DEFAULT = TABLE_DEFAULT.DEFAULT,
14171418
retryable_errors: Sequence[type[Exception]]
@@ -1429,6 +1430,8 @@ async def sample_row_keys(
14291430
row_keys, along with offset positions in the table
14301431
14311432
Args:
1433+
row_range: the range of rows to sample. If not provided, samples the
1434+
entire table.
14321435
operation_timeout: the time budget for the entire operation, in seconds.
14331436
Failed requests will be retried within the budget.i
14341437
Defaults to the Table's default_operation_timeout
@@ -1466,7 +1469,9 @@ async def sample_row_keys(
14661469
async def execute_rpc():
14671470
results = await self.client._gapic_client.sample_row_keys(
14681471
request=SampleRowKeysRequest(
1469-
app_profile_id=self.app_profile_id, **self._request_path
1472+
app_profile_id=self.app_profile_id,
1473+
row_range=row_range._to_pb() if row_range is not None else None,
1474+
**self._request_path,
14701475
),
14711476
timeout=next(attempt_timeout_gen),
14721477
retry=None,

packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/client.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@
8585
from google.cloud.bigtable.data.execute_query.values import ExecuteQueryValueType
8686
from google.cloud.bigtable.data.mutations import Mutation, RowMutationEntry
8787
from google.cloud.bigtable.data.read_modify_write_rules import ReadModifyWriteRule
88-
from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery
88+
from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery, RowRange
8989
from google.cloud.bigtable.data.row import Row
9090
from google.cloud.bigtable.data.row_filters import (
9191
CellsRowLimitFilter,
@@ -1160,6 +1160,7 @@ def row_exists(
11601160
def sample_row_keys(
11611161
self,
11621162
*,
1163+
row_range: RowRange | None = None,
11631164
operation_timeout: float | TABLE_DEFAULT = TABLE_DEFAULT.DEFAULT,
11641165
attempt_timeout: float | None | TABLE_DEFAULT = TABLE_DEFAULT.DEFAULT,
11651166
retryable_errors: Sequence[type[Exception]]
@@ -1176,6 +1177,8 @@ def sample_row_keys(
11761177
row_keys, along with offset positions in the table
11771178
11781179
Args:
1180+
row_range: the range of rows to sample. If not provided, samples the
1181+
entire table.
11791182
operation_timeout: the time budget for the entire operation, in seconds.
11801183
Failed requests will be retried within the budget.i
11811184
Defaults to the Table's default_operation_timeout
@@ -1208,7 +1211,9 @@ def sample_row_keys(
12081211
def execute_rpc():
12091212
results = self.client._gapic_client.sample_row_keys(
12101213
request=SampleRowKeysRequest(
1211-
app_profile_id=self.app_profile_id, **self._request_path
1214+
app_profile_id=self.app_profile_id,
1215+
row_range=row_range._to_pb() if row_range is not None else None,
1216+
**self._request_path,
12121217
),
12131218
timeout=next(attempt_timeout_gen),
12141219
retry=None,

packages/google-cloud-bigtable/test_proxy/handlers/client_handler_data_async.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,11 @@ async def SampleRowKeys(self, request, **kwargs):
250250
kwargs["operation_timeout"] = (
251251
kwargs.get("operation_timeout", self.per_operation_timeout) or 20
252252
)
253-
result = CrossSync.rm_aio(await table.sample_row_keys(**kwargs))
253+
row_range = None
254+
if "row_range" in request:
255+
from google.cloud.bigtable.data.read_rows_query import RowRange
256+
row_range = RowRange._from_dict(request["row_range"])
257+
result = CrossSync.rm_aio(await table.sample_row_keys(row_range=row_range, **kwargs))
254258
return result
255259

256260
@error_safe

packages/google-cloud-bigtable/test_proxy/handlers/client_handler_data_sync_autogen.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,12 @@ async def SampleRowKeys(self, request, **kwargs):
187187
kwargs["operation_timeout"] = (
188188
kwargs.get("operation_timeout", self.per_operation_timeout) or 20
189189
)
190-
result = table.sample_row_keys(**kwargs)
190+
row_range = None
191+
if "row_range" in request:
192+
from google.cloud.bigtable.data.read_rows_query import RowRange
193+
194+
row_range = RowRange._from_dict(request["row_range"])
195+
result = table.sample_row_keys(row_range=row_range, **kwargs)
191196
return result
192197

193198
@error_safe

packages/google-cloud-bigtable/tests/system/data/test_system_async.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,40 @@ async def test_sample_row_keys(
346346
assert results[-1][0] == b""
347347
assert isinstance(results[-1][1], int)
348348

349+
@pytest.mark.skipif(
350+
bool(os.environ.get(BIGTABLE_EMULATOR)), reason="emulator doesn't use splits"
351+
)
352+
@pytest.mark.usefixtures("client")
353+
@pytest.mark.usefixtures("target")
354+
@CrossSync.Retry(
355+
predicate=retry.if_exception_type(ClientError), initial=1, maximum=5
356+
)
357+
@CrossSync.pytest
358+
async def test_sample_row_keys_w_row_range(
359+
self, client, target, column_split_config
360+
):
361+
"""
362+
Sample keys with row range should return samples within the range,
363+
with the last key matching the end of the range.
364+
"""
365+
if len(column_split_config) < 4:
366+
pytest.skip("Not enough splits in column_split_config for this test")
367+
368+
from google.cloud.bigtable.data import RowRange
369+
370+
start_key = column_split_config[1]
371+
end_key = column_split_config[3]
372+
row_range = RowRange(start_key=start_key, end_key=end_key)
373+
374+
results = await target.sample_row_keys(row_range=row_range)
375+
assert len(results) == 2
376+
377+
assert results[0][0] == column_split_config[2]
378+
assert results[1][0] == column_split_config[3]
379+
380+
for _, offset in results:
381+
assert isinstance(offset, int)
382+
349383
@pytest.mark.usefixtures("client")
350384
@pytest.mark.usefixtures("target")
351385
@CrossSync.pytest

packages/google-cloud-bigtable/tests/system/data/test_system_autogen.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,31 @@ def test_sample_row_keys(self, client, target, temp_rows, column_split_config):
270270
assert results[-1][0] == b""
271271
assert isinstance(results[-1][1], int)
272272

273+
@pytest.mark.skipif(
274+
bool(os.environ.get(BIGTABLE_EMULATOR)), reason="emulator doesn't use splits"
275+
)
276+
@pytest.mark.usefixtures("client")
277+
@pytest.mark.usefixtures("target")
278+
@CrossSync._Sync_Impl.Retry(
279+
predicate=retry.if_exception_type(ClientError), initial=1, maximum=5
280+
)
281+
def test_sample_row_keys_w_row_range(self, client, target, column_split_config):
282+
"""Sample keys with row range should return samples within the range,
283+
with the last key matching the end of the range."""
284+
if len(column_split_config) < 4:
285+
pytest.skip("Not enough splits in column_split_config for this test")
286+
from google.cloud.bigtable.data import RowRange
287+
288+
start_key = column_split_config[1]
289+
end_key = column_split_config[3]
290+
row_range = RowRange(start_key=start_key, end_key=end_key)
291+
results = target.sample_row_keys(row_range=row_range)
292+
assert len(results) == 2
293+
assert results[0][0] == column_split_config[2]
294+
assert results[1][0] == column_split_config[3]
295+
for _, offset in results:
296+
assert isinstance(offset, int)
297+
273298
@pytest.mark.usefixtures("client")
274299
@pytest.mark.usefixtures("target")
275300
def test_bulk_mutations_set_cell(self, client, target, temp_rows):

packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2451,6 +2451,32 @@ async def test_sample_row_keys(self):
24512451
assert result[1] == samples[1]
24522452
assert result[2] == samples[2]
24532453

2454+
@CrossSync.pytest
2455+
async def test_sample_row_keys_w_row_range(self):
2456+
"""
2457+
Test that method returns the expected key samples when row_range is provided
2458+
"""
2459+
samples = [
2460+
(b"a_key1", 100),
2461+
(b"b", 200),
2462+
]
2463+
from google.cloud.bigtable.data import RowRange
2464+
2465+
row_range = RowRange(start_key=b"a", end_key=b"b")
2466+
async with self._make_client() as client:
2467+
async with client.get_table("instance", "table") as table:
2468+
with mock.patch.object(
2469+
table.client._gapic_client, "sample_row_keys", CrossSync.Mock()
2470+
) as sample_row_keys:
2471+
sample_row_keys.return_value = self._make_gapic_stream(samples)
2472+
result = await table.sample_row_keys(row_range=row_range)
2473+
assert len(result) == 2
2474+
assert result[0] == samples[0]
2475+
assert result[1] == samples[1]
2476+
sample_row_keys.assert_called_once()
2477+
called_request = sample_row_keys.call_args[1]["request"]
2478+
assert called_request.row_range == row_range._to_pb()
2479+
24542480
@CrossSync.pytest
24552481
async def test_sample_row_keys_bad_timeout(self):
24562482
"""

packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2030,6 +2030,28 @@ def test_sample_row_keys(self):
20302030
assert result[1] == samples[1]
20312031
assert result[2] == samples[2]
20322032

2033+
def test_sample_row_keys_w_row_range(self):
2034+
"""Test that method returns the expected key samples when row_range is provided"""
2035+
samples = [(b"a_key1", 100), (b"b", 200)]
2036+
from google.cloud.bigtable.data import RowRange
2037+
2038+
row_range = RowRange(start_key=b"a", end_key=b"b")
2039+
with self._make_client() as client:
2040+
with client.get_table("instance", "table") as table:
2041+
with mock.patch.object(
2042+
table.client._gapic_client,
2043+
"sample_row_keys",
2044+
CrossSync._Sync_Impl.Mock(),
2045+
) as sample_row_keys:
2046+
sample_row_keys.return_value = self._make_gapic_stream(samples)
2047+
result = table.sample_row_keys(row_range=row_range)
2048+
assert len(result) == 2
2049+
assert result[0] == samples[0]
2050+
assert result[1] == samples[1]
2051+
sample_row_keys.assert_called_once()
2052+
called_request = sample_row_keys.call_args[1]["request"]
2053+
assert called_request.row_range == row_range._to_pb()
2054+
20332055
def test_sample_row_keys_bad_timeout(self):
20342056
"""should raise error if timeout is negative"""
20352057
with self._make_client() as client:

0 commit comments

Comments
 (0)