Skip to content

Commit 0e6abd6

Browse files
author
zhanghaobo@kanzhun.com
committed
feat: hive2 namespace listTables in python supports pagination
1 parent c7cd761 commit 0e6abd6

5 files changed

Lines changed: 168 additions & 7 deletions

File tree

python/Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,16 @@ test-glue:
3333

3434
.PHONY: lint-hive
3535
lint-hive:
36-
uv run ruff check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_integration.py tests/test_hive3_integration.py
37-
uv run ruff format --check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_integration.py tests/test_hive3_integration.py
36+
uv run ruff check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py src/lance_namespace_impls/hive2_page_util.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_page_util.py tests/test_hive2_integration.py tests/test_hive3_integration.py
37+
uv run ruff format --check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py src/lance_namespace_impls/hive2_page_util.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_page_util.py tests/test_hive2_integration.py tests/test_hive3_integration.py
3838

3939
.PHONY: install-hive
4040
install-hive:
4141
uv sync --extra hive2 --extra hive3 --extra dev
4242

4343
.PHONY: test-hive
4444
test-hive:
45-
uv run pytest tests/test_hive2.py tests/test_hive3.py
45+
uv run pytest tests/test_hive2.py tests/test_hive3.py tests/test_hive2_page_util.py
4646

4747
# ============================================================================
4848
# Unity

python/src/lance_namespace_impls/hive2.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
)
8686

8787
from lance_namespace_impls.rest_client import InvalidInputException
88+
from lance_namespace_impls.hive2_page_util import apply_list_tables_pagination
8889

8990
logger = logging.getLogger(__name__)
9091

@@ -326,11 +327,15 @@ def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse
326327
raise
327328

328329
def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
329-
"""List tables in a database."""
330+
"""List tables in a database.
331+
332+
Pagination matches the Java Hive2Namespace implementation, ``limit`` defaults to 100 when unset
333+
or non-positive.
334+
"""
330335
try:
331336
# Root namespace has no tables
332337
if self._is_root_namespace(request.id):
333-
return ListTablesResponse(tables=[])
338+
return ListTablesResponse(tables=[], page_token=None)
334339

335340
if len(request.id) != 1:
336341
raise ValueError(f"Invalid namespace identifier: {request.id}")
@@ -357,7 +362,11 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
357362
# Skip tables we can't read
358363
continue
359364

360-
return ListTablesResponse(tables=tables)
365+
tables.sort()
366+
page_items, next_token = apply_list_tables_pagination(
367+
tables, request.page_token, request.limit
368+
)
369+
return ListTablesResponse(tables=page_items, page_token=next_token)
361370
except Exception as e:
362371
if NoSuchObjectException and isinstance(e, NoSuchObjectException):
363372
raise ValueError(f"Namespace {request.id} does not exist")
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
"""
2+
List-tables pagination aligned with Java ``PageUtil`` (Hive2 ``listTables``).
3+
"""
4+
5+
from __future__ import annotations
6+
7+
from typing import List, Optional, Tuple
8+
9+
DEFAULT_LIST_TABLES_PAGE_SIZE = 100
10+
11+
12+
def normalize_list_tables_page_size(limit: Optional[int]) -> int:
13+
if limit is None or limit <= 0:
14+
return DEFAULT_LIST_TABLES_PAGE_SIZE
15+
return limit
16+
17+
18+
def split_list_tables_page(
19+
items: List[str], page_token: Optional[str], page_size: int
20+
) -> Tuple[List[str], Optional[str]]:
21+
"""Slice sorted identifiers using the same rules as Java ``PageUtil.splitPage``."""
22+
start_index = 0
23+
if page_token:
24+
try:
25+
start_index = int(page_token)
26+
except ValueError:
27+
start_index = 0
28+
29+
if start_index >= len(items):
30+
return [], None
31+
32+
end_index = min(start_index + page_size, len(items))
33+
page_items = items[start_index:end_index]
34+
next_token = str(end_index) if end_index < len(items) else None
35+
return page_items, next_token
36+
37+
38+
def apply_list_tables_pagination(
39+
sorted_table_names: List[str],
40+
page_token: Optional[str],
41+
limit: Optional[int],
42+
) -> Tuple[List[str], Optional[str]]:
43+
"""Apply default page size and token slicing (Hive2 ``listTables``)."""
44+
page_size = normalize_list_tables_page_size(limit)
45+
return split_list_tables_page(sorted_table_names, page_token, page_size)

python/tests/test_hive2.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,43 @@ def test_list_tables(self, hive_namespace, mock_hive_client):
190190
request = ListTablesRequest(id=["test_db"])
191191
response = hive_namespace.list_tables(request)
192192

193-
# Should only return Lance table names
193+
# Should only return Lance table names (sorted, single page)
194194
assert response.tables == ["table1", "table3"]
195+
assert response.page_token is None
195196
mock_client_instance.get_all_tables.assert_called_once_with("test_db")
196197

198+
def test_list_tables_with_pagination(self, hive_namespace, mock_hive_client):
199+
"""``list_tables`` uses the same token/limit rules as Java ``PageUtil``."""
200+
names = [f"t{i:02d}" for i in range(5)]
201+
mock_tables = []
202+
for _ in names:
203+
mt = MagicMock()
204+
mt.parameters = {"table_type": "lance"}
205+
mock_tables.append(mt)
206+
207+
mock_client_instance = MagicMock()
208+
mock_client_instance.get_all_tables.return_value = names
209+
mock_client_instance.get_table.side_effect = mock_tables
210+
mock_hive_client.__enter__.return_value = mock_client_instance
211+
212+
r1 = hive_namespace.list_tables(
213+
ListTablesRequest(id=["db"], limit=2, page_token=None)
214+
)
215+
assert r1.tables == ["t00", "t01"]
216+
assert r1.page_token == "2"
217+
218+
r2 = hive_namespace.list_tables(
219+
ListTablesRequest(id=["db"], limit=2, page_token="2")
220+
)
221+
assert r2.tables == ["t02", "t03"]
222+
assert r2.page_token == "4"
223+
224+
r3 = hive_namespace.list_tables(
225+
ListTablesRequest(id=["db"], limit=2, page_token="4")
226+
)
227+
assert r3.tables == ["t04"]
228+
assert r3.page_token is None
229+
197230
def test_describe_table(self, hive_namespace, mock_hive_client):
198231
"""Test describing a table returns location only.
199232
@@ -272,6 +305,7 @@ def test_root_namespace_operations(self, hive_namespace):
272305
request = ListTablesRequest(id=[])
273306
response = hive_namespace.list_tables(request)
274307
assert response.tables == []
308+
assert response.page_token is None
275309

276310
# Test create_namespace for root (should fail)
277311
request = CreateNamespaceRequest(id=[])
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright The Lance Authors
3+
4+
"""
5+
Unit tests for Hive2 list-tables pagination (Java PageUtil parity).
6+
7+
Loaded via file path so the suite does not import ``lance_namespace_impls`` package
8+
``__init__`` (which pulls in Lance / JVM-dependent modules).
9+
"""
10+
11+
from __future__ import annotations
12+
13+
import importlib.util
14+
from pathlib import Path
15+
16+
17+
def _load_hive2_page_util():
18+
root = Path(__file__).resolve().parents[1]
19+
path = root / "src/lance_namespace_impls/hive2_page_util.py"
20+
spec = importlib.util.spec_from_file_location("hive2_page_util", path)
21+
assert spec is not None and spec.loader is not None
22+
mod = importlib.util.module_from_spec(spec)
23+
spec.loader.exec_module(mod)
24+
return mod
25+
26+
27+
pu = _load_hive2_page_util()
28+
29+
30+
class TestHive2PageUtil:
31+
def test_normalize_page_size(self):
32+
assert pu.normalize_list_tables_page_size(None) == pu.DEFAULT_LIST_TABLES_PAGE_SIZE
33+
assert pu.normalize_list_tables_page_size(0) == pu.DEFAULT_LIST_TABLES_PAGE_SIZE
34+
assert pu.normalize_list_tables_page_size(-1) == pu.DEFAULT_LIST_TABLES_PAGE_SIZE
35+
assert pu.normalize_list_tables_page_size(50) == 50
36+
37+
def test_split_page_first_and_next_token(self):
38+
items = ["a", "b", "c", "d"]
39+
page, token = pu.split_list_tables_page(items, None, 2)
40+
assert page == ["a", "b"]
41+
assert token == "2"
42+
43+
page2, token2 = pu.split_list_tables_page(items, "2", 2)
44+
assert page2 == ["c", "d"]
45+
assert token2 is None
46+
47+
def test_split_page_invalid_token_resets_to_start(self):
48+
items = ["x", "y"]
49+
page, token = pu.split_list_tables_page(items, "not-an-int", 1)
50+
assert page == ["x"]
51+
assert token == "1"
52+
53+
def test_split_page_start_beyond_end(self):
54+
page, token = pu.split_list_tables_page(["only"], "99", 10)
55+
assert page == []
56+
assert token is None
57+
58+
def test_apply_pagination_multi_page(self):
59+
names = [f"t{i:02d}" for i in range(5)]
60+
r1 = pu.apply_list_tables_pagination(names, None, 2)
61+
assert r1 == (["t00", "t01"], "2")
62+
63+
r2 = pu.apply_list_tables_pagination(names, "2", 2)
64+
assert r2 == (["t02", "t03"], "4")
65+
66+
r3 = pu.apply_list_tables_pagination(names, "4", 2)
67+
assert r3 == (["t04"], None)
68+
69+
def test_default_limit_full_single_page(self):
70+
names = ["a", "b"]
71+
page, token = pu.apply_list_tables_pagination(names, None, None)
72+
assert page == names
73+
assert token is None

0 commit comments

Comments
 (0)