diff --git a/python/Makefile b/python/Makefile index 3408479..ea8fceb 100644 --- a/python/Makefile +++ b/python/Makefile @@ -33,8 +33,8 @@ test-glue: .PHONY: lint-hive lint-hive: - uv run ruff check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_integration.py tests/test_hive3_integration.py - uv run ruff format --check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_integration.py tests/test_hive3_integration.py + uv run ruff check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py src/lance_namespace_impls/hive2_page_util.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_page_util.py tests/test_hive2_integration.py tests/test_hive3_integration.py + uv run ruff format --check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py src/lance_namespace_impls/hive2_page_util.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_page_util.py tests/test_hive2_integration.py tests/test_hive3_integration.py .PHONY: install-hive install-hive: @@ -42,7 +42,7 @@ install-hive: .PHONY: test-hive test-hive: - uv run pytest tests/test_hive2.py tests/test_hive3.py + uv run pytest tests/test_hive2.py tests/test_hive3.py tests/test_hive2_page_util.py # ============================================================================ # Unity diff --git a/python/src/lance_namespace_impls/hive2.py b/python/src/lance_namespace_impls/hive2.py index 1e7bdcc..132ed81 100644 --- a/python/src/lance_namespace_impls/hive2.py +++ b/python/src/lance_namespace_impls/hive2.py @@ -85,6 +85,7 @@ ) from lance_namespace_impls.rest_client import InvalidInputException +from lance_namespace_impls.hive2_page_util import apply_list_tables_pagination logger = logging.getLogger(__name__) @@ -330,7 +331,7 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: try: # Root namespace has no tables if self._is_root_namespace(request.id): - return ListTablesResponse(tables=[]) + return ListTablesResponse(tables=[], page_token=None) if len(request.id) != 1: raise ValueError(f"Invalid namespace identifier: {request.id}") @@ -357,7 +358,11 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: # Skip tables we can't read continue - return ListTablesResponse(tables=tables) + tables.sort() + page_items, next_token = apply_list_tables_pagination( + tables, request.page_token, request.limit + ) + return ListTablesResponse(tables=page_items, page_token=next_token) except Exception as e: if NoSuchObjectException and isinstance(e, NoSuchObjectException): raise ValueError(f"Namespace {request.id} does not exist") diff --git a/python/src/lance_namespace_impls/hive2_page_util.py b/python/src/lance_namespace_impls/hive2_page_util.py new file mode 100644 index 0000000..a159e3a --- /dev/null +++ b/python/src/lance_namespace_impls/hive2_page_util.py @@ -0,0 +1,45 @@ +""" +List-tables pagination aligned with Java ``PageUtil`` (Hive2 ``listTables``). +""" + +from __future__ import annotations + +from typing import List, Optional, Tuple + +DEFAULT_LIST_TABLES_PAGE_SIZE = 20 + + +def normalize_list_tables_page_size(limit: Optional[int]) -> int: + if limit is None or limit <= 0: + return DEFAULT_LIST_TABLES_PAGE_SIZE + return limit + + +def split_list_tables_page( + items: List[str], page_token: Optional[str], page_size: int +) -> Tuple[List[str], Optional[str]]: + """Slice sorted identifiers using the same rules as Java ``PageUtil.splitPage``.""" + start_index = 0 + if page_token: + try: + start_index = int(page_token) + except ValueError: + start_index = 0 + + if start_index >= len(items): + return [], None + + end_index = min(start_index + page_size, len(items)) + page_items = items[start_index:end_index] + next_token = str(end_index) if end_index < len(items) else None + return page_items, next_token + + +def apply_list_tables_pagination( + sorted_table_names: List[str], + page_token: Optional[str], + limit: Optional[int], +) -> Tuple[List[str], Optional[str]]: + """Apply default page size and token slicing (Hive2 ``listTables``).""" + page_size = normalize_list_tables_page_size(limit) + return split_list_tables_page(sorted_table_names, page_token, page_size) diff --git a/python/tests/test_hive2.py b/python/tests/test_hive2.py index 01c9cae..3dc5f3d 100644 --- a/python/tests/test_hive2.py +++ b/python/tests/test_hive2.py @@ -190,10 +190,43 @@ def test_list_tables(self, hive_namespace, mock_hive_client): request = ListTablesRequest(id=["test_db"]) response = hive_namespace.list_tables(request) - # Should only return Lance table names + # Should only return Lance table names (sorted, single page) assert response.tables == ["table1", "table3"] + assert response.page_token is None mock_client_instance.get_all_tables.assert_called_once_with("test_db") + def test_list_tables_with_pagination(self, hive_namespace, mock_hive_client): + """``list_tables`` uses the same token/limit rules as Java ``PageUtil``.""" + names = [f"t{i:02d}" for i in range(5)] + mock_tables = [] + for _ in names: + mt = MagicMock() + mt.parameters = {"table_type": "lance"} + mock_tables.append(mt) + + mock_client_instance = MagicMock() + mock_client_instance.get_all_tables.return_value = names + mock_client_instance.get_table.side_effect = mock_tables + mock_hive_client.__enter__.return_value = mock_client_instance + + r1 = hive_namespace.list_tables( + ListTablesRequest(id=["db"], limit=2, page_token=None) + ) + assert r1.tables == ["t00", "t01"] + assert r1.page_token == "2" + + r2 = hive_namespace.list_tables( + ListTablesRequest(id=["db"], limit=2, page_token="2") + ) + assert r2.tables == ["t02", "t03"] + assert r2.page_token == "4" + + r3 = hive_namespace.list_tables( + ListTablesRequest(id=["db"], limit=2, page_token="4") + ) + assert r3.tables == ["t04"] + assert r3.page_token is None + def test_describe_table(self, hive_namespace, mock_hive_client): """Test describing a table returns location only. @@ -272,6 +305,7 @@ def test_root_namespace_operations(self, hive_namespace): request = ListTablesRequest(id=[]) response = hive_namespace.list_tables(request) assert response.tables == [] + assert response.page_token is None # Test create_namespace for root (should fail) request = CreateNamespaceRequest(id=[]) diff --git a/python/tests/test_hive2_page_util.py b/python/tests/test_hive2_page_util.py new file mode 100644 index 0000000..f1925ba --- /dev/null +++ b/python/tests/test_hive2_page_util.py @@ -0,0 +1,70 @@ +""" +Unit tests for Hive2 list-tables pagination (Java PageUtil parity). + +Loaded via file path so the suite does not import ``lance_namespace_impls`` package +``__init__`` (which pulls in Lance / JVM-dependent modules). +""" + +from __future__ import annotations + +import importlib.util +from pathlib import Path + + +def _load_hive2_page_util(): + root = Path(__file__).resolve().parents[1] + path = root / "src/lance_namespace_impls/hive2_page_util.py" + spec = importlib.util.spec_from_file_location("hive2_page_util", path) + assert spec is not None and spec.loader is not None + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +pu = _load_hive2_page_util() + + +class TestHive2PageUtil: + def test_normalize_page_size(self): + assert pu.normalize_list_tables_page_size(None) == pu.DEFAULT_LIST_TABLES_PAGE_SIZE + assert pu.normalize_list_tables_page_size(0) == pu.DEFAULT_LIST_TABLES_PAGE_SIZE + assert pu.normalize_list_tables_page_size(-1) == pu.DEFAULT_LIST_TABLES_PAGE_SIZE + assert pu.normalize_list_tables_page_size(50) == 50 + + def test_split_page_first_and_next_token(self): + items = ["a", "b", "c", "d"] + page, token = pu.split_list_tables_page(items, None, 2) + assert page == ["a", "b"] + assert token == "2" + + page2, token2 = pu.split_list_tables_page(items, "2", 2) + assert page2 == ["c", "d"] + assert token2 is None + + def test_split_page_invalid_token_resets_to_start(self): + items = ["x", "y"] + page, token = pu.split_list_tables_page(items, "not-an-int", 1) + assert page == ["x"] + assert token == "1" + + def test_split_page_start_beyond_end(self): + page, token = pu.split_list_tables_page(["only"], "99", 10) + assert page == [] + assert token is None + + def test_apply_pagination_multi_page(self): + names = [f"t{i:02d}" for i in range(5)] + r1 = pu.apply_list_tables_pagination(names, None, 2) + assert r1 == (["t00", "t01"], "2") + + r2 = pu.apply_list_tables_pagination(names, "2", 2) + assert r2 == (["t02", "t03"], "4") + + r3 = pu.apply_list_tables_pagination(names, "4", 2) + assert r3 == (["t04"], None) + + def test_default_limit_full_single_page(self): + names = ["a", "b"] + page, token = pu.apply_list_tables_pagination(names, None, None) + assert page == names + assert token is None