Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@ test-glue:

.PHONY: lint-hive
lint-hive:
uv run ruff check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_integration.py tests/test_hive3_integration.py
uv run ruff format --check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_integration.py tests/test_hive3_integration.py
uv run ruff check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py src/lance_namespace_impls/hive2_page_util.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_page_util.py tests/test_hive2_integration.py tests/test_hive3_integration.py
uv run ruff format --check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py src/lance_namespace_impls/hive2_page_util.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_page_util.py tests/test_hive2_integration.py tests/test_hive3_integration.py

.PHONY: install-hive
install-hive:
uv sync --extra hive2 --extra hive3 --extra dev

.PHONY: test-hive
test-hive:
uv run pytest tests/test_hive2.py tests/test_hive3.py
uv run pytest tests/test_hive2.py tests/test_hive3.py tests/test_hive2_page_util.py

# ============================================================================
# Unity
Expand Down
9 changes: 7 additions & 2 deletions python/src/lance_namespace_impls/hive2.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
)

from lance_namespace_impls.rest_client import InvalidInputException
from lance_namespace_impls.hive2_page_util import apply_list_tables_pagination

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -330,7 +331,7 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
try:
# Root namespace has no tables
if self._is_root_namespace(request.id):
return ListTablesResponse(tables=[])
return ListTablesResponse(tables=[], page_token=None)

if len(request.id) != 1:
raise ValueError(f"Invalid namespace identifier: {request.id}")
Expand All @@ -357,7 +358,11 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
# Skip tables we can't read
continue

return ListTablesResponse(tables=tables)
tables.sort()
page_items, next_token = apply_list_tables_pagination(
tables, request.page_token, request.limit
)
return ListTablesResponse(tables=page_items, page_token=next_token)
except Exception as e:
if NoSuchObjectException and isinstance(e, NoSuchObjectException):
raise ValueError(f"Namespace {request.id} does not exist")
Expand Down
45 changes: 45 additions & 0 deletions python/src/lance_namespace_impls/hive2_page_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
List-tables pagination aligned with Java ``PageUtil`` (Hive2 ``listTables``).
"""

from __future__ import annotations

from typing import List, Optional, Tuple

DEFAULT_LIST_TABLES_PAGE_SIZE = 20


def normalize_list_tables_page_size(limit: Optional[int]) -> int:
if limit is None or limit <= 0:
return DEFAULT_LIST_TABLES_PAGE_SIZE
return limit


def split_list_tables_page(
items: List[str], page_token: Optional[str], page_size: int
) -> Tuple[List[str], Optional[str]]:
"""Slice sorted identifiers using the same rules as Java ``PageUtil.splitPage``."""
start_index = 0
if page_token:
try:
start_index = int(page_token)
except ValueError:
start_index = 0

if start_index >= len(items):
return [], None

end_index = min(start_index + page_size, len(items))
page_items = items[start_index:end_index]
next_token = str(end_index) if end_index < len(items) else None
return page_items, next_token


def apply_list_tables_pagination(
sorted_table_names: List[str],
page_token: Optional[str],
limit: Optional[int],
) -> Tuple[List[str], Optional[str]]:
"""Apply default page size and token slicing (Hive2 ``listTables``)."""
page_size = normalize_list_tables_page_size(limit)
return split_list_tables_page(sorted_table_names, page_token, page_size)
36 changes: 35 additions & 1 deletion python/tests/test_hive2.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,43 @@ def test_list_tables(self, hive_namespace, mock_hive_client):
request = ListTablesRequest(id=["test_db"])
response = hive_namespace.list_tables(request)

# Should only return Lance table names
# Should only return Lance table names (sorted, single page)
assert response.tables == ["table1", "table3"]
assert response.page_token is None
mock_client_instance.get_all_tables.assert_called_once_with("test_db")

def test_list_tables_with_pagination(self, hive_namespace, mock_hive_client):
"""``list_tables`` uses the same token/limit rules as Java ``PageUtil``."""
names = [f"t{i:02d}" for i in range(5)]
mock_tables = []
for _ in names:
mt = MagicMock()
mt.parameters = {"table_type": "lance"}
mock_tables.append(mt)

mock_client_instance = MagicMock()
mock_client_instance.get_all_tables.return_value = names
mock_client_instance.get_table.side_effect = mock_tables
mock_hive_client.__enter__.return_value = mock_client_instance

r1 = hive_namespace.list_tables(
ListTablesRequest(id=["db"], limit=2, page_token=None)
)
assert r1.tables == ["t00", "t01"]
assert r1.page_token == "2"

r2 = hive_namespace.list_tables(
ListTablesRequest(id=["db"], limit=2, page_token="2")
)
assert r2.tables == ["t02", "t03"]
assert r2.page_token == "4"

r3 = hive_namespace.list_tables(
ListTablesRequest(id=["db"], limit=2, page_token="4")
)
assert r3.tables == ["t04"]
assert r3.page_token is None

def test_describe_table(self, hive_namespace, mock_hive_client):
"""Test describing a table returns location only.

Expand Down Expand Up @@ -272,6 +305,7 @@ def test_root_namespace_operations(self, hive_namespace):
request = ListTablesRequest(id=[])
response = hive_namespace.list_tables(request)
assert response.tables == []
assert response.page_token is None

# Test create_namespace for root (should fail)
request = CreateNamespaceRequest(id=[])
Expand Down
70 changes: 70 additions & 0 deletions python/tests/test_hive2_page_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""
Unit tests for Hive2 list-tables pagination (Java PageUtil parity).

Loaded via file path so the suite does not import ``lance_namespace_impls`` package
``__init__`` (which pulls in Lance / JVM-dependent modules).
"""

from __future__ import annotations

import importlib.util
from pathlib import Path


def _load_hive2_page_util():
root = Path(__file__).resolve().parents[1]
path = root / "src/lance_namespace_impls/hive2_page_util.py"
spec = importlib.util.spec_from_file_location("hive2_page_util", path)
assert spec is not None and spec.loader is not None
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
return mod


pu = _load_hive2_page_util()


class TestHive2PageUtil:
def test_normalize_page_size(self):
assert pu.normalize_list_tables_page_size(None) == pu.DEFAULT_LIST_TABLES_PAGE_SIZE
assert pu.normalize_list_tables_page_size(0) == pu.DEFAULT_LIST_TABLES_PAGE_SIZE
assert pu.normalize_list_tables_page_size(-1) == pu.DEFAULT_LIST_TABLES_PAGE_SIZE
assert pu.normalize_list_tables_page_size(50) == 50

def test_split_page_first_and_next_token(self):
items = ["a", "b", "c", "d"]
page, token = pu.split_list_tables_page(items, None, 2)
assert page == ["a", "b"]
assert token == "2"

page2, token2 = pu.split_list_tables_page(items, "2", 2)
assert page2 == ["c", "d"]
assert token2 is None

def test_split_page_invalid_token_resets_to_start(self):
items = ["x", "y"]
page, token = pu.split_list_tables_page(items, "not-an-int", 1)
assert page == ["x"]
assert token == "1"

def test_split_page_start_beyond_end(self):
page, token = pu.split_list_tables_page(["only"], "99", 10)
assert page == []
assert token is None

def test_apply_pagination_multi_page(self):
names = [f"t{i:02d}" for i in range(5)]
r1 = pu.apply_list_tables_pagination(names, None, 2)
assert r1 == (["t00", "t01"], "2")

r2 = pu.apply_list_tables_pagination(names, "2", 2)
assert r2 == (["t02", "t03"], "4")

r3 = pu.apply_list_tables_pagination(names, "4", 2)
assert r3 == (["t04"], None)

def test_default_limit_full_single_page(self):
names = ["a", "b"]
page, token = pu.apply_list_tables_pagination(names, None, None)
assert page == names
assert token is None
Loading