Skip to content

Commit 769728d

Browse files
committed
feat(pagination): add count, index, reversed, copy, and + to PaginationList
These were inherited from list's C implementation, which reads the underlying array directly and bypasses _fetch_all()/_fetch_through_index(). On a multi-page result they silently operated on only the already-fetched pages. Each is now overridden to fetch all remaining pages first, then delegate to list's implementation. __add__ also needed an @overload to match list.__add__'s own overloaded signature for mypy.
1 parent 329b1bd commit 769728d

2 files changed

Lines changed: 110 additions & 0 deletions

File tree

pyiceberg/typedef.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# under the License.
1717
from __future__ import annotations
1818

19+
import sys
1920
from abc import abstractmethod
2021
from collections.abc import Callable, Iterator
2122
from datetime import date, datetime, time
@@ -155,6 +156,7 @@ def model_dump_json(
155156

156157

157158
T = TypeVar("T")
159+
S = TypeVar("S")
158160

159161

160162
class IcebergRootModel(RootModel[T], Generic[T]):
@@ -305,3 +307,39 @@ def __getitem__(self, idx: SupportsIndex | slice) -> T | list[T]:
305307
else:
306308
self._fetch_through_index(i)
307309
return list.__getitem__(self, idx)
310+
311+
def count(self, value: T) -> int:
312+
"""Return the number of occurrences of value, fetching all pages first."""
313+
self._fetch_all()
314+
return list.count(self, value)
315+
316+
def index(self, value: T, start: SupportsIndex = 0, stop: SupportsIndex = sys.maxsize, /) -> int:
317+
"""Return the index of the first occurrence of value, fetching all pages first."""
318+
self._fetch_all()
319+
return list.index(self, value, start, stop)
320+
321+
def __reversed__(self) -> Iterator[T]:
322+
"""Return an iterator over the items in reverse order, fetching all pages first."""
323+
self._fetch_all()
324+
return list.__reversed__(self)
325+
326+
def copy(self) -> list[T]:
327+
"""Return a plain list with all items, fetching all pages first."""
328+
self._fetch_all()
329+
return list.copy(self)
330+
331+
@overload
332+
def __add__(self, other: list[T]) -> list[T]: ...
333+
334+
@overload
335+
def __add__(self, other: list[S]) -> list[S | T]: ...
336+
337+
def __add__(self, other: list[Any]) -> list[Any]:
338+
"""Return self + other as a plain list, fetching all pages first."""
339+
self._fetch_all()
340+
return list.__add__(self, other)
341+
342+
def __radd__(self, other: list[T]) -> list[T]:
343+
"""Return other + self as a plain list, fetching all pages first."""
344+
self._fetch_all()
345+
return list.__add__(other, self)

tests/utils/test_pagination.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
from __future__ import annotations
1919

20+
import pytest
21+
2022
from pyiceberg.typedef import PaginationList
2123

2224

@@ -77,6 +79,38 @@ def test_single_page_is_list_subclass() -> None:
7779
assert isinstance(pl, list)
7880

7981

82+
def test_single_page_count() -> None:
83+
pl, _ = _simple_pagination_list([[1, 2, 2, 3]])
84+
assert pl.count(2) == 2
85+
86+
87+
def test_single_page_index() -> None:
88+
pl, _ = _simple_pagination_list([[10, 20, 30]])
89+
assert pl.index(30) == 2
90+
91+
92+
def test_single_page_reversed() -> None:
93+
pl, _ = _simple_pagination_list([[1, 2, 3]])
94+
assert list(reversed(pl)) == [3, 2, 1]
95+
96+
97+
def test_single_page_copy() -> None:
98+
pl, expected = _simple_pagination_list([[1, 2, 3]])
99+
copied = pl.copy()
100+
assert copied == expected
101+
assert type(copied) is list
102+
103+
104+
def test_single_page_add() -> None:
105+
pl, _ = _simple_pagination_list([[1, 2]])
106+
assert pl + [3] == [1, 2, 3]
107+
108+
109+
def test_single_page_radd() -> None:
110+
pl, _ = _simple_pagination_list([[2, 3]])
111+
assert [1] + pl == [1, 2, 3]
112+
113+
80114
# ---------------------------------------------------------------------------
81115
# Multi-page: lazily fetches subsequent pages
82116
# ---------------------------------------------------------------------------
@@ -178,6 +212,44 @@ def test_multi_page_equality_with_plain_list() -> None:
178212
assert pl == expected
179213

180214

215+
def test_multi_page_count_fetches_all() -> None:
216+
pl, _ = _simple_pagination_list([[1, 2], [2, 3]])
217+
assert pl.count(2) == 2
218+
219+
220+
def test_multi_page_index_fetches_all() -> None:
221+
pl, _ = _simple_pagination_list([[1, 2], [3, 4]])
222+
assert pl.index(4) == 3
223+
224+
225+
def test_multi_page_index_raises_for_missing_value() -> None:
226+
pl, _ = _simple_pagination_list([[1, 2], [3, 4]])
227+
with pytest.raises(ValueError):
228+
pl.index(99)
229+
230+
231+
def test_multi_page_reversed_fetches_all() -> None:
232+
pl, expected = _simple_pagination_list([[1, 2], [3, 4]])
233+
assert list(reversed(pl)) == list(reversed(expected))
234+
235+
236+
def test_multi_page_copy_fetches_all() -> None:
237+
pl, expected = _simple_pagination_list([[1, 2], [3, 4]])
238+
copied = pl.copy()
239+
assert copied == expected
240+
assert type(copied) is list
241+
242+
243+
def test_multi_page_add_fetches_all() -> None:
244+
pl, expected = _simple_pagination_list([[1, 2], [3, 4]])
245+
assert pl + [5] == expected + [5]
246+
247+
248+
def test_multi_page_radd_fetches_all() -> None:
249+
pl, expected = _simple_pagination_list([[1, 2], [3, 4]])
250+
assert [0] + pl == [0] + expected
251+
252+
181253
# ---------------------------------------------------------------------------
182254
# Performance: len() should not eagerly fetch all pages for a single-page list
183255
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)