Skip to content

Commit b097214

Browse files
authored
Merge branch 'main' into fix/cache-store-byte-range
2 parents 8380af7 + 23596c1 commit b097214

11 files changed

Lines changed: 123 additions & 55 deletions

File tree

changes/3657.bugfix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix obstore _transform_list_dir implementation to correctly relativize paths (removing lstrip usage).

changes/3704.misc.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Remove an expensive `isinstance` check from the bytes codec decoding routine.

changes/3705.bugfix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a performance bug in morton curve generation.

changes/3706.misc.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Allow NumPy ints as input when declaring a shape.

src/zarr/codecs/bytes.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,8 @@
55
from enum import Enum
66
from typing import TYPE_CHECKING
77

8-
import numpy as np
9-
108
from zarr.abc.codec import ArrayBytesCodec
11-
from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
9+
from zarr.core.buffer import Buffer, NDBuffer
1210
from zarr.core.common import JSON, parse_enum, parse_named_configuration
1311
from zarr.core.dtype.common import HasEndianness
1412

@@ -72,20 +70,15 @@ async def _decode_single(
7270
chunk_bytes: Buffer,
7371
chunk_spec: ArraySpec,
7472
) -> NDBuffer:
75-
assert isinstance(chunk_bytes, Buffer)
7673
# TODO: remove endianness enum in favor of literal union
7774
endian_str = self.endian.value if self.endian is not None else None
7875
if isinstance(chunk_spec.dtype, HasEndianness):
7976
dtype = replace(chunk_spec.dtype, endianness=endian_str).to_native_dtype() # type: ignore[call-arg]
8077
else:
8178
dtype = chunk_spec.dtype.to_native_dtype()
8279
as_array_like = chunk_bytes.as_array_like()
83-
if isinstance(as_array_like, NDArrayLike):
84-
as_nd_array_like = as_array_like
85-
else:
86-
as_nd_array_like = np.asanyarray(as_array_like)
8780
chunk_array = chunk_spec.prototype.nd_buffer.from_ndarray_like(
88-
as_nd_array_like.view(dtype=dtype)
81+
as_array_like.view(dtype=dtype) # type: ignore[attr-defined]
8982
)
9083

9184
# ensure correct chunk shape

src/zarr/core/common.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
overload,
2222
)
2323

24+
import numpy as np
2425
from typing_extensions import ReadOnly
2526

2627
from zarr.core.config import config as zarr_config
@@ -37,7 +38,7 @@
3738
ZMETADATA_V2_JSON = ".zmetadata"
3839

3940
BytesLike = bytes | bytearray | memoryview
40-
ShapeLike = Iterable[int] | int
41+
ShapeLike = Iterable[int | np.integer[Any]] | int | np.integer[Any]
4142
# For backwards compatibility
4243
ChunkCoords = tuple[int, ...]
4344
ZarrFormat = Literal[2, 3]
@@ -185,23 +186,28 @@ def parse_named_configuration(
185186

186187

187188
def parse_shapelike(data: ShapeLike) -> tuple[int, ...]:
188-
if isinstance(data, int):
189+
"""
190+
Parse a shape-like input into an explicit shape.
191+
"""
192+
if isinstance(data, int | np.integer):
189193
if data < 0:
190194
raise ValueError(f"Expected a non-negative integer. Got {data} instead")
191-
return (data,)
195+
return (int(data),)
192196
try:
193197
data_tuple = tuple(data)
194198
except TypeError as e:
195199
msg = f"Expected an integer or an iterable of integers. Got {data} instead."
196200
raise TypeError(msg) from e
197201

198-
if not all(isinstance(v, int) for v in data_tuple):
202+
if not all(isinstance(v, int | np.integer) for v in data_tuple):
199203
msg = f"Expected an iterable of integers. Got {data} instead."
200204
raise TypeError(msg)
201205
if not all(v > -1 for v in data_tuple):
202206
msg = f"Expected all values to be non-negative. Got {data} instead."
203207
raise ValueError(msg)
204-
return data_tuple
208+
209+
# cast NumPy scalars to plain python ints
210+
return tuple(int(x) for x in data_tuple)
205211

206212

207213
def parse_fill_value(data: Any) -> Any:

src/zarr/core/indexing.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from collections.abc import Iterator, Sequence
88
from dataclasses import dataclass
99
from enum import Enum
10-
from functools import reduce
10+
from functools import lru_cache, reduce
1111
from types import EllipsisType
1212
from typing import (
1313
TYPE_CHECKING,
@@ -1467,16 +1467,21 @@ def decode_morton(z: int, chunk_shape: tuple[int, ...]) -> tuple[int, ...]:
14671467
return tuple(out)
14681468

14691469

1470-
def morton_order_iter(chunk_shape: tuple[int, ...]) -> Iterator[tuple[int, ...]]:
1471-
i = 0
1470+
@lru_cache
1471+
def _morton_order(chunk_shape: tuple[int, ...]) -> tuple[tuple[int, ...], ...]:
1472+
n_total = product(chunk_shape)
14721473
order: list[tuple[int, ...]] = []
1473-
while len(order) < product(chunk_shape):
1474+
i = 0
1475+
while len(order) < n_total:
14741476
m = decode_morton(i, chunk_shape)
1475-
if m not in order and all(x < y for x, y in zip(m, chunk_shape, strict=False)):
1477+
if all(x < y for x, y in zip(m, chunk_shape, strict=False)):
14761478
order.append(m)
14771479
i += 1
1478-
for j in range(product(chunk_shape)):
1479-
yield order[j]
1480+
return tuple(order)
1481+
1482+
1483+
def morton_order_iter(chunk_shape: tuple[int, ...]) -> Iterator[tuple[int, ...]]:
1484+
return iter(_morton_order(tuple(chunk_shape)))
14801485

14811486

14821487
def c_order_iter(chunks_per_shard: tuple[int, ...]) -> Iterator[tuple[int, ...]]:

src/zarr/storage/_obstore.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import contextlib
55
import pickle
66
from collections import defaultdict
7+
from itertools import chain
8+
from operator import itemgetter
79
from typing import TYPE_CHECKING, Generic, Self, TypedDict, TypeVar
810

911
from zarr.abc.store import (
@@ -15,6 +17,7 @@
1517
)
1618
from zarr.core.common import concurrent_map
1719
from zarr.core.config import config
20+
from zarr.storage._utils import _relativize_path
1821

1922
if TYPE_CHECKING:
2023
from collections.abc import AsyncGenerator, Coroutine, Iterable, Sequence
@@ -263,10 +266,11 @@ async def _transform_list_dir(
263266
# We assume that the underlying object-store implementation correctly handles the
264267
# prefix, so we don't double-check that the returned results actually start with the
265268
# given prefix.
266-
prefixes = [obj.lstrip(prefix).lstrip("/") for obj in list_result["common_prefixes"]]
267-
objects = [obj["path"].removeprefix(prefix).lstrip("/") for obj in list_result["objects"]]
268-
for item in prefixes + objects:
269-
yield item
269+
prefix = prefix.rstrip("/")
270+
for path in chain(
271+
list_result["common_prefixes"], map(itemgetter("path"), list_result["objects"])
272+
):
273+
yield _relativize_path(path=path, prefix=prefix)
270274

271275

272276
class _BoundedRequest(TypedDict):

src/zarr/testing/store.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -492,24 +492,36 @@ async def test_list_empty_path(self, store: S) -> None:
492492
assert observed_prefix_sorted == expected_prefix_sorted
493493

494494
async def test_list_dir(self, store: S) -> None:
495-
root = "foo"
496-
store_dict = {
497-
root + "/zarr.json": self.buffer_cls.from_bytes(b"bar"),
498-
root + "/c/1": self.buffer_cls.from_bytes(b"\x01"),
499-
}
495+
roots_and_keys: list[tuple[str, dict[str, Buffer]]] = [
496+
(
497+
"foo",
498+
{
499+
"foo/zarr.json": self.buffer_cls.from_bytes(b"bar"),
500+
"foo/c/1": self.buffer_cls.from_bytes(b"\x01"),
501+
},
502+
),
503+
(
504+
"foo/bar",
505+
{
506+
"foo/bar/foobar_first_child": self.buffer_cls.from_bytes(b"1"),
507+
"foo/bar/foobar_second_child/zarr.json": self.buffer_cls.from_bytes(b"2"),
508+
},
509+
),
510+
]
500511

501512
assert await _collect_aiterator(store.list_dir("")) == ()
502-
assert await _collect_aiterator(store.list_dir(root)) == ()
503513

504-
await store._set_many(store_dict.items())
514+
for root, store_dict in roots_and_keys:
515+
assert await _collect_aiterator(store.list_dir(root)) == ()
505516

506-
keys_observed = await _collect_aiterator(store.list_dir(root))
507-
keys_expected = {k.removeprefix(root + "/").split("/")[0] for k in store_dict}
517+
await store._set_many(store_dict.items())
508518

509-
assert sorted(keys_observed) == sorted(keys_expected)
519+
keys_observed = await _collect_aiterator(store.list_dir(root))
520+
keys_expected = {k.removeprefix(root + "/").split("/")[0] for k in store_dict}
521+
assert sorted(keys_observed) == sorted(keys_expected)
510522

511-
keys_observed = await _collect_aiterator(store.list_dir(root + "/"))
512-
assert sorted(keys_expected) == sorted(keys_observed)
523+
keys_observed = await _collect_aiterator(store.list_dir(root + "/"))
524+
assert sorted(keys_expected) == sorted(keys_observed)
513525

514526
async def test_set_if_not_exists(self, store: S) -> None:
515527
key = "k"

tests/test_codecs/test_codecs.py

Lines changed: 52 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
TransposeCodec,
1919
)
2020
from zarr.core.buffer import default_buffer_prototype
21-
from zarr.core.indexing import BasicSelection, morton_order_iter
21+
from zarr.core.indexing import BasicSelection, decode_morton, morton_order_iter
2222
from zarr.core.metadata.v3 import ArrayV3Metadata
2323
from zarr.dtype import UInt8
2424
from zarr.errors import ZarrUserWarning
@@ -171,7 +171,8 @@ def test_open(store: Store) -> None:
171171
assert a.metadata == b.metadata
172172

173173

174-
def test_morton() -> None:
174+
def test_morton_exact_order() -> None:
175+
"""Test exact morton ordering for power-of-2 shapes."""
175176
assert list(morton_order_iter((2, 2))) == [(0, 0), (1, 0), (0, 1), (1, 1)]
176177
assert list(morton_order_iter((2, 2, 2))) == [
177178
(0, 0, 0),
@@ -206,21 +207,58 @@ def test_morton() -> None:
206207
@pytest.mark.parametrize(
207208
"shape",
208209
[
209-
[2, 2, 2],
210-
[5, 2],
211-
[2, 5],
212-
[2, 9, 2],
213-
[3, 2, 12],
214-
[2, 5, 1],
215-
[4, 3, 6, 2, 7],
216-
[3, 2, 1, 6, 4, 5, 2],
210+
(2, 2, 2),
211+
(5, 2),
212+
(2, 5),
213+
(2, 9, 2),
214+
(3, 2, 12),
215+
(2, 5, 1),
216+
(4, 3, 6, 2, 7),
217+
(3, 2, 1, 6, 4, 5, 2),
218+
(1,),
219+
(1, 1),
220+
(5, 1, 3),
221+
(1, 4, 1, 2),
217222
],
218223
)
219-
def test_morton2(shape: tuple[int, ...]) -> None:
224+
def test_morton_is_permutation(shape: tuple[int, ...]) -> None:
225+
"""Test that morton_order_iter produces every valid coordinate exactly once."""
226+
import itertools
227+
228+
from zarr.core.common import product
229+
230+
order = list(morton_order_iter(shape))
231+
expected_len = product(shape)
232+
# completeness: every valid coordinate is present
233+
assert len(order) == expected_len
234+
# no duplicates
235+
assert len(set(order)) == expected_len
236+
# all coordinates are within bounds
237+
assert all(all(c < s for c, s in zip(coord, shape, strict=True)) for coord in order)
238+
# the set of coordinates equals the full cartesian product
239+
assert set(order) == set(itertools.product(*(range(s) for s in shape)))
240+
241+
242+
@pytest.mark.parametrize(
243+
"shape",
244+
[
245+
(2, 2),
246+
(4, 4),
247+
(2, 2, 2),
248+
(4, 4, 4),
249+
(2, 2, 2, 2),
250+
],
251+
)
252+
def test_morton_ordering(shape: tuple[int, ...]) -> None:
253+
"""Test that the iteration order matches consecutive decode_morton outputs.
254+
255+
For power-of-2 shapes, every decode_morton output is in-bounds,
256+
so the ordering should be exactly decode_morton(0), decode_morton(1), ...
257+
"""
258+
220259
order = list(morton_order_iter(shape))
221-
for i, x in enumerate(order):
222-
assert x not in order[:i] # no duplicates
223-
assert all(x[j] < shape[j] for j in range(len(shape))) # all indices are within bounds
260+
for i, coord in enumerate(order):
261+
assert coord == decode_morton(i, shape)
224262

225263

226264
@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])

0 commit comments

Comments
 (0)