Skip to content

Commit e3c4e44

Browse files
authored
Merge branch 'main' into fix/stateful-delete-group-precondition
2 parents 0137a74 + f8b3d38 commit e3c4e44

File tree

15 files changed

+448
-58
lines changed

15 files changed

+448
-58
lines changed

changes/3655.bugfix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed a bug in the sharding codec that prevented nested shard reads in certain cases.

changes/3657.bugfix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix obstore _transform_list_dir implementation to correctly relativize paths (removing lstrip usage).

changes/3702.bugfix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Skip chunk coordinate enumeration in resize when the array is only growing, avoiding unbounded memory usage for large arrays.

changes/3704.misc.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Remove an expensive `isinstance` check from the bytes codec decoding routine.

changes/3708.misc.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Optimize Morton order computation with hypercube optimization, vectorized decoding, and singleton dimension removal, providing 10-45x speedup for typical chunk shapes.

changes/3712.misc.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added benchmarks for Morton order computation in sharded arrays.

src/zarr/codecs/bytes.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,8 @@
55
from enum import Enum
66
from typing import TYPE_CHECKING
77

8-
import numpy as np
9-
108
from zarr.abc.codec import ArrayBytesCodec
11-
from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
9+
from zarr.core.buffer import Buffer, NDBuffer
1210
from zarr.core.common import JSON, parse_enum, parse_named_configuration
1311
from zarr.core.dtype.common import HasEndianness
1412

@@ -72,20 +70,15 @@ async def _decode_single(
7270
chunk_bytes: Buffer,
7371
chunk_spec: ArraySpec,
7472
) -> NDBuffer:
75-
assert isinstance(chunk_bytes, Buffer)
7673
# TODO: remove endianness enum in favor of literal union
7774
endian_str = self.endian.value if self.endian is not None else None
7875
if isinstance(chunk_spec.dtype, HasEndianness):
7976
dtype = replace(chunk_spec.dtype, endianness=endian_str).to_native_dtype() # type: ignore[call-arg]
8077
else:
8178
dtype = chunk_spec.dtype.to_native_dtype()
8279
as_array_like = chunk_bytes.as_array_like()
83-
if isinstance(as_array_like, NDArrayLike):
84-
as_nd_array_like = as_array_like
85-
else:
86-
as_nd_array_like = np.asanyarray(as_array_like)
8780
chunk_array = chunk_spec.prototype.nd_buffer.from_ndarray_like(
88-
as_nd_array_like.view(dtype=dtype)
81+
as_array_like.view(dtype=dtype) # type: ignore[attr-defined]
8982
)
9083

9184
# ensure correct chunk shape

src/zarr/codecs/sharding.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
)
5353
from zarr.core.metadata.v3 import parse_codecs
5454
from zarr.registry import get_ndbuffer_class, get_pipeline_class
55+
from zarr.storage._utils import _normalize_byte_range_index
5556

5657
if TYPE_CHECKING:
5758
from collections.abc import Iterator
@@ -86,11 +87,16 @@ class _ShardingByteGetter(ByteGetter):
8687
async def get(
8788
self, prototype: BufferPrototype, byte_range: ByteRequest | None = None
8889
) -> Buffer | None:
89-
assert byte_range is None, "byte_range is not supported within shards"
9090
assert prototype == default_buffer_prototype(), (
9191
f"prototype is not supported within shards currently. diff: {prototype} != {default_buffer_prototype()}"
9292
)
93-
return self.shard_dict.get(self.chunk_coords)
93+
value = self.shard_dict.get(self.chunk_coords)
94+
if value is None:
95+
return None
96+
if byte_range is None:
97+
return value
98+
start, stop = _normalize_byte_range_index(value, byte_range)
99+
return value[start:stop]
94100

95101

96102
@dataclass(frozen=True)
@@ -597,7 +603,8 @@ async def _decode_shard_index(
597603
)
598604
)
599605
)
600-
assert index_array is not None
606+
# This cannot be None because we have the bytes already
607+
index_array = cast(NDBuffer, index_array)
601608
return _ShardIndex(index_array.as_numpy_array())
602609

603610
async def _encode_shard_index(self, index: _ShardIndex) -> Buffer:

src/zarr/core/array.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5990,7 +5990,10 @@ async def _resize(
59905990
assert len(new_shape) == len(array.metadata.shape)
59915991
new_metadata = array.metadata.update_shape(new_shape)
59925992

5993-
if delete_outside_chunks:
5993+
# ensure deletion is only run if array is shrinking as the delete_outside_chunks path is unbounded in memory
5994+
only_growing = all(new >= old for new, old in zip(new_shape, array.metadata.shape, strict=True))
5995+
5996+
if delete_outside_chunks and not only_growing:
59945997
# Remove all chunks outside of the new shape
59955998
old_chunk_coords = set(array.metadata.chunk_grid.all_chunk_coords(array.metadata.shape))
59965999
new_chunk_coords = set(array.metadata.chunk_grid.all_chunk_coords(new_shape))

src/zarr/core/indexing.py

Lines changed: 90 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1452,7 +1452,7 @@ def make_slice_selection(selection: Any) -> list[slice]:
14521452
def decode_morton(z: int, chunk_shape: tuple[int, ...]) -> tuple[int, ...]:
14531453
# Inspired by compressed morton code as implemented in Neuroglancer
14541454
# https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/volume.md#compressed-morton-code
1455-
bits = tuple(math.ceil(math.log2(c)) for c in chunk_shape)
1455+
bits = tuple((c - 1).bit_length() for c in chunk_shape)
14561456
max_coords_bits = max(bits)
14571457
input_bit = 0
14581458
input_value = z
@@ -1467,16 +1467,102 @@ def decode_morton(z: int, chunk_shape: tuple[int, ...]) -> tuple[int, ...]:
14671467
return tuple(out)
14681468

14691469

1470-
@lru_cache
1470+
def decode_morton_vectorized(
1471+
z: npt.NDArray[np.intp], chunk_shape: tuple[int, ...]
1472+
) -> npt.NDArray[np.intp]:
1473+
"""Vectorized Morton code decoding for multiple z values.
1474+
1475+
Parameters
1476+
----------
1477+
z : ndarray
1478+
1D array of Morton codes to decode.
1479+
chunk_shape : tuple of int
1480+
Shape defining the coordinate space.
1481+
1482+
Returns
1483+
-------
1484+
ndarray
1485+
2D array of shape (len(z), len(chunk_shape)) containing decoded coordinates.
1486+
"""
1487+
n_dims = len(chunk_shape)
1488+
bits = tuple((c - 1).bit_length() for c in chunk_shape)
1489+
1490+
max_coords_bits = max(bits) if bits else 0
1491+
out = np.zeros((len(z), n_dims), dtype=np.intp)
1492+
1493+
input_bit = 0
1494+
for coord_bit in range(max_coords_bits):
1495+
for dim in range(n_dims):
1496+
if coord_bit < bits[dim]:
1497+
# Extract bit at position input_bit from all z values
1498+
bit_values = (z >> input_bit) & 1
1499+
# Place bit at coord_bit position in dimension dim
1500+
out[:, dim] |= bit_values << coord_bit
1501+
input_bit += 1
1502+
1503+
return out
1504+
1505+
1506+
@lru_cache(maxsize=16)
14711507
def _morton_order(chunk_shape: tuple[int, ...]) -> tuple[tuple[int, ...], ...]:
14721508
n_total = product(chunk_shape)
1473-
order: list[tuple[int, ...]] = []
1474-
i = 0
1509+
if n_total == 0:
1510+
return ()
1511+
1512+
# Optimization: Remove singleton dimensions to enable magic number usage
1513+
# for shapes like (1,1,32,32,32). Compute Morton on squeezed shape, then expand.
1514+
singleton_dims = tuple(i for i, s in enumerate(chunk_shape) if s == 1)
1515+
if singleton_dims:
1516+
squeezed_shape = tuple(s for s in chunk_shape if s != 1)
1517+
if squeezed_shape:
1518+
# Compute Morton order on squeezed shape
1519+
squeezed_order = _morton_order(squeezed_shape)
1520+
# Expand coordinates to include singleton dimensions (always 0)
1521+
expanded: list[tuple[int, ...]] = []
1522+
for coord in squeezed_order:
1523+
full_coord: list[int] = []
1524+
squeezed_idx = 0
1525+
for i in range(len(chunk_shape)):
1526+
if chunk_shape[i] == 1:
1527+
full_coord.append(0)
1528+
else:
1529+
full_coord.append(coord[squeezed_idx])
1530+
squeezed_idx += 1
1531+
expanded.append(tuple(full_coord))
1532+
return tuple(expanded)
1533+
else:
1534+
# All dimensions are singletons, just return the single point
1535+
return ((0,) * len(chunk_shape),)
1536+
1537+
n_dims = len(chunk_shape)
1538+
1539+
# Find the largest power-of-2 hypercube that fits within chunk_shape.
1540+
# Within this hypercube, Morton codes are guaranteed to be in bounds.
1541+
min_dim = min(chunk_shape)
1542+
if min_dim >= 1:
1543+
power = min_dim.bit_length() - 1 # floor(log2(min_dim))
1544+
hypercube_size = 1 << power # 2^power
1545+
n_hypercube = hypercube_size**n_dims
1546+
else:
1547+
n_hypercube = 0
1548+
1549+
# Within the hypercube, no bounds checking needed - use vectorized decoding
1550+
order: list[tuple[int, ...]]
1551+
if n_hypercube > 0:
1552+
z_values = np.arange(n_hypercube, dtype=np.intp)
1553+
hypercube_coords = decode_morton_vectorized(z_values, chunk_shape)
1554+
order = [tuple(row) for row in hypercube_coords]
1555+
else:
1556+
order = []
1557+
1558+
# For remaining elements, bounds checking is needed
1559+
i = n_hypercube
14751560
while len(order) < n_total:
14761561
m = decode_morton(i, chunk_shape)
14771562
if all(x < y for x, y in zip(m, chunk_shape, strict=False)):
14781563
order.append(m)
14791564
i += 1
1565+
14801566
return tuple(order)
14811567

14821568

0 commit comments

Comments
 (0)