Skip to content

Commit 00af954

Browse files
authored
Merge branch 'main' into glossary
2 parents d3e7f0b + fccf372 commit 00af954

File tree

7 files changed

+96
-14
lines changed

7 files changed

+96
-14
lines changed

docs/_static/favicon-96x96.png

12.4 KB
Loading

mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ theme:
8585
name: material
8686
custom_dir: docs/overrides
8787
logo: _static/logo_bw.png
88+
favicon: _static/favicon-96x96.png
8889

8990
palette:
9091
# Light mode

src/zarr/core/array.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,24 +1235,28 @@ def basename(self) -> str:
12351235
@property
12361236
def cdata_shape(self) -> tuple[int, ...]:
12371237
"""
1238-
The shape of the chunk grid for this array.
1238+
The number of chunks along each dimension.
1239+
1240+
When sharding is used, this counts inner chunks (not shards) per dimension.
12391241
12401242
Returns
12411243
-------
12421244
tuple[int, ...]
1243-
The shape of the chunk grid for this array.
1245+
The number of chunks along each dimension.
12441246
"""
12451247
return self._chunk_grid_shape
12461248

12471249
@property
12481250
def _chunk_grid_shape(self) -> tuple[int, ...]:
12491251
"""
1250-
The shape of the chunk grid for this array.
1252+
The number of chunks along each dimension.
1253+
1254+
When sharding is used, this counts inner chunks (not shards) per dimension.
12511255
12521256
Returns
12531257
-------
12541258
tuple[int, ...]
1255-
The shape of the chunk grid for this array.
1259+
The number of chunks along each dimension.
12561260
"""
12571261
return tuple(starmap(ceildiv, zip(self.shape, self.chunks, strict=True)))
12581262

@@ -2399,14 +2403,23 @@ def compressors(self) -> tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...]:
23992403
@property
24002404
def cdata_shape(self) -> tuple[int, ...]:
24012405
"""
2402-
The shape of the chunk grid for this array.
2406+
The number of chunks along each dimension.
2407+
2408+
When sharding is used, this counts inner chunks (not shards) per dimension.
24032409
"""
24042410
return self.async_array._chunk_grid_shape
24052411

24062412
@property
24072413
def _chunk_grid_shape(self) -> tuple[int, ...]:
24082414
"""
2409-
The shape of the chunk grid for this array.
2415+
The number of chunks along each dimension.
2416+
2417+
When sharding is used, this counts inner chunks (not shards) per dimension.
2418+
2419+
Returns
2420+
-------
2421+
tuple[int, ...]
2422+
The number of chunks along each dimension.
24102423
"""
24112424
return self.async_array._chunk_grid_shape
24122425

src/zarr/core/chunk_grids.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,14 @@ def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tupl
126126
chunks = tuple(int(chunks) for _ in shape)
127127

128128
# handle dask-style chunks (iterable of iterables)
129-
if all(isinstance(c, (tuple | list)) for c in chunks):
130-
# take first chunk size for each dimension
131-
chunks = tuple(
132-
c[0] for c in chunks
133-
) # TODO: check/error/warn for irregular chunks (e.g. if c[0] != c[1:-1])
129+
if all(isinstance(c, (tuple, list)) for c in chunks):
130+
for i, c in enumerate(chunks):
131+
if any(x != y for x, y in itertools.pairwise(c[:-1])) or (len(c) > 1 and c[-1] > c[0]):
132+
raise ValueError(
133+
f"Irregular chunk sizes in dimension {i}: {tuple(c)}. "
134+
"Only uniform chunks (with an optional smaller final chunk) are supported."
135+
)
136+
chunks = tuple(c[0] for c in chunks)
134137

135138
# handle bad dimensionality
136139
if len(chunks) > len(shape):

src/zarr/core/codec_pipeline.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,8 @@ async def read_batch(
263263
chunk_array_batch, batch_info, strict=False
264264
):
265265
if chunk_array is not None:
266+
if drop_axes:
267+
chunk_array = chunk_array.squeeze(axis=drop_axes)
266268
out[out_selection] = chunk_array
267269
else:
268270
out[out_selection] = fill_value_or_default(chunk_spec)
@@ -285,7 +287,7 @@ async def read_batch(
285287
):
286288
if chunk_array is not None:
287289
tmp = chunk_array[chunk_selection]
288-
if drop_axes != ():
290+
if drop_axes:
289291
tmp = tmp.squeeze(axis=drop_axes)
290292
out[out_selection] = tmp
291293
else:
@@ -324,7 +326,7 @@ def _merge_chunk_array(
324326
else:
325327
chunk_value = value[out_selection]
326328
# handle missing singleton dimensions
327-
if drop_axes != ():
329+
if drop_axes:
328330
item = tuple(
329331
None # equivalent to np.newaxis
330332
if idx in drop_axes

tests/test_chunk_grids.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ def test_guess_chunks(shape: tuple[int, ...], itemsize: int) -> None:
3535
((30, None, None), (100, 20, 10), 1, (30, 20, 10)),
3636
((30, 20, None), (100, 20, 10), 1, (30, 20, 10)),
3737
((30, 20, 10), (100, 20, 10), 1, (30, 20, 10)),
38+
# dask-style chunks (uniform with optional smaller final chunk)
39+
(((100, 100, 100), (50, 50)), (300, 100), 1, (100, 50)),
40+
(((100, 100, 50),), (250,), 1, (100,)),
41+
(((100,),), (100,), 1, (100,)),
3842
# auto chunking
3943
(None, (100,), 1, (100,)),
4044
(-1, (100,), 1, (100,)),
@@ -52,3 +56,8 @@ def test_normalize_chunks_errors() -> None:
5256
normalize_chunks("foo", (100,), 1)
5357
with pytest.raises(ValueError):
5458
normalize_chunks((100, 10), (100,), 1)
59+
# dask-style irregular chunks should raise
60+
with pytest.raises(ValueError, match="Irregular chunk sizes"):
61+
normalize_chunks(((10, 20, 30),), (60,), 1)
62+
with pytest.raises(ValueError, match="Irregular chunk sizes"):
63+
normalize_chunks(((100, 100), (10, 20)), (200, 30), 1)

tests/test_codecs/test_sharding.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,8 @@ def test_invalid_shard_shape() -> None:
490490
with pytest.raises(
491491
ValueError,
492492
match=re.escape(
493-
"The array's `chunk_shape` (got (16, 16)) needs to be divisible by the shard's inner `chunk_shape` (got (9,))."
493+
"The array's `chunk_shape` (got (16, 16)) needs to be divisible "
494+
"by the shard's inner `chunk_shape` (got (9,))."
494495
),
495496
):
496497
zarr.create_array(
@@ -501,3 +502,56 @@ def test_invalid_shard_shape() -> None:
501502
dtype=np.dtype("uint8"),
502503
fill_value=0,
503504
)
505+
506+
507+
@pytest.mark.parametrize("store", ["local"], indirect=["store"])
508+
def test_sharding_mixed_integer_list_indexing(store: Store) -> None:
509+
"""Regression test for https://github.com/zarr-developers/zarr-python/issues/3691.
510+
511+
Mixed integer/list indexing on sharded arrays should return the same
512+
shape and data as on equivalent chunked arrays.
513+
"""
514+
import numpy as np
515+
516+
data = np.arange(200 * 100 * 10, dtype=np.uint8).reshape(200, 100, 10)
517+
518+
chunked = zarr.create_array(
519+
store,
520+
name="chunked",
521+
shape=(200, 100, 10),
522+
dtype=np.uint8,
523+
chunks=(200, 100, 1),
524+
overwrite=True,
525+
)
526+
chunked[:, :, :] = data
527+
528+
sharded = zarr.create_array(
529+
store,
530+
name="sharded",
531+
shape=(200, 100, 10),
532+
dtype=np.uint8,
533+
chunks=(200, 100, 1),
534+
shards=(200, 100, 10),
535+
overwrite=True,
536+
)
537+
sharded[:, :, :] = data
538+
539+
# Mixed integer + list indexing
540+
c = chunked[0:10, 0, [0, 1]] # type: ignore[index]
541+
s = sharded[0:10, 0, [0, 1]] # type: ignore[index]
542+
assert c.shape == s.shape == (10, 2), ( # type: ignore[union-attr]
543+
f"Expected (10, 2), got chunked={c.shape}, sharded={s.shape}" # type: ignore[union-attr]
544+
)
545+
np.testing.assert_array_equal(c, s)
546+
547+
# Multiple integer axes
548+
c2 = chunked[0, 0, [0, 1, 2]] # type: ignore[index]
549+
s2 = sharded[0, 0, [0, 1, 2]] # type: ignore[index]
550+
assert c2.shape == s2.shape == (3,) # type: ignore[union-attr]
551+
np.testing.assert_array_equal(c2, s2)
552+
553+
# Slice + integer + slice
554+
c3 = chunked[0:5, 1, 0:3]
555+
s3 = sharded[0:5, 1, 0:3]
556+
assert c3.shape == s3.shape == (5, 3) # type: ignore[union-attr]
557+
np.testing.assert_array_equal(c3, s3)

0 commit comments

Comments
 (0)