Skip to content

Commit 306e480

Browse files
jakenotjayd-v-bdcherian
authored
Skip chunk coordinate enumeration in resize when array is only growing (#3650) (#3702)
* Skip chunk coordinate enumeration in resize when array is only growing (#3650) * Add tests for growth, shrink, and mixed resize operations to ensure chunk enumeration skipped correctly * changelog --------- Co-authored-by: Davis Bennett <davis.v.bennett@gmail.com> Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
1 parent 23596c1 commit 306e480

File tree

3 files changed

+72
-1
lines changed

3 files changed

+72
-1
lines changed

changes/3702.bugfix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Skip chunk coordinate enumeration in resize when the array is only growing, avoiding unbounded memory usage for large arrays.

src/zarr/core/array.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5990,7 +5990,10 @@ async def _resize(
59905990
assert len(new_shape) == len(array.metadata.shape)
59915991
new_metadata = array.metadata.update_shape(new_shape)
59925992

5993-
if delete_outside_chunks:
5993+
# ensure deletion is only run if array is shrinking as the delete_outside_chunks path is unbounded in memory
5994+
only_growing = all(new >= old for new, old in zip(new_shape, array.metadata.shape, strict=True))
5995+
5996+
if delete_outside_chunks and not only_growing:
59945997
# Remove all chunks outside of the new shape
59955998
old_chunk_coords = set(array.metadata.chunk_grid.all_chunk_coords(array.metadata.shape))
59965999
new_chunk_coords = set(array.metadata.chunk_grid.all_chunk_coords(new_shape))

tests/test_array.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,73 @@ def test_resize_2d(store: MemoryStore, zarr_format: ZarrFormat) -> None:
781781
assert new_shape == result.shape
782782

783783

784+
@pytest.mark.parametrize("store", ["memory"], indirect=True)
785+
def test_resize_growing_skips_chunk_enumeration(
786+
store: MemoryStore, zarr_format: ZarrFormat
787+
) -> None:
788+
"""Growing an array should not enumerate chunk coords for deletion (#3650 mitigation)."""
789+
from zarr.core.chunk_grids import RegularChunkGrid
790+
791+
z = zarr.create(
792+
shape=(10, 10),
793+
chunks=(5, 5),
794+
dtype="i4",
795+
fill_value=0,
796+
store=store,
797+
zarr_format=zarr_format,
798+
)
799+
z[:] = np.ones((10, 10), dtype="i4")
800+
801+
# growth only - ensure no chunk coords are enumerated
802+
with mock.patch.object(
803+
RegularChunkGrid,
804+
"all_chunk_coords",
805+
wraps=z.metadata.chunk_grid.all_chunk_coords,
806+
) as mock_coords:
807+
z.resize((20, 20))
808+
mock_coords.assert_not_called()
809+
810+
assert z.shape == (20, 20)
811+
np.testing.assert_array_equal(np.ones((10, 10), dtype="i4"), z[:10, :10])
812+
np.testing.assert_array_equal(np.zeros((10, 10), dtype="i4"), z[10:, 10:])
813+
814+
# shrink - ensure no regression of behaviour
815+
with mock.patch.object(
816+
RegularChunkGrid,
817+
"all_chunk_coords",
818+
wraps=z.metadata.chunk_grid.all_chunk_coords,
819+
) as mock_coords:
820+
z.resize((5, 5))
821+
assert mock_coords.call_count > 0
822+
823+
assert z.shape == (5, 5)
824+
np.testing.assert_array_equal(np.ones((5, 5), dtype="i4"), z[:])
825+
826+
# mixed: grow dim 0, shrink dim 1 - ensure deletion path runs
827+
z2 = zarr.create(
828+
shape=(10, 10),
829+
chunks=(5, 5),
830+
dtype="i4",
831+
fill_value=0,
832+
store=store,
833+
zarr_format=zarr_format,
834+
overwrite=True,
835+
)
836+
z2[:] = np.ones((10, 10), dtype="i4")
837+
838+
with mock.patch.object(
839+
RegularChunkGrid,
840+
"all_chunk_coords",
841+
wraps=z2.metadata.chunk_grid.all_chunk_coords,
842+
) as mock_coords:
843+
z2.resize((20, 5))
844+
assert mock_coords.call_count > 0
845+
846+
assert z2.shape == (20, 5)
847+
np.testing.assert_array_equal(np.ones((10, 5), dtype="i4"), z2[:10, :])
848+
np.testing.assert_array_equal(np.zeros((10, 5), dtype="i4"), z2[10:, :])
849+
850+
784851
@pytest.mark.parametrize("store", ["memory"], indirect=True)
785852
def test_append_1d(store: MemoryStore, zarr_format: ZarrFormat) -> None:
786853
a = np.arange(105)

0 commit comments

Comments
 (0)