|
5 | 5 | import numpy as np |
6 | 6 | import pytest |
7 | 7 |
|
8 | | -from zarr.abc.codec import ArrayBytesCodec |
| 8 | +from zarr.abc.codec import ArrayBytesCodec, Codec |
9 | 9 | from zarr.codecs.bytes import BytesCodec |
10 | 10 | from zarr.codecs.crc32c_ import Crc32cCodec |
11 | 11 | from zarr.codecs.gzip import GzipCodec |
@@ -47,144 +47,99 @@ def _make_nd_buffer(arr: np.ndarray[Any, np.dtype[Any]]) -> NDBuffer: |
47 | 47 | return default_buffer_prototype().nd_buffer.from_numpy_array(arr) |
48 | 48 |
|
49 | 49 |
|
50 | | -class TestChunkTransform: |
51 | | - def test_construction_bytes_only(self) -> None: |
52 | | - # Construction succeeds when all codecs implement SupportsSyncCodec. |
53 | | - spec = _make_array_spec((100,), np.dtype("float64")) |
54 | | - ChunkTransform(codecs=(BytesCodec(),), array_spec=spec) |
55 | | - |
56 | | - def test_construction_with_compression(self) -> None: |
57 | | - # AB + BB codec chain where both implement SupportsSyncCodec. |
58 | | - spec = _make_array_spec((100,), np.dtype("float64")) |
59 | | - ChunkTransform(codecs=(BytesCodec(), GzipCodec()), array_spec=spec) |
60 | | - |
61 | | - def test_construction_full_chain(self) -> None: |
62 | | - # All three codec types (AA + AB + BB), all implementing SupportsSyncCodec. |
63 | | - spec = _make_array_spec((3, 4), np.dtype("float64")) |
64 | | - ChunkTransform( |
65 | | - codecs=(TransposeCodec(order=(1, 0)), BytesCodec(), ZstdCodec()), array_spec=spec |
66 | | - ) |
67 | | - |
68 | | - def test_encode_decode_roundtrip_bytes_only(self) -> None: |
69 | | - # Minimal round-trip: BytesCodec serializes the array to bytes and back. |
70 | | - # No compression, no AA transform. |
71 | | - arr = np.arange(100, dtype="float64") |
72 | | - spec = _make_array_spec(arr.shape, arr.dtype) |
73 | | - chain = ChunkTransform(codecs=(BytesCodec(),), array_spec=spec) |
74 | | - nd_buf = _make_nd_buffer(arr) |
75 | | - |
76 | | - encoded = chain.encode(nd_buf) |
77 | | - assert encoded is not None |
78 | | - decoded = chain.decode(encoded) |
79 | | - np.testing.assert_array_equal(arr, decoded.as_numpy_array()) |
80 | | - |
81 | | - def test_encode_decode_roundtrip_with_compression(self) -> None: |
82 | | - # Round-trip with a BB codec (GzipCodec) to verify that bytes-bytes |
83 | | - # compression/decompression is wired correctly. |
84 | | - arr = np.arange(100, dtype="float64") |
85 | | - spec = _make_array_spec(arr.shape, arr.dtype) |
86 | | - chain = ChunkTransform(codecs=(BytesCodec(), GzipCodec(level=1)), array_spec=spec) |
87 | | - nd_buf = _make_nd_buffer(arr) |
88 | | - |
89 | | - encoded = chain.encode(nd_buf) |
90 | | - assert encoded is not None |
91 | | - decoded = chain.decode(encoded) |
92 | | - np.testing.assert_array_equal(arr, decoded.as_numpy_array()) |
93 | | - |
94 | | - def test_encode_decode_roundtrip_with_transpose(self) -> None: |
95 | | - # Full AA + AB + BB chain round-trip. Transpose permutes axes on encode, |
96 | | - # then BytesCodec serializes, then ZstdCodec compresses. Decode reverses |
97 | | - # all three stages. Verifies the full pipeline works end to end. |
98 | | - arr = np.arange(12, dtype="float64").reshape(3, 4) |
99 | | - spec = _make_array_spec(arr.shape, arr.dtype) |
100 | | - chain = ChunkTransform( |
101 | | - codecs=(TransposeCodec(order=(1, 0)), BytesCodec(), ZstdCodec(level=1)), |
102 | | - array_spec=spec, |
103 | | - ) |
104 | | - nd_buf = _make_nd_buffer(arr) |
105 | | - |
106 | | - encoded = chain.encode(nd_buf) |
107 | | - assert encoded is not None |
108 | | - decoded = chain.decode(encoded) |
109 | | - np.testing.assert_array_equal(arr, decoded.as_numpy_array()) |
110 | | - |
111 | | - def test_rejects_non_sync_codec(self) -> None: |
112 | | - """Construction must raise TypeError when a codec lacks SupportsSyncCodec.""" |
113 | | - spec = _make_array_spec((100,), np.dtype("float64")) |
114 | | - with pytest.raises(TypeError, match="AsyncOnlyCodec"): |
115 | | - ChunkTransform(codecs=(AsyncOnlyCodec(),), array_spec=spec) |
116 | | - |
117 | | - def test_rejects_mixed_sync_and_non_sync(self) -> None: |
118 | | - """Even if some codecs support sync, a single non-sync codec causes failure.""" |
119 | | - spec = _make_array_spec((3, 4), np.dtype("float64")) |
120 | | - with pytest.raises(TypeError, match="AsyncOnlyCodec"): |
121 | | - ChunkTransform( |
122 | | - codecs=(TransposeCodec(order=(1, 0)), AsyncOnlyCodec()), |
123 | | - array_spec=spec, |
124 | | - ) |
125 | | - |
126 | | - def test_compute_encoded_size_bytes_only(self) -> None: |
127 | | - # BytesCodec is size-preserving: encoded size == input size. |
128 | | - spec = _make_array_spec((100,), np.dtype("float64")) |
129 | | - chain = ChunkTransform(codecs=(BytesCodec(),), array_spec=spec) |
130 | | - assert chain.compute_encoded_size(800, spec) == 800 |
131 | | - |
132 | | - def test_compute_encoded_size_with_crc32c(self) -> None: |
133 | | - # Crc32cCodec appends a 4-byte checksum, so encoded size = input + 4. |
134 | | - spec = _make_array_spec((100,), np.dtype("float64")) |
135 | | - chain = ChunkTransform(codecs=(BytesCodec(), Crc32cCodec()), array_spec=spec) |
136 | | - assert chain.compute_encoded_size(800, spec) == 804 |
137 | | - |
138 | | - def test_compute_encoded_size_with_transpose(self) -> None: |
139 | | - # TransposeCodec reorders axes but doesn't change the byte count. |
140 | | - # Verifies that compute_encoded_size walks through AA codecs correctly. |
141 | | - spec = _make_array_spec((3, 4), np.dtype("float64")) |
142 | | - chain = ChunkTransform(codecs=(TransposeCodec(order=(1, 0)), BytesCodec()), array_spec=spec) |
143 | | - assert chain.compute_encoded_size(96, spec) == 96 |
144 | | - |
145 | | - def test_encode_returns_none_propagation(self) -> None: |
146 | | - # When an AA codec returns None (signaling "this chunk is the fill value, |
147 | | - # don't store it"), encode must short-circuit and return None |
148 | | - # instead of passing None into the next codec. |
149 | | - |
150 | | - class NoneReturningAACodec(TransposeCodec): # type: ignore[misc] |
151 | | - """An ArrayArrayCodec that always returns None from encode.""" |
152 | | - |
153 | | - def _encode_sync(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer | None: |
154 | | - return None |
155 | | - |
156 | | - spec = _make_array_spec((3, 4), np.dtype("float64")) |
157 | | - chain = ChunkTransform( |
158 | | - codecs=(NoneReturningAACodec(order=(1, 0)), BytesCodec()), |
159 | | - array_spec=spec, |
160 | | - ) |
161 | | - arr = np.arange(12, dtype="float64").reshape(3, 4) |
162 | | - nd_buf = _make_nd_buffer(arr) |
163 | | - assert chain.encode(nd_buf) is None |
164 | | - |
165 | | - def test_encode_decode_roundtrip_with_crc32c(self) -> None: |
166 | | - # Round-trip through BytesCodec + Crc32cCodec. Crc32c appends a checksum |
167 | | - # on encode and verifies it on decode, so this tests that the BB codec |
168 | | - # pipeline runs correctly in both directions. |
169 | | - arr = np.arange(100, dtype="float64") |
170 | | - spec = _make_array_spec(arr.shape, arr.dtype) |
171 | | - chain = ChunkTransform(codecs=(BytesCodec(), Crc32cCodec()), array_spec=spec) |
172 | | - nd_buf = _make_nd_buffer(arr) |
173 | | - |
174 | | - encoded = chain.encode(nd_buf) |
175 | | - assert encoded is not None |
176 | | - decoded = chain.decode(encoded) |
177 | | - np.testing.assert_array_equal(arr, decoded.as_numpy_array()) |
178 | | - |
179 | | - def test_encode_decode_roundtrip_int32(self) -> None: |
180 | | - # Round-trip with int32 data to verify that the codec chain is not |
181 | | - # float-specific. Exercises a different dtype path through BytesCodec. |
182 | | - arr = np.arange(50, dtype="int32") |
183 | | - spec = _make_array_spec(arr.shape, arr.dtype) |
184 | | - chain = ChunkTransform(codecs=(BytesCodec(), ZstdCodec(level=1)), array_spec=spec) |
185 | | - nd_buf = _make_nd_buffer(arr) |
186 | | - |
187 | | - encoded = chain.encode(nd_buf) |
188 | | - assert encoded is not None |
189 | | - decoded = chain.decode(encoded) |
190 | | - np.testing.assert_array_equal(arr, decoded.as_numpy_array()) |
| 50 | +@pytest.mark.parametrize( |
| 51 | + ("shape", "codecs"), |
| 52 | + [ |
| 53 | + ((100,), (BytesCodec(),)), |
| 54 | + ((100,), (BytesCodec(), GzipCodec())), |
| 55 | + ((3, 4), (TransposeCodec(order=(1, 0)), BytesCodec(), ZstdCodec())), |
| 56 | + ], |
| 57 | + ids=["bytes-only", "with-compression", "full-chain"], |
| 58 | +) |
| 59 | +def test_construction(shape: tuple[int, ...], codecs: tuple[Codec, ...]) -> None: |
| 60 | + """Construction succeeds when all codecs implement SupportsSyncCodec.""" |
| 61 | + spec = _make_array_spec(shape, np.dtype("float64")) |
| 62 | + ChunkTransform(codecs=codecs, array_spec=spec) |
| 63 | + |
| 64 | + |
| 65 | +@pytest.mark.parametrize( |
| 66 | + ("shape", "codecs"), |
| 67 | + [ |
| 68 | + ((100,), (AsyncOnlyCodec(),)), |
| 69 | + ((3, 4), (TransposeCodec(order=(1, 0)), AsyncOnlyCodec())), |
| 70 | + ], |
| 71 | + ids=["async-only", "mixed-sync-and-async"], |
| 72 | +) |
| 73 | +def test_construction_rejects_non_sync(shape: tuple[int, ...], codecs: tuple[Codec, ...]) -> None: |
| 74 | + """Construction raises TypeError when any codec lacks SupportsSyncCodec.""" |
| 75 | + spec = _make_array_spec(shape, np.dtype("float64")) |
| 76 | + with pytest.raises(TypeError, match="AsyncOnlyCodec"): |
| 77 | + ChunkTransform(codecs=codecs, array_spec=spec) |
| 78 | + |
| 79 | + |
| 80 | +@pytest.mark.parametrize( |
| 81 | + ("arr", "codecs"), |
| 82 | + [ |
| 83 | + (np.arange(100, dtype="float64"), (BytesCodec(),)), |
| 84 | + (np.arange(100, dtype="float64"), (BytesCodec(), GzipCodec(level=1))), |
| 85 | + ( |
| 86 | + np.arange(12, dtype="float64").reshape(3, 4), |
| 87 | + (TransposeCodec(order=(1, 0)), BytesCodec(), ZstdCodec(level=1)), |
| 88 | + ), |
| 89 | + (np.arange(100, dtype="float64"), (BytesCodec(), Crc32cCodec())), |
| 90 | + (np.arange(50, dtype="int32"), (BytesCodec(), ZstdCodec(level=1))), |
| 91 | + ], |
| 92 | + ids=["bytes-only", "gzip", "transpose+zstd", "crc32c", "int32"], |
| 93 | +) |
| 94 | +def test_encode_decode_roundtrip( |
| 95 | + arr: np.ndarray[Any, np.dtype[Any]], codecs: tuple[Codec, ...] |
| 96 | +) -> None: |
| 97 | + """Data survives a full encode/decode cycle.""" |
| 98 | + spec = _make_array_spec(arr.shape, arr.dtype) |
| 99 | + chain = ChunkTransform(codecs=codecs, array_spec=spec) |
| 100 | + nd_buf = _make_nd_buffer(arr) |
| 101 | + |
| 102 | + encoded = chain.encode(nd_buf) |
| 103 | + assert encoded is not None |
| 104 | + decoded = chain.decode(encoded) |
| 105 | + np.testing.assert_array_equal(arr, decoded.as_numpy_array()) |
| 106 | + |
| 107 | + |
| 108 | +@pytest.mark.parametrize( |
| 109 | + ("shape", "codecs", "input_size", "expected_size"), |
| 110 | + [ |
| 111 | + ((100,), (BytesCodec(),), 800, 800), |
| 112 | + ((100,), (BytesCodec(), Crc32cCodec()), 800, 804), |
| 113 | + ((3, 4), (TransposeCodec(order=(1, 0)), BytesCodec()), 96, 96), |
| 114 | + ], |
| 115 | + ids=["bytes-only", "crc32c", "transpose"], |
| 116 | +) |
| 117 | +def test_compute_encoded_size( |
| 118 | + shape: tuple[int, ...], |
| 119 | + codecs: tuple[Codec, ...], |
| 120 | + input_size: int, |
| 121 | + expected_size: int, |
| 122 | +) -> None: |
| 123 | + """compute_encoded_size returns the correct byte length.""" |
| 124 | + spec = _make_array_spec(shape, np.dtype("float64")) |
| 125 | + chain = ChunkTransform(codecs=codecs, array_spec=spec) |
| 126 | + assert chain.compute_encoded_size(input_size, spec) == expected_size |
| 127 | + |
| 128 | + |
| 129 | +def test_encode_returns_none_propagation() -> None: |
| 130 | + """When an AA codec returns None, encode short-circuits and returns None.""" |
| 131 | + |
| 132 | + class NoneReturningAACodec(TransposeCodec): # type: ignore[misc] |
| 133 | + """An ArrayArrayCodec that always returns None from encode.""" |
| 134 | + |
| 135 | + def _encode_sync(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer | None: |
| 136 | + return None |
| 137 | + |
| 138 | + spec = _make_array_spec((3, 4), np.dtype("float64")) |
| 139 | + chain = ChunkTransform( |
| 140 | + codecs=(NoneReturningAACodec(order=(1, 0)), BytesCodec()), |
| 141 | + array_spec=spec, |
| 142 | + ) |
| 143 | + arr = np.arange(12, dtype="float64").reshape(3, 4) |
| 144 | + nd_buf = _make_nd_buffer(arr) |
| 145 | + assert chain.encode(nd_buf) is None |
0 commit comments