@@ -219,3 +219,49 @@ def compute_morton_order() -> None:
219219 list (morton_order_iter (shape ))
220220
221221 benchmark (compute_morton_order )
222+
223+
224+ @pytest .mark .parametrize ("store" , ["memory" ], indirect = ["store" ])
225+ @pytest .mark .parametrize ("shards" , large_morton_shards , ids = str )
226+ def test_sharded_morton_write_single_chunk (
227+ store : Store ,
228+ shards : tuple [int , ...],
229+ benchmark : BenchmarkFixture ,
230+ ) -> None :
231+ """Benchmark writing a single chunk to a large shard.
232+
233+ This is the clearest end-to-end demonstration of Morton order optimization.
234+ Writing a single chunk to a shard with 32^3 = 32768 chunks requires
235+ computing the full Morton order, but minimizes I/O overhead.
236+
237+ Expected improvement: ~160ms (matching Morton computation speedup of ~178ms).
238+ The Morton order cache is cleared before each iteration.
239+ """
240+ import numpy as np
241+
242+ from zarr .core .indexing import _morton_order
243+
244+ # 1x1x1 chunks means chunks_per_shard equals shard shape
245+ shape = tuple (s * 2 for s in shards ) # 2 shards per dimension
246+ chunks = (1 ,) * 3 # 1x1x1 chunks: chunks_per_shard = shards
247+
248+ data = create_array (
249+ store = store ,
250+ shape = shape ,
251+ dtype = "uint8" ,
252+ chunks = chunks ,
253+ shards = shards ,
254+ compressors = None ,
255+ filters = None ,
256+ fill_value = 0 ,
257+ )
258+
259+ # Write data for a single chunk
260+ write_data = np .ones ((1 , 1 , 1 ), dtype = "uint8" )
261+ indexer = (slice (1 ), slice (1 ), slice (1 ))
262+
263+ def write_with_cache_clear () -> None :
264+ _morton_order .cache_clear ()
265+ data [indexer ] = write_data
266+
267+ benchmark (write_with_cache_clear )
0 commit comments