@@ -71,7 +71,11 @@ def test_sharded_morton_indexing(
7171
7272 This benchmark exercises the Morton order iteration path in the sharding
7373 codec, which benefits from the hypercube and vectorization optimizations.
74+ The Morton order cache is cleared before each iteration to measure the
75+ full computation cost.
7476 """
77+ from zarr .core .indexing import _morton_order
78+
7579 # Create array where each shard contains many small chunks
7680 # e.g., shards=(32,32,32) with chunks=(2,2,2) means 16x16x16 = 4096 chunks per shard
7781 shape = tuple (s * 2 for s in shards ) # 2 shards per dimension
@@ -91,4 +95,56 @@ def test_sharded_morton_indexing(
9195 data [:] = 1
9296 # Read a sub-shard region to exercise Morton order iteration
9397 indexer = (slice (shards [0 ]),) * 3
94- benchmark (getitem , data , indexer )
98+
99+ def read_with_cache_clear () -> None :
100+ _morton_order .cache_clear ()
101+ getitem (data , indexer )
102+
103+ benchmark (read_with_cache_clear )
104+
105+
106+ # Benchmark with larger chunks_per_shard to make Morton order impact more visible
107+ large_morton_shards = (
108+ (32 ,) * 3 , # With 1x1x1 chunks: 32x32x32 = 32768 chunks per shard
109+ )
110+
111+
112+ @pytest .mark .parametrize ("store" , ["memory" ], indirect = ["store" ])
113+ @pytest .mark .parametrize ("shards" , large_morton_shards , ids = str )
114+ def test_sharded_morton_indexing_large (
115+ store : Store ,
116+ shards : tuple [int , ...],
117+ benchmark : BenchmarkFixture ,
118+ ) -> None :
119+ """Benchmark sharded array indexing with large chunks_per_shard.
120+
121+ Uses 1x1x1 chunks to maximize chunks_per_shard (32^3 = 32768), making
122+ the Morton order computation a more significant portion of total time.
123+ The Morton order cache is cleared before each iteration.
124+ """
125+ from zarr .core .indexing import _morton_order
126+
127+ # 1x1x1 chunks means chunks_per_shard equals shard shape
128+ shape = tuple (s * 2 for s in shards ) # 2 shards per dimension
129+ chunks = (1 ,) * 3 # 1x1x1 chunks: chunks_per_shard = shards
130+
131+ data = create_array (
132+ store = store ,
133+ shape = shape ,
134+ dtype = "uint8" ,
135+ chunks = chunks ,
136+ shards = shards ,
137+ compressors = None ,
138+ filters = None ,
139+ fill_value = 0 ,
140+ )
141+
142+ data [:] = 1
143+ # Read one full shard
144+ indexer = (slice (shards [0 ]),) * 3
145+
146+ def read_with_cache_clear () -> None :
147+ _morton_order .cache_clear ()
148+ getitem (data , indexer )
149+
150+ benchmark (read_with_cache_clear )
0 commit comments