@@ -50,3 +50,45 @@ def test_slice_indexing(
5050
5151 data [:] = 1
5252 benchmark (getitem , data , indexer )
53+
54+
55+ # Benchmark for Morton order optimization with power-of-2 shards
56+ # Morton order is used internally by sharding codec for chunk iteration
57+ morton_shards = (
58+ (16 ,) * 3 , # With 2x2x2 chunks: 8x8x8 = 512 chunks per shard
59+ (32 ,) * 3 , # With 2x2x2 chunks: 16x16x16 = 4096 chunks per shard
60+ )
61+
62+
63+ @pytest .mark .parametrize ("store" , ["memory" ], indirect = ["store" ])
64+ @pytest .mark .parametrize ("shards" , morton_shards , ids = str )
65+ def test_sharded_morton_indexing (
66+ store : Store ,
67+ shards : tuple [int , ...],
68+ benchmark : BenchmarkFixture ,
69+ ) -> None :
70+ """Benchmark sharded array indexing with power-of-2 chunks per shard.
71+
72+ This benchmark exercises the Morton order iteration path in the sharding
73+ codec, which benefits from the hypercube and vectorization optimizations.
74+ """
75+ # Create array where each shard contains many small chunks
76+ # e.g., shards=(32,32,32) with chunks=(2,2,2) means 16x16x16 = 4096 chunks per shard
77+ shape = tuple (s * 2 for s in shards ) # 2 shards per dimension
78+ chunks = (2 ,) * 3 # Small chunks to maximize chunks per shard
79+
80+ data = create_array (
81+ store = store ,
82+ shape = shape ,
83+ dtype = "uint8" ,
84+ chunks = chunks ,
85+ shards = shards ,
86+ compressors = None ,
87+ filters = None ,
88+ fill_value = 0 ,
89+ )
90+
91+ data [:] = 1
92+ # Read a sub-shard region to exercise Morton order iteration
93+ indexer = (slice (shards [0 ]),) * 3
94+ benchmark (getitem , data , indexer )
0 commit comments