@@ -21,7 +21,7 @@ use zarrs::metadata_ext::data_type::NumpyTimeUnit;
2121use zarrs_icechunk:: AsyncIcechunkStore ;
2222
2323mod sentinel2_geometry;
24- use sentinel2_geometry:: generate_wkb_polygons;
24+ use sentinel2_geometry:: { generate_bbox_columns , generate_wkb_polygons} ;
2525
2626#[ global_allocator]
2727static ALLOC : dhat:: Alloc = dhat:: Alloc ;
@@ -32,9 +32,10 @@ const CHUNK_SIZE: u64 = 1_000_000; // 1M elements per chunk
3232
3333#[ derive( Debug , Clone , Copy ) ]
3434pub enum ArraysToGenerate {
35- DatetimeOnly ,
36- BboxOnly ,
37- Both ,
35+ Datetime ,
36+ Bbox ,
37+ BboxColumns ,
38+ All ,
3839}
3940
4041// This generates:
@@ -84,10 +85,7 @@ fn generate_icechunk_store(
8485 let array_shape = vec ! [ date_data. len( ) as u64 ] ;
8586 let chunk_shape = vec ! [ CHUNK_SIZE ] ;
8687
87- if matches ! (
88- arrays,
89- ArraysToGenerate :: DatetimeOnly | ArraysToGenerate :: Both
90- ) {
88+ if matches ! ( arrays, ArraysToGenerate :: Datetime | ArraysToGenerate :: All ) {
9189 let date_blosc_codec: Arc < dyn zarrs:: array:: codec:: BytesToBytesCodecTraits > = Arc :: new (
9290 BloscCodec :: new (
9391 BloscCompressor :: Zstd ,
@@ -119,7 +117,7 @@ fn generate_icechunk_store(
119117 ) ) ?;
120118 }
121119
122- if matches ! ( arrays, ArraysToGenerate :: BboxOnly | ArraysToGenerate :: Both ) {
120+ if matches ! ( arrays, ArraysToGenerate :: Bbox ) {
123121 let bbox_data = generate_wkb_polygons ( array_shape[ 0 ] as usize ) ;
124122
125123 let bbox_blosc_codec: Arc < dyn zarrs:: array:: codec:: BytesToBytesCodecTraits > = Arc :: new (
@@ -150,6 +148,88 @@ fn generate_icechunk_store(
150148 ) ) ?;
151149 }
152150
151+ if matches ! (
152+ arrays,
153+ ArraysToGenerate :: BboxColumns | ArraysToGenerate :: All
154+ ) {
155+ let ( xmin, xmax, ymin, ymax) = generate_bbox_columns ( array_shape[ 0 ] as usize ) ;
156+
157+ let f64_blosc_codec: Arc < dyn zarrs:: array:: codec:: BytesToBytesCodecTraits > = Arc :: new (
158+ BloscCodec :: new (
159+ BloscCompressor :: Zstd ,
160+ BloscCompressionLevel :: try_from ( 9 ) . unwrap ( ) ,
161+ None ,
162+ BloscShuffleMode :: NoShuffle ,
163+ None ,
164+ )
165+ . map_err ( |e| Box :: new ( e) as Box < dyn std:: error:: Error > ) ?,
166+ ) ;
167+
168+ // Create and store xmin array
169+ let xmin_array = ArrayBuilder :: new (
170+ array_shape. clone ( ) ,
171+ chunk_shape. clone ( ) ,
172+ DataType :: Float64 ,
173+ FillValue :: from ( 0.0f64 ) ,
174+ )
175+ . bytes_to_bytes_codecs ( vec ! [ f64_blosc_codec. clone( ) ] )
176+ . build ( store. clone ( ) , "/meta/xmin" ) ?;
177+
178+ rt. block_on ( xmin_array. async_store_metadata ( ) ) ?;
179+ rt. block_on ( xmin_array. async_store_array_subset_elements (
180+ & ArraySubset :: new_with_shape ( array_shape. clone ( ) ) ,
181+ & xmin,
182+ ) ) ?;
183+
184+ // Create and store xmax array
185+ let xmax_array = ArrayBuilder :: new (
186+ array_shape. clone ( ) ,
187+ chunk_shape. clone ( ) ,
188+ DataType :: Float64 ,
189+ FillValue :: from ( 0.0f64 ) ,
190+ )
191+ . bytes_to_bytes_codecs ( vec ! [ f64_blosc_codec. clone( ) ] )
192+ . build ( store. clone ( ) , "/meta/xmax" ) ?;
193+
194+ rt. block_on ( xmax_array. async_store_metadata ( ) ) ?;
195+ rt. block_on ( xmax_array. async_store_array_subset_elements (
196+ & ArraySubset :: new_with_shape ( array_shape. clone ( ) ) ,
197+ & xmax,
198+ ) ) ?;
199+
200+ // Create and store ymin array
201+ let ymin_array = ArrayBuilder :: new (
202+ array_shape. clone ( ) ,
203+ chunk_shape. clone ( ) ,
204+ DataType :: Float64 ,
205+ FillValue :: from ( 0.0f64 ) ,
206+ )
207+ . bytes_to_bytes_codecs ( vec ! [ f64_blosc_codec. clone( ) ] )
208+ . build ( store. clone ( ) , "/meta/ymin" ) ?;
209+
210+ rt. block_on ( ymin_array. async_store_metadata ( ) ) ?;
211+ rt. block_on ( ymin_array. async_store_array_subset_elements (
212+ & ArraySubset :: new_with_shape ( array_shape. clone ( ) ) ,
213+ & ymin,
214+ ) ) ?;
215+
216+ // Create and store ymax array
217+ let ymax_array = ArrayBuilder :: new (
218+ array_shape. clone ( ) ,
219+ chunk_shape. clone ( ) ,
220+ DataType :: Float64 ,
221+ FillValue :: from ( 0.0f64 ) ,
222+ )
223+ . bytes_to_bytes_codecs ( vec ! [ f64_blosc_codec] )
224+ . build ( store. clone ( ) , "/meta/ymax" ) ?;
225+
226+ rt. block_on ( ymax_array. async_store_metadata ( ) ) ?;
227+ rt. block_on ( ymax_array. async_store_array_subset_elements (
228+ & ArraySubset :: new_with_shape ( array_shape. clone ( ) ) ,
229+ & ymax,
230+ ) ) ?;
231+ }
232+
153233 rt. block_on ( async {
154234 store
155235 . session ( )
@@ -176,6 +256,21 @@ pub fn generate_icechunk_store_local(
176256 Ok ( ( session, temp_dir) )
177257}
178258
259+ pub fn generate_icechunk_store_s3 (
260+ rt : & Runtime ,
261+ bucket : String ,
262+ prefix : String ,
263+ ) -> Result < Session , Box < dyn std:: error:: Error > > {
264+ let storage = rt. block_on ( ObjectStorage :: new_s3 (
265+ bucket,
266+ Some ( prefix) ,
267+ None , // credentials - uses default AWS credential chain
268+ None , // config - uses default S3 options
269+ ) ) ?;
270+ let session = generate_icechunk_store ( rt, Arc :: new ( storage) , ArraysToGenerate :: All ) ?;
271+ Ok ( session)
272+ }
273+
179274pub fn run_bench (
180275 c : & mut Criterion ,
181276 rt : & Runtime ,
@@ -194,7 +289,10 @@ pub fn run_bench(
194289 group. bench_function ( bench_name, |b| {
195290 b. to_async ( rt) . iter ( || async {
196291 let df = ctx. sql ( black_box ( sql) ) . await . unwrap ( ) ;
197- df. collect ( ) . await . unwrap ( )
292+ let batches = df. collect ( ) . await . unwrap ( ) ;
293+ // let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
294+ // println!("Query returned {} rows", row_count);
295+ batches
198296 } ) ;
199297 } ) ;
200298
@@ -218,11 +316,16 @@ pub fn run_memory_profile(rt: &Runtime, ctx: &SessionContext, sql: &str) {
218316
219317pub static DATETIME_SQL : & str = "\
220318 SELECT date FROM zarr_data WHERE \
221- date < CAST('2025-10-11 ' AS DATE) \
222- and date > CAST('2025-09-01 ' AS DATE)\
319+ date < CAST('2025-01-01 ' AS DATE) \
320+ and date > CAST('2024-12-25 ' AS DATE)\
223321 ";
224322
225323pub static BBOX_SQL : & str = "\
226324 SELECT bbox FROM zarr_data \
227325 WHERE ST_Intersects(bbox, ST_GeomFromText('POLYGON((0 -7, 0 7, 5 7, 5 -7, 0 -7))')) \
228326 ";
327+
328+ pub static BBOX_COLUMNS_SQL : & str = "\
329+ SELECT xmin, xmax, ymin, ymax FROM zarr_data \
330+ WHERE xmin <= 5 AND xmax >= 0 AND ymin <= 7 AND ymax >= -7 \
331+ ";
0 commit comments