@@ -404,10 +404,93 @@ fn nested_benchmarks(c: &mut Criterion) {
404404 drop ( temp_file) ;
405405}
406406
407+ fn flat_schema ( ) -> SchemaRef {
408+ Arc :: new ( Schema :: new ( vec ! [
409+ Field :: new( "id" , DataType :: Int32 , false ) ,
410+ Field :: new( "large_string" , DataType :: Utf8 , false ) ,
411+ Field :: new( "small_int" , DataType :: Int32 , false ) ,
412+ ] ) )
413+ }
414+
415+ fn flat_batch ( batch_id : usize ) -> RecordBatch {
416+ let schema = flat_schema ( ) ;
417+ let len = WRITE_RECORD_BATCH_SIZE ;
418+
419+ let base_id = ( batch_id * len) as i32 ;
420+ let id_values: Vec < i32 > = ( 0 ..len) . map ( |i| base_id + i as i32 ) . collect ( ) ;
421+ let id_array = Arc :: new ( Int32Array :: from ( id_values. clone ( ) ) ) ;
422+ let small_int_array = Arc :: new ( Int32Array :: from ( id_values) ) ;
423+
424+ let large_string: String = "x" . repeat ( LARGE_STRING_LEN ) ;
425+ let mut string_builder = StringBuilder :: new ( ) ;
426+ for _ in 0 ..len {
427+ string_builder. append_value ( & large_string) ;
428+ }
429+ let large_string_array = Arc :: new ( string_builder. finish ( ) ) ;
430+
431+ RecordBatch :: try_new (
432+ schema,
433+ vec ! [ id_array, large_string_array as ArrayRef , small_int_array] ,
434+ )
435+ . unwrap ( )
436+ }
437+
438+ /// Compare selecting a small field from a flat (top-level) schema vs from
439+ /// inside a struct. Both files contain the same logical data — the only
440+ /// difference is whether `small_int` lives at the top level or nested inside
441+ /// a struct column.
442+ fn flat_vs_struct_benchmarks ( c : & mut Criterion ) {
443+ let flat_file = generate_file ( flat_schema ( ) , flat_batch, "flat" ) ;
444+ let flat_path = flat_file. path ( ) . display ( ) . to_string ( ) ;
445+ assert ! ( Path :: new( & flat_path) . exists( ) , "path not found" ) ;
446+
447+ let struct_file = generate_file ( narrow_schema ( ) , narrow_batch, "narrow_struct_cmp" ) ;
448+ let struct_path = struct_file. path ( ) . display ( ) . to_string ( ) ;
449+ assert ! ( Path :: new( & struct_path) . exists( ) , "path not found" ) ;
450+
451+ let rt = Runtime :: new ( ) . unwrap ( ) ;
452+ let flat_ctx = create_context ( & rt, & flat_path, "t" ) ;
453+ let struct_ctx = create_context ( & rt, & struct_path, "t" ) ;
454+
455+ let mut group = c. benchmark_group ( "flat_vs_struct" ) ;
456+ group. sample_size ( 10 ) ;
457+ group. warm_up_time ( Duration :: from_secs ( 1 ) ) ;
458+ group. measurement_time ( Duration :: from_secs ( 2 ) ) ;
459+
460+ // small int: top-level vs struct field
461+ group. bench_function ( "flat_select_small_int" , |b| {
462+ b. iter ( || query ( & flat_ctx, & rt, "SELECT small_int FROM t" ) )
463+ } ) ;
464+ group. bench_function ( "struct_select_small_int" , |b| {
465+ b. iter ( || query ( & struct_ctx, & rt, "SELECT s['small_int'] FROM t" ) )
466+ } ) ;
467+
468+ // large string: top-level vs struct field
469+ group. bench_function ( "flat_select_large_string" , |b| {
470+ b. iter ( || query ( & flat_ctx, & rt, "SELECT large_string FROM t" ) )
471+ } ) ;
472+ group. bench_function ( "struct_select_large_string" , |b| {
473+ b. iter ( || query ( & struct_ctx, & rt, "SELECT s['large_string'] FROM t" ) )
474+ } ) ;
475+
476+ // aggregation: SUM of small int
477+ group. bench_function ( "flat_sum_small_int" , |b| {
478+ b. iter ( || query ( & flat_ctx, & rt, "SELECT SUM(small_int) FROM t" ) )
479+ } ) ;
480+ group. bench_function ( "struct_sum_small_int" , |b| {
481+ b. iter ( || query ( & struct_ctx, & rt, "SELECT SUM(s['small_int']) FROM t" ) )
482+ } ) ;
483+
484+ group. finish ( ) ;
485+ drop ( flat_file) ;
486+ drop ( struct_file) ;
487+ }
488+
407489criterion_group ! (
408490 benches,
409491 narrow_benchmarks,
410492 wide_benchmarks,
411- nested_benchmarks
493+ nested_benchmarks,
494+ flat_vs_struct_benchmarks,
412495) ;
413496criterion_main ! ( benches) ;
0 commit comments