@@ -529,9 +529,11 @@ pub(crate) fn collect_stats(
529529#[ cfg( test) ]
530530mod tests {
531531 use super :: * ;
532- use crate :: arrow:: array:: { Array , Int64Array , StringArray } ;
532+ use crate :: arrow:: array:: { Array , Int32Array , Int64Array , StringArray } ;
533533 use crate :: arrow:: datatypes:: { Fields , Schema } ;
534534 use crate :: expressions:: column_name;
535+ use crate :: arrow:: buffer:: NullBuffer ;
536+
535537
536538 #[ test]
537539 fn test_collect_stats_single_batch ( ) {
@@ -1165,4 +1167,74 @@ mod tests {
11651167 assert ! ( max_values. column_by_name( "id" ) . is_some( ) ) ;
11661168 assert ! ( max_values. column_by_name( "list_col" ) . is_none( ) ) ;
11671169 }
1170+
1171+ #[ test]
1172+ fn test_collect_stats_struct_with_nulls_at_struct_level ( ) {
1173+ // Schema: { my_struct: { a: int32, b: int32 (nullable) } }
1174+ // Test both struct-level nulls and field-level nulls
1175+ let child_fields = Fields :: from ( vec ! [
1176+ Field :: new( "a" , DataType :: Int32 , false ) ,
1177+ Field :: new( "b" , DataType :: Int32 , true ) , // Now nullable
1178+ ] ) ;
1179+
1180+ let a_values = Int32Array :: from ( vec ! [ 1 , 2 , 3 , 4 ] ) ;
1181+ // b has field-level nulls at rows 0 and 2
1182+ let b_values = Int32Array :: from ( vec ! [ None , Some ( 20 ) , None , Some ( 40 ) ] ) ;
1183+
1184+ // Nulls at struct level: [false, true, true, false]
1185+ // This means rows 1 and 2 have null structs (entire struct is null)
1186+ let nulls = NullBuffer :: from ( vec ! [ false , true , true , false ] ) ;
1187+
1188+ let struct_array = StructArray :: new (
1189+ child_fields. clone ( ) ,
1190+ vec ! [ Arc :: new( a_values) , Arc :: new( b_values) ] ,
1191+ Some ( nulls) ,
1192+ ) ;
1193+
1194+ let schema = Schema :: new ( vec ! [ Field :: new(
1195+ "my_struct" ,
1196+ DataType :: Struct ( child_fields) ,
1197+ true ,
1198+ ) ] ) ;
1199+
1200+ let batch = RecordBatch :: try_new ( Arc :: new ( schema) , vec ! [ Arc :: new( struct_array) ] ) . unwrap ( ) ;
1201+
1202+ let stats = collect_stats ( & batch, & [ column_name ! ( "my_struct" ) ] ) . unwrap ( ) ;
1203+
1204+ // Check null counts for child fields
1205+ // Is it expected? column 'a' has no field-level nulls, and rows 0 and 2 are null at struct level
1206+ assert_eq ! ( get_stat:: <Int64Type >( & stats, "nullCount" , "my_struct" , "a" ) , 0 ) ;
1207+ assert_eq ! ( get_stat:: <Int64Type >( & stats, "nullCount" , "my_struct" , "b" ) , 2 ) ;
1208+
1209+ // Check minValues
1210+ assert_eq ! ( get_stat:: <Int32Type >( & stats, "minValues" , "my_struct" , "a" ) , 1 ) ;
1211+ // Is it expected? It's marked with NULL for 20 at higher level. Shouldn't be 40?
1212+ assert_eq ! ( get_stat:: <Int32Type >( & stats, "minValues" , "my_struct" , "b" ) , 20 ) ;
1213+
1214+ // Check maxValues
1215+ assert_eq ! ( get_stat:: <Int32Type >( & stats, "maxValues" , "my_struct" , "a" ) , 4 ) ;
1216+ assert_eq ! ( get_stat:: <Int32Type >( & stats, "maxValues" , "my_struct" , "b" ) , 40 ) ;
1217+ }
1218+
1219+ // Generic helper to extract and downcast nested columns from stats
1220+ fn get_stat < T > ( stats : & StructArray , stat_name : & str , struct_name : & str , field_name : & str ) -> T :: Native
1221+ where
1222+ T : crate :: arrow:: datatypes:: ArrowPrimitiveType ,
1223+ {
1224+ stats
1225+ . column_by_name ( stat_name)
1226+ . unwrap ( )
1227+ . as_any ( )
1228+ . downcast_ref :: < StructArray > ( )
1229+ . unwrap ( )
1230+ . column_by_name ( struct_name)
1231+ . unwrap ( )
1232+ . as_any ( )
1233+ . downcast_ref :: < StructArray > ( )
1234+ . unwrap ( )
1235+ . column_by_name ( field_name)
1236+ . unwrap ( )
1237+ . as_primitive :: < T > ( )
1238+ . value ( 0 )
1239+ }
11681240}
0 commit comments