@@ -529,7 +529,8 @@ pub(crate) fn collect_stats(
529529#[ cfg( test) ]
530530mod tests {
531531 use super :: * ;
532- use crate :: arrow:: array:: { Array , Int64Array , StringArray } ;
532+ use crate :: arrow:: array:: { Array , Int32Array , Int64Array , StringArray } ;
533+ use crate :: arrow:: buffer:: NullBuffer ;
533534 use crate :: arrow:: datatypes:: { Fields , Schema } ;
534535 use crate :: expressions:: column_name;
535536
@@ -1165,4 +1166,97 @@ mod tests {
11651166 assert ! ( max_values. column_by_name( "id" ) . is_some( ) ) ;
11661167 assert ! ( max_values. column_by_name( "list_col" ) . is_none( ) ) ;
11671168 }
1169+
1170+ #[ test]
1171+ fn test_collect_stats_struct_with_nulls_at_struct_level ( ) {
1172+ // Schema: { my_struct: { a: int32, b: int32 (nullable) } }
1173+ // Test both struct-level nulls and field-level nulls
1174+ let child_fields = Fields :: from ( vec ! [
1175+ Field :: new( "a" , DataType :: Int32 , false ) ,
1176+ Field :: new( "b" , DataType :: Int32 , true ) , // Now nullable
1177+ ] ) ;
1178+
1179+ let a_values = Int32Array :: from ( vec ! [ 1 , 2 , 3 , 4 ] ) ;
1180+ // b has field-level nulls at rows 0 and 2
1181+ let b_values = Int32Array :: from ( vec ! [ None , Some ( 20 ) , None , Some ( 40 ) ] ) ;
1182+
1183+ // Nulls at struct level: [false, true, true, false]
1184+ // This means rows 1 and 2 have null structs (entire struct is null)
1185+ let nulls = NullBuffer :: from ( vec ! [ false , true , true , false ] ) ;
1186+
1187+ let struct_array = StructArray :: new (
1188+ child_fields. clone ( ) ,
1189+ vec ! [ Arc :: new( a_values) , Arc :: new( b_values) ] ,
1190+ Some ( nulls) ,
1191+ ) ;
1192+
1193+ let schema = Schema :: new ( vec ! [ Field :: new(
1194+ "my_struct" ,
1195+ DataType :: Struct ( child_fields) ,
1196+ true ,
1197+ ) ] ) ;
1198+
1199+ let batch = RecordBatch :: try_new ( Arc :: new ( schema) , vec ! [ Arc :: new( struct_array) ] ) . unwrap ( ) ;
1200+
1201+ let stats = collect_stats ( & batch, & [ column_name ! ( "my_struct" ) ] ) . unwrap ( ) ;
1202+
1203+ // Check null counts for child fields
1204+ // Is it expected? column 'a' has no field-level nulls, and rows 0 and 2 are null at struct level
1205+ assert_eq ! (
1206+ get_stat:: <Int64Type >( & stats, "nullCount" , "my_struct" , "a" ) ,
1207+ 0
1208+ ) ;
1209+ assert_eq ! (
1210+ get_stat:: <Int64Type >( & stats, "nullCount" , "my_struct" , "b" ) ,
1211+ 2
1212+ ) ;
1213+
1214+ // Check minValues
1215+ assert_eq ! (
1216+ get_stat:: <Int32Type >( & stats, "minValues" , "my_struct" , "a" ) ,
1217+ 1
1218+ ) ;
1219+ // Is it expected? It's marked with NULL for 20 at higher level. Shouldn't be 40?
1220+ assert_eq ! (
1221+ get_stat:: <Int32Type >( & stats, "minValues" , "my_struct" , "b" ) ,
1222+ 20
1223+ ) ;
1224+
1225+ // Check maxValues
1226+ assert_eq ! (
1227+ get_stat:: <Int32Type >( & stats, "maxValues" , "my_struct" , "a" ) ,
1228+ 4
1229+ ) ;
1230+ assert_eq ! (
1231+ get_stat:: <Int32Type >( & stats, "maxValues" , "my_struct" , "b" ) ,
1232+ 40
1233+ ) ;
1234+ }
1235+
1236+ // Generic helper to extract and downcast nested columns from stats
1237+ fn get_stat < T > (
1238+ stats : & StructArray ,
1239+ stat_name : & str ,
1240+ struct_name : & str ,
1241+ field_name : & str ,
1242+ ) -> T :: Native
1243+ where
1244+ T : crate :: arrow:: datatypes:: ArrowPrimitiveType ,
1245+ {
1246+ stats
1247+ . column_by_name ( stat_name)
1248+ . unwrap ( )
1249+ . as_any ( )
1250+ . downcast_ref :: < StructArray > ( )
1251+ . unwrap ( )
1252+ . column_by_name ( struct_name)
1253+ . unwrap ( )
1254+ . as_any ( )
1255+ . downcast_ref :: < StructArray > ( )
1256+ . unwrap ( )
1257+ . column_by_name ( field_name)
1258+ . unwrap ( )
1259+ . as_primitive :: < T > ( )
1260+ . value ( 0 )
1261+ }
11681262}
0 commit comments