Skip to content

Commit faef9e3

Browse files
committed
test
1 parent 8b4ac09 commit faef9e3

1 file changed

Lines changed: 95 additions & 1 deletion

File tree

kernel/src/engine/default/stats.rs

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,8 @@ pub(crate) fn collect_stats(
529529
#[cfg(test)]
530530
mod tests {
531531
use super::*;
532-
use crate::arrow::array::{Array, Int64Array, StringArray};
532+
use crate::arrow::array::{Array, Int32Array, Int64Array, StringArray};
533+
use crate::arrow::buffer::NullBuffer;
533534
use crate::arrow::datatypes::{Fields, Schema};
534535
use crate::expressions::column_name;
535536

@@ -1165,4 +1166,97 @@ mod tests {
11651166
assert!(max_values.column_by_name("id").is_some());
11661167
assert!(max_values.column_by_name("list_col").is_none());
11671168
}
1169+
1170+
#[test]
1171+
fn test_collect_stats_struct_with_nulls_at_struct_level() {
1172+
// Schema: { my_struct: { a: int32, b: int32 (nullable) } }
1173+
// Test both struct-level nulls and field-level nulls
1174+
let child_fields = Fields::from(vec![
1175+
Field::new("a", DataType::Int32, false),
1176+
Field::new("b", DataType::Int32, true), // Now nullable
1177+
]);
1178+
1179+
let a_values = Int32Array::from(vec![1, 2, 3, 4]);
1180+
// b has field-level nulls at rows 0 and 2
1181+
let b_values = Int32Array::from(vec![None, Some(20), None, Some(40)]);
1182+
1183+
// Nulls at struct level: [false, true, true, false]
1184+
// This means rows 1 and 2 have null structs (entire struct is null)
1185+
let nulls = NullBuffer::from(vec![false, true, true, false]);
1186+
1187+
let struct_array = StructArray::new(
1188+
child_fields.clone(),
1189+
vec![Arc::new(a_values), Arc::new(b_values)],
1190+
Some(nulls),
1191+
);
1192+
1193+
let schema = Schema::new(vec![Field::new(
1194+
"my_struct",
1195+
DataType::Struct(child_fields),
1196+
true,
1197+
)]);
1198+
1199+
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(struct_array)]).unwrap();
1200+
1201+
let stats = collect_stats(&batch, &[column_name!("my_struct")]).unwrap();
1202+
1203+
// Check null counts for child fields
1204+
// Is it expected? column 'a' has no field-level nulls, and rows 0 and 2 are null at struct level
1205+
assert_eq!(
1206+
get_stat::<Int64Type>(&stats, "nullCount", "my_struct", "a"),
1207+
0
1208+
);
1209+
assert_eq!(
1210+
get_stat::<Int64Type>(&stats, "nullCount", "my_struct", "b"),
1211+
2
1212+
);
1213+
1214+
// Check minValues
1215+
assert_eq!(
1216+
get_stat::<Int32Type>(&stats, "minValues", "my_struct", "a"),
1217+
1
1218+
);
1219+
// Is it expected? It's marked with NULL for 20 at higher level. Shouldn't be 40?
1220+
assert_eq!(
1221+
get_stat::<Int32Type>(&stats, "minValues", "my_struct", "b"),
1222+
20
1223+
);
1224+
1225+
// Check maxValues
1226+
assert_eq!(
1227+
get_stat::<Int32Type>(&stats, "maxValues", "my_struct", "a"),
1228+
4
1229+
);
1230+
assert_eq!(
1231+
get_stat::<Int32Type>(&stats, "maxValues", "my_struct", "b"),
1232+
40
1233+
);
1234+
}
1235+
1236+
// Generic helper to extract and downcast nested columns from stats
1237+
fn get_stat<T>(
1238+
stats: &StructArray,
1239+
stat_name: &str,
1240+
struct_name: &str,
1241+
field_name: &str,
1242+
) -> T::Native
1243+
where
1244+
T: crate::arrow::datatypes::ArrowPrimitiveType,
1245+
{
1246+
stats
1247+
.column_by_name(stat_name)
1248+
.unwrap()
1249+
.as_any()
1250+
.downcast_ref::<StructArray>()
1251+
.unwrap()
1252+
.column_by_name(struct_name)
1253+
.unwrap()
1254+
.as_any()
1255+
.downcast_ref::<StructArray>()
1256+
.unwrap()
1257+
.column_by_name(field_name)
1258+
.unwrap()
1259+
.as_primitive::<T>()
1260+
.value(0)
1261+
}
11681262
}

0 commit comments

Comments
 (0)