Skip to content

Commit 423894f

Browse files
committed
test
1 parent 8b4ac09 commit 423894f

1 file changed

Lines changed: 73 additions & 1 deletion

File tree

kernel/src/engine/default/stats.rs

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,9 +529,11 @@ pub(crate) fn collect_stats(
529529
#[cfg(test)]
530530
mod tests {
531531
use super::*;
532-
use crate::arrow::array::{Array, Int64Array, StringArray};
532+
use crate::arrow::array::{Array, Int32Array, Int64Array, StringArray};
533533
use crate::arrow::datatypes::{Fields, Schema};
534534
use crate::expressions::column_name;
535+
use crate::arrow::buffer::NullBuffer;
536+
535537

536538
#[test]
537539
fn test_collect_stats_single_batch() {
@@ -1165,4 +1167,74 @@ mod tests {
11651167
assert!(max_values.column_by_name("id").is_some());
11661168
assert!(max_values.column_by_name("list_col").is_none());
11671169
}
1170+
1171+
#[test]
1172+
fn test_collect_stats_struct_with_nulls_at_struct_level() {
1173+
// Schema: { my_struct: { a: int32, b: int32 (nullable) } }
1174+
// Test both struct-level nulls and field-level nulls
1175+
let child_fields = Fields::from(vec![
1176+
Field::new("a", DataType::Int32, false),
1177+
Field::new("b", DataType::Int32, true), // Now nullable
1178+
]);
1179+
1180+
let a_values = Int32Array::from(vec![1, 2, 3, 4]);
1181+
// b has field-level nulls at rows 0 and 2
1182+
let b_values = Int32Array::from(vec![None, Some(20), None, Some(40)]);
1183+
1184+
// Nulls at struct level: [false, true, true, false]
1185+
// This means rows 1 and 2 have null structs (entire struct is null)
1186+
let nulls = NullBuffer::from(vec![false, true, true, false]);
1187+
1188+
let struct_array = StructArray::new(
1189+
child_fields.clone(),
1190+
vec![Arc::new(a_values), Arc::new(b_values)],
1191+
Some(nulls),
1192+
);
1193+
1194+
let schema = Schema::new(vec![Field::new(
1195+
"my_struct",
1196+
DataType::Struct(child_fields),
1197+
true,
1198+
)]);
1199+
1200+
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(struct_array)]).unwrap();
1201+
1202+
let stats = collect_stats(&batch, &[column_name!("my_struct")]).unwrap();
1203+
1204+
// Check null counts for child fields
1205+
// Is it expected? column 'a' has no field-level nulls, and rows 0 and 2 are null at struct level
1206+
assert_eq!(get_stat::<Int64Type>(&stats, "nullCount", "my_struct", "a"), 0);
1207+
assert_eq!(get_stat::<Int64Type>(&stats, "nullCount", "my_struct", "b"), 2);
1208+
1209+
// Check minValues
1210+
assert_eq!(get_stat::<Int32Type>(&stats, "minValues", "my_struct", "a"), 1);
1211+
// Is it expected? It's marked with NULL for 20 at higher level. Shouldn't be 40?
1212+
assert_eq!(get_stat::<Int32Type>(&stats, "minValues", "my_struct", "b"), 20);
1213+
1214+
// Check maxValues
1215+
assert_eq!(get_stat::<Int32Type>(&stats, "maxValues", "my_struct", "a"), 4);
1216+
assert_eq!(get_stat::<Int32Type>(&stats, "maxValues", "my_struct", "b"), 40);
1217+
}
1218+
1219+
// Generic helper to extract and downcast nested columns from stats
1220+
fn get_stat<T>(stats: &StructArray, stat_name: &str, struct_name: &str, field_name: &str) -> T::Native
1221+
where
1222+
T: crate::arrow::datatypes::ArrowPrimitiveType,
1223+
{
1224+
stats
1225+
.column_by_name(stat_name)
1226+
.unwrap()
1227+
.as_any()
1228+
.downcast_ref::<StructArray>()
1229+
.unwrap()
1230+
.column_by_name(struct_name)
1231+
.unwrap()
1232+
.as_any()
1233+
.downcast_ref::<StructArray>()
1234+
.unwrap()
1235+
.column_by_name(field_name)
1236+
.unwrap()
1237+
.as_primitive::<T>()
1238+
.value(0)
1239+
}
11681240
}

0 commit comments

Comments
 (0)