Skip to content

Commit ed25cc2

Browse files
alambfwojciecclaude
authored
[branch-53] Fix FilterExec converting Absent column stats to Exact(NULL) (#20391) (#20892)
- Part of #19692 - Closes #20388 on branch-53 This PR: - Backports #20391 from @fwojciec to the branch-53 line Co-authored-by: Filip Wojciechowski <fwojciec@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 19e9c06 commit ed25cc2

1 file changed

Lines changed: 54 additions & 5 deletions

File tree

datafusion/physical-plan/src/filter.rs

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,21 @@ impl EmbeddedProjection for FilterExec {
757757
}
758758
}
759759

760+
/// Converts an interval bound to a [`Precision`] value. NULL bounds (which
761+
/// represent "unbounded" in the interval type) map to [`Precision::Absent`].
762+
fn interval_bound_to_precision(
763+
bound: ScalarValue,
764+
is_exact: bool,
765+
) -> Precision<ScalarValue> {
766+
if bound.is_null() {
767+
Precision::Absent
768+
} else if is_exact {
769+
Precision::Exact(bound)
770+
} else {
771+
Precision::Inexact(bound)
772+
}
773+
}
774+
760775
/// This function ensures that all bounds in the `ExprBoundaries` vector are
761776
/// converted to closed bounds. If a lower/upper bound is initially open, it
762777
/// is adjusted by using the next/previous value for its data type to convert
@@ -795,11 +810,9 @@ fn collect_new_statistics(
795810
};
796811
};
797812
let (lower, upper) = interval.into_bounds();
798-
let (min_value, max_value) = if lower.eq(&upper) {
799-
(Precision::Exact(lower), Precision::Exact(upper))
800-
} else {
801-
(Precision::Inexact(lower), Precision::Inexact(upper))
802-
};
813+
let is_exact = !lower.is_null() && !upper.is_null() && lower == upper;
814+
let min_value = interval_bound_to_precision(lower, is_exact);
815+
let max_value = interval_bound_to_precision(upper, is_exact);
803816
ColumnStatistics {
804817
null_count: input_column_stats[idx].null_count.to_inexact(),
805818
max_value,
@@ -2141,4 +2154,40 @@ mod tests {
21412154

21422155
Ok(())
21432156
}
2157+
2158+
/// Columns with Absent min/max statistics should remain Absent after
2159+
/// FilterExec.
2160+
#[tokio::test]
2161+
async fn test_filter_statistics_absent_columns_stay_absent() -> Result<()> {
2162+
let schema = Schema::new(vec![
2163+
Field::new("a", DataType::Int32, false),
2164+
Field::new("b", DataType::Int32, false),
2165+
]);
2166+
let input = Arc::new(StatisticsExec::new(
2167+
Statistics {
2168+
num_rows: Precision::Inexact(1000),
2169+
total_byte_size: Precision::Absent,
2170+
column_statistics: vec![
2171+
ColumnStatistics::default(),
2172+
ColumnStatistics::default(),
2173+
],
2174+
},
2175+
schema.clone(),
2176+
));
2177+
2178+
let predicate = Arc::new(BinaryExpr::new(
2179+
Arc::new(Column::new("a", 0)),
2180+
Operator::Eq,
2181+
Arc::new(Literal::new(ScalarValue::Int32(Some(42)))),
2182+
));
2183+
let filter: Arc<dyn ExecutionPlan> =
2184+
Arc::new(FilterExec::try_new(predicate, input)?);
2185+
2186+
let statistics = filter.partition_statistics(None)?;
2187+
let col_b_stats = &statistics.column_statistics[1];
2188+
assert_eq!(col_b_stats.min_value, Precision::Absent);
2189+
assert_eq!(col_b_stats.max_value, Precision::Absent);
2190+
2191+
Ok(())
2192+
}
21442193
}

0 commit comments

Comments
 (0)