Skip to content

Commit 0e23ef1

Browse files
committed
Revert NDV propagation through projections
Remove the single-column expression heuristic for preserving NDV in projections, as it is too broad to be correct. Expression-level statistics propagation will be addressed in a follow-up design.
1 parent 089b44a commit 0e23ef1

File tree

1 file changed

+5
-22
lines changed

1 file changed

+5
-22
lines changed

datafusion/physical-expr/src/projection.rs

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -711,25 +711,9 @@ impl ProjectionExprs {
711711
}
712712
}
713713
} else {
714-
// TODO: expressions should compute their own statistics
715-
//
716-
// For now, try to preserve NDV if the expression references a
717-
// single column (as a conservative upper bound).
718-
// More accurate NDV propagation would require tracking injectivity
719-
// of functions (e.g., `a + 1` preserves NDV exactly, `ABS(a)` may
720-
// reduce it, `a % 10` bounds it to 10)
721-
let columns = collect_columns(expr);
722-
if columns.len() == 1 {
723-
let col_idx = columns.iter().next().unwrap().index();
724-
ColumnStatistics {
725-
distinct_count: stats.column_statistics[col_idx]
726-
.distinct_count
727-
.to_inexact(),
728-
..ColumnStatistics::new_unknown()
729-
}
730-
} else {
731-
ColumnStatistics::new_unknown()
732-
}
714+
// TODO stats: estimate more statistics from expressions
715+
// (expressions should compute their statistics themselves)
716+
ColumnStatistics::new_unknown()
733717
};
734718
column_statistics.push(col_stats);
735719
}
@@ -2734,11 +2718,10 @@ pub(crate) mod tests {
27342718
// Should have 2 column statistics
27352719
assert_eq!(output_stats.column_statistics.len(), 2);
27362720

2737-
// First column (expression `col0 + 1`) preserves NDV from the single
2738-
// referenced column as a conservative upper bound (marked Inexact)
2721+
// First column (expression) should have unknown statistics
27392722
assert_eq!(
27402723
output_stats.column_statistics[0].distinct_count,
2741-
Precision::Inexact(5)
2724+
Precision::Absent
27422725
);
27432726
assert_eq!(
27442727
output_stats.column_statistics[0].max_value,

0 commit comments

Comments
 (0)