@@ -211,6 +211,16 @@ impl Precision<ScalarValue> {
211211 }
212212 }
213213
214+ fn cast_scalar_to_sum_type ( value : & ScalarValue ) -> Result < ScalarValue > {
215+ let source_type = value. data_type ( ) ;
216+ let target_type = Self :: sum_data_type ( & source_type) ;
217+ if source_type == target_type {
218+ Ok ( value. clone ( ) )
219+ } else {
220+ value. cast_to ( & target_type)
221+ }
222+ }
223+
214224 /// Calculates the sum of two (possibly inexact) [`ScalarValue`] values,
215225 /// conservatively propagating exactness information. If one of the input
216226 /// values is [`Precision::Absent`], the result is `Absent` too.
@@ -241,39 +251,21 @@ impl Precision<ScalarValue> {
241251 /// This narrows overflow risk when `sum_value` statistics are merged:
242252 /// `Int8/Int16/Int32 -> Int64` and `UInt8/UInt16/UInt32 -> UInt64`.
243253 pub fn cast_to_sum_type ( & self ) -> Precision < ScalarValue > {
244- match self {
245- Precision :: Exact ( value) => {
246- let source_type = value. data_type ( ) ;
247- let target_type = Self :: sum_data_type ( & source_type) ;
248- if source_type == target_type {
249- Precision :: Exact ( value. clone ( ) )
250- } else {
251- value
252- . cast_to ( & target_type)
253- . map ( Precision :: Exact )
254- . unwrap_or ( Precision :: Absent )
255- }
256- }
257- Precision :: Inexact ( value) => {
258- let source_type = value. data_type ( ) ;
259- let target_type = Self :: sum_data_type ( & source_type) ;
260- if source_type == target_type {
261- Precision :: Inexact ( value. clone ( ) )
262- } else {
263- value
264- . cast_to ( & target_type)
265- . map ( Precision :: Inexact )
266- . unwrap_or ( Precision :: Absent )
267- }
268- }
269- Precision :: Absent => Precision :: Absent ,
254+ match ( self . is_exact ( ) , self . get_value ( ) ) {
255+ ( Some ( true ) , Some ( value) ) => Self :: cast_scalar_to_sum_type ( value)
256+ . map ( Precision :: Exact )
257+ . unwrap_or ( Precision :: Absent ) ,
258+ ( Some ( false ) , Some ( value) ) => Self :: cast_scalar_to_sum_type ( value)
259+ . map ( Precision :: Inexact )
260+ . unwrap_or ( Precision :: Absent ) ,
261+ ( _, _) => Precision :: Absent ,
270262 }
271263 }
272264
273265 /// SUM-style addition with integer widening to match SQL `SUM` return
274266 /// types for smaller integral inputs.
275267 pub fn add_for_sum ( & self , other : & Precision < ScalarValue > ) -> Precision < ScalarValue > {
276- self . cast_to_sum_type ( ) . add ( & other. cast_to_sum_type ( ) )
268+ precision_add ( & self . cast_to_sum_type ( ) , & other. cast_to_sum_type ( ) )
277269 }
278270
279271 /// Calculates the difference of two (possibly inexact) [`ScalarValue`] values,
@@ -926,7 +918,15 @@ pub struct ColumnStatistics {
926918 pub max_value : Precision < ScalarValue > ,
927919 /// Minimum value of column
928920 pub min_value : Precision < ScalarValue > ,
929- /// Sum value of a column
921+ /// Sum value of a column.
922+ ///
923+ /// For integral columns, values should be kept in SUM-compatible widened
924+ /// types (`Int8/Int16/Int32 -> Int64`, `UInt8/UInt16/UInt32 -> UInt64`) to
925+ /// reduce overflow risk during statistics propagation.
926+ ///
927+ /// Callers should prefer [`ColumnStatistics::with_sum_value`] for setting
928+ /// this field and [`Precision<ScalarValue>::add_for_sum`] /
929+ /// [`Precision<ScalarValue>::cast_to_sum_type`] for sum arithmetic.
930930 pub sum_value : Precision < ScalarValue > ,
931931 /// Number of distinct values
932932 pub distinct_count : Precision < usize > ,
@@ -991,7 +991,19 @@ impl ColumnStatistics {
991991
992992 /// Set the sum value
993993 pub fn with_sum_value ( mut self , sum_value : Precision < ScalarValue > ) -> Self {
994- self . sum_value = sum_value;
994+ self . sum_value = match sum_value {
995+ Precision :: Exact ( value) => {
996+ Precision :: < ScalarValue > :: cast_scalar_to_sum_type ( & value)
997+ . map ( Precision :: Exact )
998+ . unwrap_or ( Precision :: Absent )
999+ }
1000+ Precision :: Inexact ( value) => {
1001+ Precision :: < ScalarValue > :: cast_scalar_to_sum_type ( & value)
1002+ . map ( Precision :: Inexact )
1003+ . unwrap_or ( Precision :: Absent )
1004+ }
1005+ Precision :: Absent => Precision :: Absent ,
1006+ } ;
9951007 self
9961008 }
9971009
@@ -2085,6 +2097,16 @@ mod tests {
20852097 assert_eq ! ( col_stats. byte_size, Precision :: Exact ( 8192 ) ) ;
20862098 }
20872099
2100+ #[ test]
2101+ fn test_with_sum_value_builder_widens_small_integers ( ) {
2102+ let col_stats = ColumnStatistics :: new_unknown ( )
2103+ . with_sum_value ( Precision :: Exact ( ScalarValue :: UInt32 ( Some ( 123 ) ) ) ) ;
2104+ assert_eq ! (
2105+ col_stats. sum_value,
2106+ Precision :: Exact ( ScalarValue :: UInt64 ( Some ( 123 ) ) )
2107+ ) ;
2108+ }
2109+
20882110 #[ test]
20892111 fn test_with_fetch_scales_byte_size ( ) {
20902112 // Test that byte_size is scaled by the row ratio in with_fetch
@@ -2232,7 +2254,7 @@ mod tests {
22322254 ) ;
22332255 assert_eq ! (
22342256 col1_stats. sum_value,
2235- Precision :: Exact ( ScalarValue :: Int32 ( Some ( 1100 ) ) )
2257+ Precision :: Exact ( ScalarValue :: Int64 ( Some ( 1100 ) ) )
22362258 ) ;
22372259
22382260 let col2_stats = & summary_stats. column_statistics [ 1 ] ;
@@ -2247,7 +2269,7 @@ mod tests {
22472269 ) ;
22482270 assert_eq ! (
22492271 col2_stats. sum_value,
2250- Precision :: Exact ( ScalarValue :: Int32 ( Some ( 2200 ) ) )
2272+ Precision :: Exact ( ScalarValue :: Int64 ( Some ( 2200 ) ) )
22512273 ) ;
22522274 }
22532275
@@ -2596,7 +2618,7 @@ mod tests {
25962618 ) ;
25972619 assert_eq ! (
25982620 col_stats. sum_value,
2599- Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 1500 ) ) )
2621+ Precision :: Inexact ( ScalarValue :: Int64 ( Some ( 1500 ) ) )
26002622 ) ;
26012623 }
26022624}
0 commit comments