@@ -203,6 +203,14 @@ impl Precision<usize> {
203203}
204204
205205impl Precision < ScalarValue > {
206+ fn sum_data_type ( data_type : & DataType ) -> DataType {
207+ match data_type {
208+ DataType :: Int8 | DataType :: Int16 | DataType :: Int32 => DataType :: Int64 ,
209+ DataType :: UInt8 | DataType :: UInt16 | DataType :: UInt32 => DataType :: UInt64 ,
210+ _ => data_type. clone ( ) ,
211+ }
212+ }
213+
206214 /// Calculates the sum of two (possibly inexact) [`ScalarValue`] values,
207215 /// conservatively propagating exactness information. If one of the input
208216 /// values is [`Precision::Absent`], the result is `Absent` too.
@@ -228,6 +236,46 @@ impl Precision<ScalarValue> {
228236 }
229237 }
230238
239+ /// Casts integer values to the wider SQL `SUM` return type.
240+ ///
241+ /// This narrows overflow risk when `sum_value` statistics are merged:
242+ /// `Int8/Int16/Int32 -> Int64` and `UInt8/UInt16/UInt32 -> UInt64`.
243+ pub fn cast_to_sum_type ( & self ) -> Precision < ScalarValue > {
244+ match self {
245+ Precision :: Exact ( value) => {
246+ let source_type = value. data_type ( ) ;
247+ let target_type = Self :: sum_data_type ( & source_type) ;
248+ if source_type == target_type {
249+ Precision :: Exact ( value. clone ( ) )
250+ } else {
251+ value
252+ . cast_to ( & target_type)
253+ . map ( Precision :: Exact )
254+ . unwrap_or ( Precision :: Absent )
255+ }
256+ }
257+ Precision :: Inexact ( value) => {
258+ let source_type = value. data_type ( ) ;
259+ let target_type = Self :: sum_data_type ( & source_type) ;
260+ if source_type == target_type {
261+ Precision :: Inexact ( value. clone ( ) )
262+ } else {
263+ value
264+ . cast_to ( & target_type)
265+ . map ( Precision :: Inexact )
266+ . unwrap_or ( Precision :: Absent )
267+ }
268+ }
269+ Precision :: Absent => Precision :: Absent ,
270+ }
271+ }
272+
273+ /// SUM-style addition with integer widening to match SQL `SUM` return
274+ /// types for smaller integral inputs.
275+ pub fn add_for_sum ( & self , other : & Precision < ScalarValue > ) -> Precision < ScalarValue > {
276+ self . cast_to_sum_type ( ) . add ( & other. cast_to_sum_type ( ) )
277+ }
278+
231279 /// Calculates the difference of two (possibly inexact) [`ScalarValue`] values,
232280 /// conservatively propagating exactness information. If one of the input
233281 /// values is [`Precision::Absent`], the result is `Absent` too.
@@ -620,7 +668,7 @@ impl Statistics {
620668 /// assert_eq!(merged.column_statistics[0].max_value,
621669 /// Precision::Exact(ScalarValue::from(200)));
622670 /// assert_eq!(merged.column_statistics[0].sum_value,
623- /// Precision::Exact(ScalarValue::from( 1500)));
671+ /// Precision::Exact(ScalarValue::Int64(Some( 1500) )));
624672 /// ```
625673 pub fn try_merge_iter < ' a , I > ( items : I , schema : & Schema ) -> Result < Statistics >
626674 where
@@ -664,7 +712,7 @@ impl Statistics {
664712 null_count : cs. null_count ,
665713 max_value : cs. max_value . clone ( ) ,
666714 min_value : cs. min_value . clone ( ) ,
667- sum_value : cs. sum_value . clone ( ) ,
715+ sum_value : cs. sum_value . cast_to_sum_type ( ) ,
668716 distinct_count : cs. distinct_count ,
669717 byte_size : cs. byte_size ,
670718 } )
@@ -693,7 +741,8 @@ impl Statistics {
693741 } ;
694742 col_stats. min_value = col_stats. min_value . min ( & item_cs. min_value ) ;
695743 col_stats. max_value = col_stats. max_value . max ( & item_cs. max_value ) ;
696- precision_add ( & mut col_stats. sum_value , & item_cs. sum_value ) ;
744+ let item_sum_value = item_cs. sum_value . cast_to_sum_type ( ) ;
745+ precision_add ( & mut col_stats. sum_value , & item_sum_value) ;
697746 col_stats. byte_size = col_stats. byte_size . add ( & item_cs. byte_size ) ;
698747 }
699748 }
@@ -1095,6 +1144,45 @@ mod tests {
10951144 assert_eq ! ( precision. add( & Precision :: Absent ) , Precision :: Absent ) ;
10961145 }
10971146
1147+ #[ test]
1148+ fn test_add_for_sum_scalar_integer_widening ( ) {
1149+ let precision = Precision :: Exact ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ;
1150+
1151+ assert_eq ! (
1152+ precision. add_for_sum( & Precision :: Exact ( ScalarValue :: Int32 ( Some ( 23 ) ) ) ) ,
1153+ Precision :: Exact ( ScalarValue :: Int64 ( Some ( 65 ) ) ) ,
1154+ ) ;
1155+ assert_eq ! (
1156+ precision. add_for_sum( & Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 23 ) ) ) ) ,
1157+ Precision :: Inexact ( ScalarValue :: Int64 ( Some ( 65 ) ) ) ,
1158+ ) ;
1159+ }
1160+
1161+ #[ test]
1162+ fn test_add_for_sum_prevents_int32_overflow ( ) {
1163+ let lhs = Precision :: Exact ( ScalarValue :: Int32 ( Some ( i32:: MAX ) ) ) ;
1164+ let rhs = Precision :: Exact ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ;
1165+
1166+ assert_eq ! (
1167+ lhs. add_for_sum( & rhs) ,
1168+ Precision :: Exact ( ScalarValue :: Int64 ( Some ( i64 :: from( i32 :: MAX ) + 1 ) ) ) ,
1169+ ) ;
1170+ }
1171+
1172+ #[ test]
1173+ fn test_add_for_sum_scalar_unsigned_integer_widening ( ) {
1174+ let precision = Precision :: Exact ( ScalarValue :: UInt32 ( Some ( 42 ) ) ) ;
1175+
1176+ assert_eq ! (
1177+ precision. add_for_sum( & Precision :: Exact ( ScalarValue :: UInt32 ( Some ( 23 ) ) ) ) ,
1178+ Precision :: Exact ( ScalarValue :: UInt64 ( Some ( 65 ) ) ) ,
1179+ ) ;
1180+ assert_eq ! (
1181+ precision. add_for_sum( & Precision :: Inexact ( ScalarValue :: UInt32 ( Some ( 23 ) ) ) ) ,
1182+ Precision :: Inexact ( ScalarValue :: UInt64 ( Some ( 65 ) ) ) ,
1183+ ) ;
1184+ }
1185+
10981186 #[ test]
10991187 fn test_sub ( ) {
11001188 let precision1 = Precision :: Exact ( 42 ) ;
@@ -1340,7 +1428,7 @@ mod tests {
13401428 ) ;
13411429 assert_eq ! (
13421430 col1_stats. sum_value,
1343- Precision :: Exact ( ScalarValue :: Int32 ( Some ( 1100 ) ) )
1431+ Precision :: Exact ( ScalarValue :: Int64 ( Some ( 1100 ) ) )
13441432 ) ; // 500 + 600
13451433
13461434 let col2_stats = & summary_stats. column_statistics [ 1 ] ;
@@ -1355,7 +1443,7 @@ mod tests {
13551443 ) ;
13561444 assert_eq ! (
13571445 col2_stats. sum_value,
1358- Precision :: Exact ( ScalarValue :: Int32 ( Some ( 2200 ) ) )
1446+ Precision :: Exact ( ScalarValue :: Int64 ( Some ( 2200 ) ) )
13591447 ) ; // 1000 + 1200
13601448 }
13611449
0 commit comments