From 1be68a43cacc093b4d290109a155c56194630a4c Mon Sep 17 00:00:00 2001 From: Sean Doherty Date: Sun, 17 May 2026 08:01:40 -0500 Subject: [PATCH] Guard to_timestamp decimal overflow --- .../functions/src/datetime/to_timestamp.rs | 72 +++++++++++++++---- .../test_files/datetime/timestamps.slt | 6 ++ 2 files changed, 66 insertions(+), 12 deletions(-) diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index 405f6ff3c7b13..2514910cbceaf 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -30,7 +30,7 @@ use arrow::datatypes::{ TimestampNanosecondType, TimestampSecondType, }; use datafusion_common::config::ConfigOptions; -use datafusion_common::{Result, ScalarType, ScalarValue, exec_err}; +use datafusion_common::{Result, ScalarType, ScalarValue, exec_datafusion_err, exec_err}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility, @@ -332,14 +332,31 @@ impl_to_timestamp_constructors!(ToTimestampMillisFunc); impl_to_timestamp_constructors!(ToTimestampMicrosFunc); impl_to_timestamp_constructors!(ToTimestampNanosFunc); -fn decimal_to_nanoseconds(value: i128, scale: i8) -> i64 { +fn decimal_to_nanoseconds(value: i128, scale: i8) -> Result { let nanos_exponent = 9_i16 - scale as i16; + let power = 10_i128 + .checked_pow(nanos_exponent.unsigned_abs() as u32) + .ok_or_else(|| { + exec_datafusion_err!( + "Decimal value {value} with scale {scale} overflows timestamp nanoseconds" + ) + })?; + let timestamp_nanos = if nanos_exponent >= 0 { - value * 10_i128.pow(nanos_exponent as u32) + value.checked_mul(power).ok_or_else(|| { + exec_datafusion_err!( + "Decimal value {value} with scale {scale} overflows timestamp nanoseconds" + ) + })? } else { - value / 10_i128.pow(nanos_exponent.unsigned_abs() as u32) + value / power }; - timestamp_nanos as i64 + + i64::try_from(timestamp_nanos).map_err(|_| { + exec_datafusion_err!( + "Decimal value {value} with scale {scale} overflows timestamp nanoseconds" + ) + }) } fn decimal128_to_timestamp_nanos( @@ -348,7 +365,7 @@ fn decimal128_to_timestamp_nanos( ) -> Result { match arg { ColumnarValue::Scalar(ScalarValue::Decimal128(Some(value), _, scale)) => { - let timestamp_nanos = decimal_to_nanoseconds(*value, *scale); + let timestamp_nanos = decimal_to_nanoseconds(*value, *scale)?; Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond( Some(timestamp_nanos), tz, @@ -362,8 +379,8 @@ fn decimal128_to_timestamp_nanos( let scale = decimal_arr.scale(); let result: TimestampNanosecondArray = decimal_arr .iter() - .map(|v| v.map(|val| decimal_to_nanoseconds(val, scale))) - .collect(); + .map(|v| v.map(|val| decimal_to_nanoseconds(val, scale)).transpose()) + .collect::>()?; let result = result.with_timezone_opt(tz); Ok(ColumnarValue::Array(Arc::new(result))) } @@ -947,6 +964,37 @@ mod tests { Ok(()) } + #[test] + fn to_timestamp_decimal128_overflow_returns_error() { + let value = "99999999999999999999999999999999999999" + .parse::() + .unwrap(); + let err = decimal128_to_timestamp_nanos( + &ColumnarValue::Scalar(ScalarValue::Decimal128(Some(value), 38, 0)), + None, + ) + .unwrap_err() + .to_string(); + + assert_contains!(err, "overflows timestamp nanoseconds"); + } + + #[test] + fn to_timestamp_decimal128_array_overflow_returns_error() { + let value = "99999999999999999999999999999999999999" + .parse::() + .unwrap(); + let array = Decimal128Array::from(vec![Some(value)]) + .with_precision_and_scale(38, 0) + .unwrap(); + let err = + decimal128_to_timestamp_nanos(&ColumnarValue::Array(Arc::new(array)), None) + .unwrap_err() + .to_string(); + + assert_contains!(err, "overflows timestamp nanoseconds"); + } + #[test] fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> { // ensure that arrow array implementation is wired up and handles nulls correctly @@ -1830,19 +1878,19 @@ mod tests { #[test] fn test_decimal_to_nanoseconds_negative_scale() { // scale -2: internal value 5 represents 5 * 10^2 = 500 seconds - let nanos = decimal_to_nanoseconds(5, -2); + let nanos = decimal_to_nanoseconds(5, -2).unwrap(); assert_eq!(nanos, 500_000_000_000); // 500 seconds in nanoseconds // scale -1: internal value 10 represents 10 * 10^1 = 100 seconds - let nanos = decimal_to_nanoseconds(10, -1); + let nanos = decimal_to_nanoseconds(10, -1).unwrap(); assert_eq!(nanos, 100_000_000_000); // scale 0: internal value 5 represents 5 seconds - let nanos = decimal_to_nanoseconds(5, 0); + let nanos = decimal_to_nanoseconds(5, 0).unwrap(); assert_eq!(nanos, 5_000_000_000); // scale 3: internal value 1500 represents 1.5 seconds - let nanos = decimal_to_nanoseconds(1500, 3); + let nanos = decimal_to_nanoseconds(1500, 3).unwrap(); assert_eq!(nanos, 1_500_000_000); } } diff --git a/datafusion/sqllogictest/test_files/datetime/timestamps.slt b/datafusion/sqllogictest/test_files/datetime/timestamps.slt index d6e50f560aaf0..e045abc0f2cb6 100644 --- a/datafusion/sqllogictest/test_files/datetime/timestamps.slt +++ b/datafusion/sqllogictest/test_files/datetime/timestamps.slt @@ -595,6 +595,12 @@ SELECT to_timestamp(arrow_cast(123456789.123456789, 'Decimal128(18,9)')) as c1, ---- 1973-11-29T21:33:09.123456784 1970-01-01T00:00:00.123456789 1970-01-01T00:00:00.123456789 +# Regression test for https://github.com/apache/datafusion/issues/22213 +query error .*overflows timestamp nanoseconds +SELECT to_timestamp( + arrow_cast('99999999999999999999999999999999999999', 'Decimal128(38,0)') +); + # from_unixtime