fix sqrt(-1.0::float8) should error, not return NaN (apache#22308)

xiedeyantu · Copilot · web-flow · commit 7bcb61332830 · 2026-05-25T02:09:17.000Z
## Which issue does this PR close? - Closes apache#22260. ## Rationale for this change DataFusion previously returned `NaN` for `sqrt` on negative floating-point inputs, for example `sqrt((-1.0)::float8)`. This differs from PostgreSQL semantics, which raise an error for square root of a negative number. This change makes `sqrt` return an execution error for out-of-domain negative inputs so its behavior is closer to PostgreSQL and avoids silently producing `NaN` for invalid inputs. ## What changes are included in this PR? - Updated the unary math UDF helper to support an optional validator callback for runtime input validation. - Switched `sqrt` to use a named validator helper instead of inline predicate and error-string arguments. - Added runtime validation for `sqrt` so negative inputs now raise `cannot take square root of a negative number`. - Updated sqllogictests for `sqrt`: - negative literal inputs now expect an error - negative column inputs now expect an error - positive column coverage was retained using in-domain inputs ## Are these changes tested? Yes. The change is covered by existing SQL logic tests and targeted validation runs: - `cargo test -p datafusion-functions sqrt` - `cargo test -p datafusion-sqllogictest --test sqllogictests scalar` ## Are there any user-facing changes? Yes. `sqrt` now raises an execution error for negative inputs instead of returning `NaN`. This changes user-visible query behavior to better align with PostgreSQL semantics. --------- Co-authored-by: Copilot <copilot@github.com>
diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs
@@ -210,6 +210,17 @@ macro_rules! downcast_arg {
 /// $GET_DOC: the function to get the documentation of the UDF
 macro_rules! make_math_unary_udf {
     ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
+        make_math_unary_udf!(
+            $UDF,
+            $NAME,
+            $UNARY_FUNC,
+            $OUTPUT_ORDERING,
+            $EVALUATE_BOUNDS,
+            $GET_DOC,
+            None::<fn(f64) -> Result<()>>
+        );
+    };
+    ($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr, $VALIDATOR:expr) => {
         $crate::make_udf_function!($NAME::$UDF, $NAME);
 
         mod $NAME {
@@ -218,6 +229,7 @@ macro_rules! make_math_unary_udf {
 
             use arrow::array::{ArrayRef, AsArray};
             use arrow::datatypes::{DataType, Float32Type, Float64Type};
+            use arrow::error::ArrowError;
             use datafusion_common::{Result, exec_err};
             use datafusion_expr::interval_arithmetic::Interval;
             use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
@@ -280,16 +292,38 @@ macro_rules! make_math_unary_udf {
                 ) -> Result<ColumnarValue> {
                     let args = ColumnarValue::values_to_arrays(&args.args)?;
                     let arr: ArrayRef = match args[0].data_type() {
-                        DataType::Float64 => Arc::new(
-                            args[0]
+                        DataType::Float64 => {
+                            let values = args[0]
                                 .as_primitive::<Float64Type>()
-                                .unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
-                        ) as ArrayRef,
-                        DataType::Float32 => Arc::new(
-                            args[0]
+                                .try_unary::<_, Float64Type, _>(
+                                |x: f64| -> std::result::Result<f64, ArrowError> {
+                                    if let Some(validate) = $VALIDATOR {
+                                        validate(x).map_err(|error| {
+                                            ArrowError::ComputeError(error.to_string())
+                                        })?;
+                                    }
+
+                                    Ok(f64::$UNARY_FUNC(x))
+                                },
+                            )?;
+                            Arc::new(values) as ArrayRef
+                        }
+                        DataType::Float32 => {
+                            let values = args[0]
                                 .as_primitive::<Float32Type>()
-                                .unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
-                        ) as ArrayRef,
+                                .try_unary::<_, Float32Type, _>(
+                                |x: f32| -> std::result::Result<f32, ArrowError> {
+                                    if let Some(validate) = $VALIDATOR {
+                                        validate(x as f64).map_err(|error| {
+                                            ArrowError::ComputeError(error.to_string())
+                                        })?;
+                                    }
+
+                                    Ok(f32::$UNARY_FUNC(x))
+                                },
+                            )?;
+                            Arc::new(values) as ArrayRef
+                        }
                         other => {
                             return exec_err!(
                                 "Unsupported data type {other:?} for function {}",
diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
@@ -18,6 +18,7 @@
 //! "math" DataFusion functions
 
 use crate::math::monotonicity::*;
+use datafusion_common::{Result, exec_err};
 use datafusion_expr::ScalarUDF;
 use std::sync::Arc;
 
@@ -42,6 +43,14 @@ pub mod round;
 pub mod signum;
 pub mod trunc;
 
+fn validate_sqrt_input(value: f64) -> Result<()> {
+    if value < 0.0 {
+        exec_err!("cannot take square root of a negative number")
+    } else {
+        Ok(())
+    }
+}
+
 // Create UDFs
 make_udf_function!(abs::AbsFunc, abs);
 make_math_unary_udf!(
@@ -208,7 +217,8 @@ make_math_unary_udf!(
     sqrt,
     super::sqrt_order,
     super::bounds::sqrt_bounds,
-    super::get_sqrt_doc
+    super::get_sqrt_doc,
+    Some(super::validate_sqrt_input)
 );
 make_math_unary_udf!(
     TanFunc,
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1107,12 +1107,16 @@ NULL
 
 # sqrt with columns (round is needed to normalize the outputs of different operating systems)
 query RRR rowsort
-select round(sqrt(a), 5), round(sqrt(b), 5), round(sqrt(c), 5) from signed_integers;
+select round(sqrt(abs(a)), 5), round(sqrt(abs(b)), 5), round(sqrt(abs(c)), 5) from signed_integers;
 ----
-1.41421 NaN 11.09054
+1 10 23.81176
+1.41421 31.62278 11.09054
+1.73205 100 31.27299
 2 NULL NULL
-NaN 10 NaN
-NaN 100 NaN
+
+# sqrt with negative column values should error
+query error cannot take square root of a negative number
+select round(sqrt(a), 5), round(sqrt(b), 5), round(sqrt(c), 5) from signed_integers;
 
 # sqrt scalar fraction
 query RR rowsort
@@ -1128,10 +1132,12 @@ select sqrt(cast(10e8 as double));
 
 
 # sqrt scalar negative
-query R rowsort
+query error cannot take square root of a negative number
 select sqrt(-1);
-----
-NaN
+
+# sqrt scalar negative float8
+query error cannot take square root of a negative number
+select sqrt((-1.0)::float8);
 
 ## tan