diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index aec87ec5ff853..1e86ae695e593 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1039,7 +1039,6 @@ pub fn binary_numeric_coercion( pub fn decimal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { use arrow::datatypes::DataType::*; - // Prefer decimal data type over floating point for comparison operation match (lhs_type, rhs_type) { // Same decimal types (lhs_type, rhs_type) @@ -1059,7 +1058,19 @@ pub fn decimal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Some(lhs_type.clone()), + ( + Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _), + Float16 | Float32 | Float64, + ) => Some(rhs_type.clone()), + // Decimal + integer types: promote to Decimal. (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _), _) => { get_common_decimal_type(lhs_type, rhs_type) } diff --git a/datafusion/expr-common/src/type_coercion/binary/tests/comparison.rs b/datafusion/expr-common/src/type_coercion/binary/tests/comparison.rs index f8bff3ca90ecf..132d3fcee272c 100644 --- a/datafusion/expr-common/src/type_coercion/binary/tests/comparison.rs +++ b/datafusion/expr-common/src/type_coercion/binary/tests/comparison.rs @@ -31,13 +31,14 @@ fn test_decimal_binary_comparison_coercion() -> Result<()> { DataType::Decimal128(20, 8), DataType::Null, ]; + // Float types win over Decimal because Decimal cannot represent NaN/±Inf. let result_types = [ DataType::Decimal128(20, 3), DataType::Decimal128(20, 3), DataType::Decimal128(20, 3), DataType::Decimal128(23, 3), - DataType::Decimal128(24, 7), - DataType::Decimal128(32, 15), + DataType::Float32, + DataType::Float64, DataType::Decimal128(38, 10), DataType::Decimal128(25, 8), DataType::Decimal128(20, 3), @@ -460,11 +461,13 @@ fn test_type_coercion_compare() -> Result<()> { Operator::Lt, DataType::Decimal128(22, 2) ); + // Float wins over Decimal in comparison coercion: Decimal cannot + // represent NaN/±Inf, so the common super-type must be the float. test_coercion_binary_rule!( DataType::Float64, DataType::Decimal128(10, 3), Operator::Gt, - DataType::Decimal128(30, 15) + DataType::Float64 ); test_coercion_binary_rule!( DataType::Int64, diff --git a/datafusion/sqllogictest/test_files/decimal.slt b/datafusion/sqllogictest/test_files/decimal.slt index 5faf801c84652..38aaae88065e2 100644 --- a/datafusion/sqllogictest/test_files/decimal.slt +++ b/datafusion/sqllogictest/test_files/decimal.slt @@ -598,6 +598,25 @@ select arrow_typeof(null <= a), null <= a from (values (1.1::decimal)) as t(a); ---- Boolean NULL +# Float wins over Decimal in comparison coercion (issue #14272). +# Decimal cannot represent NaN/±Inf, so the common super-type must be the +# float — otherwise comparing a Decimal against +Inf would error trying to +# cast +Inf into the Decimal range. +query B +select '1'::decimal(10,0) = arrow_cast('inf', 'Float64'); +---- +false + +query B +select '1'::decimal(10,0) < arrow_cast('inf', 'Float64'); +---- +true + +query B +select arrow_cast('NaN', 'Float64') = '1'::decimal(10,0); +---- +false + query R select try_cast(1234567 as decimal(7,3)); ---- diff --git a/datafusion/sqllogictest/test_files/operator.slt b/datafusion/sqllogictest/test_files/operator.slt index e50fa721c8850..c594386656e4e 100644 --- a/datafusion/sqllogictest/test_files/operator.slt +++ b/datafusion/sqllogictest/test_files/operator.slt @@ -299,22 +299,23 @@ physical_plan 01)FilterExec: int64@3 < -5 AND CAST(uint64@7 AS Decimal128(20, 0)) < Some(-5),20,0 AND float64@9 < -5 AND decimal@10 < Some(-500),5,2 02)--DataSourceExec: partitions=1, partition_sizes=[1] -## < decimal (expect casts for integers to float) +## < decimal (expect casts for integers and decimal to float, since Float wins +## over Decimal in comparison coercion — Decimal cannot hold NaN/±Inf) query TT EXPLAIN SELECT * FROM numeric_types WHERE int64 < 5.1 AND uint64 < 5.1 AND float64 < 5.1 AND decimal < 5.1; ---- physical_plan -01)FilterExec: CAST(int64@3 AS Float64) < 5.1 AND CAST(uint64@7 AS Float64) < 5.1 AND float64@9 < 5.1 AND decimal@10 < Some(510),5,2 +01)FilterExec: CAST(int64@3 AS Float64) < 5.1 AND CAST(uint64@7 AS Float64) < 5.1 AND float64@9 < 5.1 AND CAST(decimal@10 AS Float64) < 5.1 02)--DataSourceExec: partitions=1, partition_sizes=[1] -## < negative decimal (expect casts for integers to float) +## < negative decimal (same Float-wins behavior as above) query TT EXPLAIN SELECT * FROM numeric_types WHERE int64 < -5.1 AND uint64 < -5.1 AND float64 < -5.1 AND decimal < -5.1; ---- physical_plan -01)FilterExec: CAST(int64@3 AS Float64) < -5.1 AND CAST(uint64@7 AS Float64) < -5.1 AND float64@9 < -5.1 AND decimal@10 < Some(-510),5,2 +01)FilterExec: CAST(int64@3 AS Float64) < -5.1 AND CAST(uint64@7 AS Float64) < -5.1 AND float64@9 < -5.1 AND CAST(decimal@10 AS Float64) < -5.1 02)--DataSourceExec: partitions=1, partition_sizes=[1] @@ -338,22 +339,23 @@ physical_plan 01)FilterExec: int64@3 = -5 AND CAST(uint64@7 AS Decimal128(20, 0)) = Some(-5),20,0 AND float64@9 = -5 AND decimal@10 = Some(-500),5,2 02)--DataSourceExec: partitions=1, partition_sizes=[1] -## = decimal (expect casts for integers to float) +## = decimal (expect casts for integers and decimal to float, since Float wins +## over Decimal in comparison coercion — Decimal cannot hold NaN/±Inf) query TT EXPLAIN SELECT * FROM numeric_types WHERE int64 = 5.1 AND uint64 = 5.1 AND float64 = 5.1 AND decimal = 5.1; ---- physical_plan -01)FilterExec: CAST(int64@3 AS Float64) = 5.1 AND CAST(uint64@7 AS Float64) = 5.1 AND float64@9 = 5.1 AND decimal@10 = Some(510),5,2 +01)FilterExec: CAST(int64@3 AS Float64) = 5.1 AND CAST(uint64@7 AS Float64) = 5.1 AND float64@9 = 5.1 AND CAST(decimal@10 AS Float64) = 5.1 02)--DataSourceExec: partitions=1, partition_sizes=[1] -## = negative decimal (expect casts for integers to float) +## = negative decimal (same Float-wins behavior as above) query TT EXPLAIN SELECT * FROM numeric_types WHERE int64 = -5.1 AND uint64 = -5.1 AND float64 = -5.1 AND decimal = -5.1; ---- physical_plan -01)FilterExec: CAST(int64@3 AS Float64) = -5.1 AND CAST(uint64@7 AS Float64) = -5.1 AND float64@9 = -5.1 AND decimal@10 = Some(-510),5,2 +01)FilterExec: CAST(int64@3 AS Float64) = -5.1 AND CAST(uint64@7 AS Float64) = -5.1 AND float64@9 = -5.1 AND CAST(decimal@10 AS Float64) = -5.1 02)--DataSourceExec: partitions=1, partition_sizes=[1] diff --git a/docs/source/library-user-guide/upgrading/55.0.0.md b/docs/source/library-user-guide/upgrading/55.0.0.md new file mode 100644 index 0000000000000..2877ddac0eaa9 --- /dev/null +++ b/docs/source/library-user-guide/upgrading/55.0.0.md @@ -0,0 +1,66 @@ + + +# Upgrade Guides + +## DataFusion 55.0.0 + +**Note:** DataFusion `55.0.0` has not been released yet. The information provided +in this section pertains to features and changes that have already been merged +to the main branch and are awaiting release in this version. + +### Decimal / floating-point coercion now picks the floating-point type + +Previously, any context that needed a common type for a `Decimal` and a +floating-point value (`Float16`, `Float32`, or `Float64`) chose the decimal +type. This produced errors for legitimate floating-point inputs that have +no decimal representation: + +```sql +-- Before: errored with "Cast error: Cannot cast to Decimal128(...). Overflowing on inf" +SELECT '1'::decimal(10,0) = arrow_cast('inf', 'Float64'); +``` + +DataFusion now coerces the decimal side to the floating-point type instead. +Decimal types cannot represent `NaN`, `±Infinity`, or values outside their +precision/scale range, so the float is the only choice that is always +representable. This also matches the behavior of PostgreSQL, DuckDB, and the +existing rule for arithmetic operators in DataFusion. + +**Migration guide:** + +Most queries become more correct with no source change required — previously +errored queries (against `NaN`, `±Inf`, or out-of-range values) now succeed, +and previously-coerced expressions that went through `Decimal128(30, 15)` +are now done in the natural float type. + +The one behavior to be aware of: a decimal value with more than ~15–17 +significant digits will lose precision when cast to `Float64`. If your query +needs to preserve full decimal precision, cast the float operand to the +decimal type explicitly, or use a decimal literal instead of a float-typed +value: + +```sql +-- If high-precision decimal comparison matters, force the float into the +-- decimal domain explicitly: +SELECT * FROM t WHERE big_decimal_col > CAST(my_float AS DECIMAL(38, 10)); +``` + +See [#14272](https://github.com/apache/datafusion/issues/14272) for the +original report and discussion. diff --git a/docs/source/library-user-guide/upgrading/index.rst b/docs/source/library-user-guide/upgrading/index.rst index 1ed5eca2a5d2a..51c7f1413172b 100644 --- a/docs/source/library-user-guide/upgrading/index.rst +++ b/docs/source/library-user-guide/upgrading/index.rst @@ -21,6 +21,7 @@ Upgrade Guides .. toctree:: :maxdepth: 1 + DataFusion 55.0.0 <55.0.0> DataFusion 54.0.0 <54.0.0> DataFusion 53.0.0 <53.0.0> DataFusion 52.0.0 <52.0.0>