Skip to content

Commit 72de105

Browse files
committed
feat: Support IEEE 754 for SQL ops
1 parent beb8750 commit 72de105

3 files changed

Lines changed: 66 additions & 1 deletion

File tree

datafusion/common/src/utils/mod.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,24 +1395,55 @@ fn fsl_values_row_number(list_size: i32, array_len: usize) -> Result<Int32Array>
13951395
/// semantics, which treats `-0.0` and `+0.0` as distinct. SQL semantics
13961396
/// (PostgreSQL / IEEE 754 equality) require them to compare equal, so
13971397
/// callers normalize before invoking those kernels.
1398+
///
1399+
/// The common case — no `-0.0` present — is allocation-free: a single
1400+
/// read-only scan of the underlying buffer (auto-vectorizable to an
1401+
/// OR-reduction) decides whether to fall through to the rewriting path.
1402+
/// Only arrays that actually contain `-0.0` pay for a new buffer.
13981403
pub fn normalize_float_zero(array: &ArrayRef) -> ArrayRef {
13991404
use arrow::array::{Float16Array, Float32Array, Float64Array};
14001405
use arrow::datatypes::{Float16Type, Float32Type, Float64Type};
1406+
// -0.0 has only the sign bit set; no other finite or NaN value shares
1407+
// this bit pattern, so a strict-equality scan reliably gates the rewrite.
1408+
const NEG_ZERO_F16_BITS: u16 = half::f16::NEG_ZERO.to_bits();
1409+
const NEG_ZERO_F32_BITS: u32 = (-0.0_f32).to_bits();
1410+
const NEG_ZERO_F64_BITS: u64 = (-0.0_f64).to_bits();
14011411
match array.data_type() {
14021412
DataType::Float32 => {
14031413
let arr: &Float32Array = array.as_primitive::<Float32Type>();
1414+
if !arr
1415+
.values()
1416+
.iter()
1417+
.any(|v| v.to_bits() == NEG_ZERO_F32_BITS)
1418+
{
1419+
return Arc::clone(array);
1420+
}
14041421
let normalized: Float32Array =
14051422
arr.unary(|v| if v.to_bits() << 1 == 0 { 0.0_f32 } else { v });
14061423
Arc::new(normalized)
14071424
}
14081425
DataType::Float64 => {
14091426
let arr: &Float64Array = array.as_primitive::<Float64Type>();
1427+
if !arr
1428+
.values()
1429+
.iter()
1430+
.any(|v| v.to_bits() == NEG_ZERO_F64_BITS)
1431+
{
1432+
return Arc::clone(array);
1433+
}
14101434
let normalized: Float64Array =
14111435
arr.unary(|v| if v.to_bits() << 1 == 0 { 0.0_f64 } else { v });
14121436
Arc::new(normalized)
14131437
}
14141438
DataType::Float16 => {
14151439
let arr: &Float16Array = array.as_primitive::<Float16Type>();
1440+
if !arr
1441+
.values()
1442+
.iter()
1443+
.any(|v| v.to_bits() == NEG_ZERO_F16_BITS)
1444+
{
1445+
return Arc::clone(array);
1446+
}
14161447
let normalized: Float16Array = arr.unary(|v| {
14171448
if v.to_bits() << 1 == 0 {
14181449
half::f16::from_bits(0)

datafusion/physical-plan/src/joins/utils.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2262,7 +2262,16 @@ impl JoinKeyComparator {
22622262
.zip(right_arrays.iter())
22632263
.zip(sort_options.iter())
22642264
.map(|((l, r), opts)| {
2265-
let inner = make_comparator(l.as_ref(), r.as_ref(), *opts)?;
2265+
// `make_comparator` uses IEEE 754 totalOrder for floats and
2266+
// treats `-0.0` / `+0.0` as distinct. Normalize float arrays
2267+
// so SMJ / piecewise-merge equi-keys honor SQL equality;
2268+
// no-op (Arc::clone) for non-floats and for float arrays
2269+
// that contain no `-0.0`. `normalize_float_zero` preserves
2270+
// null positions, so the original null masks below remain
2271+
// valid.
2272+
let l_norm = normalize_float_zero(l);
2273+
let r_norm = normalize_float_zero(r);
2274+
let inner = make_comparator(l_norm.as_ref(), r_norm.as_ref(), *opts)?;
22662275
if null_equality == NullEquality::NullEqualsNothing {
22672276
let ln = l.logical_nulls().filter(|n| n.null_count() > 0);
22682277
let rn = r.logical_nulls().filter(|n| n.null_count() > 0);

datafusion/sqllogictest/test_files/negative_zero.slt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,3 +204,28 @@ FROM (SELECT 0.0 AS a) t1
204204
JOIN (SELECT -0.0 AS b) t2 ON t1.a = t2.b;
205205
----
206206
0 0
207+
208+
# Sort-merge join must also match +0.0 against -0.0. SMJ builds equi-key
209+
# matchers via `JoinKeyComparator`, which calls Arrow's `make_comparator`
210+
# (IEEE 754 totalOrder); without normalization, +0.0 and -0.0 produce
211+
# different orderings and miss the match.
212+
statement ok
213+
set datafusion.optimizer.prefer_hash_join = false;
214+
215+
query RR
216+
SELECT t1.a, t2.b
217+
FROM (SELECT 0.0 AS a) t1
218+
JOIN (SELECT -0.0 AS b) t2 ON t1.a = t2.b;
219+
----
220+
0 0
221+
222+
# Float32 SMJ equi-join.
223+
query RR
224+
SELECT t1.a, t2.b
225+
FROM (SELECT arrow_cast(0.0, 'Float32') AS a) t1
226+
JOIN (SELECT arrow_cast(-0.0, 'Float32') AS b) t2 ON t1.a = t2.b;
227+
----
228+
0 0
229+
230+
statement ok
231+
reset datafusion.optimizer.prefer_hash_join;

0 commit comments

Comments
 (0)