Skip to content

Commit d3064ea

Browse files
alambadriangb
authored andcommitted
Revert "[Minor]: unify ANY/ALL planning and align ANY NULL semantics with PG (#21743)"
This reverts commit 4bff17e.
1 parent c8b784a commit d3064ea

3 files changed

Lines changed: 90 additions & 104 deletions

File tree

datafusion/sql/src/expr/mod.rs

Lines changed: 72 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -621,12 +621,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
621621
_ => {
622622
let left_expr = self.sql_to_expr(*left, schema, planner_context)?;
623623
let right_expr = self.sql_to_expr(*right, schema, planner_context)?;
624-
plan_quantified_op(
625-
&left_expr,
626-
&right_expr,
627-
&compare_op,
628-
SetQuantifier::Any,
629-
)
624+
plan_any_op(left_expr, right_expr, &compare_op)
630625
}
631626
},
632627
SQLExpr::AllOp {
@@ -645,12 +640,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
645640
_ => {
646641
let left_expr = self.sql_to_expr(*left, schema, planner_context)?;
647642
let right_expr = self.sql_to_expr(*right, schema, planner_context)?;
648-
plan_quantified_op(
649-
&left_expr,
650-
&right_expr,
651-
&compare_op,
652-
SetQuantifier::All,
653-
)
643+
plan_all_op(&left_expr, &right_expr, &compare_op)
654644
}
655645
},
656646
#[expect(deprecated)]
@@ -1259,20 +1249,73 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
12591249
}
12601250
}
12611251

1262-
/// Plans `needle <compare_op> ANY/ALL(haystack)` with proper SQL NULL semantics.
1252+
/// Builds a CASE expression that handles NULL semantics for `x <op> ANY(arr)`:
1253+
///
1254+
/// ```text
1255+
/// CASE
1256+
/// WHEN <min_or_max>(arr) IS NOT NULL THEN <comparison>
1257+
/// WHEN arr IS NOT NULL THEN FALSE -- empty or all-null array
1258+
/// ELSE NULL -- NULL array
1259+
/// END
1260+
/// ```
1261+
fn any_op_with_null_handling(bound: Expr, comparison: Expr, arr: Expr) -> Result<Expr> {
1262+
when(bound.is_not_null(), comparison)
1263+
.when(arr.is_not_null(), lit(false))
1264+
.otherwise(lit(ScalarValue::Boolean(None)))
1265+
}
1266+
1267+
/// Plans a `<left> <op> ANY(<right>)` expression for non-subquery operands.
1268+
fn plan_any_op(
1269+
left_expr: Expr,
1270+
right_expr: Expr,
1271+
compare_op: &BinaryOperator,
1272+
) -> Result<Expr> {
1273+
match compare_op {
1274+
BinaryOperator::Eq => Ok(array_has(right_expr, left_expr)),
1275+
BinaryOperator::NotEq => {
1276+
let min = array_min(right_expr.clone());
1277+
let max = array_max(right_expr.clone());
1278+
// NOT EQ is true when either bound differs from left
1279+
let comparison = min
1280+
.not_eq(left_expr.clone())
1281+
.or(max.clone().not_eq(left_expr));
1282+
any_op_with_null_handling(max, comparison, right_expr)
1283+
}
1284+
BinaryOperator::Gt => {
1285+
let min = array_min(right_expr.clone());
1286+
any_op_with_null_handling(min.clone(), min.lt(left_expr), right_expr)
1287+
}
1288+
BinaryOperator::Lt => {
1289+
let max = array_max(right_expr.clone());
1290+
any_op_with_null_handling(max.clone(), max.gt(left_expr), right_expr)
1291+
}
1292+
BinaryOperator::GtEq => {
1293+
let min = array_min(right_expr.clone());
1294+
any_op_with_null_handling(min.clone(), min.lt_eq(left_expr), right_expr)
1295+
}
1296+
BinaryOperator::LtEq => {
1297+
let max = array_max(right_expr.clone());
1298+
any_op_with_null_handling(max.clone(), max.gt_eq(left_expr), right_expr)
1299+
}
1300+
_ => plan_err!(
1301+
"Unsupported AnyOp: '{compare_op}', only '=', '<>', '>', '<', '>=', '<=' are supported"
1302+
),
1303+
}
1304+
}
1305+
1306+
/// Plans `needle <compare_op> ALL(haystack)` with proper SQL NULL semantics.
12631307
///
12641308
/// CASE/WHEN structure:
12651309
/// WHEN arr IS NULL → NULL
1266-
/// WHEN empty → vacuous_result (ANY:false, ALL:true)
1310+
/// WHEN empty → TRUE
12671311
/// WHEN lhs IS NULL → NULL
1268-
/// WHEN decisive_condition → decisive_result (ANY:true match found, ALL:false violation found)
1312+
/// WHEN decisive_condition → FALSE
12691313
/// WHEN has_nulls → NULL
1270-
/// ELSE → vacuous_result
1271-
fn plan_quantified_op(
1314+
/// ELSE → TRUE
1315+
fn plan_all_op(
12721316
needle: &Expr,
12731317
haystack: &Expr,
12741318
compare_op: &BinaryOperator,
1275-
quantifier: SetQuantifier,
12761319
) -> Result<Expr> {
12771320
let null_arr_check = haystack.clone().is_null();
12781321
let empty_check = cardinality(haystack.clone()).eq(lit(0u64));
@@ -1282,61 +1325,40 @@ fn plan_quantified_op(
12821325
let has_nulls =
12831326
array_position(haystack.clone(), lit(ScalarValue::Null), lit(1i64)).is_not_null();
12841327

1285-
let decisive_condition = match (compare_op, quantifier) {
1286-
(BinaryOperator::Eq, SetQuantifier::Any)
1287-
| (BinaryOperator::NotEq, SetQuantifier::All) => {
1288-
array_has(haystack.clone(), needle.clone())
1289-
}
1290-
(BinaryOperator::Eq, SetQuantifier::All)
1291-
| (BinaryOperator::NotEq, SetQuantifier::Any) => {
1328+
let decisive_condition = match compare_op {
1329+
BinaryOperator::NotEq => array_has(haystack.clone(), needle.clone()),
1330+
BinaryOperator::Eq => {
12921331
let all_equal = array_min(haystack.clone())
12931332
.eq(needle.clone())
12941333
.and(array_max(haystack.clone()).eq(needle.clone()));
12951334
Expr::Not(Box::new(all_equal))
12961335
}
1297-
(BinaryOperator::Gt, SetQuantifier::Any) => {
1298-
needle.clone().gt(array_min(haystack.clone()))
1299-
}
1300-
(BinaryOperator::Gt, SetQuantifier::All) => {
1336+
BinaryOperator::Gt => {
13011337
Expr::Not(Box::new(needle.clone().gt(array_max(haystack.clone()))))
13021338
}
1303-
(BinaryOperator::Lt, SetQuantifier::Any) => {
1304-
needle.clone().lt(array_max(haystack.clone()))
1305-
}
1306-
(BinaryOperator::Lt, SetQuantifier::All) => {
1339+
BinaryOperator::Lt => {
13071340
Expr::Not(Box::new(needle.clone().lt(array_min(haystack.clone()))))
13081341
}
1309-
(BinaryOperator::GtEq, SetQuantifier::Any) => {
1310-
needle.clone().gt_eq(array_min(haystack.clone()))
1311-
}
1312-
(BinaryOperator::GtEq, SetQuantifier::All) => {
1342+
BinaryOperator::GtEq => {
13131343
Expr::Not(Box::new(needle.clone().gt_eq(array_max(haystack.clone()))))
13141344
}
1315-
(BinaryOperator::LtEq, SetQuantifier::Any) => {
1316-
needle.clone().lt_eq(array_max(haystack.clone()))
1317-
}
1318-
(BinaryOperator::LtEq, SetQuantifier::All) => {
1345+
BinaryOperator::LtEq => {
13191346
Expr::Not(Box::new(needle.clone().lt_eq(array_min(haystack.clone()))))
13201347
}
13211348
_ => {
13221349
return plan_err!(
1323-
"Unsupported {quantifier}Op: '{compare_op}', only '=', '<>', '>', '<', '>=', '<=' are supported"
1350+
"Unsupported AllOp: '{compare_op}', only '=', '<>', '>', '<', '>=', '<=' are supported"
13241351
);
13251352
}
13261353
};
13271354

1328-
let (vacuous_result, decisive_result) = match quantifier {
1329-
SetQuantifier::Any => (false, true),
1330-
SetQuantifier::All => (true, false),
1331-
};
1332-
13331355
let null_bool = lit(ScalarValue::Boolean(None));
13341356
when(null_arr_check, null_bool.clone())
1335-
.when(empty_check, lit(vacuous_result))
1357+
.when(empty_check, lit(true))
13361358
.when(null_lhs_check, null_bool.clone())
1337-
.when(decisive_condition, lit(decisive_result))
1359+
.when(decisive_condition, lit(false))
13381360
.when(has_nulls, null_bool)
1339-
.otherwise(lit(vacuous_result))
1361+
.otherwise(lit(true))
13401362
}
13411363

13421364
#[cfg(test)]

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ fn roundtrip_statement_postgres_any_array_expr() -> Result<(), DataFusionError>
370370
sql: "select left from array where 1 = any(left);",
371371
parser_dialect: GenericDialect {},
372372
unparser_dialect: UnparserPostgreSqlDialect {},
373-
expected: @r#"SELECT "array"."left" FROM "array" WHERE CASE WHEN "array"."left" IS NULL THEN NULL WHEN (cardinality("array"."left") = 0) THEN false WHEN 1 IS NULL THEN NULL WHEN 1 = ANY("array"."left") THEN true WHEN array_position("array"."left", NULL, 1) IS NOT NULL THEN NULL ELSE false END"#,
373+
expected: @r#"SELECT "array"."left" FROM "array" WHERE 1 = ANY("array"."left")"#,
374374
);
375375
Ok(())
376376
}

datafusion/sqllogictest/test_files/array/array_has.slt

Lines changed: 17 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -517,18 +517,16 @@ logical_plan
517517
03)----SubqueryAlias: test
518518
04)------SubqueryAlias: t
519519
05)--------Projection:
520-
06)----------Filter: __common_expr_3 IS NULL AND Boolean(NULL) OR __common_expr_3 IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) IS NOT DISTINCT FROM Boolean(true) AND __common_expr_3 IS NOT NULL
521-
07)------------Projection: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) AS __common_expr_3
522-
08)--------------TableScan: generate_series() projection=[value]
520+
06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")])
521+
07)------------TableScan: generate_series() projection=[value]
523522
physical_plan
524523
01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
525524
02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))]
526525
03)----CoalescePartitionsExec
527526
04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
528-
05)--------FilterExec: __common_expr_3@0 IS NULL AND NULL OR __common_expr_3@0 IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]) IS NOT DISTINCT FROM true AND __common_expr_3@0 IS NOT NULL, projection=[]
529-
06)----------ProjectionExec: expr=[substr(md5(CAST(value@0 AS Utf8View)), 1, 32) as __common_expr_3]
530-
07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
531-
08)--------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
527+
05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[]
528+
06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
529+
07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
532530

533531
query I
534532
with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i))
@@ -756,26 +754,26 @@ select 5 <= any(make_array());
756754
false
757755

758756
# Mixed NULL + non-NULL array where no non-NULL element satisfies the condition
759-
# These return NULL because NULLs leave the result indeterminate
757+
# These return false (NULLs are skipped by array_min/array_max)
760758
query B
761759
select 5 > any(make_array(6, NULL));
762760
----
763-
NULL
761+
false
764762

765763
query B
766764
select 5 < any(make_array(3, NULL));
767765
----
768-
NULL
766+
false
769767

770768
query B
771769
select 5 >= any(make_array(6, NULL));
772770
----
773-
NULL
771+
false
774772

775773
query B
776774
select 5 <= any(make_array(3, NULL));
777775
----
778-
NULL
776+
false
779777

780778
# Mixed NULL + non-NULL array where a non-NULL element satisfies the condition
781779
query B
@@ -806,38 +804,33 @@ true
806804
query B
807805
select 5 <> any(make_array(5, NULL));
808806
----
809-
NULL
807+
false
810808

811-
# All-NULL array: all operators should return NULL (unknown comparison)
809+
# All-NULL array: all operators should return false
812810
query B
813811
select 5 > any(make_array(NULL::INT, NULL::INT));
814812
----
815-
NULL
813+
false
816814

817815
query B
818816
select 5 < any(make_array(NULL::INT, NULL::INT));
819817
----
820-
NULL
818+
false
821819

822820
query B
823821
select 5 >= any(make_array(NULL::INT, NULL::INT));
824822
----
825-
NULL
823+
false
826824

827825
query B
828826
select 5 <= any(make_array(NULL::INT, NULL::INT));
829827
----
830-
NULL
828+
false
831829

832830
query B
833831
select 5 <> any(make_array(NULL::INT, NULL::INT));
834832
----
835-
NULL
836-
837-
query B
838-
select 5 = any(make_array(NULL::INT, NULL::INT));
839-
----
840-
NULL
833+
false
841834

842835
# NULL left operand: should return NULL for non-empty arrays
843836
query B
@@ -897,35 +890,6 @@ select 5 <> any(NULL::INT[]);
897890
----
898891
NULL
899892

900-
query B
901-
select 5 = any(NULL::INT[]);
902-
----
903-
NULL
904-
905-
# NULL = ANY with non-empty array
906-
query B
907-
select NULL = any(make_array(1, 2, 3));
908-
----
909-
NULL
910-
911-
# = ANY with no match, no NULLs
912-
query B
913-
select 5 = any(make_array(1, 2, 3));
914-
----
915-
false
916-
917-
# = ANY with mixed NULL (satisfying) returns TRUE
918-
query B
919-
select 5 = any(make_array(5, NULL));
920-
----
921-
true
922-
923-
# = ANY with mixed NULL (non-satisfying): NULLs leave result indeterminate
924-
query B
925-
select 5 = any(make_array(1, 2, NULL));
926-
----
927-
NULL
928-
929893
statement ok
930894
DROP TABLE any_op_test;
931895

0 commit comments

Comments
 (0)