Skip to content

Commit 49876bf

Browse files
authored
Revert "[Minor]: unify ANY/ALL planning and align ANY NULL semantics with PG (#21743)" (#22345)
## Which issue does this PR close? - Closes #22073 ## Rationale for this change As @cetra3 describes on #22073, in #21743 @berkaysynnada made a seemingly small change to align to postgres semantics that had much larger consequences I think many users (for example @cetra3) will treat this change as a regression, and thus I think this issue would block the next datafusion release - #21080 We tried an alternate fix but it appears to also raise some subtle questions from @Jefffrey - #22102 (comment) Thus I think it is safest to revert the initial change and work out the better longer term fix before releasing ## What changes are included in this PR? - Revert #21743 / 4bff17e until we can have a beter ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. -->
1 parent 5876a1a commit 49876bf

3 files changed

Lines changed: 90 additions & 104 deletions

File tree

datafusion/sql/src/expr/mod.rs

Lines changed: 72 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -621,12 +621,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
621621
_ => {
622622
let left_expr = self.sql_to_expr(*left, schema, planner_context)?;
623623
let right_expr = self.sql_to_expr(*right, schema, planner_context)?;
624-
plan_quantified_op(
625-
&left_expr,
626-
&right_expr,
627-
&compare_op,
628-
SetQuantifier::Any,
629-
)
624+
plan_any_op(left_expr, right_expr, &compare_op)
630625
}
631626
},
632627
SQLExpr::AllOp {
@@ -645,12 +640,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
645640
_ => {
646641
let left_expr = self.sql_to_expr(*left, schema, planner_context)?;
647642
let right_expr = self.sql_to_expr(*right, schema, planner_context)?;
648-
plan_quantified_op(
649-
&left_expr,
650-
&right_expr,
651-
&compare_op,
652-
SetQuantifier::All,
653-
)
643+
plan_all_op(&left_expr, &right_expr, &compare_op)
654644
}
655645
},
656646
#[expect(deprecated)]
@@ -1259,20 +1249,73 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
12591249
}
12601250
}
12611251

1262-
/// Plans `needle <compare_op> ANY/ALL(haystack)` with proper SQL NULL semantics.
1252+
/// Builds a CASE expression that handles NULL semantics for `x <op> ANY(arr)`:
1253+
///
1254+
/// ```text
1255+
/// CASE
1256+
/// WHEN <min_or_max>(arr) IS NOT NULL THEN <comparison>
1257+
/// WHEN arr IS NOT NULL THEN FALSE -- empty or all-null array
1258+
/// ELSE NULL -- NULL array
1259+
/// END
1260+
/// ```
1261+
fn any_op_with_null_handling(bound: Expr, comparison: Expr, arr: Expr) -> Result<Expr> {
1262+
when(bound.is_not_null(), comparison)
1263+
.when(arr.is_not_null(), lit(false))
1264+
.otherwise(lit(ScalarValue::Boolean(None)))
1265+
}
1266+
1267+
/// Plans a `<left> <op> ANY(<right>)` expression for non-subquery operands.
1268+
fn plan_any_op(
1269+
left_expr: Expr,
1270+
right_expr: Expr,
1271+
compare_op: &BinaryOperator,
1272+
) -> Result<Expr> {
1273+
match compare_op {
1274+
BinaryOperator::Eq => Ok(array_has(right_expr, left_expr)),
1275+
BinaryOperator::NotEq => {
1276+
let min = array_min(right_expr.clone());
1277+
let max = array_max(right_expr.clone());
1278+
// NOT EQ is true when either bound differs from left
1279+
let comparison = min
1280+
.not_eq(left_expr.clone())
1281+
.or(max.clone().not_eq(left_expr));
1282+
any_op_with_null_handling(max, comparison, right_expr)
1283+
}
1284+
BinaryOperator::Gt => {
1285+
let min = array_min(right_expr.clone());
1286+
any_op_with_null_handling(min.clone(), min.lt(left_expr), right_expr)
1287+
}
1288+
BinaryOperator::Lt => {
1289+
let max = array_max(right_expr.clone());
1290+
any_op_with_null_handling(max.clone(), max.gt(left_expr), right_expr)
1291+
}
1292+
BinaryOperator::GtEq => {
1293+
let min = array_min(right_expr.clone());
1294+
any_op_with_null_handling(min.clone(), min.lt_eq(left_expr), right_expr)
1295+
}
1296+
BinaryOperator::LtEq => {
1297+
let max = array_max(right_expr.clone());
1298+
any_op_with_null_handling(max.clone(), max.gt_eq(left_expr), right_expr)
1299+
}
1300+
_ => plan_err!(
1301+
"Unsupported AnyOp: '{compare_op}', only '=', '<>', '>', '<', '>=', '<=' are supported"
1302+
),
1303+
}
1304+
}
1305+
1306+
/// Plans `needle <compare_op> ALL(haystack)` with proper SQL NULL semantics.
12631307
///
12641308
/// CASE/WHEN structure:
12651309
/// WHEN arr IS NULL → NULL
1266-
/// WHEN empty → vacuous_result (ANY:false, ALL:true)
1310+
/// WHEN empty → TRUE
12671311
/// WHEN lhs IS NULL → NULL
1268-
/// WHEN decisive_condition → decisive_result (ANY:true match found, ALL:false violation found)
1312+
/// WHEN decisive_condition → FALSE
12691313
/// WHEN has_nulls → NULL
1270-
/// ELSE → vacuous_result
1271-
fn plan_quantified_op(
1314+
/// ELSE → TRUE
1315+
fn plan_all_op(
12721316
needle: &Expr,
12731317
haystack: &Expr,
12741318
compare_op: &BinaryOperator,
1275-
quantifier: SetQuantifier,
12761319
) -> Result<Expr> {
12771320
let null_arr_check = haystack.clone().is_null();
12781321
let empty_check = cardinality(haystack.clone()).eq(lit(0u64));
@@ -1282,61 +1325,40 @@ fn plan_quantified_op(
12821325
let has_nulls =
12831326
array_position(haystack.clone(), lit(ScalarValue::Null), lit(1i64)).is_not_null();
12841327

1285-
let decisive_condition = match (compare_op, quantifier) {
1286-
(BinaryOperator::Eq, SetQuantifier::Any)
1287-
| (BinaryOperator::NotEq, SetQuantifier::All) => {
1288-
array_has(haystack.clone(), needle.clone())
1289-
}
1290-
(BinaryOperator::Eq, SetQuantifier::All)
1291-
| (BinaryOperator::NotEq, SetQuantifier::Any) => {
1328+
let decisive_condition = match compare_op {
1329+
BinaryOperator::NotEq => array_has(haystack.clone(), needle.clone()),
1330+
BinaryOperator::Eq => {
12921331
let all_equal = array_min(haystack.clone())
12931332
.eq(needle.clone())
12941333
.and(array_max(haystack.clone()).eq(needle.clone()));
12951334
Expr::Not(Box::new(all_equal))
12961335
}
1297-
(BinaryOperator::Gt, SetQuantifier::Any) => {
1298-
needle.clone().gt(array_min(haystack.clone()))
1299-
}
1300-
(BinaryOperator::Gt, SetQuantifier::All) => {
1336+
BinaryOperator::Gt => {
13011337
Expr::Not(Box::new(needle.clone().gt(array_max(haystack.clone()))))
13021338
}
1303-
(BinaryOperator::Lt, SetQuantifier::Any) => {
1304-
needle.clone().lt(array_max(haystack.clone()))
1305-
}
1306-
(BinaryOperator::Lt, SetQuantifier::All) => {
1339+
BinaryOperator::Lt => {
13071340
Expr::Not(Box::new(needle.clone().lt(array_min(haystack.clone()))))
13081341
}
1309-
(BinaryOperator::GtEq, SetQuantifier::Any) => {
1310-
needle.clone().gt_eq(array_min(haystack.clone()))
1311-
}
1312-
(BinaryOperator::GtEq, SetQuantifier::All) => {
1342+
BinaryOperator::GtEq => {
13131343
Expr::Not(Box::new(needle.clone().gt_eq(array_max(haystack.clone()))))
13141344
}
1315-
(BinaryOperator::LtEq, SetQuantifier::Any) => {
1316-
needle.clone().lt_eq(array_max(haystack.clone()))
1317-
}
1318-
(BinaryOperator::LtEq, SetQuantifier::All) => {
1345+
BinaryOperator::LtEq => {
13191346
Expr::Not(Box::new(needle.clone().lt_eq(array_min(haystack.clone()))))
13201347
}
13211348
_ => {
13221349
return plan_err!(
1323-
"Unsupported {quantifier}Op: '{compare_op}', only '=', '<>', '>', '<', '>=', '<=' are supported"
1350+
"Unsupported AllOp: '{compare_op}', only '=', '<>', '>', '<', '>=', '<=' are supported"
13241351
);
13251352
}
13261353
};
13271354

1328-
let (vacuous_result, decisive_result) = match quantifier {
1329-
SetQuantifier::Any => (false, true),
1330-
SetQuantifier::All => (true, false),
1331-
};
1332-
13331355
let null_bool = lit(ScalarValue::Boolean(None));
13341356
when(null_arr_check, null_bool.clone())
1335-
.when(empty_check, lit(vacuous_result))
1357+
.when(empty_check, lit(true))
13361358
.when(null_lhs_check, null_bool.clone())
1337-
.when(decisive_condition, lit(decisive_result))
1359+
.when(decisive_condition, lit(false))
13381360
.when(has_nulls, null_bool)
1339-
.otherwise(lit(vacuous_result))
1361+
.otherwise(lit(true))
13401362
}
13411363

13421364
#[cfg(test)]

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ fn roundtrip_statement_postgres_any_array_expr() -> Result<(), DataFusionError>
370370
sql: "select left from array where 1 = any(left);",
371371
parser_dialect: GenericDialect {},
372372
unparser_dialect: UnparserPostgreSqlDialect {},
373-
expected: @r#"SELECT "array"."left" FROM "array" WHERE CASE WHEN "array"."left" IS NULL THEN NULL WHEN (cardinality("array"."left") = 0) THEN false WHEN 1 IS NULL THEN NULL WHEN 1 = ANY("array"."left") THEN true WHEN array_position("array"."left", NULL, 1) IS NOT NULL THEN NULL ELSE false END"#,
373+
expected: @r#"SELECT "array"."left" FROM "array" WHERE 1 = ANY("array"."left")"#,
374374
);
375375
Ok(())
376376
}

datafusion/sqllogictest/test_files/array/array_has.slt

Lines changed: 17 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -517,18 +517,16 @@ logical_plan
517517
03)----SubqueryAlias: test
518518
04)------SubqueryAlias: t
519519
05)--------Projection:
520-
06)----------Filter: __common_expr_3 IS NULL AND Boolean(NULL) OR __common_expr_3 IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) IS NOT DISTINCT FROM Boolean(true) AND __common_expr_3 IS NOT NULL
521-
07)------------Projection: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) AS __common_expr_3
522-
08)--------------TableScan: generate_series() projection=[value]
520+
06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")])
521+
07)------------TableScan: generate_series() projection=[value]
523522
physical_plan
524523
01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
525524
02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))]
526525
03)----CoalescePartitionsExec
527526
04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
528-
05)--------FilterExec: __common_expr_3@0 IS NULL AND NULL OR __common_expr_3@0 IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]) IS NOT DISTINCT FROM true AND __common_expr_3@0 IS NOT NULL, projection=[]
529-
06)----------ProjectionExec: expr=[substr(md5(CAST(value@0 AS Utf8View)), 1, 32) as __common_expr_3]
530-
07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
531-
08)--------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
527+
05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[]
528+
06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
529+
07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
532530

533531
query I
534532
with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i))
@@ -756,26 +754,26 @@ select 5 <= any(make_array());
756754
false
757755

758756
# Mixed NULL + non-NULL array where no non-NULL element satisfies the condition
759-
# These return NULL because NULLs leave the result indeterminate
757+
# These return false (NULLs are skipped by array_min/array_max)
760758
query B
761759
select 5 > any(make_array(6, NULL));
762760
----
763-
NULL
761+
false
764762

765763
query B
766764
select 5 < any(make_array(3, NULL));
767765
----
768-
NULL
766+
false
769767

770768
query B
771769
select 5 >= any(make_array(6, NULL));
772770
----
773-
NULL
771+
false
774772

775773
query B
776774
select 5 <= any(make_array(3, NULL));
777775
----
778-
NULL
776+
false
779777

780778
# Mixed NULL + non-NULL array where a non-NULL element satisfies the condition
781779
query B
@@ -806,38 +804,33 @@ true
806804
query B
807805
select 5 <> any(make_array(5, NULL));
808806
----
809-
NULL
807+
false
810808

811-
# All-NULL array: all operators should return NULL (unknown comparison)
809+
# All-NULL array: all operators should return false
812810
query B
813811
select 5 > any(make_array(NULL::INT, NULL::INT));
814812
----
815-
NULL
813+
false
816814

817815
query B
818816
select 5 < any(make_array(NULL::INT, NULL::INT));
819817
----
820-
NULL
818+
false
821819

822820
query B
823821
select 5 >= any(make_array(NULL::INT, NULL::INT));
824822
----
825-
NULL
823+
false
826824

827825
query B
828826
select 5 <= any(make_array(NULL::INT, NULL::INT));
829827
----
830-
NULL
828+
false
831829

832830
query B
833831
select 5 <> any(make_array(NULL::INT, NULL::INT));
834832
----
835-
NULL
836-
837-
query B
838-
select 5 = any(make_array(NULL::INT, NULL::INT));
839-
----
840-
NULL
833+
false
841834

842835
# NULL left operand: should return NULL for non-empty arrays
843836
query B
@@ -897,35 +890,6 @@ select 5 <> any(NULL::INT[]);
897890
----
898891
NULL
899892

900-
query B
901-
select 5 = any(NULL::INT[]);
902-
----
903-
NULL
904-
905-
# NULL = ANY with non-empty array
906-
query B
907-
select NULL = any(make_array(1, 2, 3));
908-
----
909-
NULL
910-
911-
# = ANY with no match, no NULLs
912-
query B
913-
select 5 = any(make_array(1, 2, 3));
914-
----
915-
false
916-
917-
# = ANY with mixed NULL (satisfying) returns TRUE
918-
query B
919-
select 5 = any(make_array(5, NULL));
920-
----
921-
true
922-
923-
# = ANY with mixed NULL (non-satisfying): NULLs leave result indeterminate
924-
query B
925-
select 5 = any(make_array(1, 2, NULL));
926-
----
927-
NULL
928-
929893
statement ok
930894
DROP TABLE any_op_test;
931895

0 commit comments

Comments
 (0)