Skip to content

Commit be273d6

Browse files
authored
Support '>', '<', '>=', '<=', '<>' in any operator (#20830)
## Which issue does this PR close? - Closes #2548. ## Rationale for this change ANY operator only supports equality check ## What changes are included in this PR? Adds support for other expressions and add tests ## Are these changes tested? Added slt tests for this and they all pass ## Are there any user-facing changes? Yes user's can now use any operator with this new expressions
1 parent 9885f4b commit be273d6

File tree

2 files changed

+297
-15
lines changed

2 files changed

+297
-15
lines changed

datafusion/sql/src/expr/mod.rs

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@ use datafusion_expr::expr::SetQuantifier;
3636
use datafusion_expr::expr::{InList, WildcardOptions};
3737
use datafusion_expr::{
3838
Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal,
39-
Operator, TryCast, lit,
39+
Operator, TryCast, lit, when,
4040
};
4141

4242
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
43-
use datafusion_functions_nested::expr_fn::array_has;
43+
use datafusion_functions_nested::expr_fn::{array_has, array_max, array_min};
4444

4545
mod binary_op;
4646
mod function;
@@ -608,17 +608,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
608608
planner_context,
609609
),
610610
_ => {
611-
if compare_op != BinaryOperator::Eq {
612-
plan_err!(
613-
"Unsupported AnyOp: '{compare_op}', only '=' is supported"
614-
)
615-
} else {
616-
let left_expr =
617-
self.sql_to_expr(*left, schema, planner_context)?;
618-
let right_expr =
619-
self.sql_to_expr(*right, schema, planner_context)?;
620-
Ok(array_has(right_expr, left_expr))
621-
}
611+
let left_expr = self.sql_to_expr(*left, schema, planner_context)?;
612+
let right_expr = self.sql_to_expr(*right, schema, planner_context)?;
613+
plan_any_op(left_expr, right_expr, &compare_op)
622614
}
623615
},
624616
SQLExpr::AllOp {
@@ -1242,6 +1234,60 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
12421234
}
12431235
}
12441236

1237+
/// Builds a CASE expression that handles NULL semantics for `x <op> ANY(arr)`:
1238+
///
1239+
/// ```text
1240+
/// CASE
1241+
/// WHEN <min_or_max>(arr) IS NOT NULL THEN <comparison>
1242+
/// WHEN arr IS NOT NULL THEN FALSE -- empty or all-null array
1243+
/// ELSE NULL -- NULL array
1244+
/// END
1245+
/// ```
1246+
fn any_op_with_null_handling(bound: Expr, comparison: Expr, arr: Expr) -> Result<Expr> {
1247+
when(bound.is_not_null(), comparison)
1248+
.when(arr.is_not_null(), lit(false))
1249+
.otherwise(lit(ScalarValue::Boolean(None)))
1250+
}
1251+
1252+
/// Plans a `<left> <op> ANY(<right>)` expression for non-subquery operands.
1253+
fn plan_any_op(
1254+
left_expr: Expr,
1255+
right_expr: Expr,
1256+
compare_op: &BinaryOperator,
1257+
) -> Result<Expr> {
1258+
match compare_op {
1259+
BinaryOperator::Eq => Ok(array_has(right_expr, left_expr)),
1260+
BinaryOperator::NotEq => {
1261+
let min = array_min(right_expr.clone());
1262+
let max = array_max(right_expr.clone());
1263+
// NOT EQ is true when either bound differs from left
1264+
let comparison = min
1265+
.not_eq(left_expr.clone())
1266+
.or(max.clone().not_eq(left_expr));
1267+
any_op_with_null_handling(max, comparison, right_expr)
1268+
}
1269+
BinaryOperator::Gt => {
1270+
let min = array_min(right_expr.clone());
1271+
any_op_with_null_handling(min.clone(), min.lt(left_expr), right_expr)
1272+
}
1273+
BinaryOperator::Lt => {
1274+
let max = array_max(right_expr.clone());
1275+
any_op_with_null_handling(max.clone(), max.gt(left_expr), right_expr)
1276+
}
1277+
BinaryOperator::GtEq => {
1278+
let min = array_min(right_expr.clone());
1279+
any_op_with_null_handling(min.clone(), min.lt_eq(left_expr), right_expr)
1280+
}
1281+
BinaryOperator::LtEq => {
1282+
let max = array_max(right_expr.clone());
1283+
any_op_with_null_handling(max.clone(), max.gt_eq(left_expr), right_expr)
1284+
}
1285+
_ => plan_err!(
1286+
"Unsupported AnyOp: '{compare_op}', only '=', '<>', '>', '<', '>=', '<=' are supported"
1287+
),
1288+
}
1289+
}
1290+
12451291
#[cfg(test)]
12461292
mod tests {
12471293
use std::collections::HashMap;

datafusion/sqllogictest/test_files/array.slt

Lines changed: 238 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7101,8 +7101,244 @@ select count(*) from arrays where 'X'=any(column3);
71017101
----
71027102
0
71037103

7104-
query error DataFusion error: Error during planning: Unsupported AnyOp: '>', only '=' is supported
7105-
select count(*) from arrays where 'X'>any(column3);
7104+
# any operator with comparison operators
7105+
# Use inline arrays so the test data is visible and the needle (5)
7106+
# falls within the range of some arrays but not others.
7107+
statement ok
7108+
CREATE TABLE any_op_test AS VALUES
7109+
(1, make_array(1, 2, 3)),
7110+
(2, make_array(4, 5, 6)),
7111+
(3, make_array(7, 8, 9)),
7112+
(4, make_array(3, 5, 7));
7113+
7114+
# 5 > ANY(arr): true when array_min < 5
7115+
# row1: min=1 < 5 ✓, row2: min=4 < 5 ✓, row3: min=7 < 5 ✗, row4: min=3 < 5 ✓
7116+
query I?
7117+
select column1, column2 from any_op_test where 5 > any(column2) order by column1;
7118+
----
7119+
1 [1, 2, 3]
7120+
2 [4, 5, 6]
7121+
4 [3, 5, 7]
7122+
7123+
# 5 >= ANY(arr): true when array_min <= 5
7124+
# row1: min=1 <= 5 ✓, row2: min=4 <= 5 ✓, row3: min=7 <= 5 ✗, row4: min=3 <= 5 ✓
7125+
query I?
7126+
select column1, column2 from any_op_test where 5 >= any(column2) order by column1;
7127+
----
7128+
1 [1, 2, 3]
7129+
2 [4, 5, 6]
7130+
4 [3, 5, 7]
7131+
7132+
# 5 < ANY(arr): true when array_max > 5
7133+
# row1: max=3 > 5 ✗, row2: max=6 > 5 ✓, row3: max=9 > 5 ✓, row4: max=7 > 5 ✓
7134+
query I?
7135+
select column1, column2 from any_op_test where 5 < any(column2) order by column1;
7136+
----
7137+
2 [4, 5, 6]
7138+
3 [7, 8, 9]
7139+
4 [3, 5, 7]
7140+
7141+
# 5 <= ANY(arr): true when array_max >= 5
7142+
# row1: max=3 >= 5 ✗, row2: max=6 >= 5 ✓, row3: max=9 >= 5 ✓, row4: max=7 >= 5 ✓
7143+
query I?
7144+
select column1, column2 from any_op_test where 5 <= any(column2) order by column1;
7145+
----
7146+
2 [4, 5, 6]
7147+
3 [7, 8, 9]
7148+
4 [3, 5, 7]
7149+
7150+
# 5 <> ANY(arr): true when array_min != 5 OR array_max != 5
7151+
# row1: [1,2,3] min=1!=5 ✓, row2: [4,5,6] min=4!=5 ✓, row3: [7,8,9] min=7!=5 ✓, row4: [3,5,7] min=3!=5 ✓
7152+
query I?
7153+
select column1, column2 from any_op_test where 5 <> any(column2) order by column1;
7154+
----
7155+
1 [1, 2, 3]
7156+
2 [4, 5, 6]
7157+
3 [7, 8, 9]
7158+
4 [3, 5, 7]
7159+
7160+
# For a single-element array where the element equals the needle, <> should return false
7161+
query B
7162+
select 5 <> any(make_array(5));
7163+
----
7164+
false
7165+
7166+
# For a uniform array [5,5,5], <> should also return false
7167+
query B
7168+
select 5 <> any(make_array(5, 5, 5));
7169+
----
7170+
false
7171+
7172+
# Empty array: all operators should return false (no elements satisfy the condition)
7173+
query B
7174+
select 5 = any(make_array());
7175+
----
7176+
false
7177+
7178+
query B
7179+
select 5 <> any(make_array());
7180+
----
7181+
false
7182+
7183+
query B
7184+
select 5 > any(make_array());
7185+
----
7186+
false
7187+
7188+
query B
7189+
select 5 < any(make_array());
7190+
----
7191+
false
7192+
7193+
query B
7194+
select 5 >= any(make_array());
7195+
----
7196+
false
7197+
7198+
query B
7199+
select 5 <= any(make_array());
7200+
----
7201+
false
7202+
7203+
# Mixed NULL + non-NULL array where no non-NULL element satisfies the condition
7204+
# These return false (NULLs are skipped by array_min/array_max)
7205+
query B
7206+
select 5 > any(make_array(6, NULL));
7207+
----
7208+
false
7209+
7210+
query B
7211+
select 5 < any(make_array(3, NULL));
7212+
----
7213+
false
7214+
7215+
query B
7216+
select 5 >= any(make_array(6, NULL));
7217+
----
7218+
false
7219+
7220+
query B
7221+
select 5 <= any(make_array(3, NULL));
7222+
----
7223+
false
7224+
7225+
# Mixed NULL + non-NULL array where a non-NULL element satisfies the condition
7226+
query B
7227+
select 5 > any(make_array(3, NULL));
7228+
----
7229+
true
7230+
7231+
query B
7232+
select 5 < any(make_array(6, NULL));
7233+
----
7234+
true
7235+
7236+
query B
7237+
select 5 >= any(make_array(5, NULL));
7238+
----
7239+
true
7240+
7241+
query B
7242+
select 5 <= any(make_array(5, NULL));
7243+
----
7244+
true
7245+
7246+
query B
7247+
select 5 <> any(make_array(3, NULL));
7248+
----
7249+
true
7250+
7251+
query B
7252+
select 5 <> any(make_array(5, NULL));
7253+
----
7254+
false
7255+
7256+
# All-NULL array: all operators should return false
7257+
query B
7258+
select 5 > any(make_array(NULL::INT, NULL::INT));
7259+
----
7260+
false
7261+
7262+
query B
7263+
select 5 < any(make_array(NULL::INT, NULL::INT));
7264+
----
7265+
false
7266+
7267+
query B
7268+
select 5 >= any(make_array(NULL::INT, NULL::INT));
7269+
----
7270+
false
7271+
7272+
query B
7273+
select 5 <= any(make_array(NULL::INT, NULL::INT));
7274+
----
7275+
false
7276+
7277+
query B
7278+
select 5 <> any(make_array(NULL::INT, NULL::INT));
7279+
----
7280+
false
7281+
7282+
# NULL left operand: should return NULL for non-empty arrays
7283+
query B
7284+
select NULL > any(make_array(1, 2, 3));
7285+
----
7286+
NULL
7287+
7288+
query B
7289+
select NULL < any(make_array(1, 2, 3));
7290+
----
7291+
NULL
7292+
7293+
query B
7294+
select NULL >= any(make_array(1, 2, 3));
7295+
----
7296+
NULL
7297+
7298+
query B
7299+
select NULL <= any(make_array(1, 2, 3));
7300+
----
7301+
NULL
7302+
7303+
query B
7304+
select NULL <> any(make_array(1, 2, 3));
7305+
----
7306+
NULL
7307+
7308+
# NULL left operand with empty array: should return false
7309+
query B
7310+
select NULL > any(make_array());
7311+
----
7312+
false
7313+
7314+
# NULL array: should return NULL
7315+
query B
7316+
select 5 > any(NULL::INT[]);
7317+
----
7318+
NULL
7319+
7320+
query B
7321+
select 5 < any(NULL::INT[]);
7322+
----
7323+
NULL
7324+
7325+
query B
7326+
select 5 >= any(NULL::INT[]);
7327+
----
7328+
NULL
7329+
7330+
query B
7331+
select 5 <= any(NULL::INT[]);
7332+
----
7333+
NULL
7334+
7335+
query B
7336+
select 5 <> any(NULL::INT[]);
7337+
----
7338+
NULL
7339+
7340+
statement ok
7341+
DROP TABLE any_op_test;
71067342

71077343
## array_distinct
71087344

0 commit comments

Comments
 (0)