Skip to content

Commit cab75e0

Browse files
authored
feat: warn on NULL equality predicates (#22948)
## Which issue does this PR close? - Closes #14434. ## Rationale for this change SQL comparisons such as `expr = NULL` and `expr <> NULL` evaluate to `NULL` under SQL three-valued logic, not to `true` or `false`. When those comparisons appear in predicate contexts such as `WHERE`, `JOIN ON`, or `HAVING`, they are almost always a user mistake where `IS NULL` or `IS NOT NULL` was intended. This PR emits non-fatal diagnostic warnings for those cases so callers that surface `Diagnostic` information can help users find and fix the query without changing planning success or query semantics. ## What changes are included in this PR? - Adds warning collection to `SqlToRel`, with `SqlToRel::take_warnings()` to drain non-fatal planning diagnostics after planning. - Adds predicate-scoped detection for `= NULL` and `<> NULL` comparisons. - Wires detection into `WHERE`, `JOIN ON`, and `HAVING` planning paths. - Recursively checks predicate expression structure such as nested `AND` / `OR` binary predicates and `CASE WHEN` conditions, without warning for projection-only expressions like `SELECT col = NULL`. - Creates `Diagnostic::new_warning` entries with a primary span on the comparison expression and help pointing at the `NULL` literal when spans are available. ## Are these changes tested? Yes. Added regression coverage in `datafusion/sql/tests/cases/diagnostic.rs` for: - `WHERE col = NULL` - `WHERE NULL = col` - `WHERE col <> NULL` - `JOIN ... ON col = NULL` - `HAVING ... = NULL` - nested `CASE WHEN col = NULL` inside a predicate - no warning for `IS NULL` - no warning for projection-only `SELECT col = NULL` - multiple warnings in one predicate Validation run locally: ```shell cargo fmt --all cargo test -p datafusion-sql --test sql_integration diagnostic cargo test -p datafusion-sql cargo clippy -p datafusion-sql --all-targets --all-features -- -D warnings cargo clippy --all-targets --all-features -- -D warnings git diff main..HEAD --check ``` ## Are there any user-facing changes? Yes, but non-breaking. SQL planning can now collect warning diagnostics for likely mistaken `NULL` equality predicates. Consumers can retrieve them with `SqlToRel::take_warnings()` and decide how to present them. Planning still succeeds and query semantics are unchanged.
1 parent 933297b commit cab75e0

5 files changed

Lines changed: 342 additions & 7 deletions

File tree

datafusion/sql/src/expr/mod.rs

Lines changed: 86 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,23 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use std::ops::ControlFlow;
19+
1820
use arrow::datatypes::{DataType, TimeUnit};
1921
use datafusion_expr::planner::{
2022
PlannerResult, RawBinaryExpr, RawDictionaryExpr, RawFieldAccessExpr,
2123
};
2224
use sqlparser::ast::{
2325
AccessExpr, BinaryOperator, CastFormat, CastKind, CeilFloorKind,
2426
DataType as SQLDataType, DateTimeField, DictionaryField, Expr as SQLExpr,
25-
ExprWithAlias as SQLExprWithAlias, JsonPath, MapEntry, StructField, Subscript,
26-
TrimWhereField, TypedString, Value, ValueWithSpan,
27+
ExprWithAlias as SQLExprWithAlias, JsonPath, MapEntry, Spanned, StructField,
28+
Subscript, TrimWhereField, TypedString, Value, ValueWithSpan,
2729
};
30+
use sqlparser::ast::{Query, Visit, Visitor};
2831

2932
use datafusion_common::{
30-
DFSchema, Result, ScalarValue, internal_datafusion_err, internal_err, not_impl_err,
31-
plan_err,
33+
DFSchema, Diagnostic, Result, ScalarValue, Span, internal_datafusion_err,
34+
internal_err, not_impl_err, plan_err,
3235
};
3336

3437
use datafusion_expr::expr::ScalarFunction;
@@ -54,7 +57,86 @@ mod substring;
5457
mod unary_op;
5558
mod value;
5659

60+
fn null_value_span(expr: &SQLExpr) -> Option<Option<Span>> {
61+
if let SQLExpr::Value(ValueWithSpan {
62+
value: Value::Null,
63+
span,
64+
}) = expr
65+
{
66+
Some(Span::try_from_sqlparser_span(*span))
67+
} else {
68+
None
69+
}
70+
}
71+
72+
fn null_equality_warning(expr: &SQLExpr) -> Option<Diagnostic> {
73+
let SQLExpr::BinaryOp { left, op, right } = expr else {
74+
return None;
75+
};
76+
77+
let null_span = null_value_span(left).or_else(|| null_value_span(right))?;
78+
79+
let (message, help) = match op {
80+
BinaryOperator::Eq => (
81+
"comparison with NULL using `=` always evaluates to NULL",
82+
"use `IS NULL` to check for NULL values",
83+
),
84+
BinaryOperator::NotEq => (
85+
"comparison with NULL using `<>` always evaluates to NULL",
86+
"use `IS NOT NULL` to check for non-NULL values",
87+
),
88+
_ => return None,
89+
};
90+
91+
Some(
92+
Diagnostic::new_warning(message, Span::try_from_sqlparser_span(expr.span()))
93+
.with_help(help, null_span),
94+
)
95+
}
96+
97+
struct NullEqualityPredicateVisitor<'a, 'b, S: ContextProvider> {
98+
sql_to_rel: &'a SqlToRel<'b, S>,
99+
subquery_depth: usize,
100+
}
101+
102+
impl<'a, 'b, S: ContextProvider> NullEqualityPredicateVisitor<'a, 'b, S> {
103+
fn new(sql_to_rel: &'a SqlToRel<'b, S>) -> Self {
104+
Self {
105+
sql_to_rel,
106+
subquery_depth: 0,
107+
}
108+
}
109+
}
110+
111+
impl<S: ContextProvider> Visitor for NullEqualityPredicateVisitor<'_, '_, S> {
112+
type Break = ();
113+
114+
fn pre_visit_query(&mut self, _query: &Query) -> ControlFlow<Self::Break> {
115+
self.subquery_depth += 1;
116+
ControlFlow::Continue(())
117+
}
118+
119+
fn post_visit_query(&mut self, _query: &Query) -> ControlFlow<Self::Break> {
120+
self.subquery_depth -= 1;
121+
ControlFlow::Continue(())
122+
}
123+
124+
fn pre_visit_expr(&mut self, expr: &SQLExpr) -> ControlFlow<Self::Break> {
125+
if self.subquery_depth == 0
126+
&& let Some(warning) = null_equality_warning(expr)
127+
{
128+
self.sql_to_rel.add_warning(warning);
129+
}
130+
ControlFlow::Continue(())
131+
}
132+
}
133+
57134
impl<S: ContextProvider> SqlToRel<'_, S> {
135+
pub(crate) fn warn_on_null_equality_predicate(&self, predicate: &SQLExpr) {
136+
let mut visitor = NullEqualityPredicateVisitor::new(self);
137+
let _ = predicate.visit(&mut visitor);
138+
}
139+
58140
pub(crate) fn sql_expr_to_logical_expr_with_alias(
59141
&self,
60142
sql: SQLExprWithAlias,

datafusion/sql/src/planner.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
//! [`SqlToRel`]: SQL Query Planner (produces [`LogicalPlan`] from SQL AST)
1919
use std::collections::HashMap;
2020
use std::str::FromStr;
21-
use std::sync::Arc;
21+
use std::sync::{Arc, Mutex};
2222
use std::vec;
2323

2424
use crate::utils::make_decimal_type;
@@ -455,6 +455,7 @@ pub struct SqlToRel<'a, S: ContextProvider> {
455455
pub(crate) context_provider: &'a S,
456456
pub(crate) options: ParserOptions,
457457
pub(crate) ident_normalizer: IdentNormalizer,
458+
warnings: Mutex<Vec<Diagnostic>>,
458459
}
459460

460461
impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -477,9 +478,27 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
477478
context_provider,
478479
options,
479480
ident_normalizer: IdentNormalizer::new(ident_normalize),
481+
warnings: Mutex::new(vec![]),
480482
}
481483
}
482484

485+
pub(crate) fn add_warning(&self, warning: Diagnostic) {
486+
self.warnings
487+
.lock()
488+
.expect("warning diagnostic lock poisoned")
489+
.push(warning);
490+
}
491+
492+
/// Drain and return non-fatal warnings collected during SQL planning.
493+
pub fn take_warnings(&self) -> Vec<Diagnostic> {
494+
std::mem::take(
495+
&mut self
496+
.warnings
497+
.lock()
498+
.expect("warning diagnostic lock poisoned"),
499+
)
500+
}
501+
483502
pub fn build_schema(&self, columns: Vec<SQLColumnDef>) -> Result<Schema> {
484503
let mut fields = Vec::with_capacity(columns.len());
485504

datafusion/sql/src/relation/join.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
122122
JoinConstraint::On(sql_expr) => {
123123
let join_schema = left.schema().join(right.schema())?;
124124
// parse ON expression
125+
self.warn_on_null_equality_predicate(&sql_expr);
125126
let expr = self.sql_to_expr(sql_expr, &join_schema, planner_context)?;
126127
LogicalPlanBuilder::from(left)
127128
.join_on(right, join_type, Some(expr))?

datafusion/sql/src/select.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
198198
let having_expr_opt = select
199199
.having
200200
.map::<Result<Expr>, _>(|having_expr| {
201+
self.warn_on_null_equality_predicate(&having_expr);
201202
let having_expr = self.sql_expr_to_logical_expr(
202203
having_expr,
203204
&combined_schema,
@@ -865,6 +866,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
865866
Some(predicate_expr) => {
866867
let fallback_schemas = plan.fallback_normalize_schemas();
867868

869+
self.warn_on_null_equality_predicate(&predicate_expr);
868870
let filter_expr =
869871
self.sql_to_expr(predicate_expr, plan.schema(), planner_context)?;
870872

0 commit comments

Comments
 (0)