Skip to content

Commit 847a8fa

Browse files
committed
Restore syntactic null-restriction fast path
Reinstate the deleted fast path for is_restrict_null_predicate in null_restriction.rs. Implement a two-stage evaluation process: return early false for mixed-reference predicates, perform syntactic evaluation for supported join-key-only predicates, and ensure authoritative fallback is applied only when necessary.
1 parent bacb0e3 commit 847a8fa

File tree

1 file changed

+261
-0
lines changed

1 file changed

+261
-0
lines changed
Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Syntactic null-restriction evaluator used by optimizer fast paths.
19+
20+
use std::collections::HashSet;
21+
22+
use datafusion_common::{Column, ScalarValue};
23+
use datafusion_expr::{BinaryExpr, Expr, Operator};
24+
25+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
26+
enum NullSubstitutionValue {
27+
/// SQL NULL after substituting join columns with NULL.
28+
Null,
29+
/// Known to be non-null, but value is otherwise unknown.
30+
NonNull,
31+
/// A known boolean outcome from SQL three-valued logic.
32+
Boolean(bool),
33+
}
34+
35+
pub(super) fn all_columns_allowed(
36+
column_refs: &HashSet<&Column>,
37+
allowed_columns: &HashSet<&Column>,
38+
) -> bool {
39+
column_refs
40+
.iter()
41+
.all(|column| allowed_columns.contains(*column))
42+
}
43+
44+
pub(super) fn syntactic_restrict_null_predicate(
45+
predicate: &Expr,
46+
join_cols: &HashSet<&Column>,
47+
) -> Option<bool> {
48+
match syntactic_null_substitution_value(predicate, join_cols) {
49+
Some(NullSubstitutionValue::Boolean(value)) => Some(!value),
50+
Some(NullSubstitutionValue::Null) => Some(true),
51+
Some(NullSubstitutionValue::NonNull) | None => None,
52+
}
53+
}
54+
55+
fn not(value: Option<NullSubstitutionValue>) -> Option<NullSubstitutionValue> {
56+
match value {
57+
Some(NullSubstitutionValue::Boolean(value)) => {
58+
Some(NullSubstitutionValue::Boolean(!value))
59+
}
60+
Some(NullSubstitutionValue::Null) => Some(NullSubstitutionValue::Null),
61+
Some(NullSubstitutionValue::NonNull) | None => None,
62+
}
63+
}
64+
65+
fn binary_boolean_value(
66+
left: Option<NullSubstitutionValue>,
67+
right: Option<NullSubstitutionValue>,
68+
when_short_circuit: bool,
69+
) -> Option<NullSubstitutionValue> {
70+
let short_circuit = Some(NullSubstitutionValue::Boolean(when_short_circuit));
71+
let identity = Some(NullSubstitutionValue::Boolean(!when_short_circuit));
72+
73+
if left == short_circuit || right == short_circuit {
74+
return short_circuit;
75+
}
76+
77+
match (left, right) {
78+
(value, other) if value == identity => other,
79+
(other, value) if value == identity => other,
80+
(Some(NullSubstitutionValue::Null), Some(NullSubstitutionValue::Null)) => {
81+
Some(NullSubstitutionValue::Null)
82+
}
83+
(Some(NullSubstitutionValue::NonNull), _)
84+
| (_, Some(NullSubstitutionValue::NonNull))
85+
| (None, _)
86+
| (_, None) => None,
87+
(left, right) => {
88+
debug_assert_eq!(left, right);
89+
left
90+
}
91+
}
92+
}
93+
94+
fn null_check_value(
95+
value: Option<NullSubstitutionValue>,
96+
is_not_null: bool,
97+
) -> Option<NullSubstitutionValue> {
98+
match value {
99+
Some(NullSubstitutionValue::Null) => {
100+
Some(NullSubstitutionValue::Boolean(!is_not_null))
101+
}
102+
Some(NullSubstitutionValue::NonNull | NullSubstitutionValue::Boolean(_)) => {
103+
Some(NullSubstitutionValue::Boolean(is_not_null))
104+
}
105+
None => None,
106+
}
107+
}
108+
109+
fn null_if_contains_null(
110+
values: impl IntoIterator<Item = Option<NullSubstitutionValue>>,
111+
) -> Option<NullSubstitutionValue> {
112+
values
113+
.into_iter()
114+
.any(|value| matches!(value, Some(NullSubstitutionValue::Null)))
115+
.then_some(NullSubstitutionValue::Null)
116+
}
117+
118+
fn strict_null_only(
119+
value: Option<NullSubstitutionValue>,
120+
) -> Option<NullSubstitutionValue> {
121+
value.filter(|value| matches!(value, NullSubstitutionValue::Null))
122+
}
123+
124+
fn syntactic_null_substitution_value(
125+
expr: &Expr,
126+
join_cols: &HashSet<&Column>,
127+
) -> Option<NullSubstitutionValue> {
128+
match expr {
129+
Expr::Alias(alias) => {
130+
syntactic_null_substitution_value(alias.expr.as_ref(), join_cols)
131+
}
132+
Expr::Column(column) => join_cols
133+
.contains(column)
134+
.then_some(NullSubstitutionValue::Null),
135+
Expr::Literal(value, _) => Some(scalar_to_null_substitution_value(value)),
136+
Expr::BinaryExpr(binary_expr) => syntactic_binary_value(binary_expr, join_cols),
137+
Expr::Not(expr) => {
138+
not(syntactic_null_substitution_value(expr.as_ref(), join_cols))
139+
}
140+
Expr::IsNull(expr) => null_check_value(
141+
syntactic_null_substitution_value(expr.as_ref(), join_cols),
142+
false,
143+
),
144+
Expr::IsNotNull(expr) => null_check_value(
145+
syntactic_null_substitution_value(expr.as_ref(), join_cols),
146+
true,
147+
),
148+
Expr::Between(between) => null_if_contains_null([
149+
syntactic_null_substitution_value(between.expr.as_ref(), join_cols),
150+
syntactic_null_substitution_value(between.low.as_ref(), join_cols),
151+
syntactic_null_substitution_value(between.high.as_ref(), join_cols),
152+
]),
153+
Expr::Cast(cast) => strict_null_only(syntactic_null_substitution_value(
154+
cast.expr.as_ref(),
155+
join_cols,
156+
)),
157+
Expr::TryCast(try_cast) => strict_null_only(syntactic_null_substitution_value(
158+
try_cast.expr.as_ref(),
159+
join_cols,
160+
)),
161+
Expr::Negative(expr) => {
162+
strict_null_only(syntactic_null_substitution_value(expr.as_ref(), join_cols))
163+
}
164+
Expr::Like(like) | Expr::SimilarTo(like) => null_if_contains_null([
165+
syntactic_null_substitution_value(like.expr.as_ref(), join_cols),
166+
syntactic_null_substitution_value(like.pattern.as_ref(), join_cols),
167+
]),
168+
Expr::Exists { .. }
169+
| Expr::InList(_)
170+
| Expr::InSubquery(_)
171+
| Expr::SetComparison(_)
172+
| Expr::ScalarSubquery(_)
173+
| Expr::OuterReferenceColumn(_, _)
174+
| Expr::Placeholder(_)
175+
| Expr::ScalarVariable(_, _)
176+
| Expr::Unnest(_)
177+
| Expr::GroupingSet(_)
178+
| Expr::WindowFunction(_)
179+
| Expr::ScalarFunction(_)
180+
| Expr::Case(_)
181+
| Expr::IsTrue(_)
182+
| Expr::IsFalse(_)
183+
| Expr::IsUnknown(_)
184+
| Expr::IsNotTrue(_)
185+
| Expr::IsNotFalse(_)
186+
| Expr::IsNotUnknown(_) => None,
187+
Expr::AggregateFunction(_) => None,
188+
#[expect(deprecated)]
189+
Expr::Wildcard { .. } => None,
190+
}
191+
}
192+
193+
fn scalar_to_null_substitution_value(value: &ScalarValue) -> NullSubstitutionValue {
194+
match value {
195+
_ if value.is_null() => NullSubstitutionValue::Null,
196+
ScalarValue::Boolean(Some(value)) => NullSubstitutionValue::Boolean(*value),
197+
_ => NullSubstitutionValue::NonNull,
198+
}
199+
}
200+
201+
fn is_strict_null_binary_op(op: Operator) -> bool {
202+
matches!(
203+
op,
204+
Operator::Eq
205+
| Operator::NotEq
206+
| Operator::Lt
207+
| Operator::LtEq
208+
| Operator::Gt
209+
| Operator::GtEq
210+
| Operator::Plus
211+
| Operator::Minus
212+
| Operator::Multiply
213+
| Operator::Divide
214+
| Operator::Modulo
215+
| Operator::RegexMatch
216+
| Operator::RegexIMatch
217+
| Operator::RegexNotMatch
218+
| Operator::RegexNotIMatch
219+
| Operator::LikeMatch
220+
| Operator::ILikeMatch
221+
| Operator::NotLikeMatch
222+
| Operator::NotILikeMatch
223+
| Operator::BitwiseAnd
224+
| Operator::BitwiseOr
225+
| Operator::BitwiseXor
226+
| Operator::BitwiseShiftRight
227+
| Operator::BitwiseShiftLeft
228+
| Operator::StringConcat
229+
| Operator::AtArrow
230+
| Operator::ArrowAt
231+
| Operator::Arrow
232+
| Operator::LongArrow
233+
| Operator::HashArrow
234+
| Operator::HashLongArrow
235+
| Operator::AtAt
236+
| Operator::IntegerDivide
237+
| Operator::HashMinus
238+
| Operator::AtQuestion
239+
| Operator::Question
240+
| Operator::QuestionAnd
241+
| Operator::QuestionPipe
242+
| Operator::Colon
243+
)
244+
}
245+
246+
fn syntactic_binary_value(
247+
binary_expr: &BinaryExpr,
248+
join_cols: &HashSet<&Column>,
249+
) -> Option<NullSubstitutionValue> {
250+
let left = syntactic_null_substitution_value(binary_expr.left.as_ref(), join_cols);
251+
let right = syntactic_null_substitution_value(binary_expr.right.as_ref(), join_cols);
252+
253+
match binary_expr.op {
254+
Operator::And => binary_boolean_value(left, right, false),
255+
Operator::Or => binary_boolean_value(left, right, true),
256+
Operator::IsDistinctFrom | Operator::IsNotDistinctFrom => None,
257+
op => is_strict_null_binary_op(op)
258+
.then(|| null_if_contains_null([left, right]))
259+
.flatten(),
260+
}
261+
}

0 commit comments

Comments
 (0)