|
| 1 | +// Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +// or more contributor license agreements. See the NOTICE file |
| 3 | +// distributed with this work for additional information |
| 4 | +// regarding copyright ownership. The ASF licenses this file |
| 5 | +// to you under the Apache License, Version 2.0 (the |
| 6 | +// "License"); you may not use this file except in compliance |
| 7 | +// with the License. You may obtain a copy of the License at |
| 8 | +// |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +// |
| 11 | +// Unless required by applicable law or agreed to in writing, |
| 12 | +// software distributed under the License is distributed on an |
| 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +// KIND, either express or implied. See the License for the |
| 15 | +// specific language governing permissions and limitations |
| 16 | +// under the License. |
| 17 | + |
| 18 | +//! Syntactic null-restriction evaluator used by optimizer fast paths. |
| 19 | +
|
| 20 | +use std::collections::HashSet; |
| 21 | + |
| 22 | +use datafusion_common::{Column, ScalarValue}; |
| 23 | +use datafusion_expr::{BinaryExpr, Expr, Operator}; |
| 24 | + |
| 25 | +#[derive(Clone, Copy, Debug, PartialEq, Eq)] |
| 26 | +enum NullSubstitutionValue { |
| 27 | + /// SQL NULL after substituting join columns with NULL. |
| 28 | + Null, |
| 29 | + /// Known to be non-null, but value is otherwise unknown. |
| 30 | + NonNull, |
| 31 | + /// A known boolean outcome from SQL three-valued logic. |
| 32 | + Boolean(bool), |
| 33 | +} |
| 34 | + |
| 35 | +pub(super) fn all_columns_allowed( |
| 36 | + column_refs: &HashSet<&Column>, |
| 37 | + allowed_columns: &HashSet<&Column>, |
| 38 | +) -> bool { |
| 39 | + column_refs |
| 40 | + .iter() |
| 41 | + .all(|column| allowed_columns.contains(*column)) |
| 42 | +} |
| 43 | + |
| 44 | +pub(super) fn syntactic_restrict_null_predicate( |
| 45 | + predicate: &Expr, |
| 46 | + join_cols: &HashSet<&Column>, |
| 47 | +) -> Option<bool> { |
| 48 | + match syntactic_null_substitution_value(predicate, join_cols) { |
| 49 | + Some(NullSubstitutionValue::Boolean(value)) => Some(!value), |
| 50 | + Some(NullSubstitutionValue::Null) => Some(true), |
| 51 | + Some(NullSubstitutionValue::NonNull) | None => None, |
| 52 | + } |
| 53 | +} |
| 54 | + |
| 55 | +fn not(value: Option<NullSubstitutionValue>) -> Option<NullSubstitutionValue> { |
| 56 | + match value { |
| 57 | + Some(NullSubstitutionValue::Boolean(value)) => { |
| 58 | + Some(NullSubstitutionValue::Boolean(!value)) |
| 59 | + } |
| 60 | + Some(NullSubstitutionValue::Null) => Some(NullSubstitutionValue::Null), |
| 61 | + Some(NullSubstitutionValue::NonNull) | None => None, |
| 62 | + } |
| 63 | +} |
| 64 | + |
| 65 | +fn binary_boolean_value( |
| 66 | + left: Option<NullSubstitutionValue>, |
| 67 | + right: Option<NullSubstitutionValue>, |
| 68 | + when_short_circuit: bool, |
| 69 | +) -> Option<NullSubstitutionValue> { |
| 70 | + let short_circuit = Some(NullSubstitutionValue::Boolean(when_short_circuit)); |
| 71 | + let identity = Some(NullSubstitutionValue::Boolean(!when_short_circuit)); |
| 72 | + |
| 73 | + if left == short_circuit || right == short_circuit { |
| 74 | + return short_circuit; |
| 75 | + } |
| 76 | + |
| 77 | + match (left, right) { |
| 78 | + (value, other) if value == identity => other, |
| 79 | + (other, value) if value == identity => other, |
| 80 | + (Some(NullSubstitutionValue::Null), Some(NullSubstitutionValue::Null)) => { |
| 81 | + Some(NullSubstitutionValue::Null) |
| 82 | + } |
| 83 | + (Some(NullSubstitutionValue::NonNull), _) |
| 84 | + | (_, Some(NullSubstitutionValue::NonNull)) |
| 85 | + | (None, _) |
| 86 | + | (_, None) => None, |
| 87 | + (left, right) => { |
| 88 | + debug_assert_eq!(left, right); |
| 89 | + left |
| 90 | + } |
| 91 | + } |
| 92 | +} |
| 93 | + |
| 94 | +fn null_check_value( |
| 95 | + value: Option<NullSubstitutionValue>, |
| 96 | + is_not_null: bool, |
| 97 | +) -> Option<NullSubstitutionValue> { |
| 98 | + match value { |
| 99 | + Some(NullSubstitutionValue::Null) => { |
| 100 | + Some(NullSubstitutionValue::Boolean(!is_not_null)) |
| 101 | + } |
| 102 | + Some(NullSubstitutionValue::NonNull | NullSubstitutionValue::Boolean(_)) => { |
| 103 | + Some(NullSubstitutionValue::Boolean(is_not_null)) |
| 104 | + } |
| 105 | + None => None, |
| 106 | + } |
| 107 | +} |
| 108 | + |
| 109 | +fn null_if_contains_null( |
| 110 | + values: impl IntoIterator<Item = Option<NullSubstitutionValue>>, |
| 111 | +) -> Option<NullSubstitutionValue> { |
| 112 | + values |
| 113 | + .into_iter() |
| 114 | + .any(|value| matches!(value, Some(NullSubstitutionValue::Null))) |
| 115 | + .then_some(NullSubstitutionValue::Null) |
| 116 | +} |
| 117 | + |
| 118 | +fn strict_null_only( |
| 119 | + value: Option<NullSubstitutionValue>, |
| 120 | +) -> Option<NullSubstitutionValue> { |
| 121 | + value.filter(|value| matches!(value, NullSubstitutionValue::Null)) |
| 122 | +} |
| 123 | + |
| 124 | +fn syntactic_null_substitution_value( |
| 125 | + expr: &Expr, |
| 126 | + join_cols: &HashSet<&Column>, |
| 127 | +) -> Option<NullSubstitutionValue> { |
| 128 | + match expr { |
| 129 | + Expr::Alias(alias) => { |
| 130 | + syntactic_null_substitution_value(alias.expr.as_ref(), join_cols) |
| 131 | + } |
| 132 | + Expr::Column(column) => join_cols |
| 133 | + .contains(column) |
| 134 | + .then_some(NullSubstitutionValue::Null), |
| 135 | + Expr::Literal(value, _) => Some(scalar_to_null_substitution_value(value)), |
| 136 | + Expr::BinaryExpr(binary_expr) => syntactic_binary_value(binary_expr, join_cols), |
| 137 | + Expr::Not(expr) => { |
| 138 | + not(syntactic_null_substitution_value(expr.as_ref(), join_cols)) |
| 139 | + } |
| 140 | + Expr::IsNull(expr) => null_check_value( |
| 141 | + syntactic_null_substitution_value(expr.as_ref(), join_cols), |
| 142 | + false, |
| 143 | + ), |
| 144 | + Expr::IsNotNull(expr) => null_check_value( |
| 145 | + syntactic_null_substitution_value(expr.as_ref(), join_cols), |
| 146 | + true, |
| 147 | + ), |
| 148 | + Expr::Between(between) => null_if_contains_null([ |
| 149 | + syntactic_null_substitution_value(between.expr.as_ref(), join_cols), |
| 150 | + syntactic_null_substitution_value(between.low.as_ref(), join_cols), |
| 151 | + syntactic_null_substitution_value(between.high.as_ref(), join_cols), |
| 152 | + ]), |
| 153 | + Expr::Cast(cast) => strict_null_only(syntactic_null_substitution_value( |
| 154 | + cast.expr.as_ref(), |
| 155 | + join_cols, |
| 156 | + )), |
| 157 | + Expr::TryCast(try_cast) => strict_null_only(syntactic_null_substitution_value( |
| 158 | + try_cast.expr.as_ref(), |
| 159 | + join_cols, |
| 160 | + )), |
| 161 | + Expr::Negative(expr) => { |
| 162 | + strict_null_only(syntactic_null_substitution_value(expr.as_ref(), join_cols)) |
| 163 | + } |
| 164 | + Expr::Like(like) | Expr::SimilarTo(like) => null_if_contains_null([ |
| 165 | + syntactic_null_substitution_value(like.expr.as_ref(), join_cols), |
| 166 | + syntactic_null_substitution_value(like.pattern.as_ref(), join_cols), |
| 167 | + ]), |
| 168 | + Expr::Exists { .. } |
| 169 | + | Expr::InList(_) |
| 170 | + | Expr::InSubquery(_) |
| 171 | + | Expr::SetComparison(_) |
| 172 | + | Expr::ScalarSubquery(_) |
| 173 | + | Expr::OuterReferenceColumn(_, _) |
| 174 | + | Expr::Placeholder(_) |
| 175 | + | Expr::ScalarVariable(_, _) |
| 176 | + | Expr::Unnest(_) |
| 177 | + | Expr::GroupingSet(_) |
| 178 | + | Expr::WindowFunction(_) |
| 179 | + | Expr::ScalarFunction(_) |
| 180 | + | Expr::Case(_) |
| 181 | + | Expr::IsTrue(_) |
| 182 | + | Expr::IsFalse(_) |
| 183 | + | Expr::IsUnknown(_) |
| 184 | + | Expr::IsNotTrue(_) |
| 185 | + | Expr::IsNotFalse(_) |
| 186 | + | Expr::IsNotUnknown(_) => None, |
| 187 | + Expr::AggregateFunction(_) => None, |
| 188 | + #[expect(deprecated)] |
| 189 | + Expr::Wildcard { .. } => None, |
| 190 | + } |
| 191 | +} |
| 192 | + |
| 193 | +fn scalar_to_null_substitution_value(value: &ScalarValue) -> NullSubstitutionValue { |
| 194 | + match value { |
| 195 | + _ if value.is_null() => NullSubstitutionValue::Null, |
| 196 | + ScalarValue::Boolean(Some(value)) => NullSubstitutionValue::Boolean(*value), |
| 197 | + _ => NullSubstitutionValue::NonNull, |
| 198 | + } |
| 199 | +} |
| 200 | + |
| 201 | +fn is_strict_null_binary_op(op: Operator) -> bool { |
| 202 | + matches!( |
| 203 | + op, |
| 204 | + Operator::Eq |
| 205 | + | Operator::NotEq |
| 206 | + | Operator::Lt |
| 207 | + | Operator::LtEq |
| 208 | + | Operator::Gt |
| 209 | + | Operator::GtEq |
| 210 | + | Operator::Plus |
| 211 | + | Operator::Minus |
| 212 | + | Operator::Multiply |
| 213 | + | Operator::Divide |
| 214 | + | Operator::Modulo |
| 215 | + | Operator::RegexMatch |
| 216 | + | Operator::RegexIMatch |
| 217 | + | Operator::RegexNotMatch |
| 218 | + | Operator::RegexNotIMatch |
| 219 | + | Operator::LikeMatch |
| 220 | + | Operator::ILikeMatch |
| 221 | + | Operator::NotLikeMatch |
| 222 | + | Operator::NotILikeMatch |
| 223 | + | Operator::BitwiseAnd |
| 224 | + | Operator::BitwiseOr |
| 225 | + | Operator::BitwiseXor |
| 226 | + | Operator::BitwiseShiftRight |
| 227 | + | Operator::BitwiseShiftLeft |
| 228 | + | Operator::StringConcat |
| 229 | + | Operator::AtArrow |
| 230 | + | Operator::ArrowAt |
| 231 | + | Operator::Arrow |
| 232 | + | Operator::LongArrow |
| 233 | + | Operator::HashArrow |
| 234 | + | Operator::HashLongArrow |
| 235 | + | Operator::AtAt |
| 236 | + | Operator::IntegerDivide |
| 237 | + | Operator::HashMinus |
| 238 | + | Operator::AtQuestion |
| 239 | + | Operator::Question |
| 240 | + | Operator::QuestionAnd |
| 241 | + | Operator::QuestionPipe |
| 242 | + | Operator::Colon |
| 243 | + ) |
| 244 | +} |
| 245 | + |
| 246 | +fn syntactic_binary_value( |
| 247 | + binary_expr: &BinaryExpr, |
| 248 | + join_cols: &HashSet<&Column>, |
| 249 | +) -> Option<NullSubstitutionValue> { |
| 250 | + let left = syntactic_null_substitution_value(binary_expr.left.as_ref(), join_cols); |
| 251 | + let right = syntactic_null_substitution_value(binary_expr.right.as_ref(), join_cols); |
| 252 | + |
| 253 | + match binary_expr.op { |
| 254 | + Operator::And => binary_boolean_value(left, right, false), |
| 255 | + Operator::Or => binary_boolean_value(left, right, true), |
| 256 | + Operator::IsDistinctFrom | Operator::IsNotDistinctFrom => None, |
| 257 | + op => is_strict_null_binary_op(op) |
| 258 | + .then(|| null_if_contains_null([left, right])) |
| 259 | + .flatten(), |
| 260 | + } |
| 261 | +} |
0 commit comments