Skip to content

Commit 0645326

Browse files
committed
Use EqualityNullBehavior instead of boolean for logical plan
1 parent 33a32d4 commit 0645326

22 files changed

Lines changed: 314 additions & 78 deletions

File tree

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
/// Represents the null-handling behavior when evaluating equality.
19+
///
20+
/// # Order
21+
///
22+
/// The order on this type represents the "restrictiveness" of the behavior. The more restrictive
23+
/// a behavior is, the fewer elements are considered to be equal to `null`.
24+
/// [EqualityNullBehavior::NullEqualsNothing] represents the most restrictive behavior.
25+
///
26+
/// This mirrors the old order with booleans, as `false` indicated that `null != null`.
27+
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Hash)]
28+
pub enum EqualityNullBehavior {
29+
/// Null is *not* equal to null while joining (`null != null`)
30+
NullEqualsNothing,
31+
/// Null is equal to null while joining (`null == null`)
32+
NullEqualsNull,
33+
}

datafusion/common/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ pub mod config;
4141
pub mod cse;
4242
pub mod diagnostic;
4343
pub mod display;
44+
mod equality_null_behavior;
4445
pub mod error;
4546
pub mod file_options;
4647
pub mod format;
@@ -64,6 +65,7 @@ pub use dfschema::{
6465
qualified_name, DFSchema, DFSchemaRef, ExprSchema, SchemaExt, ToDFSchema,
6566
};
6667
pub use diagnostic::Diagnostic;
68+
pub use equality_null_behavior::EqualityNullBehavior;
6769
pub use error::{
6870
field_not_found, unqualified_field_not_found, DataFusionError, Result, SchemaError,
6971
SharedResult,

datafusion/core/src/physical_planner.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ use datafusion_common::tree_node::{
6767
};
6868
use datafusion_common::{
6969
exec_err, internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema,
70-
ScalarValue,
70+
EqualityNullBehavior, ScalarValue,
7171
};
7272
use datafusion_datasource::memory::MemorySourceConfig;
7373
use datafusion_expr::dml::{CopyTo, InsertOp};
@@ -895,11 +895,12 @@ impl DefaultPhysicalPlanner {
895895
on: keys,
896896
filter,
897897
join_type,
898-
null_equals_null,
898+
equality_null_behavior,
899899
schema: join_schema,
900900
..
901901
}) => {
902-
let null_equals_null = *null_equals_null;
902+
let null_equals_null =
903+
*equality_null_behavior == EqualityNullBehavior::NullEqualsNull;
903904

904905
let [physical_left, physical_right] = children.two()?;
905906

datafusion/expr/src/logical_plan/builder.rs

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ use datafusion_common::file_options::file_type::FileType;
5656
use datafusion_common::{
5757
exec_err, get_target_functional_dependencies, internal_err, not_impl_err,
5858
plan_datafusion_err, plan_err, Column, Constraints, DFSchema, DFSchemaRef,
59-
DataFusionError, Result, ScalarValue, TableReference, ToDFSchema, UnnestOptions,
59+
DataFusionError, EqualityNullBehavior, Result, ScalarValue, TableReference,
60+
ToDFSchema, UnnestOptions,
6061
};
6162
use datafusion_expr_common::type_coercion::binary::type_union_resolution;
6263

@@ -903,7 +904,13 @@ impl LogicalPlanBuilder {
903904
join_keys: (Vec<impl Into<Column>>, Vec<impl Into<Column>>),
904905
filter: Option<Expr>,
905906
) -> Result<Self> {
906-
self.join_detailed(right, join_type, join_keys, filter, false)
907+
self.join_detailed(
908+
right,
909+
join_type,
910+
join_keys,
911+
filter,
912+
EqualityNullBehavior::NullEqualsNothing,
913+
)
907914
}
908915

909916
/// Apply a join using the specified expressions.
@@ -959,7 +966,7 @@ impl LogicalPlanBuilder {
959966
join_type,
960967
(Vec::<Column>::new(), Vec::<Column>::new()),
961968
filter,
962-
false,
969+
EqualityNullBehavior::NullEqualsNothing,
963970
)
964971
}
965972

@@ -996,7 +1003,7 @@ impl LogicalPlanBuilder {
9961003
join_type: JoinType,
9971004
join_keys: (Vec<impl Into<Column>>, Vec<impl Into<Column>>),
9981005
filter: Option<Expr>,
999-
null_equals_null: bool,
1006+
equality_null_behavior: EqualityNullBehavior,
10001007
) -> Result<Self> {
10011008
if join_keys.0.len() != join_keys.1.len() {
10021009
return plan_err!("left_keys and right_keys were not the same length");
@@ -1113,7 +1120,7 @@ impl LogicalPlanBuilder {
11131120
join_type,
11141121
join_constraint: JoinConstraint::On,
11151122
schema: DFSchemaRef::new(join_schema),
1116-
null_equals_null,
1123+
equality_null_behavior,
11171124
})))
11181125
}
11191126

@@ -1186,7 +1193,7 @@ impl LogicalPlanBuilder {
11861193
filters,
11871194
join_type,
11881195
JoinConstraint::Using,
1189-
false,
1196+
EqualityNullBehavior::NullEqualsNothing,
11901197
)?;
11911198

11921199
Ok(Self::new(LogicalPlan::Join(join)))
@@ -1202,7 +1209,7 @@ impl LogicalPlanBuilder {
12021209
None,
12031210
JoinType::Inner,
12041211
JoinConstraint::On,
1205-
false,
1212+
EqualityNullBehavior::NullEqualsNothing,
12061213
)?;
12071214

12081215
Ok(Self::new(LogicalPlan::Join(join)))
@@ -1340,12 +1347,24 @@ impl LogicalPlanBuilder {
13401347
.unzip();
13411348
if is_all {
13421349
LogicalPlanBuilder::from(left_plan)
1343-
.join_detailed(right_plan, join_type, join_keys, None, true)?
1350+
.join_detailed(
1351+
right_plan,
1352+
join_type,
1353+
join_keys,
1354+
None,
1355+
EqualityNullBehavior::NullEqualsNull,
1356+
)?
13441357
.build()
13451358
} else {
13461359
LogicalPlanBuilder::from(left_plan)
13471360
.distinct()?
1348-
.join_detailed(right_plan, join_type, join_keys, None, true)?
1361+
.join_detailed(
1362+
right_plan,
1363+
join_type,
1364+
join_keys,
1365+
None,
1366+
EqualityNullBehavior::NullEqualsNull,
1367+
)?
13491368
.build()
13501369
}
13511370
}
@@ -1423,7 +1442,7 @@ impl LogicalPlanBuilder {
14231442
filter,
14241443
join_type,
14251444
JoinConstraint::On,
1426-
false,
1445+
EqualityNullBehavior::NullEqualsNothing,
14271446
)?;
14281447

14291448
Ok(Self::new(LogicalPlan::Join(join)))

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,9 @@ use datafusion_common::tree_node::{
5555
};
5656
use datafusion_common::{
5757
aggregate_functional_dependencies, internal_err, plan_err, Column, Constraints,
58-
DFSchema, DFSchemaRef, DataFusionError, Dependency, FunctionalDependence,
59-
FunctionalDependencies, ParamValues, Result, ScalarValue, Spans, TableReference,
60-
UnnestOptions,
58+
DFSchema, DFSchemaRef, DataFusionError, Dependency, EqualityNullBehavior,
59+
FunctionalDependence, FunctionalDependencies, ParamValues, Result, ScalarValue,
60+
Spans, TableReference, UnnestOptions,
6161
};
6262
use indexmap::IndexSet;
6363

@@ -655,7 +655,7 @@ impl LogicalPlan {
655655
join_constraint,
656656
on,
657657
schema: _,
658-
null_equals_null,
658+
equality_null_behavior: null_equals_null,
659659
}) => {
660660
let schema =
661661
build_join_schema(left.schema(), right.schema(), &join_type)?;
@@ -676,7 +676,7 @@ impl LogicalPlan {
676676
on: new_on,
677677
filter,
678678
schema: DFSchemaRef::new(schema),
679-
null_equals_null,
679+
equality_null_behavior: null_equals_null,
680680
}))
681681
}
682682
LogicalPlan::Subquery(_) => Ok(self),
@@ -894,7 +894,7 @@ impl LogicalPlan {
894894
join_type,
895895
join_constraint,
896896
on,
897-
null_equals_null,
897+
equality_null_behavior: null_equals_null,
898898
..
899899
}) => {
900900
let (left, right) = self.only_two_inputs(inputs)?;
@@ -933,7 +933,7 @@ impl LogicalPlan {
933933
on: new_on,
934934
filter: filter_expr,
935935
schema: DFSchemaRef::new(schema),
936-
null_equals_null: *null_equals_null,
936+
equality_null_behavior: *null_equals_null,
937937
}))
938938
}
939939
LogicalPlan::Subquery(Subquery {
@@ -3704,8 +3704,8 @@ pub struct Join {
37043704
pub join_constraint: JoinConstraint,
37053705
/// The output schema, containing fields from the left and right inputs
37063706
pub schema: DFSchemaRef,
3707-
/// If null_equals_null is true, null == null else null != null
3708-
pub null_equals_null: bool,
3707+
/// The null handling behavior for equalities
3708+
pub equality_null_behavior: EqualityNullBehavior,
37093709
}
37103710

37113711
impl Join {
@@ -3734,7 +3734,7 @@ impl Join {
37343734
filter: Option<Expr>,
37353735
join_type: JoinType,
37363736
join_constraint: JoinConstraint,
3737-
null_equals_null: bool,
3737+
equality_null_behavior: EqualityNullBehavior,
37383738
) -> Result<Self> {
37393739
let join_schema = build_join_schema(left.schema(), right.schema(), &join_type)?;
37403740

@@ -3746,7 +3746,7 @@ impl Join {
37463746
join_type,
37473747
join_constraint,
37483748
schema: Arc::new(join_schema),
3749-
null_equals_null,
3749+
equality_null_behavior,
37503750
})
37513751
}
37523752

@@ -3779,7 +3779,7 @@ impl Join {
37793779
join_type: original_join.join_type,
37803780
join_constraint: original_join.join_constraint,
37813781
schema: Arc::new(join_schema),
3782-
null_equals_null: original_join.null_equals_null,
3782+
equality_null_behavior: original_join.equality_null_behavior,
37833783
})
37843784
}
37853785
}
@@ -3801,8 +3801,8 @@ impl PartialOrd for Join {
38013801
pub join_type: &'a JoinType,
38023802
/// Join constraint
38033803
pub join_constraint: &'a JoinConstraint,
3804-
/// If null_equals_null is true, null == null else null != null
3805-
pub null_equals_null: &'a bool,
3804+
/// The null handling behavior for equalities
3805+
pub equality_null_behavior: &'a EqualityNullBehavior,
38063806
}
38073807
let comparable_self = ComparableJoin {
38083808
left: &self.left,
@@ -3811,7 +3811,7 @@ impl PartialOrd for Join {
38113811
filter: &self.filter,
38123812
join_type: &self.join_type,
38133813
join_constraint: &self.join_constraint,
3814-
null_equals_null: &self.null_equals_null,
3814+
equality_null_behavior: &self.equality_null_behavior,
38153815
};
38163816
let comparable_other = ComparableJoin {
38173817
left: &other.left,
@@ -3820,7 +3820,7 @@ impl PartialOrd for Join {
38203820
filter: &other.filter,
38213821
join_type: &other.join_type,
38223822
join_constraint: &other.join_constraint,
3823-
null_equals_null: &other.null_equals_null,
3823+
equality_null_behavior: &other.equality_null_behavior,
38243824
};
38253825
comparable_self.partial_cmp(&comparable_other)
38263826
}
@@ -4891,7 +4891,7 @@ mod tests {
48914891
join_type: JoinType::Inner,
48924892
join_constraint: JoinConstraint::On,
48934893
schema: Arc::new(left_schema.join(&right_schema)?),
4894-
null_equals_null: false,
4894+
equality_null_behavior: EqualityNullBehavior::NullEqualsNothing,
48954895
}))
48964896
}
48974897

@@ -5002,7 +5002,7 @@ mod tests {
50025002
Some(col("t1.b").gt(col("t2.b"))),
50035003
join_type,
50045004
JoinConstraint::On,
5005-
false,
5005+
EqualityNullBehavior::NullEqualsNothing,
50065006
)?;
50075007

50085008
match join_type {
@@ -5112,7 +5112,10 @@ mod tests {
51125112
assert_eq!(join.filter, Some(col("t1.b").gt(col("t2.b"))));
51135113
assert_eq!(join.join_type, join_type);
51145114
assert_eq!(join.join_constraint, JoinConstraint::On);
5115-
assert!(!join.null_equals_null);
5115+
assert_eq!(
5116+
join.equality_null_behavior,
5117+
EqualityNullBehavior::NullEqualsNothing
5118+
);
51165119
}
51175120

51185121
Ok(())
@@ -5147,7 +5150,7 @@ mod tests {
51475150
None,
51485151
JoinType::Inner,
51495152
JoinConstraint::Using,
5150-
false,
5153+
EqualityNullBehavior::NullEqualsNothing,
51515154
)?;
51525155

51535156
let fields = join.schema.fields();
@@ -5198,7 +5201,7 @@ mod tests {
51985201
Some(col("t1.value").lt(col("t2.value"))), // Non-equi filter condition
51995202
JoinType::Inner,
52005203
JoinConstraint::On,
5201-
false,
5204+
EqualityNullBehavior::NullEqualsNothing,
52025205
)?;
52035206

52045207
let fields = join.schema.fields();
@@ -5247,10 +5250,13 @@ mod tests {
52475250
None,
52485251
JoinType::Inner,
52495252
JoinConstraint::On,
5250-
true,
5253+
EqualityNullBehavior::NullEqualsNull,
52515254
)?;
52525255

5253-
assert!(join.null_equals_null);
5256+
assert_eq!(
5257+
join.equality_null_behavior,
5258+
EqualityNullBehavior::NullEqualsNull
5259+
);
52545260
}
52555261

52565262
Ok(())
@@ -5289,7 +5295,7 @@ mod tests {
52895295
Some(col("t1.value").gt(lit(5.0))),
52905296
join_type,
52915297
JoinConstraint::On,
5292-
false,
5298+
EqualityNullBehavior::NullEqualsNothing,
52935299
)?;
52945300

52955301
let fields = join.schema.fields();
@@ -5328,7 +5334,7 @@ mod tests {
53285334
None,
53295335
JoinType::Inner,
53305336
JoinConstraint::Using,
5331-
false,
5337+
EqualityNullBehavior::NullEqualsNothing,
53325338
)?;
53335339

53345340
assert_eq!(

0 commit comments

Comments
 (0)