Skip to content

Commit 4b1901f

Browse files
SubhamSinghalSubham Singhalblaginin
authored
Eliminate outer joins with empty relations via null-padded projection (apache#21321)
## Which issue does this PR close? - Closes apache#21320 ### Rationale for this change When one side of a LEFT/RIGHT/FULL outer join is an EmptyRelation, the current PropagateEmptyRelation optimizer rule leaves the join untouched. This means the engine still builds a hash table for the empty side, probes every row from the non-empty side, finds zero matches, and pads NULLs — all wasted work. The TODO at lines 76-80 of propagate_empty_relation.rs explicitly called out this gap: ``` // TODO: For LeftOut/Full Join, if the right side is empty, the Join can be eliminated // with a Projection with left side columns + right side columns replaced with null values. // For RightOut/Full Join, if the left side is empty, the Join can be eliminated // with a Projection with right side columns + left side columns replaced with null values. ``` ### What changes are included in this PR? Extends the PropagateEmptyRelation rule to handle 4 previously unoptimized cases by replacing the join with a Projection that null-pads the empty side's columns: ### Are these changes tested? Yes. 4 new unit tests added: ### Are there any user-facing changes? No API changes. --------- Co-authored-by: Subham Singhal <subhamsinghal@Subhams-MacBook-Air.local> Co-authored-by: Dmitrii Blaginin <dmitrii@blaginin.me>
1 parent 4aed81a commit 4b1901f

File tree

3 files changed

+354
-11
lines changed

3 files changed

+354
-11
lines changed

datafusion/optimizer/src/propagate_empty_relation.rs

Lines changed: 197 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ use std::sync::Arc;
2121

2222
use datafusion_common::JoinType;
2323
use datafusion_common::tree_node::Transformed;
24-
use datafusion_common::{Result, plan_err};
24+
use datafusion_common::{Column, DFSchemaRef, Result, ScalarValue, plan_err};
2525
use datafusion_expr::logical_plan::LogicalPlan;
26-
use datafusion_expr::{EmptyRelation, Projection, Union};
26+
use datafusion_expr::{EmptyRelation, Expr, Projection, Union, cast, lit};
2727

2828
use crate::optimizer::ApplyOrder;
2929
use crate::{OptimizerConfig, OptimizerRule};
@@ -73,12 +73,8 @@ impl OptimizerRule for PropagateEmptyRelation {
7373
Ok(Transformed::no(plan))
7474
}
7575
LogicalPlan::Join(ref join) => {
76-
// TODO: For Join, more join type need to be careful:
77-
// For LeftOut/Full Join, if the right side is empty, the Join can be eliminated with a Projection with left side
78-
// columns + right side columns replaced with null values.
79-
// For RightOut/Full Join, if the left side is empty, the Join can be eliminated with a Projection with right side
80-
// columns + left side columns replaced with null values.
8176
let (left_empty, right_empty) = binary_plan_children_is_empty(&plan)?;
77+
let left_field_count = join.left.schema().fields().len();
8278

8379
match join.join_type {
8480
// For Full Join, only both sides are empty, the Join result is empty.
@@ -88,6 +84,24 @@ impl OptimizerRule for PropagateEmptyRelation {
8884
schema: Arc::clone(&join.schema),
8985
}),
9086
)),
87+
// For Full Join, if one side is empty, replace with a
88+
// Projection that null-pads the empty side's columns.
89+
JoinType::Full if right_empty => {
90+
Ok(Transformed::yes(build_null_padded_projection(
91+
Arc::clone(&join.left),
92+
&join.schema,
93+
left_field_count,
94+
true,
95+
)?))
96+
}
97+
JoinType::Full if left_empty => {
98+
Ok(Transformed::yes(build_null_padded_projection(
99+
Arc::clone(&join.right),
100+
&join.schema,
101+
left_field_count,
102+
false,
103+
)?))
104+
}
91105
JoinType::Inner if left_empty || right_empty => Ok(Transformed::yes(
92106
LogicalPlan::EmptyRelation(EmptyRelation {
93107
produce_one_row: false,
@@ -100,12 +114,32 @@ impl OptimizerRule for PropagateEmptyRelation {
100114
schema: Arc::clone(&join.schema),
101115
}),
102116
)),
117+
// Left Join with empty right: all left rows survive
118+
// with NULLs for right columns.
119+
JoinType::Left if right_empty => {
120+
Ok(Transformed::yes(build_null_padded_projection(
121+
Arc::clone(&join.left),
122+
&join.schema,
123+
left_field_count,
124+
true,
125+
)?))
126+
}
103127
JoinType::Right if right_empty => Ok(Transformed::yes(
104128
LogicalPlan::EmptyRelation(EmptyRelation {
105129
produce_one_row: false,
106130
schema: Arc::clone(&join.schema),
107131
}),
108132
)),
133+
// Right Join with empty left: all right rows survive
134+
// with NULLs for left columns.
135+
JoinType::Right if left_empty => {
136+
Ok(Transformed::yes(build_null_padded_projection(
137+
Arc::clone(&join.right),
138+
&join.schema,
139+
left_field_count,
140+
false,
141+
)?))
142+
}
109143
JoinType::LeftSemi if left_empty || right_empty => Ok(
110144
Transformed::yes(LogicalPlan::EmptyRelation(EmptyRelation {
111145
produce_one_row: false,
@@ -230,6 +264,57 @@ fn empty_child(plan: &LogicalPlan) -> Result<Option<LogicalPlan>> {
230264
}
231265
}
232266

267+
/// Builds a Projection that replaces one side of an outer join with NULL literals.
268+
///
269+
/// When one side of an outer join is an `EmptyRelation`, the join can be eliminated
270+
/// by projecting the surviving side's columns as-is and replacing the empty side's
271+
/// columns with `CAST(NULL AS <type>)`.
272+
///
273+
/// The join schema is used as the projection's output schema to preserve nullability
274+
/// guarantees (important for FULL JOIN where the surviving side's columns are marked
275+
/// nullable in the join schema even if they aren't in the source schema).
276+
///
277+
/// # Example
278+
///
279+
/// For a `LEFT JOIN` where the right side is empty:
280+
/// ```text
281+
/// Left Join (orders.id = returns.order_id) Projection(orders.id, orders.amount,
282+
/// ├── TableScan: orders => CAST(NULL AS Int64) AS order_id,
283+
/// └── EmptyRelation CAST(NULL AS Utf8) AS reason)
284+
/// └── TableScan: orders
285+
/// ```
286+
fn build_null_padded_projection(
287+
surviving_plan: Arc<LogicalPlan>,
288+
join_schema: &DFSchemaRef,
289+
left_field_count: usize,
290+
empty_side_is_right: bool,
291+
) -> Result<LogicalPlan> {
292+
let exprs = join_schema
293+
.iter()
294+
.enumerate()
295+
.map(|(i, (qualifier, field))| {
296+
let on_empty_side = if empty_side_is_right {
297+
i >= left_field_count
298+
} else {
299+
i < left_field_count
300+
};
301+
302+
if on_empty_side {
303+
cast(lit(ScalarValue::Null), field.data_type().clone())
304+
.alias_qualified(qualifier.cloned(), field.name())
305+
} else {
306+
Expr::Column(Column::new(qualifier.cloned(), field.name()))
307+
}
308+
})
309+
.collect::<Vec<_>>();
310+
311+
Ok(LogicalPlan::Projection(Projection::try_new_with_schema(
312+
exprs,
313+
surviving_plan,
314+
Arc::clone(join_schema),
315+
)?))
316+
}
317+
233318
#[cfg(test)]
234319
mod tests {
235320

@@ -570,6 +655,111 @@ mod tests {
570655
assert_empty_left_empty_right_lp(true, false, JoinType::RightAnti, false)
571656
}
572657

658+
#[test]
659+
fn test_left_join_right_empty_null_pad() -> Result<()> {
660+
let left =
661+
LogicalPlanBuilder::from(test_table_scan_with_name("left")?).build()?;
662+
let right_empty = LogicalPlanBuilder::from(test_table_scan_with_name("right")?)
663+
.filter(lit(false))?
664+
.build()?;
665+
666+
let plan = LogicalPlanBuilder::from(left)
667+
.join_using(
668+
right_empty,
669+
JoinType::Left,
670+
vec![Column::from_name("a".to_string())],
671+
)?
672+
.build()?;
673+
674+
let expected = "Projection: left.a, left.b, left.c, CAST(NULL AS UInt32) AS a, CAST(NULL AS UInt32) AS b, CAST(NULL AS UInt32) AS c\n TableScan: left";
675+
assert_together_optimized_plan(plan, expected, true)
676+
}
677+
678+
#[test]
679+
fn test_right_join_left_empty_null_pad() -> Result<()> {
680+
let left_empty = LogicalPlanBuilder::from(test_table_scan_with_name("left")?)
681+
.filter(lit(false))?
682+
.build()?;
683+
let right =
684+
LogicalPlanBuilder::from(test_table_scan_with_name("right")?).build()?;
685+
686+
let plan = LogicalPlanBuilder::from(left_empty)
687+
.join_using(
688+
right,
689+
JoinType::Right,
690+
vec![Column::from_name("a".to_string())],
691+
)?
692+
.build()?;
693+
694+
let expected = "Projection: CAST(NULL AS UInt32) AS a, CAST(NULL AS UInt32) AS b, CAST(NULL AS UInt32) AS c, right.a, right.b, right.c\n TableScan: right";
695+
assert_together_optimized_plan(plan, expected, true)
696+
}
697+
698+
#[test]
699+
fn test_full_join_right_empty_null_pad() -> Result<()> {
700+
let left =
701+
LogicalPlanBuilder::from(test_table_scan_with_name("left")?).build()?;
702+
let right_empty = LogicalPlanBuilder::from(test_table_scan_with_name("right")?)
703+
.filter(lit(false))?
704+
.build()?;
705+
706+
let plan = LogicalPlanBuilder::from(left)
707+
.join_using(
708+
right_empty,
709+
JoinType::Full,
710+
vec![Column::from_name("a".to_string())],
711+
)?
712+
.build()?;
713+
714+
let expected = "Projection: left.a, left.b, left.c, CAST(NULL AS UInt32) AS a, CAST(NULL AS UInt32) AS b, CAST(NULL AS UInt32) AS c\n TableScan: left";
715+
assert_together_optimized_plan(plan, expected, true)
716+
}
717+
718+
#[test]
719+
fn test_full_join_left_empty_null_pad() -> Result<()> {
720+
let left_empty = LogicalPlanBuilder::from(test_table_scan_with_name("left")?)
721+
.filter(lit(false))?
722+
.build()?;
723+
let right =
724+
LogicalPlanBuilder::from(test_table_scan_with_name("right")?).build()?;
725+
726+
let plan = LogicalPlanBuilder::from(left_empty)
727+
.join_using(
728+
right,
729+
JoinType::Full,
730+
vec![Column::from_name("a".to_string())],
731+
)?
732+
.build()?;
733+
734+
let expected = "Projection: CAST(NULL AS UInt32) AS a, CAST(NULL AS UInt32) AS b, CAST(NULL AS UInt32) AS c, right.a, right.b, right.c\n TableScan: right";
735+
assert_together_optimized_plan(plan, expected, true)
736+
}
737+
738+
#[test]
739+
fn test_left_join_complex_on_right_empty_null_pad() -> Result<()> {
740+
let left =
741+
LogicalPlanBuilder::from(test_table_scan_with_name("left")?).build()?;
742+
let right_empty = LogicalPlanBuilder::from(test_table_scan_with_name("right")?)
743+
.filter(lit(false))?
744+
.build()?;
745+
746+
// Complex ON condition: left.a = right.a AND left.b > right.b
747+
let plan = LogicalPlanBuilder::from(left)
748+
.join(
749+
right_empty,
750+
JoinType::Left,
751+
(
752+
vec![Column::from_name("a".to_string())],
753+
vec![Column::from_name("a".to_string())],
754+
),
755+
Some(col("left.b").gt(col("right.b"))),
756+
)?
757+
.build()?;
758+
759+
let expected = "Projection: left.a, left.b, left.c, CAST(NULL AS UInt32) AS a, CAST(NULL AS UInt32) AS b, CAST(NULL AS UInt32) AS c\n TableScan: left";
760+
assert_together_optimized_plan(plan, expected, true)
761+
}
762+
573763
#[test]
574764
fn test_empty_with_non_empty() -> Result<()> {
575765
let table_scan = test_table_scan()?;

0 commit comments

Comments
 (0)