Skip to content

Commit 2195484

Browse files
committed
fix join swap
1 parent 6c1f600 commit 2195484

1 file changed

Lines changed: 62 additions & 10 deletions

File tree

datafusion/optimizer/src/reorder_join/left_deep_join_plan.rs

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -618,23 +618,75 @@ impl<'graph> PrecedenceTreeNode<'graph> {
618618
)
619619
})?;
620620

621-
// Determine if the join order was swapped compared to the original edge
622-
// by checking if the left expressions' columns come from the current_plan's schema.
623-
// If the left expressions belong to current_plan, no swap needed.
624-
// If they belong to next_plan, the join is swapped.
621+
// Determine if the join order was swapped compared to the original edge.
622+
// We use a two-tier approach:
623+
// 1. Check if left/right expressions belong to current/next schemas (handles most cases)
624+
// 2. If ambiguous (e.g., self-joins), use relation qualifiers as a tiebreaker
625625
let current_schema = current_plan.schema();
626+
let next_schema = next_plan.schema();
627+
626628
let join_order_swapped = if !edge.join.on.is_empty() {
627-
// Check the first join condition's left expression
628-
let (left_expr, _) = &edge.join.on[0];
629+
// Extract columns from the first join condition
630+
let (left_expr, right_expr) = &edge.join.on[0];
629631
let left_columns = left_expr.column_refs();
632+
let right_columns = right_expr.column_refs();
630633

631-
// If the left expression's columns are NOT in current_plan's schema,
632-
// then the join order is swapped
633-
!datafusion_expr::utils::check_all_columns_from_schema(
634+
// Tier 1: Check which schema each expression belongs to
635+
let left_in_current = datafusion_expr::utils::check_all_columns_from_schema(
634636
&left_columns,
635637
current_schema.as_ref(),
636638
)
637-
.unwrap_or(false)
639+
.unwrap_or(false);
640+
641+
let right_in_next = datafusion_expr::utils::check_all_columns_from_schema(
642+
&right_columns,
643+
next_schema.as_ref(),
644+
)
645+
.unwrap_or(false);
646+
647+
let left_in_next = datafusion_expr::utils::check_all_columns_from_schema(
648+
&left_columns,
649+
next_schema.as_ref(),
650+
)
651+
.unwrap_or(false);
652+
653+
let right_in_current = datafusion_expr::utils::check_all_columns_from_schema(
654+
&right_columns,
655+
current_schema.as_ref(),
656+
)
657+
.unwrap_or(false);
658+
659+
// Determine swap based on where columns are found
660+
if left_in_current && right_in_next && !left_in_next {
661+
// Clear case: left belongs to current, right belongs to next → no swap
662+
false
663+
} else if left_in_next && right_in_current && !left_in_current {
664+
// Clear case: left belongs to next, right belongs to current → swap
665+
true
666+
} else {
667+
// Tier 2: Ambiguous case (columns exist in both schemas, e.g., self-joins)
668+
// Use relation qualifiers as a tiebreaker if available
669+
let left_has_relation = left_columns.iter().any(|c| c.relation.is_some());
670+
let right_has_relation = right_columns.iter().any(|c| c.relation.is_some());
671+
672+
if left_has_relation || right_has_relation {
673+
// Check if qualified left columns match the next_plan's schema
674+
// If they do, it means the join is swapped
675+
left_columns.iter().any(|col| {
676+
if let Some(relation) = &col.relation {
677+
// Simple heuristic: check if any field in next_schema has this qualifier
678+
next_schema.iter().any(|(qualifier, _field)| {
679+
qualifier.map(|q| q == relation).unwrap_or(false)
680+
})
681+
} else {
682+
false
683+
}
684+
})
685+
} else {
686+
// No qualifiers available, default to no swap (preserve original order)
687+
false
688+
}
689+
}
638690
} else {
639691
// If there are no join conditions, we can't determine swap status
640692
// This shouldn't happen in practice for equi-joins

0 commit comments

Comments
 (0)