Skip to content

Commit b549e0d

Browse files
authored
refactor(binder): make flatten_plan return explicit derived-column mappings (#19696)
refactor: make flatten_plan return explicit derived-column mappings
1 parent cc83532 commit b549e0d

9 files changed

Lines changed: 731 additions & 326 deletions

File tree

src/query/sql/src/planner/binder/bind_table_reference/bind_join.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ impl Binder {
394394
// If there are outer columns in right child, then the join is a correlated lateral join
395395
let opt_ctx = OptimizerContext::new(self.ctx.clone(), self.metadata.clone());
396396
let mut decorrelator = SubqueryDecorrelatorOptimizer::new(opt_ctx, Some(self.clone()));
397-
right_child = decorrelator.flatten_plan(
397+
let (flatten_plan, derived_columns) = decorrelator.flatten_plan(
398398
&left_child,
399399
&right_child,
400400
&right_prop.outer_columns,
@@ -403,10 +403,12 @@ impl Binder {
403403
},
404404
false,
405405
)?;
406+
right_child = flatten_plan;
406407
let original_num_conditions = left_conditions.len();
407408
decorrelator.add_equi_conditions(
408409
None,
409410
&right_prop.outer_columns,
411+
&derived_columns,
410412
&mut right_conditions,
411413
&mut left_conditions,
412414
)?;

src/query/sql/src/planner/optimizer/optimizers/operator/decorrelate/decorrelate.rs

Lines changed: 55 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use databend_common_expression::types::DataType;
2828
use databend_common_expression::types::NumberScalar;
2929
use databend_common_functions::BUILTIN_FUNCTIONS;
3030

31+
use super::DerivedColumnScope;
3132
use crate::ColumnSet;
3233
use crate::binder::ColumnBindingBuilder;
3334
use crate::binder::JoinPredicate;
@@ -244,17 +245,17 @@ impl SubqueryDecorrelatorOptimizer {
244245
Ok(Some(result))
245246
}
246247

247-
pub fn try_decorrelate_subquery(
248+
pub(crate) fn try_decorrelate_subquery(
248249
&mut self,
249250
outer: &SExpr,
250251
subquery: &SubqueryExpr,
251252
flatten_info: &mut FlattenInfo,
252253
is_conjunctive_predicate: bool,
253-
) -> Result<(SExpr, UnnestResult)> {
254+
) -> Result<(SExpr, UnnestResult, DerivedColumnScope)> {
254255
match subquery.typ {
255256
SubqueryType::Scalar => {
256257
let correlated_columns = &subquery.outer_columns;
257-
let flatten_plan = self.flatten_plan(
258+
let (flatten_plan, derived_columns) = self.flatten_plan(
258259
outer,
259260
&subquery.subquery,
260261
correlated_columns,
@@ -267,6 +268,7 @@ impl SubqueryDecorrelatorOptimizer {
267268
self.add_equi_conditions(
268269
subquery.span,
269270
correlated_columns,
271+
&derived_columns,
270272
&mut right_conditions,
271273
&mut left_conditions,
272274
)?;
@@ -304,16 +306,20 @@ impl SubqueryDecorrelatorOptimizer {
304306
Arc::new(outer.clone()),
305307
Arc::new(flatten_plan),
306308
);
307-
Ok((s_expr, UnnestResult::SingleJoin))
309+
Ok((s_expr, UnnestResult::SingleJoin, derived_columns))
308310
}
309311
SubqueryType::Exists | SubqueryType::NotExists => {
310-
if is_conjunctive_predicate {
311-
if let Some(result) = self.try_decorrelate_simple_subquery(outer, subquery)? {
312-
return Ok((result, UnnestResult::SimpleJoin { output_index: None }));
313-
}
312+
if is_conjunctive_predicate
313+
&& let Some(result) = self.try_decorrelate_simple_subquery(outer, subquery)?
314+
{
315+
return Ok((
316+
result,
317+
UnnestResult::SimpleJoin { output_index: None },
318+
Default::default(),
319+
));
314320
}
315321
let correlated_columns = &subquery.outer_columns;
316-
let flatten_plan = self.flatten_plan(
322+
let (flatten_plan, derived_columns) = self.flatten_plan(
317323
outer,
318324
&subquery.subquery,
319325
correlated_columns,
@@ -326,6 +332,7 @@ impl SubqueryDecorrelatorOptimizer {
326332
self.add_equi_conditions(
327333
subquery.span,
328334
correlated_columns,
335+
&derived_columns,
329336
&mut left_conditions,
330337
&mut right_conditions,
331338
)?;
@@ -368,11 +375,15 @@ impl SubqueryDecorrelatorOptimizer {
368375
Arc::new(outer.clone()),
369376
Arc::new(flatten_plan),
370377
);
371-
Ok((s_expr, UnnestResult::MarkJoin { marker_index }))
378+
Ok((
379+
s_expr,
380+
UnnestResult::MarkJoin { marker_index },
381+
derived_columns,
382+
))
372383
}
373384
SubqueryType::Any => {
374385
let correlated_columns = &subquery.outer_columns;
375-
let flatten_plan = self.flatten_plan(
386+
let (flatten_plan, derived_columns) = self.flatten_plan(
376387
outer,
377388
&subquery.subquery,
378389
correlated_columns,
@@ -384,6 +395,7 @@ impl SubqueryDecorrelatorOptimizer {
384395
self.add_equi_conditions(
385396
subquery.span,
386397
correlated_columns,
398+
&derived_columns,
387399
&mut left_conditions,
388400
&mut right_conditions,
389401
)?;
@@ -453,16 +465,18 @@ impl SubqueryDecorrelatorOptimizer {
453465
Arc::new(flatten_plan),
454466
),
455467
UnnestResult::MarkJoin { marker_index },
468+
derived_columns,
456469
))
457470
}
458471
_ => unreachable!(),
459472
}
460473
}
461474

462-
pub fn add_equi_conditions(
475+
pub(crate) fn add_equi_conditions(
463476
&self,
464477
span: Span,
465478
correlated_columns: &ColumnSet,
479+
derived_columns: &DerivedColumnScope,
466480
left_conditions: &mut Vec<ScalarExpr>,
467481
right_conditions: &mut Vec<ScalarExpr>,
468482
) -> Result<()> {
@@ -482,15 +496,15 @@ impl SubqueryDecorrelatorOptimizer {
482496
.table_index(column_entry.table_index())
483497
.build(),
484498
});
485-
let Some(derive_column) = self.derived_columns.get(&correlated_column) else {
499+
let Some(derive_column) = derived_columns.resolve(correlated_column) else {
486500
continue;
487501
};
488-
let column_entry = metadata.column(*derive_column);
502+
let column_entry = metadata.column(derive_column);
489503
let left_column = ScalarExpr::BoundColumnRef(BoundColumnRef {
490504
span,
491505
column: ColumnBindingBuilder::new(
492506
column_entry.name(),
493-
*derive_column,
507+
derive_column,
494508
Box::from(column_entry.data_type()),
495509
Visibility::Visible,
496510
)
@@ -507,43 +521,40 @@ impl SubqueryDecorrelatorOptimizer {
507521
// If correlated_columns only occur in equi-conditions, such as `where t1.a = t.a and t1.b = t.b`(t1 is outer table)
508522
// Then we won't join outer and inner table.
509523
pub(crate) fn join_outer_inner_table(
510-
&mut self,
524+
&self,
511525
filter: &Filter,
512526
correlated_columns: &ColumnSet,
513-
) -> Result<bool> {
514-
Ok(!filter.predicates.iter().all(|predicate| {
527+
) -> Result<(bool, DerivedColumnScope)> {
528+
let mut derived_columns = DerivedColumnScope::default();
529+
let can_reuse_inner_columns = filter.predicates.iter().all(|predicate| {
515530
if predicate
516531
.used_columns()
517532
.iter()
518-
.any(|column| correlated_columns.contains(column))
533+
.all(|column| !correlated_columns.contains(column))
519534
{
520-
if let ScalarExpr::FunctionCall(func) = predicate {
521-
if func.func_name == "eq" {
522-
if let (
523-
ScalarExpr::BoundColumnRef(left),
524-
ScalarExpr::BoundColumnRef(right),
525-
) = (&func.arguments[0], &func.arguments[1])
526-
{
527-
if correlated_columns.contains(&left.column.index)
528-
&& !correlated_columns.contains(&right.column.index)
529-
{
530-
self.derived_columns
531-
.insert(left.column.index, right.column.index);
532-
}
533-
if !correlated_columns.contains(&left.column.index)
534-
&& correlated_columns.contains(&right.column.index)
535-
{
536-
self.derived_columns
537-
.insert(right.column.index, left.column.index);
538-
}
539-
return true;
540-
}
541-
}
535+
return true;
536+
}
537+
if let ScalarExpr::FunctionCall(func) = predicate
538+
&& func.func_name == "eq"
539+
&& let (ScalarExpr::BoundColumnRef(left), ScalarExpr::BoundColumnRef(right)) =
540+
(&func.arguments[0], &func.arguments[1])
541+
{
542+
if correlated_columns.contains(&left.column.index)
543+
&& !correlated_columns.contains(&right.column.index)
544+
{
545+
derived_columns.record(left.column.index, right.column.index);
546+
}
547+
if !correlated_columns.contains(&left.column.index)
548+
&& correlated_columns.contains(&right.column.index)
549+
{
550+
derived_columns.record(right.column.index, left.column.index);
542551
}
543-
return false;
552+
true
553+
} else {
554+
false
544555
}
545-
true
546-
}))
556+
});
557+
Ok((!can_reuse_inner_columns, derived_columns))
547558
}
548559

549560
// Try folding the subquery into a constant value expression,

0 commit comments

Comments
 (0)