Skip to content

Commit 80adf8b

Browse files
committed
Add configurable UNION DISTINCT to FILTER rewrite optimization
1 parent 9885f4b commit 80adf8b

File tree

6 files changed

+521
-0
lines changed

6 files changed

+521
-0
lines changed

datafusion/common/src/config.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,6 +1168,37 @@ config_namespace! {
11681168
/// closer to the leaf table scans, and push those projections down
11691169
/// towards the leaf nodes.
11701170
pub enable_leaf_expression_pushdown: bool, default = true
1171+
1172+
/// When set to true, the logical optimizer will rewrite `UNION DISTINCT`
1173+
/// branches that read from the same source and differ only by filter predicates
1174+
/// into a single branch with a combined filter.
1175+
///
1176+
/// This optimization is conservative: it only applies when the `UNION DISTINCT`
1177+
/// branches share the same source and compatible wrapper nodes such as identical
1178+
/// projections or aliases.
1179+
///
1180+
/// Example:
1181+
///
1182+
/// Disabled (`false`, default):
1183+
/// ```text
1184+
/// Distinct:
1185+
/// Union
1186+
/// Projection: test.col_int32 AS a, test.col_uint32 AS b
1187+
/// Filter: test.col_int32 = Int32(1)
1188+
/// TableScan: test
1189+
/// Projection: test.col_int32 AS a, test.col_uint32 AS b
1190+
/// Filter: test.col_uint32 = UInt32(5)
1191+
/// TableScan: test
1192+
/// ```
1193+
///
1194+
/// Enabled (`true`):
1195+
/// ```text
1196+
/// Distinct:
1197+
/// Projection: test.col_int32 AS a, test.col_uint32 AS b
1198+
/// Filter: test.col_int32 = Int32(1) OR test.col_uint32 = UInt32(5)
1199+
/// TableScan: test
1200+
/// ```
1201+
pub enable_unions_to_filter: bool, default = false
11711202
}
11721203
}
11731204

datafusion/optimizer/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ pub mod rewrite_set_comparison;
7070
pub mod scalar_subquery_to_join;
7171
pub mod simplify_expressions;
7272
pub mod single_distinct_to_groupby;
73+
pub mod unions_to_filter;
7374
pub mod utils;
7475

7576
#[cfg(test)]

datafusion/optimizer/src/optimizer.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ use crate::rewrite_set_comparison::RewriteSetComparison;
5656
use crate::scalar_subquery_to_join::ScalarSubqueryToJoin;
5757
use crate::simplify_expressions::SimplifyExpressions;
5858
use crate::single_distinct_to_groupby::SingleDistinctToGroupBy;
59+
use crate::unions_to_filter::UnionsToFilter;
5960
use crate::utils::log_plan;
6061

6162
/// Transforms one [`LogicalPlan`] into another which computes the same results,
@@ -280,6 +281,7 @@ impl Optimizer {
280281
let rules: Vec<Arc<dyn OptimizerRule + Sync + Send>> = vec![
281282
Arc::new(RewriteSetComparison::new()),
282283
Arc::new(OptimizeUnions::new()),
284+
Arc::new(UnionsToFilter::new()),
283285
Arc::new(SimplifyExpressions::new()),
284286
Arc::new(ReplaceDistinctWithAggregate::new()),
285287
Arc::new(EliminateJoin::new()),

0 commit comments

Comments
 (0)