1818//! Rewrites `UNION DISTINCT` branches that differ only by filter predicates
1919//! into a single filtered branch plus `DISTINCT`.
2020
21- use crate :: optimizer:: ApplyOrder ;
2221use crate :: { OptimizerConfig , OptimizerRule } ;
2322use datafusion_common:: Result ;
24- use datafusion_common:: tree_node:: { Transformed , TreeNode } ;
23+ use datafusion_common:: tree_node:: { Transformed , TreeNode , TreeNodeRewriter } ;
2524use datafusion_expr:: expr_rewriter:: coerce_plan_expr_for_schema;
2625use datafusion_expr:: logical_plan:: builder:: LogicalPlanBuilder ;
2726use datafusion_expr:: utils:: disjunction;
@@ -47,10 +46,6 @@ impl OptimizerRule for UnionsToFilter {
4746 "unions_to_filter"
4847 }
4948
50- fn apply_order ( & self ) -> Option < ApplyOrder > {
51- Some ( ApplyOrder :: BottomUp )
52- }
53-
5449 fn supports_rewrite ( & self ) -> bool {
5550 true
5651 }
@@ -64,6 +59,24 @@ impl OptimizerRule for UnionsToFilter {
6459 return Ok ( Transformed :: no ( plan) ) ;
6560 }
6661
62+ // Fast pre-check: if the plan tree has no Distinct::All node at all we can
63+ // skip the expensive bottom-up rewrite_with_subqueries traversal entirely.
64+ // This matters for large UNION ALL plans (e.g. TPC-DS Q4) where the rule
65+ // can never fire and the traversal overhead is otherwise measurable.
66+ if !plan. exists ( |p| Ok ( matches ! ( p, LogicalPlan :: Distinct ( Distinct :: All ( _) ) ) ) ) ? {
67+ return Ok ( Transformed :: no ( plan) ) ;
68+ }
69+
70+ plan. rewrite_with_subqueries ( & mut UnionsToFilterRewriter )
71+ }
72+ }
73+
74+ struct UnionsToFilterRewriter ;
75+
76+ impl TreeNodeRewriter for UnionsToFilterRewriter {
77+ type Node = LogicalPlan ;
78+
79+ fn f_up ( & mut self , plan : LogicalPlan ) -> Result < Transformed < LogicalPlan > > {
6780 match plan {
6881 LogicalPlan :: Distinct ( Distinct :: All ( input) ) => {
6982 let inner = Arc :: unwrap_or_clone ( input) ;
@@ -370,6 +383,10 @@ mod tests {
370383 struct VolatileTestUdf ;
371384
372385 impl ScalarUDFImpl for VolatileTestUdf {
386+ fn as_any ( & self ) -> & dyn std:: any:: Any {
387+ self
388+ }
389+
373390 fn name ( & self ) -> & str {
374391 "volatile_test"
375392 }
0 commit comments