@@ -19,7 +19,8 @@ use std::fmt::Debug;
1919use std:: sync:: Arc ;
2020
2121use crate :: utils:: {
22- add_sort_above, is_sort, is_sort_preserving_merge, is_union, is_window,
22+ add_sort_above_with_distribution, is_sort, is_sort_preserving_merge, is_union,
23+ is_window,
2324} ;
2425
2526use arrow:: datatypes:: SchemaRef ;
@@ -29,7 +30,7 @@ use datafusion_expr::JoinType;
2930use datafusion_physical_expr:: expressions:: Column ;
3031use datafusion_physical_expr:: utils:: collect_columns;
3132use datafusion_physical_expr:: {
32- EquivalenceProperties , add_offset_to_physical_sort_exprs,
33+ Distribution , EquivalenceProperties , add_offset_to_physical_sort_exprs,
3334} ;
3435use datafusion_physical_expr_common:: sort_expr:: {
3536 LexOrdering , LexRequirement , OrderingRequirements , PhysicalSortExpr ,
@@ -55,23 +56,46 @@ use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties};
5556/// of the parent node as its data.
5657///
5758/// [`EnforceSorting`]: crate::enforce_sorting::EnforceSorting
58- #[ derive( Default , Clone , Debug ) ]
59+ #[ derive( Clone , Debug ) ]
5960pub struct ParentRequirements {
6061 ordering_requirement : Option < OrderingRequirements > ,
6162 fetch : Option < usize > ,
63+ /// The distribution required by whatever consumer will sit above any
64+ /// `SortExec` we materialise here. When a sort is added by `add_sort_above`
65+ /// over a multi-partition input, we use this to decide whether the new
66+ /// sort needs a `SortPreservingMergeExec` wrapper to produce a single
67+ /// partition.
68+ distribution_requirement : Distribution ,
69+ }
70+
71+ impl Default for ParentRequirements {
72+ fn default ( ) -> Self {
73+ Self {
74+ ordering_requirement : None ,
75+ fetch : None ,
76+ distribution_requirement : Distribution :: UnspecifiedDistribution ,
77+ }
78+ }
6279}
6380
6481pub type SortPushDown = PlanContext < ParentRequirements > ;
6582
6683/// Assigns the ordering requirement of the root node to the its children.
6784pub fn assign_initial_requirements ( sort_push_down : & mut SortPushDown ) {
6885 let reqs = sort_push_down. plan . required_input_ordering ( ) ;
69- for ( child, requirement) in sort_push_down. children . iter_mut ( ) . zip ( reqs) {
86+ let dists = sort_push_down. plan . required_input_distribution ( ) ;
87+ for ( idx, ( child, requirement) ) in
88+ sort_push_down. children . iter_mut ( ) . zip ( reqs) . enumerate ( )
89+ {
7090 child. data = ParentRequirements {
7191 ordering_requirement : requirement,
7292 // If the parent has a fetch value, assign it to the children
7393 // Or use the fetch value of the child.
7494 fetch : child. plan . fetch ( ) ,
95+ distribution_requirement : dists
96+ . get ( idx)
97+ . cloned ( )
98+ . unwrap_or ( Distribution :: UnspecifiedDistribution ) ,
7599 } ;
76100 }
77101}
@@ -92,11 +116,34 @@ fn min_fetch(f1: Option<usize>, f2: Option<usize>) -> Option<usize> {
92116 }
93117}
94118
119+ /// Returns the stricter of two distribution requirements when propagating
120+ /// `parent_distribution` down through pass-through operators.
121+ ///
122+ /// `SinglePartition` is the strictest requirement we care about for the
123+ /// purposes of inserting `SortPreservingMergeExec` above a partition-
124+ /// preserving `SortExec`. If either side requests it, we keep that.
125+ fn stronger_distribution ( a : & Distribution , b : & Distribution ) -> Distribution {
126+ match ( a, b) {
127+ ( Distribution :: SinglePartition , _) | ( _, Distribution :: SinglePartition ) => {
128+ Distribution :: SinglePartition
129+ }
130+ ( Distribution :: HashPartitioned ( _) , _) => a. clone ( ) ,
131+ ( _, Distribution :: HashPartitioned ( _) ) => b. clone ( ) ,
132+ _ => Distribution :: UnspecifiedDistribution ,
133+ }
134+ }
135+
95136fn pushdown_sorts_helper (
96137 mut sort_push_down : SortPushDown ,
97138) -> Result < Transformed < SortPushDown > > {
98139 let plan = sort_push_down. plan ;
99140 let parent_fetch = sort_push_down. data . fetch ;
141+ // The distribution required by whatever sits above any new sort we add
142+ // here. When this node is a SortExec we are about to remove or replace,
143+ // the new sort takes the removed sort's slot, so its consumer is the
144+ // grandparent — i.e. the same distribution requirement that flowed into
145+ // this call.
146+ let parent_distribution = sort_push_down. data . distribution_requirement . clone ( ) ;
100147
101148 let Some ( parent_requirement) = sort_push_down. data . ordering_requirement . clone ( )
102149 else {
@@ -116,11 +163,28 @@ fn pushdown_sorts_helper(
116163 sort_push_down. data . fetch = fetch;
117164 sort_push_down. data . ordering_requirement =
118165 Some ( OrderingRequirements :: from ( sort_ordering) ) ;
166+ // The new context now sits where the SortExec was; preserve the
167+ // grandparent's distribution requirement so a subsequent
168+ // `add_sort_above` knows whether to wrap in SortPreservingMergeExec.
169+ sort_push_down. data . distribution_requirement = parent_distribution;
119170 // Recursive call to helper, so it doesn't transform_down and miss
120171 // the new node (previous child of sort):
121172 return pushdown_sorts_helper ( sort_push_down) ;
122173 }
123174 sort_push_down. plan = plan;
175+ // No ordering is being pushed down here, so only use the node's own
176+ // distribution requirement. Do NOT propagate parent_distribution
177+ // through partition-merging nodes (e.g. SortPreservingMergeExec):
178+ // those nodes already satisfy SinglePartition themselves, so the
179+ // children below them should not be forced to also produce a single
180+ // partition.
181+ let dists = sort_push_down. plan . required_input_distribution ( ) ;
182+ for ( idx, child) in sort_push_down. children . iter_mut ( ) . enumerate ( ) {
183+ child. data . distribution_requirement = dists
184+ . get ( idx)
185+ . cloned ( )
186+ . unwrap_or ( Distribution :: UnspecifiedDistribution ) ;
187+ }
124188 return Ok ( Transformed :: no ( sort_push_down) ) ;
125189 } ;
126190
@@ -149,16 +213,21 @@ fn pushdown_sorts_helper(
149213 // The sort was imposing a different ordering than the one being
150214 // pushed down. Replace it with a sort that matches the pushed-down
151215 // ordering, and continue the pushdown.
152- // Add back the sort:
153- sort_push_down = add_sort_above (
216+ // Add back the sort. The new sort sits where the old one did, so
217+ // its consumer is the grandparent and we must respect that
218+ // distribution requirement (otherwise a multi-partition input
219+ // produces preserve_partitioning=true with no SPM above).
220+ sort_push_down = add_sort_above_with_distribution (
154221 sort_push_down,
155222 parent_requirement. into_single ( ) ,
156223 parent_fetch,
224+ & parent_distribution,
157225 ) ;
158226 // Update pushdown requirements:
159227 sort_push_down. children [ 0 ] . data = ParentRequirements {
160228 ordering_requirement : Some ( OrderingRequirements :: from ( sort_ordering) ) ,
161229 fetch : sort_fetch,
230+ distribution_requirement : Distribution :: UnspecifiedDistribution ,
162231 } ;
163232 return Ok ( Transformed :: yes ( sort_push_down) ) ;
164233 } else {
@@ -174,6 +243,10 @@ fn pushdown_sorts_helper(
174243 } else {
175244 Some ( parent_requirement)
176245 } ;
246+ // The sort was removed; carry the grandparent's distribution
247+ // requirement so any sort we materialise deeper down still
248+ // satisfies it.
249+ sort_push_down. data . distribution_requirement = parent_distribution;
177250 // Recursive call to helper, so it doesn't transform_down and miss
178251 // the new node (previous child of sort):
179252 return pushdown_sorts_helper ( sort_push_down) ;
@@ -184,10 +257,35 @@ fn pushdown_sorts_helper(
184257 if satisfy_parent {
185258 // For non-sort operators which satisfy ordering:
186259 let reqs = sort_push_down. plan . required_input_ordering ( ) ;
260+ let dists = sort_push_down. plan . required_input_distribution ( ) ;
261+
262+ // If this node already produces a single partition it has absorbed any
263+ // SinglePartition requirement from the consumer above. Don't push
264+ // that requirement down into children that live below the merge point.
265+ let effective_parent_dist =
266+ if sort_push_down. plan . output_partitioning ( ) . partition_count ( ) == 1 {
267+ Distribution :: UnspecifiedDistribution
268+ } else {
269+ parent_distribution. clone ( )
270+ } ;
187271
188- for ( child, order) in sort_push_down. children . iter_mut ( ) . zip ( reqs) {
272+ for ( idx, ( child, order) ) in
273+ sort_push_down. children . iter_mut ( ) . zip ( reqs) . enumerate ( )
274+ {
189275 child. data . ordering_requirement = order;
190276 child. data . fetch = min_fetch ( parent_fetch, child. data . fetch ) ;
277+ // Any sort we materialise inside this child subtree must still
278+ // satisfy the strongest distribution requirement we've seen on
279+ // the way down. Pass-through operators (Projection, Filter, etc.)
280+ // don't change partitioning, so a `SinglePartition` requirement
281+ // from a higher consumer must propagate, not get reset to this
282+ // node's own (often `UnspecifiedDistribution`) input requirement.
283+ child. data . distribution_requirement = stronger_distribution (
284+ & effective_parent_dist,
285+ dists
286+ . get ( idx)
287+ . unwrap_or ( & Distribution :: UnspecifiedDistribution ) ,
288+ ) ;
191289 }
192290 } else if let Some ( adjusted) = pushdown_requirement_to_children (
193291 & sort_push_down. plan ,
@@ -197,17 +295,29 @@ fn pushdown_sorts_helper(
197295 // For operators that can take a sort pushdown, continue with updated
198296 // requirements:
199297 let current_fetch = sort_push_down. plan . fetch ( ) ;
200- for ( child, order) in sort_push_down. children . iter_mut ( ) . zip ( adjusted) {
298+ let dists = sort_push_down. plan . required_input_distribution ( ) ;
299+ for ( idx, ( child, order) ) in
300+ sort_push_down. children . iter_mut ( ) . zip ( adjusted) . enumerate ( )
301+ {
201302 child. data . ordering_requirement = order;
202303 child. data . fetch = min_fetch ( current_fetch, parent_fetch) ;
304+ child. data . distribution_requirement = stronger_distribution (
305+ & parent_distribution,
306+ dists
307+ . get ( idx)
308+ . unwrap_or ( & Distribution :: UnspecifiedDistribution ) ,
309+ ) ;
203310 }
204311 sort_push_down. data . ordering_requirement = None ;
205312 } else {
206- // Can not push down requirements, add new `SortExec`:
207- sort_push_down = add_sort_above (
313+ // Can not push down requirements, add new `SortExec`. The new sort sits
314+ // between this node and its parent, so its consumer's distribution
315+ // requirement is the one carried in `parent_distribution`.
316+ sort_push_down = add_sort_above_with_distribution (
208317 sort_push_down,
209318 parent_requirement. into_single ( ) ,
210319 parent_fetch,
320+ & parent_distribution,
211321 ) ;
212322 assign_initial_requirements ( & mut sort_push_down) ;
213323 }
0 commit comments