@@ -19,30 +19,56 @@ use std::sync::Arc;
1919
2020use crate :: file_scan_config:: FileScanConfig ;
2121use crate :: file_stream:: scan_state:: ScanState ;
22+ use crate :: file_stream:: work_source:: { SharedWorkSource , WorkSource } ;
2223use crate :: morsel:: { FileOpenerMorselizer , Morselizer } ;
2324use datafusion_common:: { Result , internal_err} ;
2425use datafusion_physical_plan:: metrics:: { BaselineMetrics , ExecutionPlanMetricsSet } ;
2526
2627use super :: { FileOpener , FileStream , FileStreamMetrics , FileStreamState , OnError } ;
2728
29+ /// Whether this stream may reorder work across sibling `FileStream`s.
30+ enum Reorderable {
31+ /// This stream may reorder work, optionally using a shared file queue.
32+ Yes {
33+ shared_work_source : Option < SharedWorkSource > ,
34+ } ,
35+ /// This stream may not reorder work.
36+ No ,
37+ }
38+
2839/// Builder for constructing a [`FileStream`].
2940pub struct FileStreamBuilder < ' a > {
3041 config : & ' a FileScanConfig ,
3142 partition : Option < usize > ,
3243 morselizer : Option < Box < dyn Morselizer > > ,
3344 metrics : Option < & ' a ExecutionPlanMetricsSet > ,
3445 on_error : OnError ,
46+ reorderable : Reorderable ,
3547}
3648
3749impl < ' a > FileStreamBuilder < ' a > {
3850 /// Create a new builder.
3951 pub fn new ( config : & ' a FileScanConfig ) -> Self {
52+ let reorderable = if config. preserve_order || config. partitioned_by_file_group {
53+ Reorderable :: No
54+ } else {
55+ Reorderable :: Yes {
56+ shared_work_source : Some (
57+ config
58+ . shared_work_source
59+ . get_or_init ( SharedWorkSource :: new)
60+ . clone ( ) ,
61+ ) ,
62+ }
63+ } ;
64+
4065 Self {
4166 config,
4267 partition : None ,
4368 morselizer : None ,
4469 metrics : None ,
4570 on_error : OnError :: Fail ,
71+ reorderable,
4672 }
4773 }
4874
@@ -88,6 +114,7 @@ impl<'a> FileStreamBuilder<'a> {
88114 morselizer,
89115 metrics,
90116 on_error,
117+ reorderable,
91118 } = self ;
92119
93120 let Some ( partition) = partition else {
@@ -105,10 +132,23 @@ impl<'a> FileStreamBuilder<'a> {
105132 "FileStreamBuilder invalid partition index: {partition}"
106133 ) ;
107134 } ;
135+ let files = file_group. into_inner ( ) ;
136+ let work_source = match reorderable {
137+ Reorderable :: Yes { shared_work_source } => {
138+ if let Some ( shared) = shared_work_source {
139+ shared. register_stream ( ) ;
140+ shared. push_files ( files) ;
141+ WorkSource :: Shared ( shared)
142+ } else {
143+ WorkSource :: Local ( files. into ( ) )
144+ }
145+ }
146+ Reorderable :: No => WorkSource :: Local ( files. into ( ) ) ,
147+ } ;
108148
109149 let file_stream_metrics = FileStreamMetrics :: new ( metrics, partition) ;
110150 let scan_state = Box :: new ( ScanState :: new (
111- file_group . into_inner ( ) ,
151+ work_source ,
112152 config. limit ,
113153 morselizer,
114154 on_error,
0 commit comments