@@ -19,31 +19,57 @@ use std::sync::Arc;
1919
2020use crate :: file_scan_config:: FileScanConfig ;
2121use crate :: file_stream:: scan_state:: ScanState ;
22+ use crate :: file_stream:: work_source:: { SharedWorkSource , WorkSource } ;
2223use crate :: morsel:: { FileOpenerMorselizer , Morselizer } ;
2324use datafusion_common:: { Result , internal_err} ;
2425use datafusion_physical_plan:: metrics:: { BaselineMetrics , ExecutionPlanMetricsSet } ;
2526
2627use super :: metrics:: FileStreamMetrics ;
2728use super :: { FileOpener , FileStream , FileStreamState , OnError } ;
2829
30+ /// Whether this stream may reorder work across sibling `FileStream`s.
31+ enum Reorderable {
32+ /// This stream may reorder work, optionally using a shared file queue.
33+ Yes {
34+ shared_work_source : Option < SharedWorkSource > ,
35+ } ,
36+ /// This stream may not reorder work.
37+ No ,
38+ }
39+
2940/// Builder for constructing a [`FileStream`].
3041pub struct FileStreamBuilder < ' a > {
3142 config : & ' a FileScanConfig ,
3243 partition : Option < usize > ,
3344 morselizer : Option < Box < dyn Morselizer > > ,
3445 metrics : Option < & ' a ExecutionPlanMetricsSet > ,
3546 on_error : OnError ,
47+ reorderable : Reorderable ,
3648}
3749
3850impl < ' a > FileStreamBuilder < ' a > {
3951 /// Create a new builder.
4052 pub fn new ( config : & ' a FileScanConfig ) -> Self {
53+ let reorderable = if config. preserve_order || config. partitioned_by_file_group {
54+ Reorderable :: No
55+ } else {
56+ Reorderable :: Yes {
57+ shared_work_source : Some (
58+ config
59+ . shared_work_source
60+ . get_or_init ( SharedWorkSource :: new)
61+ . clone ( ) ,
62+ ) ,
63+ }
64+ } ;
65+
4166 Self {
4267 config,
4368 partition : None ,
4469 morselizer : None ,
4570 metrics : None ,
4671 on_error : OnError :: Fail ,
72+ reorderable,
4773 }
4874 }
4975
@@ -89,6 +115,7 @@ impl<'a> FileStreamBuilder<'a> {
89115 morselizer,
90116 metrics,
91117 on_error,
118+ reorderable,
92119 } = self ;
93120
94121 let Some ( partition) = partition else {
@@ -106,10 +133,23 @@ impl<'a> FileStreamBuilder<'a> {
106133 "FileStreamBuilder invalid partition index: {partition}"
107134 ) ;
108135 } ;
136+ let files = file_group. into_inner ( ) ;
137+ let work_source = match reorderable {
138+ Reorderable :: Yes { shared_work_source } => {
139+ if let Some ( shared) = shared_work_source {
140+ shared. register_stream ( ) ;
141+ shared. push_files ( files) ;
142+ WorkSource :: Shared ( shared)
143+ } else {
144+ WorkSource :: Local ( files. into ( ) )
145+ }
146+ }
147+ Reorderable :: No => WorkSource :: Local ( files. into ( ) ) ,
148+ } ;
109149
110150 let file_stream_metrics = FileStreamMetrics :: new ( metrics, partition) ;
111151 let scan_state = Box :: new ( ScanState :: new (
112- file_group . into_inner ( ) ,
152+ work_source ,
113153 config. limit ,
114154 morselizer,
115155 on_error,
0 commit comments