11// SPDX-License-Identifier: Apache-2.0
22// SPDX-FileCopyrightText: Copyright the Vortex contributors
33
4- use std:: cmp ;
4+ use std:: collections :: BTreeSet ;
55use std:: ops:: Range ;
66use std:: sync:: Arc ;
7+ use std:: { cmp, iter} ;
78
89use futures:: future:: BoxFuture ;
910use itertools:: Itertools ;
@@ -326,7 +327,7 @@ pub struct RepeatedScan<A: 'static + Send> {
326327 /// The selection mask to apply to the selected row range.
327328 selection : Selection ,
328329 /// The natural splits of the file.
329- splits : Vec < Range < u64 > > ,
330+ splits : BTreeSet < u64 > ,
330331 /// The number of splits to make progress on concurrently **per-thread**.
331332 concurrency : usize ,
332333 /// Function to apply to each [`ArrayRef`] within the spawned split tasks.
@@ -342,27 +343,38 @@ impl<A: 'static + Send> RepeatedScan<A> {
342343 & self ,
343344 row_range : Option < Range < u64 > > ,
344345 ) -> VortexResult < Vec < BoxFuture < ' static , VortexResult < Option < A > > > > > {
345- let row_range = intersect_ranges ( self . row_range . as_ref ( ) , row_range) ;
346-
347346 let ctx = Arc :: new ( TaskContext {
348- row_range,
349347 selection : self . selection . clone ( ) ,
350348 filter : self . filter . clone ( ) . map ( |f| Arc :: new ( FilterExpr :: new ( f) ) ) ,
351349 reader : self . layout_reader . clone ( ) ,
352350 projection : self . projection . clone ( ) ,
353351 mapper : self . map_fn . clone ( ) ,
354352 } ) ;
355353
354+ let row_range = intersect_ranges ( self . row_range . as_ref ( ) , row_range) ;
355+ let splits_iter: Box < dyn Iterator < Item = _ > > = match row_range {
356+ None => Box :: new ( self . splits . iter ( ) . copied ( ) ) ,
357+ Some ( range) => {
358+ if range. start > range. end {
359+ return Ok ( Vec :: new ( ) ) ;
360+ }
361+ Box :: new (
362+ iter:: once ( range. start )
363+ . chain ( self . splits . range ( range. clone ( ) ) . copied ( ) )
364+ . chain ( iter:: once ( range. end ) ) ,
365+ )
366+ }
367+ } ;
368+
356369 // Create a task that executes the full scan pipeline for each split.
357370 let mut limit = self . limit ;
358- let split_tasks = self
359- . splits
360- . iter ( )
361- . filter_map ( |split_range| {
362- if limit. is_some_and ( |l| l == 0 ) {
371+ let split_tasks = splits_iter
372+ . tuple_windows ( )
373+ . filter_map ( |( start, end) | {
374+ if limit. is_some_and ( |l| l == 0 ) || start >= end {
363375 None
364376 } else {
365- Some ( split_exec ( ctx. clone ( ) , split_range . clone ( ) , limit. as_mut ( ) ) )
377+ Some ( split_exec ( ctx. clone ( ) , start..end , limit. as_mut ( ) ) )
366378 }
367379 } )
368380 . try_collect ( ) ?;
0 commit comments