@@ -17,20 +17,21 @@ use vortex::error::VortexExpect;
1717use vortex:: error:: VortexResult ;
1818
1919use crate :: convert:: ToDuckDBScalar ;
20+ use crate :: duckdb:: ReusableDict ;
2021use crate :: duckdb:: SelectionVector ;
2122use crate :: duckdb:: VectorRef ;
2223use crate :: exporter:: ColumnExporter ;
2324use crate :: exporter:: cache:: ConversionCache ;
25+ use crate :: exporter:: cached_values_dict;
2426use crate :: exporter:: canonical;
25- use crate :: exporter:: new_array_exporter_with_flatten;
2627
27- /// We export run-end arrays to a DuckDB dictionary vector, using a selection vector to
28- /// repeat the values in the run-end array .
28+ /// We export run-end arrays to a DuckDB dictionary vector. Values are exported
29+ /// into a ReusableDict with SelectionVector applied in export() .
2930struct RunEndExporter < E : IntegerPType > {
3031 ends : PrimitiveArray ,
3132 ends_type : PhantomData < E > ,
3233 values : ArrayRef ,
33- values_exporter : Box < dyn ColumnExporter > ,
34+ values_dict : ReusableDict ,
3435 run_end_offset : usize ,
3536}
3637
@@ -50,16 +51,14 @@ pub(crate) fn new_exporter_with_flatten(
5051 let ends = array. ends ( ) . clone ( ) ;
5152 let values = array. values ( ) . clone ( ) ;
5253 let ends = ends. execute :: < PrimitiveArray > ( ctx) ?;
53- // REE exports values in run-index space, not outer row space. Materialize the dictionary
54- // payload so chunked physical boundaries in the values child cannot constrain row batches.
55- let values_exporter = new_array_exporter_with_flatten ( values. clone ( ) , cache, ctx, true ) ?;
54+ let values_dict = cached_values_dict ( values. clone ( ) , cache, ctx) ?;
5655
5756 match_each_integer_ptype ! ( ends. ptype( ) , |E | {
5857 Ok ( Box :: new( RunEndExporter {
5958 ends,
6059 ends_type: PhantomData :: <E >,
6160 values,
62- values_exporter ,
61+ values_dict ,
6362 run_end_offset: offset,
6463 } ) )
6564 } )
@@ -88,10 +87,7 @@ impl<E: IntegerPType> ColumnExporter for RunEndExporter<E> {
8887
8988 // Find the final run in case we can short-circuit and return a constant vector.
9089 let end_run_idx = ends_slice
91- . search_sorted (
92- & offset. add ( E :: from_usize ( len) . vortex_expect ( "len out of bounds" ) ) ,
93- SearchSortedSide :: Right ,
94- ) ?
90+ . search_sorted ( & end_offset, SearchSortedSide :: Right ) ?
9591 . to_ends_index ( ends_slice. len ( ) ) ;
9692
9793 if start_run_idx == end_run_idx {
@@ -113,29 +109,16 @@ impl<E: IntegerPType> ColumnExporter for RunEndExporter<E> {
113109 . to_usize ( )
114110 . vortex_expect ( "run_len is usize" ) ;
115111
116- // Push the runs into the selection vector.
117- sel_vec_slice[ ..run_len] . fill ( u32:: try_from ( run_idx) . vortex_expect ( "sel_idx is u32" ) ) ;
112+ let global_run_idx =
113+ u32:: try_from ( start_run_idx + run_idx) . vortex_expect ( "run index exceeds u32" ) ;
114+ sel_vec_slice[ ..run_len] . fill ( global_run_idx) ;
118115 sel_vec_slice = & mut sel_vec_slice[ run_len..] ;
119116
120117 offset = next_end;
121118 }
122- assert ! (
123- sel_vec_slice. is_empty( ) ,
124- "Selection vector not completely filled"
125- ) ;
126-
127- // The values in the selection vector are the run indices, so we can find the number of
128- // values we referenced by looking at the last index of the selection vector.
129- let values_len = * unsafe { sel_vec. as_slice_mut ( len) }
130- . last ( )
131- . vortex_expect ( "non-empty" )
132- + 1 ;
133-
134- // Export the run-end values into the vector, and then turn it into a dictionary vector.
135- self . values_exporter
136- . export ( start_run_idx, values_len as usize , vector, ctx) ?;
137- vector. dictionary ( vector, values_len as usize , & sel_vec, len as _ ) ;
119+ debug_assert ! ( sel_vec_slice. is_empty( ) ) ;
138120
121+ vector. reuse_dictionary ( & self . values_dict , & sel_vec) ;
139122 Ok ( ( ) )
140123 }
141124}
0 commit comments