33
44use std:: fmt:: Debug ;
55use std:: hash:: Hash ;
6-
6+ use std:: sync:: Arc ;
7+
8+ use arrow_array:: ArrayRef as ArrowArrayRef ;
9+ use arrow_array:: RunArray ;
10+ use arrow_array:: cast:: AsArray ;
11+ use arrow_array:: types:: * ;
12+ use arrow_buffer:: ArrowNativeType ;
13+ use arrow_schema:: DataType ;
714use vortex_array:: ArrayEq ;
815use vortex_array:: ArrayHash ;
916use vortex_array:: ArrayRef ;
@@ -17,6 +24,7 @@ use vortex_array::ProstMetadata;
1724use vortex_array:: SerializeMetadata ;
1825use vortex_array:: arrays:: Primitive ;
1926use vortex_array:: arrays:: VarBinViewArray ;
27+ use vortex_array:: arrow:: ArrowArrayExecutor ;
2028use vortex_array:: buffer:: BufferHandle ;
2129use vortex_array:: dtype:: DType ;
2230use vortex_array:: dtype:: Nullability ;
@@ -203,6 +211,31 @@ impl VTable for RunEnd {
203211 PARENT_KERNELS . execute ( array, parent, child_idx, ctx)
204212 }
205213
214+ fn to_arrow_array (
215+ array : & RunEndArray ,
216+ data_type : & DataType ,
217+ ctx : & mut ExecutionCtx ,
218+ ) -> VortexResult < Option < ArrowArrayRef > > {
219+ let DataType :: RunEndEncoded ( ends_field, values_field) = data_type else {
220+ return Ok ( None ) ;
221+ } ;
222+ let arrow_ends = array
223+ . ends ( )
224+ . clone ( )
225+ . execute_arrow ( Some ( ends_field. data_type ( ) ) , ctx) ?;
226+ let arrow_values = array
227+ . values ( )
228+ . clone ( )
229+ . execute_arrow ( Some ( values_field. data_type ( ) ) , ctx) ?;
230+ Ok ( Some ( build_run_array (
231+ & arrow_ends,
232+ & arrow_values,
233+ ends_field. data_type ( ) ,
234+ array. offset ( ) ,
235+ array. len ( ) ,
236+ ) ?) )
237+ }
238+
206239 fn execute ( array : & Self :: Array , ctx : & mut ExecutionCtx ) -> VortexResult < ExecutionStep > {
207240 run_end_canonicalize ( array, ctx) . map ( ExecutionStep :: Done )
208241 }
@@ -503,6 +536,59 @@ pub(super) fn run_end_canonicalize(
503536 } )
504537}
505538
539+ fn build_run_array (
540+ ends : & ArrowArrayRef ,
541+ values : & ArrowArrayRef ,
542+ ends_type : & DataType ,
543+ offset : usize ,
544+ length : usize ,
545+ ) -> VortexResult < ArrowArrayRef > {
546+ match ends_type {
547+ DataType :: Int16 => build_run_array_typed :: < Int16Type > ( ends, values, offset, length) ,
548+ DataType :: Int32 => build_run_array_typed :: < Int32Type > ( ends, values, offset, length) ,
549+ DataType :: Int64 => build_run_array_typed :: < Int64Type > ( ends, values, offset, length) ,
550+ _ => vortex_bail ! ( "Unsupported run-end index type: {:?}" , ends_type) ,
551+ }
552+ }
553+
554+ fn build_run_array_typed < R : RunEndIndexType > (
555+ ends : & ArrowArrayRef ,
556+ values : & ArrowArrayRef ,
557+ offset : usize ,
558+ length : usize ,
559+ ) -> VortexResult < ArrowArrayRef >
560+ where
561+ R :: Native : std:: ops:: Sub < Output = R :: Native > + Ord ,
562+ {
563+ let offset_native = R :: Native :: from_usize ( offset) . ok_or_else ( || {
564+ vortex_error:: vortex_err!( "Offset {offset} exceeds run-end index capacity" )
565+ } ) ?;
566+ let length_native = R :: Native :: from_usize ( length) . ok_or_else ( || {
567+ vortex_error:: vortex_err!( "Length {length} exceeds run-end index capacity" )
568+ } ) ?;
569+
570+ let ends_prim = ends. as_primitive :: < R > ( ) ;
571+ if offset == 0 && ends_prim. values ( ) . last ( ) == Some ( & length_native) {
572+ return Ok ( Arc :: new ( RunArray :: < R > :: try_new ( ends_prim, values) ?) as ArrowArrayRef ) ;
573+ }
574+
575+ // Trim to only include runs covering the [offset, offset+length) range.
576+ let num_runs = ( ends_prim
577+ . values ( )
578+ . partition_point ( |& e| e - offset_native < length_native)
579+ + 1 )
580+ . min ( ends_prim. len ( ) ) ;
581+
582+ let trimmed_ends = ends. slice ( 0 , num_runs) ;
583+ let trimmed_values = values. slice ( 0 , num_runs) ;
584+
585+ let adjusted = trimmed_ends
586+ . as_primitive :: < R > ( )
587+ . unary ( |end| ( end - offset_native) . min ( length_native) ) ;
588+
589+ Ok ( Arc :: new ( RunArray :: < R > :: try_new ( & adjusted, & trimmed_values) ?) as ArrowArrayRef )
590+ }
591+
506592#[ cfg( test) ]
507593mod tests {
508594 use vortex_array:: IntoArray ;
0 commit comments