@@ -14,10 +14,12 @@ use std::sync::OnceLock;
1414
1515use futures:: FutureExt ;
1616use vortex_array:: EmptyMetadata ;
17+ use vortex_array:: Executable ;
1718use vortex_array:: MaskFuture ;
18- use vortex_array:: arrays:: Primitive ;
19- use vortex_array:: arrays:: Struct ;
20- use vortex_array:: arrays:: VarBinView ;
19+ use vortex_array:: VortexSessionExecute ;
20+ use vortex_array:: arrays:: PrimitiveArray ;
21+ use vortex_array:: arrays:: StructArray ;
22+ use vortex_array:: arrays:: VarBinViewArray ;
2123use vortex_array:: arrays:: struct_:: StructArrayExt ;
2224use vortex_array:: dtype:: DType ;
2325use vortex_array:: dtype:: FieldName ;
@@ -102,6 +104,53 @@ impl ArrayTreeLayout {
102104 Nullability :: NonNullable ,
103105 )
104106 }
107+
108+ /// Build a [`LayoutReaderContext`] that overlays `ctx` with a source-injecting builder
109+ /// override for this layout's [`ArrayTreeFlat`] descendants.
110+ ///
111+ /// The returned context, when used to construct a reader on a descendant layout, will
112+ /// satisfy `ArrayTreeFlat`'s requirement for an injected [`ArrayTreesSource`]. Used by:
113+ /// - The normal [`crate::VTable::new_reader`] dispatch on `ArrayTreeLayout` (production path).
114+ /// - Tools that construct readers at arbitrary points in the layout tree (explorers,
115+ /// debuggers) — they should walk from the root to the target node, calling this method
116+ /// for each `ArrayTreeLayout` ancestor on the path so the accumulated ctx carries the
117+ /// right override when the leaf is finally constructed.
118+ pub fn derive_reader_ctx (
119+ & self ,
120+ name : & str ,
121+ segment_source : Arc < dyn SegmentSource > ,
122+ session : & VortexSession ,
123+ ctx : & LayoutReaderContext ,
124+ ) -> VortexResult < LayoutReaderContext > {
125+ // Construct the array_trees auxiliary reader using the unmodified incoming context —
126+ // the array_trees subtree is a vanilla struct of (u32, bytes) and needs no overrides.
127+ let array_trees_child = self
128+ . children
129+ . child ( 1 , & Self :: array_trees_dtype ( ) ) ?;
130+ let trees_reader = array_trees_child. new_reader_in_ctx (
131+ Arc :: from ( format ! ( "{name}/array_trees" ) ) ,
132+ segment_source,
133+ session,
134+ ctx,
135+ ) ?;
136+ let source = Arc :: new ( ArrayTreesSource :: new ( trees_reader, session. clone ( ) ) ) ;
137+
138+ Ok ( ctx. with_override (
139+ ArrayTreeFlat :: id ( & ArrayTreeFlatLayoutEncoding ) ,
140+ Arc :: new ( move |layout, name, segs, sess, _ctx| {
141+ let atf = layout
142+ . as_opt :: < ArrayTreeFlat > ( )
143+ . vortex_expect ( "ArrayTreeFlat override applied to wrong layout encoding" ) ;
144+ Ok ( Arc :: new ( ArrayTreeFlatReader :: new (
145+ atf. clone ( ) ,
146+ name,
147+ segs,
148+ sess. clone ( ) ,
149+ Arc :: clone ( & source) ,
150+ ) ) )
151+ } ) ,
152+ ) )
153+ }
105154}
106155
107156impl VTable for ArrayTree {
@@ -160,36 +209,8 @@ impl VTable for ArrayTree {
160209 session : & VortexSession ,
161210 ctx : & LayoutReaderContext ,
162211 ) -> VortexResult < LayoutReaderRef > {
163- // Construct the array_trees auxiliary reader using the unmodified incoming context —
164- // the array_trees subtree is a vanilla struct of (u32, bytes) and needs no overrides.
165- let array_trees_child = Self :: child ( layout, 1 ) ?;
166- let trees_reader = array_trees_child. new_reader_in_ctx (
167- Arc :: from ( format ! ( "{name}/array_trees" ) ) ,
168- Arc :: clone ( & segment_source) ,
169- session,
170- ctx,
171- ) ?;
172- let source = Arc :: new ( ArrayTreesSource :: new ( trees_reader) ) ;
173-
174- // Derive a context that intercepts ArrayTreeFlat construction with our source-injecting
175- // builder. The data subtree (and any nested layouts within it) sees this context, so
176- // any ArrayTreeFlat descendant — no matter how deep — gets the source.
177- let derived_ctx = ctx. with_override (
178- ArrayTreeFlat :: id ( & ArrayTreeFlatLayoutEncoding ) ,
179- Arc :: new ( move |layout, name, segs, sess, _ctx| {
180- let atf = layout
181- . as_opt :: < ArrayTreeFlat > ( )
182- . vortex_expect ( "ArrayTreeFlat override applied to wrong layout encoding" ) ;
183- Ok ( Arc :: new ( ArrayTreeFlatReader :: new (
184- atf. clone ( ) ,
185- name,
186- segs,
187- sess. clone ( ) ,
188- Arc :: clone ( & source) ,
189- ) ) )
190- } ) ,
191- ) ;
192-
212+ let derived_ctx =
213+ layout. derive_reader_ctx ( & name, Arc :: clone ( & segment_source) , session, ctx) ?;
193214 let data_child = Self :: child ( layout, 0 ) ?;
194215 let data_reader = data_child. new_reader_in_ctx (
195216 Arc :: clone ( & name) ,
@@ -234,6 +255,10 @@ impl VTable for ArrayTree {
234255/// shared across all leaves of the parent [`ArrayTreeLayout`] via a `OnceLock`-cached future.
235256pub struct ArrayTreesSource {
236257 reader : LayoutReaderRef ,
258+ /// Session used to construct execution contexts when canonicalizing the array_trees
259+ /// struct (its fields may be in compressed encodings depending on how the writer's
260+ /// `array_trees_strategy` is configured).
261+ session : VortexSession ,
237262 /// Lazily initialized shared future for the segment-keyed lookup map.
238263 map : OnceLock < SharedSegmentMapFuture > ,
239264}
@@ -253,10 +278,11 @@ impl std::fmt::Debug for ArrayTreesSource {
253278}
254279
255280impl ArrayTreesSource {
256- /// Creates a new source backed by the given array_trees reader.
257- pub fn new ( reader : LayoutReaderRef ) -> Self {
281+ /// Creates a new source backed by the given array_trees reader and session .
282+ pub fn new ( reader : LayoutReaderRef , session : VortexSession ) -> Self {
258283 Self {
259284 reader,
285+ session,
260286 map : OnceLock :: new ( ) ,
261287 }
262288 }
@@ -286,6 +312,7 @@ impl ArrayTreesSource {
286312 . get_or_init ( || {
287313 let row_count = self . reader . row_count ( ) ;
288314 let reader = Arc :: clone ( & self . reader ) ;
315+ let session = self . session . clone ( ) ;
289316 async move {
290317 let array = reader
291318 . projection_evaluation (
@@ -299,7 +326,10 @@ impl ArrayTreesSource {
299326 . map_err ( Arc :: new) ?
300327 . await
301328 . map_err ( Arc :: new) ?;
302- build_segment_map ( array) . map ( Arc :: new) . map_err ( Arc :: new)
329+ let mut ctx = session. create_execution_ctx ( ) ;
330+ build_segment_map ( array, & mut ctx)
331+ . map ( Arc :: new)
332+ . map_err ( Arc :: new)
303333 }
304334 . boxed ( )
305335 . shared ( )
@@ -309,30 +339,30 @@ impl ArrayTreesSource {
309339}
310340
311341/// Decode the array_trees struct array into a `HashMap<SegmentId, ByteBuffer>`.
342+ ///
343+ /// The struct array's columns may be in compressed encodings (bitpacked `segment_id`, dict
344+ /// `compact_tree`, etc.) when read from a file whose array-trees strategy applies compression,
345+ /// so we canonicalize each field via [`Executable::execute`] before downcasting to the
346+ /// concrete typed array.
312347fn build_segment_map (
313348 array : vortex_array:: ArrayRef ,
349+ ctx : & mut vortex_array:: ExecutionCtx ,
314350) -> VortexResult < HashMap < SegmentId , ByteBuffer > > {
315- let struct_array = array
316- . try_downcast :: < Struct > ( )
317- . map_err ( |_| vortex_err ! ( "array_trees is not a Struct array" ) ) ?;
351+ let struct_array = StructArray :: execute ( array, ctx) ?;
318352
319353 let segment_ids_field = struct_array
320354 . unmasked_field_by_name_opt ( "segment_id" )
321- . ok_or_else ( || vortex_err ! ( "array_trees missing 'segment_id' field" ) ) ?;
355+ . ok_or_else ( || vortex_err ! ( "array_trees missing 'segment_id' field" ) ) ?
356+ . clone ( ) ;
322357 let trees_field = struct_array
323358 . unmasked_field_by_name_opt ( "compact_tree" )
324- . ok_or_else ( || vortex_err ! ( "array_trees missing 'compact_tree' field" ) ) ?;
359+ . ok_or_else ( || vortex_err ! ( "array_trees missing 'compact_tree' field" ) ) ?
360+ . clone ( ) ;
325361
326- let segment_ids = segment_ids_field
327- . clone ( )
328- . try_downcast :: < Primitive > ( )
329- . map_err ( |_| vortex_err ! ( "array_trees 'segment_id' field is not Primitive" ) ) ?;
362+ let segment_ids = PrimitiveArray :: execute ( segment_ids_field, ctx) ?;
330363 let segment_ids = segment_ids. as_slice :: < u32 > ( ) ;
331364
332- let trees = trees_field
333- . clone ( )
334- . try_downcast :: < VarBinView > ( )
335- . map_err ( |_| vortex_err ! ( "array_trees 'compact_tree' field is not a VarBinView" ) ) ?;
365+ let trees = VarBinViewArray :: execute ( trees_field, ctx) ?;
336366
337367 let mut map = HashMap :: with_capacity ( segment_ids. len ( ) ) ;
338368 for ( idx, & seg) in segment_ids. iter ( ) . enumerate ( ) {
0 commit comments