Skip to content

Commit a5f6e71

Browse files
committed
vortex web to work with array tree layouts
Signed-off-by: Onur Satici <onur@spiraldb.com>
1 parent 0ad3689 commit a5f6e71

6 files changed

Lines changed: 201 additions & 82 deletions

File tree

vortex-layout/src/layouts/array_tree/flat.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,10 @@ pub struct ArrayTreeFlatLayoutEncoding;
4040
/// At read time, this layout's reader looks up its compact tree in a shared
4141
/// [`super::ArrayTreesSource`] using its own [`SegmentId`]. Construction requires that an
4242
/// ancestor [`super::ArrayTreeLayout`] has registered a reader-builder override against
43-
/// this encoding's ID — this layout has no useful default reader.
43+
/// this encoding's ID — this layout has no useful default reader. Tools that need to
44+
/// construct readers at arbitrary points in the layout tree (explorers, debuggers) should
45+
/// use [`super::ArrayTreeLayout::derive_reader_ctx`] to build a context that registers the
46+
/// override before descending to the leaf.
4447
#[derive(Clone, Debug)]
4548
pub struct ArrayTreeFlatLayout {
4649
inner: FlatLayout,
@@ -109,10 +112,12 @@ impl VTable for ArrayTreeFlat {
109112
// ArrayTreeFlatLayout has no useful default reader. It exists to be intercepted by an
110113
// ancestor ArrayTreeLayout that registers a reader-builder override carrying the
111114
// shared ArrayTreesSource. If the dispatcher reached this method, no such ancestor
112-
// was present in the layout tree.
115+
// was present in the layout tree — see `ArrayTreeLayout::derive_reader_ctx` for the
116+
// helper tools should call when starting reader construction below the root.
113117
vortex_bail!(
114118
"ArrayTreeFlatLayout requires an ancestor ArrayTreeLayout to register a reader \
115-
builder override; this layout cannot be read on its own"
119+
builder override; call ArrayTreeLayout::derive_reader_ctx on each ArrayTreeLayout \
120+
ancestor before constructing a reader for this layout"
116121
)
117122
}
118123

vortex-layout/src/layouts/array_tree/mod.rs

Lines changed: 79 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ use std::sync::OnceLock;
1414

1515
use futures::FutureExt;
1616
use vortex_array::EmptyMetadata;
17+
use vortex_array::Executable;
1718
use vortex_array::MaskFuture;
18-
use vortex_array::arrays::Primitive;
19-
use vortex_array::arrays::Struct;
20-
use vortex_array::arrays::VarBinView;
19+
use vortex_array::VortexSessionExecute;
20+
use vortex_array::arrays::PrimitiveArray;
21+
use vortex_array::arrays::StructArray;
22+
use vortex_array::arrays::VarBinViewArray;
2123
use vortex_array::arrays::struct_::StructArrayExt;
2224
use vortex_array::dtype::DType;
2325
use vortex_array::dtype::FieldName;
@@ -102,6 +104,53 @@ impl ArrayTreeLayout {
102104
Nullability::NonNullable,
103105
)
104106
}
107+
108+
/// Build a [`LayoutReaderContext`] that overlays `ctx` with a source-injecting builder
109+
/// override for this layout's [`ArrayTreeFlat`] descendants.
110+
///
111+
/// The returned context, when used to construct a reader on a descendant layout, will
112+
/// satisfy `ArrayTreeFlat`'s requirement for an injected [`ArrayTreesSource`]. Used by:
113+
/// - The normal [`crate::VTable::new_reader`] dispatch on `ArrayTreeLayout` (production path).
114+
/// - Tools that construct readers at arbitrary points in the layout tree (explorers,
115+
/// debuggers) — they should walk from the root to the target node, calling this method
116+
/// for each `ArrayTreeLayout` ancestor on the path so the accumulated ctx carries the
117+
/// right override when the leaf is finally constructed.
118+
pub fn derive_reader_ctx(
119+
&self,
120+
name: &str,
121+
segment_source: Arc<dyn SegmentSource>,
122+
session: &VortexSession,
123+
ctx: &LayoutReaderContext,
124+
) -> VortexResult<LayoutReaderContext> {
125+
// Construct the array_trees auxiliary reader using the unmodified incoming context —
126+
// the array_trees subtree is a vanilla struct of (u32, bytes) and needs no overrides.
127+
let array_trees_child = self
128+
.children
129+
.child(1, &Self::array_trees_dtype())?;
130+
let trees_reader = array_trees_child.new_reader_in_ctx(
131+
Arc::from(format!("{name}/array_trees")),
132+
segment_source,
133+
session,
134+
ctx,
135+
)?;
136+
let source = Arc::new(ArrayTreesSource::new(trees_reader, session.clone()));
137+
138+
Ok(ctx.with_override(
139+
ArrayTreeFlat::id(&ArrayTreeFlatLayoutEncoding),
140+
Arc::new(move |layout, name, segs, sess, _ctx| {
141+
let atf = layout
142+
.as_opt::<ArrayTreeFlat>()
143+
.vortex_expect("ArrayTreeFlat override applied to wrong layout encoding");
144+
Ok(Arc::new(ArrayTreeFlatReader::new(
145+
atf.clone(),
146+
name,
147+
segs,
148+
sess.clone(),
149+
Arc::clone(&source),
150+
)))
151+
}),
152+
))
153+
}
105154
}
106155

107156
impl VTable for ArrayTree {
@@ -160,36 +209,8 @@ impl VTable for ArrayTree {
160209
session: &VortexSession,
161210
ctx: &LayoutReaderContext,
162211
) -> VortexResult<LayoutReaderRef> {
163-
// Construct the array_trees auxiliary reader using the unmodified incoming context —
164-
// the array_trees subtree is a vanilla struct of (u32, bytes) and needs no overrides.
165-
let array_trees_child = Self::child(layout, 1)?;
166-
let trees_reader = array_trees_child.new_reader_in_ctx(
167-
Arc::from(format!("{name}/array_trees")),
168-
Arc::clone(&segment_source),
169-
session,
170-
ctx,
171-
)?;
172-
let source = Arc::new(ArrayTreesSource::new(trees_reader));
173-
174-
// Derive a context that intercepts ArrayTreeFlat construction with our source-injecting
175-
// builder. The data subtree (and any nested layouts within it) sees this context, so
176-
// any ArrayTreeFlat descendant — no matter how deep — gets the source.
177-
let derived_ctx = ctx.with_override(
178-
ArrayTreeFlat::id(&ArrayTreeFlatLayoutEncoding),
179-
Arc::new(move |layout, name, segs, sess, _ctx| {
180-
let atf = layout
181-
.as_opt::<ArrayTreeFlat>()
182-
.vortex_expect("ArrayTreeFlat override applied to wrong layout encoding");
183-
Ok(Arc::new(ArrayTreeFlatReader::new(
184-
atf.clone(),
185-
name,
186-
segs,
187-
sess.clone(),
188-
Arc::clone(&source),
189-
)))
190-
}),
191-
);
192-
212+
let derived_ctx =
213+
layout.derive_reader_ctx(&name, Arc::clone(&segment_source), session, ctx)?;
193214
let data_child = Self::child(layout, 0)?;
194215
let data_reader = data_child.new_reader_in_ctx(
195216
Arc::clone(&name),
@@ -234,6 +255,10 @@ impl VTable for ArrayTree {
234255
/// shared across all leaves of the parent [`ArrayTreeLayout`] via a `OnceLock`-cached future.
235256
pub struct ArrayTreesSource {
236257
reader: LayoutReaderRef,
258+
/// Session used to construct execution contexts when canonicalizing the array_trees
259+
/// struct (its fields may be in compressed encodings depending on how the writer's
260+
/// `array_trees_strategy` is configured).
261+
session: VortexSession,
237262
/// Lazily initialized shared future for the segment-keyed lookup map.
238263
map: OnceLock<SharedSegmentMapFuture>,
239264
}
@@ -253,10 +278,11 @@ impl std::fmt::Debug for ArrayTreesSource {
253278
}
254279

255280
impl ArrayTreesSource {
256-
/// Creates a new source backed by the given array_trees reader.
257-
pub fn new(reader: LayoutReaderRef) -> Self {
281+
/// Creates a new source backed by the given array_trees reader and session.
282+
pub fn new(reader: LayoutReaderRef, session: VortexSession) -> Self {
258283
Self {
259284
reader,
285+
session,
260286
map: OnceLock::new(),
261287
}
262288
}
@@ -286,6 +312,7 @@ impl ArrayTreesSource {
286312
.get_or_init(|| {
287313
let row_count = self.reader.row_count();
288314
let reader = Arc::clone(&self.reader);
315+
let session = self.session.clone();
289316
async move {
290317
let array = reader
291318
.projection_evaluation(
@@ -299,7 +326,10 @@ impl ArrayTreesSource {
299326
.map_err(Arc::new)?
300327
.await
301328
.map_err(Arc::new)?;
302-
build_segment_map(array).map(Arc::new).map_err(Arc::new)
329+
let mut ctx = session.create_execution_ctx();
330+
build_segment_map(array, &mut ctx)
331+
.map(Arc::new)
332+
.map_err(Arc::new)
303333
}
304334
.boxed()
305335
.shared()
@@ -309,30 +339,30 @@ impl ArrayTreesSource {
309339
}
310340

311341
/// Decode the array_trees struct array into a `HashMap<SegmentId, ByteBuffer>`.
342+
///
343+
/// The struct array's columns may be in compressed encodings (bitpacked `segment_id`, dict
344+
/// `compact_tree`, etc.) when read from a file whose array-trees strategy applies compression,
345+
/// so we canonicalize each field via [`Executable::execute`] before downcasting to the
346+
/// concrete typed array.
312347
fn build_segment_map(
313348
array: vortex_array::ArrayRef,
349+
ctx: &mut vortex_array::ExecutionCtx,
314350
) -> VortexResult<HashMap<SegmentId, ByteBuffer>> {
315-
let struct_array = array
316-
.try_downcast::<Struct>()
317-
.map_err(|_| vortex_err!("array_trees is not a Struct array"))?;
351+
let struct_array = StructArray::execute(array, ctx)?;
318352

319353
let segment_ids_field = struct_array
320354
.unmasked_field_by_name_opt("segment_id")
321-
.ok_or_else(|| vortex_err!("array_trees missing 'segment_id' field"))?;
355+
.ok_or_else(|| vortex_err!("array_trees missing 'segment_id' field"))?
356+
.clone();
322357
let trees_field = struct_array
323358
.unmasked_field_by_name_opt("compact_tree")
324-
.ok_or_else(|| vortex_err!("array_trees missing 'compact_tree' field"))?;
359+
.ok_or_else(|| vortex_err!("array_trees missing 'compact_tree' field"))?
360+
.clone();
325361

326-
let segment_ids = segment_ids_field
327-
.clone()
328-
.try_downcast::<Primitive>()
329-
.map_err(|_| vortex_err!("array_trees 'segment_id' field is not Primitive"))?;
362+
let segment_ids = PrimitiveArray::execute(segment_ids_field, ctx)?;
330363
let segment_ids = segment_ids.as_slice::<u32>();
331364

332-
let trees = trees_field
333-
.clone()
334-
.try_downcast::<VarBinView>()
335-
.map_err(|_| vortex_err!("array_trees 'compact_tree' field is not a VarBinView"))?;
365+
let trees = VarBinViewArray::execute(trees_field, ctx)?;
336366

337367
let mut map = HashMap::with_capacity(segment_ids.len());
338368
for (idx, &seg) in segment_ids.iter().enumerate() {

0 commit comments

Comments
 (0)