Skip to content

Commit 20d8b2e

Browse files
committed
tests, fix wasm swallowed error
Signed-off-by: Onur Satici <onur@spiraldb.com>
1 parent a5f6e71 commit 20d8b2e

4 files changed

Lines changed: 131 additions & 60 deletions

File tree

vortex-file/src/tests.rs

Lines changed: 89 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1910,76 +1910,50 @@ async fn test_segment_ordering_zonemaps_after_data() -> VortexResult<()> {
19101910

19111911
check_zoned_ordering(root.as_ref(), segment_specs);
19121912

1913-
// Additionally: all zone map segments across all columns should appear after
1914-
// all data segments across all columns. Array tree segments (if present) appear
1915-
// between data and zones.
1913+
// Additionally: every zone-map segment across all columns must appear after every data
1914+
// segment across all columns. This holds even with cross-column interleaving inside the
1915+
// metadata phase because the root writer splits the sequence universe — chunk data uses
1916+
// IDs derived from `ptr` and all metadata (zones, etc.) derives from `eof`, with
1917+
// `ptr < eof` so all data segments globally precede all metadata segments.
1918+
//
1919+
// Per-column `data < array_trees < zones` ordering for the new `ArrayTreeLayout` is
1920+
// covered separately by the array-tree-specific ordering tests.
19161921
let mut all_data_offsets = Vec::new();
1917-
let mut all_array_tree_offsets = Vec::new();
19181922
let mut all_zones_offsets = Vec::new();
19191923

19201924
fn collect_all_zoned(
19211925
layout: &dyn Layout,
19221926
segment_specs: &[SegmentSpec],
19231927
all_data: &mut Vec<u64>,
1924-
all_array_trees: &mut Vec<u64>,
19251928
all_zones: &mut Vec<u64>,
19261929
) {
19271930
if layout.encoding_id().as_ref() == "vortex.stats" {
1928-
// child 0 = data (may contain array_tree layouts), child 1 = zones
1929-
let data_child = layout.child(0).unwrap();
1930-
// If the data child is an array_tree layout, split its segments.
1931-
if data_child.encoding_id().as_ref() == "vortex.array_tree" {
1932-
// child 0 = actual data, child 1 = array_trees auxiliary
1933-
all_data.extend(collect_segment_offsets(
1934-
data_child.child(0).unwrap().as_ref(),
1935-
segment_specs,
1936-
));
1937-
all_array_trees.extend(collect_segment_offsets(
1938-
data_child.child(1).unwrap().as_ref(),
1939-
segment_specs,
1940-
));
1941-
} else {
1942-
all_data.extend(collect_segment_offsets(data_child.as_ref(), segment_specs));
1943-
}
1931+
all_data.extend(collect_segment_offsets(
1932+
layout.child(0).unwrap().as_ref(),
1933+
segment_specs,
1934+
));
19441935
all_zones.extend(collect_segment_offsets(
19451936
layout.child(1).unwrap().as_ref(),
19461937
segment_specs,
19471938
));
19481939
return;
19491940
}
19501941
for child in layout.children().unwrap() {
1951-
collect_all_zoned(
1952-
child.as_ref(),
1953-
segment_specs,
1954-
all_data,
1955-
all_array_trees,
1956-
all_zones,
1957-
);
1942+
collect_all_zoned(child.as_ref(), segment_specs, all_data, all_zones);
19581943
}
19591944
}
19601945

19611946
collect_all_zoned(
19621947
root.as_ref(),
19631948
segment_specs,
19641949
&mut all_data_offsets,
1965-
&mut all_array_tree_offsets,
19661950
&mut all_zones_offsets,
19671951
);
19681952

1969-
// The root writer splits the sequence universe into two: data chunks use IDs from `ptr`
1970-
// and all metadata (array trees, zones) derive from `eof`. Since ptr < eof, all data
1971-
// segments are globally before all metadata segments.
1972-
//
1973-
// Within the eof universe, per-column ordering guarantees array_trees < zones within
1974-
// each column, but cross-column interleaving means we cannot assert
1975-
// all_array_trees < all_zones globally.
1976-
let mut all_metadata_offsets = all_array_tree_offsets;
1977-
all_metadata_offsets.extend(&all_zones_offsets);
1978-
19791953
assert_offsets_ordered(
19801954
&all_data_offsets,
1981-
&all_metadata_offsets,
1982-
"global: all data segments should come before all metadata segments (array trees + zone maps)",
1955+
&all_zones_offsets,
1956+
"global: all data segments should come before all zone-map segments",
19831957
);
19841958

19851959
Ok(())
@@ -2146,3 +2120,77 @@ async fn test_segment_ordering_array_trees_before_zones() -> VortexResult<()> {
21462120

21472121
Ok(())
21482122
}
2123+
2124+
#[tokio::test]
2125+
#[cfg_attr(miri, ignore)]
2126+
async fn test_roundtrip_array_tree_layout() -> VortexResult<()> {
2127+
// End-to-end coverage of the new ArrayTreeLayout: write a multi-column struct with
2128+
// `with_array_tree(true)`, then read it back through the override-registration path and
2129+
// assert the data matches. Exercises:
2130+
// - ArrayTreeCollectorStrategy collecting compact trees via the side-channel sink
2131+
// - ArrayTreeLayout::new_reader deriving a ctx with the source-injecting override
2132+
// - ArrayTreeFlatReader looking up its compact tree from ArrayTreesSource by segment_id
2133+
// - SerializedArray::from_flatbuffer_and_segment decoding the data segment
2134+
let mut ctx = SESSION.create_execution_ctx();
2135+
2136+
let n = 10_000;
2137+
let strings_in: Vec<&str> = (0..n).map(|i| ["alpha", "beta", "gamma"][i % 3]).collect();
2138+
let strings = VarBinArray::from(strings_in.clone()).into_array();
2139+
let numbers_in: Vec<i32> = (0..n as i32).collect();
2140+
let numbers = PrimitiveArray::from_iter(numbers_in.iter().copied()).into_array();
2141+
2142+
let st = StructArray::from_fields(&[("strings", strings), ("numbers", numbers)])?.into_array();
2143+
let dtype = st.dtype().clone();
2144+
2145+
let mut buf = ByteBufferMut::empty();
2146+
let strategy = crate::WriteStrategyBuilder::default()
2147+
.with_array_tree(true)
2148+
.build();
2149+
SESSION
2150+
.write_options()
2151+
.with_strategy(strategy)
2152+
.write(&mut buf, st.to_array_stream())
2153+
.await?;
2154+
2155+
// Sanity-check that we actually wrote ArrayTreeLayout nodes — otherwise the test would
2156+
// silently pass on the default code path.
2157+
let file = SESSION.open_options().open_buffer(buf)?;
2158+
fn has_array_tree(layout: &dyn Layout) -> bool {
2159+
if layout.encoding_id().as_ref() == "vortex.array_tree" {
2160+
return true;
2161+
}
2162+
layout
2163+
.children()
2164+
.map(|cs| cs.iter().any(|c| has_array_tree(c.as_ref())))
2165+
.unwrap_or(false)
2166+
}
2167+
assert!(
2168+
has_array_tree(file.footer().layout().as_ref()),
2169+
"test expected ArrayTreeLayout in the written file"
2170+
);
2171+
2172+
// Read back and assert structure + data round-trip cleanly.
2173+
let result = file.scan()?.into_array_stream()?.read_all().await?;
2174+
assert_eq!(result.len(), n);
2175+
assert_eq!(result.dtype(), &dtype);
2176+
2177+
let struct_array = result.execute::<StructArray>(&mut ctx)?;
2178+
2179+
let read_numbers = struct_array.unmasked_field_by_name("numbers").cloned()?;
2180+
let expected_numbers = PrimitiveArray::from_iter(numbers_in.iter().copied()).into_array();
2181+
assert_arrays_eq!(read_numbers, expected_numbers);
2182+
2183+
let read_strings = struct_array
2184+
.unmasked_field_by_name("strings")
2185+
.cloned()?
2186+
.execute::<VarBinViewArray>(&mut ctx)?
2187+
.with_iterator(|iter| {
2188+
iter.map(|s| s.map(|st| unsafe { String::from_utf8_unchecked(st.to_vec()) }))
2189+
.collect::<Vec<_>>()
2190+
});
2191+
let expected_strings: Vec<Option<String>> =
2192+
strings_in.iter().map(|s| Some((*s).to_string())).collect();
2193+
assert_eq!(read_strings, expected_strings);
2194+
2195+
Ok(())
2196+
}

vortex-layout/src/display.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ mod tests {
262262

263263
/// Test display_tree for a struct layout (fallback rendering, no inline array_tree).
264264
#[test]
265-
fn test_display_tree_inline_array_tree() {
265+
fn test_display_tree_struct_layout_fallback() {
266266
block_on(|handle| async move {
267267
let session = SESSION.clone().with_handle(handle);
268268
let ctx = ArrayContext::empty();

vortex-layout/src/layouts/array_tree/mod.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,7 @@ impl ArrayTreeLayout {
124124
) -> VortexResult<LayoutReaderContext> {
125125
// Construct the array_trees auxiliary reader using the unmodified incoming context —
126126
// the array_trees subtree is a vanilla struct of (u32, bytes) and needs no overrides.
127-
let array_trees_child = self
128-
.children
129-
.child(1, &Self::array_trees_dtype())?;
127+
let array_trees_child = self.children.child(1, &Self::array_trees_dtype())?;
130128
let trees_reader = array_trees_child.new_reader_in_ctx(
131129
Arc::from(format!("{name}/array_trees")),
132130
segment_source,

vortex-web/crate/src/wasm.rs

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ impl VortexFileHandle {
316316
node_id,
317317
Some((&segment_source, &self.session)),
318318
)
319+
.map_err(|e| JsValue::from_str(&e.to_string()))?
319320
.ok_or_else(|| JsValue::from_str(&format!("Layout node not found: {node_id}")))?;
320321

321322
let reader = layout
@@ -709,7 +710,12 @@ fn build_array_encoding_tree_from_array(
709710
/// IDs match the format: "root.field_name.chunked.[0]" where each segment
710711
/// corresponds to a `LayoutChildType::name()`.
711712
fn find_layout_by_id(root: &LayoutRef, node_id: &str) -> Option<LayoutRef> {
712-
find_layout_with_ctx(root, node_id, None).map(|(layout, _)| layout)
713+
match find_layout_with_ctx(root, node_id, None) {
714+
Ok(Some((layout, _))) => Some(layout),
715+
Ok(None) => None,
716+
// No derivation requested → derive_reader_ctx is never called, so this can't fail.
717+
Err(_) => None,
718+
}
713719
}
714720

715721
/// Like [`find_layout_by_id`], but also builds a [`LayoutReaderContext`] that registers a
@@ -726,36 +732,55 @@ fn find_layout_by_id(root: &LayoutRef, node_id: &str) -> Option<LayoutRef> {
726732
/// Pass `Some((segment_source, session))` to actually derive the context. When passed
727733
/// `None`, the function still navigates the tree and returns an empty context — useful for
728734
/// callers that only need the layout reference.
735+
///
736+
/// Return shape:
737+
/// - `Ok(Some((layout, ctx)))` — target found.
738+
/// - `Ok(None)` — `node_id` does not resolve to a layout in the tree (bad path).
739+
/// - `Err(e)` — `derive_reader_ctx` failed on some ancestor (e.g. the trees segment is
740+
/// unreadable). Surfaces the real cause instead of silently producing a ctx that would
741+
/// make the leaf bail with "missing override".
729742
fn find_layout_with_ctx(
730743
root: &LayoutRef,
731744
node_id: &str,
732-
derive: Option<(&Arc<dyn vortex::layout::segments::SegmentSource>, &VortexSession)>,
733-
) -> Option<(LayoutRef, LayoutReaderContext)> {
745+
derive: Option<(
746+
&Arc<dyn vortex::layout::segments::SegmentSource>,
747+
&VortexSession,
748+
)>,
749+
) -> VortexResult<Option<(LayoutRef, LayoutReaderContext)>> {
734750
let segments: Vec<&str> = node_id.split('.').collect();
735751
if segments.is_empty() || segments[0] != "root" {
736-
return None;
752+
return Ok(None);
737753
}
738754

739755
let mut current = root.clone();
740756
let mut current_id = String::from("root");
741757
let mut ctx = LayoutReaderContext::new();
742758

743-
let maybe_derive = |layout: &LayoutRef, name: &str, ctx: &LayoutReaderContext| {
759+
// For every layout we walk through, if it's an ArrayTree and we have the deps to derive,
760+
// overlay its source-injecting override onto the ctx; any other layout is a passthrough.
761+
let try_derive = |layout: &LayoutRef,
762+
name: &str,
763+
ctx: &LayoutReaderContext|
764+
-> VortexResult<LayoutReaderContext> {
744765
let Some((segment_source, session)) = derive else {
745-
return Some(ctx.clone());
766+
return Ok(ctx.clone());
746767
};
747-
layout.as_opt::<ArrayTree>().and_then(|atl| {
748-
atl.derive_reader_ctx(name, Arc::clone(segment_source), session, ctx).ok()
749-
}).or_else(|| Some(ctx.clone()))
768+
match layout.as_opt::<ArrayTree>() {
769+
Some(atl) => atl.derive_reader_ctx(name, Arc::clone(segment_source), session, ctx),
770+
None => Ok(ctx.clone()),
771+
}
750772
};
751773

752-
ctx = maybe_derive(&current, &current_id, &ctx)?;
774+
ctx = try_derive(&current, &current_id, &ctx)?;
753775
if segments.len() == 1 {
754-
return Some((current, ctx));
776+
return Ok(Some((current, ctx)));
755777
}
756778

757779
for seg in &segments[1..] {
758-
let children = current.children().ok()?;
780+
let children = match current.children() {
781+
Ok(c) => c,
782+
Err(_) => return Ok(None),
783+
};
759784
let mut found = None;
760785
for (i, child) in children.into_iter().enumerate() {
761786
let name = current.child_type(i).name();
@@ -764,13 +789,13 @@ fn find_layout_with_ctx(
764789
break;
765790
}
766791
}
767-
let child = found?;
792+
let Some(child) = found else { return Ok(None) };
768793
current = child;
769794
current_id.push('.');
770795
current_id.push_str(seg);
771-
ctx = maybe_derive(&current, &current_id, &ctx)?;
796+
ctx = try_derive(&current, &current_id, &ctx)?;
772797
}
773-
Some((current, ctx))
798+
Ok(Some((current, ctx)))
774799
}
775800

776801
/// Downgrade Arrow `*View` types to their non-view equivalents so the JS

0 commit comments

Comments
 (0)