Skip to content

Commit 4668b6e

Browse files
authored
fix(layout): don't panic collecting an empty stream (#8472)
## Summary `CollectStrategy` collects its whole input into a single chunk for a child that requires exactly one chunk. When the input stream is empty, e.g. writing a zero-row table with a nullable struct column whose validity substream is empty, no chunk supplied a sequence id and it panicked with `must have visited at least one chunk`. `CollectStrategy` now yields nothing on empty input, and `FlatLayoutStrategy` returns an empty `ChunkedLayout` instead of requiring a single chunk, so no segment is written for an empty array. The issue mentions the fuzzer's `assume()` guard could be dropped once this is fixed. I left it in place here: reading a nullable struct nested in a struct back is a separate bug (#8348), so the round-trip only works once both are fixed. Closes: #8347 ## Testing Added a regression test in `vortex-file/tests/test_write_table.rs` that writes a zero-row nullable struct column; it panicked before this change and now writes and reads back as zero rows. The issue's Python repro (`vx.io.write` of an empty `struct` column) no longer panics. `cargo nextest run -p vortex-layout -p vortex-file` passes (177 tests, including the segment-ordering tests). `fmt --all` + `clippy --all-targets --all-features` clean. --- I'm Korean, so sorry if any wording reads a little awkward. --------- Signed-off-by: Han Damin <miniex@daminstudio.net>
1 parent f793584 commit 4668b6e

3 files changed

Lines changed: 58 additions & 3 deletions

File tree

vortex-file/tests/test_write_table.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,3 +166,46 @@ async fn test_dict_listview_validity_roundtrip() {
166166
vortex_array::assert_arrays_eq!(data, chunk, &mut SESSION.create_execution_ctx());
167167
assert!(stream.next().await.is_none(), "expected a single chunk");
168168
}
169+
170+
/// Regression test: writing a zero-row table with a nullable struct column used to
171+
/// panic in `CollectStrategy` ("must have visited at least one chunk").
172+
#[tokio::test]
173+
async fn test_write_empty_nullable_struct_column() {
174+
let inner = StructArray::new(
175+
FieldNames::from(["a"]),
176+
vec![PrimitiveArray::from_iter(Vec::<i32>::new()).into_array()],
177+
0,
178+
Validity::AllValid,
179+
)
180+
.into_array();
181+
182+
let data = StructArray::new(
183+
FieldNames::from(["c0"]),
184+
vec![inner],
185+
0,
186+
Validity::NonNullable,
187+
)
188+
.into_array();
189+
190+
let mut bytes = Vec::new();
191+
SESSION
192+
.write_options()
193+
.write(&mut bytes, data.to_array_stream())
194+
.await
195+
.expect("writing an empty nullable struct column should not panic");
196+
197+
let bytes = ByteBuffer::from(bytes);
198+
let vxf = SESSION.open_options().open_buffer(bytes).expect("open");
199+
let stream = vxf
200+
.scan()
201+
.expect("scan")
202+
.into_stream()
203+
.expect("into_stream");
204+
pin_mut!(stream);
205+
206+
let mut rows = 0usize;
207+
while let Some(next) = stream.next().await {
208+
rows += next.expect("read back should succeed").len();
209+
}
210+
assert_eq!(rows, 0);
211+
}

vortex-layout/src/layouts/collect.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use futures::pin_mut;
1010
use vortex_array::ArrayContext;
1111
use vortex_array::IntoArray;
1212
use vortex_array::arrays::ChunkedArray;
13-
use vortex_error::VortexExpect;
1413
use vortex_error::VortexResult;
1514
use vortex_session::VortexSession;
1615

@@ -61,8 +60,13 @@ impl LayoutStrategy for CollectStrategy {
6160
chunks.push(chunk);
6261
}
6362

63+
// an empty input yields no chunk; the child layout handles it.
64+
let Some(sequence_id) = latest_sequence_id else {
65+
return;
66+
};
67+
6468
let collected = ChunkedArray::try_new(chunks, _dtype)?.into_array();
65-
yield (latest_sequence_id.vortex_expect("must have visited at least one chunk"), collected);
69+
yield (sequence_id, collected);
6670
};
6771

6872
let adapted = Box::pin(SequentialStreamAdapter::new(dtype, collected_stream));

vortex-layout/src/layouts/flat/writer.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ use vortex_utils::aliases::hash_set::HashSet;
2929
use crate::IntoLayout;
3030
use crate::LayoutRef;
3131
use crate::LayoutStrategy;
32+
use crate::children::OwnedLayoutChildren;
33+
use crate::layouts::chunked::ChunkedLayout;
3234
use crate::layouts::flat::FlatLayout;
3335
use crate::layouts::flat::flat_layout_inline_array_node;
3436
use crate::segments::SegmentSinkRef;
@@ -104,7 +106,13 @@ impl LayoutStrategy for FlatLayoutStrategy {
104106
) -> VortexResult<LayoutRef> {
105107
let ctx = ctx.clone();
106108
let Some(chunk) = stream.next().await else {
107-
vortex_bail!("flat layout needs a single chunk");
109+
// an empty input has no segment to write.
110+
return Ok(ChunkedLayout::new(
111+
0,
112+
stream.dtype().clone(),
113+
OwnedLayoutChildren::layout_children(vec![]),
114+
)
115+
.into_layout());
108116
};
109117
let (sequence_id, chunk) = chunk?;
110118

0 commit comments

Comments
 (0)