Skip to content

Commit 9567467

Browse files
authored
feat(array): push struct validity into children (#8589)
## Summary `push_validity_into_children` masks each field with the struct's top-level validity, so a row null at the struct level becomes null in every field (`{a: 1, b: 2}, NULL` -> `{a: 1, b: 2}, {a: NULL, b: NULL}`), mirroring Arrow's `StructArray::flatten`. `remove_struct_validity` drops the top-level validity to non-nullable; otherwise it is kept, and a struct with no top-level nulls is returned unchanged. Each field is masked via a `mask` expression (per @gatesn's note on the issue, not the eager `compute::mask` of #5826). Open question: should this be a `StructArray` method, or a standalone mask expression in the new operator world? Closes: #3859 ## Benchmark For reference (not committed), vs hand-rolling the same masking without the fast path: with no top-level nulls the fast path is ~5-7x faster (0.26us vs 1.2us at 4 fields, 0.65us vs 4.5us at 16); with nulls the two are equal (~1.7us / ~6.3us), so the method adds no overhead. ## Testing `cargo nextest run -p vortex-array` passes (drops/preserves validity, intersecting field-level nulls, all-invalid, no-nulls fast path); `fmt --all` + `clippy --all-targets --all-features` clean. --- I'm Korean, so sorry if any wording reads a little awkward. Signed-off-by: Han Damin <miniex@daminstudio.net>
1 parent 97f49ed commit 9567467

2 files changed

Lines changed: 210 additions & 0 deletions

File tree

vortex-array/src/arrays/struct_/array.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use crate::array::child_to_validity;
2222
use crate::array::validity_to_child;
2323
use crate::arrays::ChunkedArray;
2424
use crate::arrays::Struct;
25+
use crate::builtins::ArrayBuiltins;
2526
use crate::dtype::DType;
2627
use crate::dtype::FieldName;
2728
use crate::dtype::FieldNames;
@@ -525,4 +526,38 @@ impl Array<Struct> {
525526
// the correct length and dtype harmony.
526527
Ok(unsafe { Array::<Struct>::new_unchecked(field_arrays, struct_fields, len, validity) })
527528
}
529+
530+
/// Push the struct's top-level validity into each field, so a row null at the struct level
531+
/// becomes null in every field.
532+
///
533+
/// If `remove_struct_validity` is set the result is non-nullable; otherwise it keeps its
534+
/// top-level validity.
535+
pub fn push_validity_into_children(&self, remove_struct_validity: bool) -> VortexResult<Self> {
536+
let struct_validity = self.struct_validity();
537+
538+
let new_validity = if remove_struct_validity {
539+
Validity::NonNullable
540+
} else {
541+
struct_validity.clone()
542+
};
543+
544+
// Nothing to push down.
545+
if struct_validity.definitely_no_nulls() {
546+
return Self::try_new(
547+
self.names().clone(),
548+
self.unmasked_fields(),
549+
self.len(),
550+
new_validity,
551+
);
552+
}
553+
554+
// Null each field where the struct row is null.
555+
let mask = struct_validity.to_array(self.len());
556+
let fields = self
557+
.iter_unmasked_fields()
558+
.map(|field| field.clone().mask(mask.clone()))
559+
.collect::<VortexResult<Vec<_>>>()?;
560+
561+
Self::try_new(self.names().clone(), fields, self.len(), new_validity)
562+
}
528563
}

vortex-array/src/arrays/struct_/tests.rs

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,178 @@ fn test_uncompressed_size_in_bytes() -> VortexResult<()> {
172172
assert_eq!(uncompressed_size, Some(4000));
173173
Ok(())
174174
}
175+
176+
#[test]
177+
fn test_push_validity_into_children_drops_struct_validity() -> VortexResult<()> {
178+
let mut ctx = array_session().create_execution_ctx();
179+
let struct_array = StructArray::try_new(
180+
FieldNames::from(["a", "b"]),
181+
vec![
182+
buffer![1i32, 2, 3].into_array(),
183+
buffer![10i32, 20, 30].into_array(),
184+
],
185+
3,
186+
Validity::from_iter([true, false, true]),
187+
)?;
188+
189+
let pushed = struct_array.push_validity_into_children(true)?;
190+
191+
// The struct is now non-nullable; the row-1 null lives in every field instead.
192+
let expected = StructArray::try_new(
193+
FieldNames::from(["a", "b"]),
194+
vec![
195+
PrimitiveArray::new(
196+
buffer![1i32, 2, 3],
197+
Validity::from_iter([true, false, true]),
198+
)
199+
.into_array(),
200+
PrimitiveArray::new(
201+
buffer![10i32, 20, 30],
202+
Validity::from_iter([true, false, true]),
203+
)
204+
.into_array(),
205+
],
206+
3,
207+
Validity::NonNullable,
208+
)?;
209+
210+
assert!(!pushed.dtype().is_nullable());
211+
assert_arrays_eq!(pushed, expected, &mut ctx);
212+
Ok(())
213+
}
214+
215+
#[test]
216+
fn test_push_validity_into_children_preserves_struct_validity() -> VortexResult<()> {
217+
let mut ctx = array_session().create_execution_ctx();
218+
let struct_array = StructArray::try_new(
219+
FieldNames::from(["a", "b"]),
220+
vec![
221+
buffer![1i32, 2, 3].into_array(),
222+
buffer![10i32, 20, 30].into_array(),
223+
],
224+
3,
225+
Validity::from_iter([true, false, true]),
226+
)?;
227+
228+
let pushed = struct_array.push_validity_into_children(false)?;
229+
230+
// The null now exists both at the struct level and in every field.
231+
let expected = StructArray::try_new(
232+
FieldNames::from(["a", "b"]),
233+
vec![
234+
PrimitiveArray::new(
235+
buffer![1i32, 2, 3],
236+
Validity::from_iter([true, false, true]),
237+
)
238+
.into_array(),
239+
PrimitiveArray::new(
240+
buffer![10i32, 20, 30],
241+
Validity::from_iter([true, false, true]),
242+
)
243+
.into_array(),
244+
],
245+
3,
246+
Validity::from_iter([true, false, true]),
247+
)?;
248+
249+
assert!(pushed.dtype().is_nullable());
250+
assert_arrays_eq!(pushed, expected, &mut ctx);
251+
Ok(())
252+
}
253+
254+
#[test]
255+
fn test_push_validity_into_children_intersects_field_validity() -> VortexResult<()> {
256+
let mut ctx = array_session().create_execution_ctx();
257+
258+
// Fields carry their own nulls (a at row 1, b at row 2) and the struct is null at row 1,
259+
// so pushing intersects both levels rather than overwriting the fields.
260+
let struct_array = StructArray::try_new(
261+
FieldNames::from(["a", "b"]),
262+
vec![
263+
PrimitiveArray::from_option_iter([Some(1i32), None, Some(3)]).into_array(),
264+
PrimitiveArray::from_option_iter([Some(10i64), Some(20), None]).into_array(),
265+
],
266+
3,
267+
Validity::from_iter([true, false, true]),
268+
)?;
269+
270+
let pushed = struct_array.push_validity_into_children(true)?;
271+
272+
// a: null at row 1; b: null at rows 1 and 2.
273+
let expected = StructArray::try_new(
274+
FieldNames::from(["a", "b"]),
275+
vec![
276+
PrimitiveArray::from_option_iter([Some(1i32), None, Some(3)]).into_array(),
277+
PrimitiveArray::from_option_iter([Some(10i64), None, None]).into_array(),
278+
],
279+
3,
280+
Validity::NonNullable,
281+
)?;
282+
283+
assert_arrays_eq!(pushed, expected, &mut ctx);
284+
Ok(())
285+
}
286+
287+
#[test]
288+
fn test_push_validity_into_children_all_invalid() -> VortexResult<()> {
289+
let mut ctx = array_session().create_execution_ctx();
290+
let struct_array = StructArray::try_new(
291+
FieldNames::from(["a", "b"]),
292+
vec![
293+
buffer![1i32, 2, 3].into_array(),
294+
buffer![10i32, 20, 30].into_array(),
295+
],
296+
3,
297+
Validity::AllInvalid,
298+
)?;
299+
300+
let pushed = struct_array.push_validity_into_children(true)?;
301+
302+
// Every row is null at the struct level, so every field becomes all-null.
303+
let expected = StructArray::try_new(
304+
FieldNames::from(["a", "b"]),
305+
vec![
306+
PrimitiveArray::new(buffer![1i32, 2, 3], Validity::AllInvalid).into_array(),
307+
PrimitiveArray::new(buffer![10i32, 20, 30], Validity::AllInvalid).into_array(),
308+
],
309+
3,
310+
Validity::NonNullable,
311+
)?;
312+
313+
assert_arrays_eq!(pushed, expected, &mut ctx);
314+
Ok(())
315+
}
316+
317+
#[test]
318+
fn test_push_validity_into_children_no_nulls() -> VortexResult<()> {
319+
let mut ctx = array_session().create_execution_ctx();
320+
321+
// No nulls: the fields are untouched, only the top-level nullability changes.
322+
let struct_array = StructArray::try_new(
323+
FieldNames::from(["a", "b"]),
324+
vec![
325+
buffer![1i32, 2, 3].into_array(),
326+
buffer![10i32, 20, 30].into_array(),
327+
],
328+
3,
329+
Validity::AllValid,
330+
)?;
331+
332+
let dropped = struct_array.push_validity_into_children(true)?;
333+
let expected = StructArray::try_new(
334+
FieldNames::from(["a", "b"]),
335+
vec![
336+
buffer![1i32, 2, 3].into_array(),
337+
buffer![10i32, 20, 30].into_array(),
338+
],
339+
3,
340+
Validity::NonNullable,
341+
)?;
342+
assert!(!dropped.dtype().is_nullable());
343+
assert_arrays_eq!(dropped, expected, &mut ctx);
344+
345+
let preserved = struct_array.push_validity_into_children(false)?;
346+
assert!(preserved.dtype().is_nullable());
347+
assert_arrays_eq!(preserved, struct_array, &mut ctx);
348+
Ok(())
349+
}

0 commit comments

Comments
 (0)