Skip to content

Commit bf95b6f

Browse files
authored
Fix StructArray / StructData after migration (#7305)
Fixes a decompression regression on wide struct datasets by removing StructData metadata duplication and deriving struct metadata from the outer cached dtype/len. --------- Signed-off-by: Nicholas Gates <nick@nickgates.com>
1 parent 4b3d712 commit bf95b6f

File tree

15 files changed

+101
-303
lines changed

15 files changed

+101
-303
lines changed

Cargo.lock

Lines changed: 7 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

fuzz/src/array/filter.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ pub fn filter_canonical_array(array: &ArrayRef, filter: &[bool]) -> VortexResult
100100

101101
StructArray::try_new_with_dtype(
102102
filtered_children,
103-
struct_array.struct_fields(),
103+
struct_array.struct_fields().clone(),
104104
filter.iter().filter(|b| **b).map(|b| *b as usize).sum(),
105105
validity,
106106
)

fuzz/src/array/mask.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ pub fn mask_canonical_array(canonical: Canonical, mask: &Mask) -> VortexResult<A
122122
let new_validity = mask_validity(&array.validity()?, mask);
123123
StructArray::try_new_with_dtype(
124124
array.unmasked_fields(),
125-
array.struct_fields(),
125+
array.struct_fields().clone(),
126126
array.len(),
127127
new_validity,
128128
)

fuzz/src/array/slice.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ pub fn slice_canonical_array(
6969
.collect::<VortexResult<Vec<_>>>()?;
7070
StructArray::try_new_with_dtype(
7171
sliced_children,
72-
struct_array.struct_fields(),
72+
struct_array.struct_fields().clone(),
7373
stop - start,
7474
validity,
7575
)

vortex-array/public-api.lock

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4170,7 +4170,7 @@ pub fn vortex_array::arrays::Struct::serialize(_array: vortex_array::ArrayView<'
41704170

41714171
pub fn vortex_array::arrays::Struct::slot_name(array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String
41724172

4173-
pub fn vortex_array::arrays::Struct::validate(&self, data: &vortex_array::arrays::struct_::StructData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
4173+
pub fn vortex_array::arrays::Struct::validate(&self, _data: &vortex_array::arrays::struct_::StructData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
41744174

41754175
impl vortex_array::ValidityVTable<vortex_array::arrays::Struct> for vortex_array::arrays::Struct
41764176

@@ -4198,22 +4198,14 @@ pub fn vortex_array::arrays::Struct::zip(if_true: vortex_array::ArrayView<'_, vo
41984198

41994199
pub struct vortex_array::arrays::struct_::StructData
42004200

4201-
impl vortex_array::arrays::struct_::StructData
4202-
4203-
pub fn vortex_array::arrays::struct_::StructData::build(names: vortex_array::dtype::FieldNames, fields: impl core::convert::Into<alloc::sync::Arc<[vortex_array::ArrayRef]>>, length: usize, validity: vortex_array::validity::Validity) -> Self
4204-
4205-
pub fn vortex_array::arrays::struct_::StructData::names(&self) -> &vortex_array::dtype::FieldNames
4206-
4207-
pub fn vortex_array::arrays::struct_::StructData::new_fieldless_with_len(len: usize) -> Self
4208-
4209-
pub unsafe fn vortex_array::arrays::struct_::StructData::new_unchecked(dtype: vortex_array::dtype::StructFields, fieldless_len: core::option::Option<usize>) -> Self
4210-
4211-
pub fn vortex_array::arrays::struct_::StructData::validate(fields: &[vortex_array::ArrayRef], dtype: &vortex_array::dtype::StructFields, length: usize, validity: &vortex_array::validity::Validity) -> vortex_error::VortexResult<()>
4212-
42134201
impl core::clone::Clone for vortex_array::arrays::struct_::StructData
42144202

42154203
pub fn vortex_array::arrays::struct_::StructData::clone(&self) -> vortex_array::arrays::struct_::StructData
42164204

4205+
impl core::default::Default for vortex_array::arrays::struct_::StructData
4206+
4207+
pub fn vortex_array::arrays::struct_::StructData::default() -> vortex_array::arrays::struct_::StructData
4208+
42174209
impl core::fmt::Debug for vortex_array::arrays::struct_::StructData
42184210

42194211
pub fn vortex_array::arrays::struct_::StructData::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
@@ -4242,7 +4234,7 @@ pub fn vortex_array::arrays::struct_::StructArrayExt::names(&self) -> &vortex_ar
42424234

42434235
pub fn vortex_array::arrays::struct_::StructArrayExt::nullability(&self) -> vortex_array::dtype::Nullability
42444236

4245-
pub fn vortex_array::arrays::struct_::StructArrayExt::struct_fields(&self) -> vortex_array::dtype::StructFields
4237+
pub fn vortex_array::arrays::struct_::StructArrayExt::struct_fields(&self) -> &vortex_array::dtype::StructFields
42464238

42474239
pub fn vortex_array::arrays::struct_::StructArrayExt::struct_validity(&self) -> vortex_array::validity::Validity
42484240

@@ -4262,7 +4254,7 @@ pub fn T::names(&self) -> &vortex_array::dtype::FieldNames
42624254

42634255
pub fn T::nullability(&self) -> vortex_array::dtype::Nullability
42644256

4265-
pub fn T::struct_fields(&self) -> vortex_array::dtype::StructFields
4257+
pub fn T::struct_fields(&self) -> &vortex_array::dtype::StructFields
42664258

42674259
pub fn T::struct_validity(&self) -> vortex_array::validity::Validity
42684260

@@ -6416,7 +6408,7 @@ pub fn vortex_array::arrays::Struct::serialize(_array: vortex_array::ArrayView<'
64166408

64176409
pub fn vortex_array::arrays::Struct::slot_name(array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String
64186410

6419-
pub fn vortex_array::arrays::Struct::validate(&self, data: &vortex_array::arrays::struct_::StructData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
6411+
pub fn vortex_array::arrays::Struct::validate(&self, _data: &vortex_array::arrays::struct_::StructData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
64206412

64216413
impl vortex_array::ValidityVTable<vortex_array::arrays::Struct> for vortex_array::arrays::Struct
64226414

@@ -19458,7 +19450,7 @@ pub fn vortex_array::arrays::Struct::serialize(_array: vortex_array::ArrayView<'
1945819450

1945919451
pub fn vortex_array::arrays::Struct::slot_name(array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String
1946019452

19461-
pub fn vortex_array::arrays::Struct::validate(&self, data: &vortex_array::arrays::struct_::StructData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
19453+
pub fn vortex_array::arrays::Struct::validate(&self, _data: &vortex_array::arrays::struct_::StructData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
1946219454

1946319455
impl vortex_array::VTable for vortex_array::arrays::VarBin
1946419456

@@ -20474,7 +20466,7 @@ pub fn vortex_array::arrays::Struct::serialize(_array: vortex_array::ArrayView<'
2047420466

2047520467
pub fn vortex_array::arrays::Struct::slot_name(array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String
2047620468

20477-
pub fn vortex_array::arrays::Struct::validate(&self, data: &vortex_array::arrays::struct_::StructData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
20469+
pub fn vortex_array::arrays::Struct::validate(&self, _data: &vortex_array::arrays::struct_::StructData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
2047820470

2047920471
impl vortex_array::VTable for vortex_array::arrays::VarBin
2048020472

@@ -23196,7 +23188,7 @@ pub fn vortex_array::arrays::Struct::serialize(_array: vortex_array::ArrayView<'
2319623188

2319723189
pub fn vortex_array::arrays::Struct::slot_name(array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String
2319823190

23199-
pub fn vortex_array::arrays::Struct::validate(&self, data: &vortex_array::arrays::struct_::StructData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
23191+
pub fn vortex_array::arrays::Struct::validate(&self, _data: &vortex_array::arrays::struct_::StructData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
2320023192

2320123193
impl vortex_array::VTable for vortex_array::arrays::VarBin
2320223194

@@ -24460,7 +24452,7 @@ pub fn vortex_array::arrays::Struct::serialize(_array: vortex_array::ArrayView<'
2446024452

2446124453
pub fn vortex_array::arrays::Struct::slot_name(array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String
2446224454

24463-
pub fn vortex_array::arrays::Struct::validate(&self, data: &vortex_array::arrays::struct_::StructData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
24455+
pub fn vortex_array::arrays::Struct::validate(&self, _data: &vortex_array::arrays::struct_::StructData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
2446424456

2446524457
impl vortex_array::VTable for vortex_array::arrays::VarBin
2446624458

vortex-array/src/arrays/filter/execute/struct_.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,13 @@ pub fn filter_struct(array: &StructArray, mask: &Arc<MaskValues>) -> StructArray
3535
.map(|a| a.len())
3636
.unwrap_or_else(|| mask.true_count());
3737

38-
StructArray::try_new_with_dtype(fields, array.struct_fields(), length, filtered_validity)
39-
.vortex_expect("filtered StructArray fields have consistent lengths")
38+
StructArray::try_new_with_dtype(
39+
fields,
40+
array.struct_fields().clone(),
41+
length,
42+
filtered_validity,
43+
)
44+
.vortex_expect("filtered StructArray fields have consistent lengths")
4045
}
4146

4247
#[cfg(test)]

vortex-array/src/arrays/masked/execute.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ fn mask_validity_struct(
185185
let fields = array.unmasked_fields();
186186
let struct_fields = array.struct_fields();
187187
// SAFETY: We're only changing validity, not the data structure
188-
Ok(unsafe { StructArray::new_unchecked(fields, struct_fields, len, new_validity) })
188+
Ok(unsafe { StructArray::new_unchecked(fields, struct_fields.clone(), len, new_validity) })
189189
}
190190

191191
fn mask_validity_extension(

0 commit comments

Comments
 (0)