Skip to content

Commit 2a66e39

Browse files
committed
CR comments and better chunked variant handling
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 4c81d21 commit 2a66e39

9 files changed

Lines changed: 415 additions & 50 deletions

File tree

encodings/parquet-variant/src/array.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ use vortex_array::EmptyArrayData;
1414
use vortex_array::ExecutionCtx;
1515
use vortex_array::IntoArray;
1616
use vortex_array::TypedArrayRef;
17+
use vortex_array::arrays::ConstantArray;
1718
use vortex_array::arrays::List;
1819
use vortex_array::arrays::ListArray;
1920
use vortex_array::arrays::Struct;
2021
use vortex_array::arrays::StructArray;
21-
use vortex_array::arrays::VarBinViewArray;
2222
use vortex_array::arrays::VariantArray;
2323
use vortex_array::arrays::list::ListArrayExt;
2424
use vortex_array::arrays::struct_::StructArrayExt;
@@ -30,6 +30,7 @@ use vortex_array::dtype::DType;
3030
use vortex_array::dtype::FieldName;
3131
use vortex_array::dtype::FieldNames;
3232
use vortex_array::dtype::Nullability;
33+
use vortex_array::scalar::Scalar;
3334
use vortex_array::smallvec::smallvec;
3435
use vortex_array::validity::Validity;
3536
use vortex_array::vtable::child_to_validity;
@@ -39,6 +40,7 @@ use vortex_error::VortexExpect;
3940
use vortex_error::VortexResult;
4041

4142
use crate::ParquetVariant;
43+
use crate::ParquetVariantArray;
4244

4345
/// The validity bitmap indicating which elements are non-null.
4446
pub(crate) const VALIDITY_SLOT: usize = 0;
@@ -131,7 +133,7 @@ impl ParquetVariant {
131133
}
132134

133135
pub(crate) fn core_storage_without_typed_value(
134-
array: &Array<ParquetVariant>,
136+
array: &ParquetVariantArray,
135137
) -> VortexResult<ArrayRef> {
136138
// The spec requires at least one of `value`/`typed_value` to be present
137139
// (matching the Arrow canonical extension contract). When we lift `typed_value` out into
@@ -140,8 +142,11 @@ pub(crate) fn core_storage_without_typed_value(
140142
// can round-trip back through `to_arrow`.
141143
let value = array.value_array().cloned().or_else(|| {
142144
array.typed_value_array().map(|_| {
143-
VarBinViewArray::from_iter_nullable_bin(std::iter::repeat_n(None::<&[u8]>, array.len()))
144-
.into_array()
145+
ConstantArray::new(
146+
Scalar::null(DType::Binary(Nullability::Nullable)),
147+
array.len(),
148+
)
149+
.into_array()
145150
})
146151
});
147152

encodings/parquet-variant/src/kernel.rs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,13 @@ pub(crate) static PARENT_KERNELS: ParentKernelSet<ParquetVariant> = ParentKernel
4444
ParentKernelSet::lift(&FilterExecuteAdaptor(ParquetVariant)),
4545
ParentKernelSet::lift(&SliceExecuteAdaptor(ParquetVariant)),
4646
ParentKernelSet::lift(&TakeExecuteAdaptor(ParquetVariant)),
47-
ParentKernelSet::lift(&VariantGetExecute),
47+
ParentKernelSet::lift(&VariantGetKernel),
4848
]);
4949

5050
#[derive(Default, Debug)]
51-
struct VariantGetExecute;
51+
struct VariantGetKernel;
5252

53-
impl ExecuteParentKernel<ParquetVariant> for VariantGetExecute {
53+
impl ExecuteParentKernel<ParquetVariant> for VariantGetKernel {
5454
type Parent = ExactScalarFn<VariantGet>;
5555

5656
fn execute_parent(
@@ -875,10 +875,18 @@ mod tests {
875875
}
876876

877877
#[test]
878-
fn test_variant_get_canonical_shredded_rewrites_to_core_storage() -> VortexResult<()> {
878+
fn test_variant_get_canonicalized_parquet_uses_top_level_shredded() -> VortexResult<()> {
879879
let canonical = make_partially_shredded_object_array()?;
880-
let shredded = canonical
881-
.as_::<Variant>()
880+
let canonical_variant = canonical.as_::<Variant>();
881+
882+
let core_storage = canonical_variant
883+
.core_storage()
884+
.as_opt::<ParquetVariant>()
885+
.ok_or_else(|| vortex_err!("expected parquet variant core storage"))?;
886+
assert!(core_storage.typed_value_array().is_none());
887+
assert!(core_storage.value_array().is_some());
888+
889+
let shredded = canonical_variant
882890
.shredded()
883891
.ok_or_else(|| vortex_err!("expected canonical shredded child"))?
884892
.clone()

0 commit comments

Comments
 (0)