Skip to content

Commit 54d4015

Browse files
committed
More docs and cleanup
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 5447678 commit 54d4015

10 files changed

Lines changed: 277 additions & 732 deletions

File tree

PROMPT.md

Lines changed: 0 additions & 51 deletions
This file was deleted.

encodings/parquet-variant/src/array.rs

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ use vortex_array::builtins::ArrayBuiltins;
2929
use vortex_array::dtype::DType;
3030
use vortex_array::dtype::FieldName;
3131
use vortex_array::dtype::FieldNames;
32-
use vortex_array::dtype::Nullability;
3332
use vortex_array::smallvec::smallvec;
3433
use vortex_array::validity::Validity;
3534
use vortex_array::vtable::child_to_validity;
@@ -78,33 +77,29 @@ impl ParquetVariant {
7877
/// Converts an Arrow `parquet_variant_compute::VariantArray` into a Vortex `ArrayRef`
7978
/// wrapping `VariantArray(ParquetVariantArray(...))`.
8079
pub fn from_arrow_variant(arrow_variant: &ArrowVariantArray) -> VortexResult<ArrayRef> {
81-
Self::from_arrow_variant_impl(arrow_variant, None)
80+
Self::from_arrow_variant_impl(arrow_variant, false)
8281
}
8382

84-
pub(crate) fn from_arrow_variant_with_nullability(
83+
pub(crate) fn from_arrow_variant_nullable(
8584
arrow_variant: &ArrowVariantArray,
86-
nullability: Nullability,
8785
) -> VortexResult<ArrayRef> {
88-
Self::from_arrow_variant_impl(arrow_variant, Some(nullability))
86+
Self::from_arrow_variant_impl(arrow_variant, true)
8987
}
9088

9189
fn from_arrow_variant_impl(
9290
arrow_variant: &ArrowVariantArray,
93-
forced_nullability: Option<Nullability>,
91+
force_nullable: bool,
9492
) -> VortexResult<ArrayRef> {
9593
let storage = arrow_variant.inner();
96-
let value_nullable = storage
97-
.fields()
98-
.iter()
99-
.find(|field| field.name() == "value")
100-
.map(|field| field.is_nullable())
101-
.unwrap_or(false);
102-
let typed_value_nullable = storage
103-
.fields()
104-
.iter()
105-
.find(|field| field.name() == "typed_value")
106-
.map(|field| field.is_nullable())
107-
.unwrap_or(false);
94+
let mut value_nullable = false;
95+
let mut typed_value_nullable = false;
96+
for field in storage.fields() {
97+
match field.name().as_str() {
98+
"value" => value_nullable = field.is_nullable(),
99+
"typed_value" => typed_value_nullable = field.is_nullable(),
100+
_ => {}
101+
}
102+
}
108103
let validity = arrow_variant
109104
.nulls()
110105
.map(|nulls| {
@@ -114,9 +109,10 @@ impl ParquetVariant {
114109
Validity::from(BitBuffer::from(nulls.inner().clone()))
115110
}
116111
})
117-
.unwrap_or(match forced_nullability {
118-
Some(Nullability::Nullable) => Validity::AllValid,
119-
Some(Nullability::NonNullable) | None => Validity::NonNullable,
112+
.unwrap_or(if force_nullable {
113+
Validity::AllValid
114+
} else {
115+
Validity::NonNullable
120116
});
121117
let metadata =
122118
ArrayRef::from_arrow(arrow_variant.metadata_field() as &dyn ArrowArray, false)?;
@@ -144,6 +140,11 @@ impl ParquetVariant {
144140
pub(crate) fn core_storage_without_typed_value(
145141
array: &Array<ParquetVariant>,
146142
) -> VortexResult<ArrayRef> {
143+
// `ParquetVariant::validate` requires at least one of `value`/`typed_value` to be present
144+
// (matching the Arrow canonical extension contract). When we lift `typed_value` out into
145+
// the outer `VariantArray::shredded` slot and the original had no `value`, synthesize an
146+
// all-null `value` so the remaining `ParquetVariant` still satisfies that invariant and
147+
// can round-trip back through `to_arrow`.
147148
let value = array.value_array().cloned().or_else(|| {
148149
array.typed_value_array().map(|_| {
149150
VarBinViewArray::from_iter_nullable_bin(std::iter::repeat_n(None::<&[u8]>, array.len()))
@@ -280,28 +281,34 @@ fn inferred_shredded_field_validity(
280281
Ok(Validity::from_iter(validity))
281282
}
282283

284+
/// Accessors and Arrow conversion for Parquet Variant storage arrays.
283285
pub trait ParquetVariantArrayExt: TypedArrayRef<ParquetVariant> {
286+
/// Returns the non-nullable Parquet Variant metadata child.
284287
fn metadata_array(&self) -> &ArrayRef {
285288
self.as_ref().slots()[METADATA_SLOT]
286289
.as_ref()
287290
.vortex_expect("ParquetVariantArray metadata slot")
288291
}
289292

293+
/// Returns the outer row validity for the Variant values.
290294
fn validity(&self) -> Validity {
291295
child_to_validity(
292296
self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
293297
self.as_ref().dtype().nullability(),
294298
)
295299
}
296300

301+
/// Returns the optional raw Parquet Variant `value` child.
297302
fn value_array(&self) -> Option<&ArrayRef> {
298303
self.as_ref().slots()[VALUE_SLOT].as_ref()
299304
}
300305

306+
/// Returns the optional shredded Parquet Variant `typed_value` child.
301307
fn typed_value_array(&self) -> Option<&ArrayRef> {
302308
self.as_ref().slots()[TYPED_VALUE_SLOT].as_ref()
303309
}
304310

311+
/// Converts this storage array to Arrow's canonical Parquet Variant extension storage.
305312
fn to_arrow(&self, ctx: &mut ExecutionCtx) -> VortexResult<ArrowVariantArray> {
306313
let metadata = self.metadata_array();
307314
let len = metadata.len();
@@ -385,7 +392,7 @@ mod tests {
385392
let variant_view = vortex_arr
386393
.as_opt::<Variant>()
387394
.ok_or_else(|| vortex_err!("expected variant array"))?;
388-
let child = variant_view.child();
395+
let child = variant_view.core_storage();
389396
let inner = child
390397
.as_opt::<ParquetVariant>()
391398
.ok_or_else(|| vortex_err!("expected parquet variant child"))?;
@@ -488,7 +495,7 @@ mod tests {
488495
PrimitiveArray::from_option_iter([Some(10), None, Some(30)])
489496
);
490497
let inner = variant_arr
491-
.child()
498+
.core_storage()
492499
.as_opt::<ParquetVariant>()
493500
.ok_or_else(|| vortex_err!("expected parquet variant child"))?;
494501
assert!(inner.typed_value_array().is_none());

encodings/parquet-variant/src/kernel.rs

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ use vortex_array::arrays::slice::SliceExecuteAdaptor;
2727
use vortex_array::arrays::slice::SliceKernel;
2828
use vortex_array::arrow::FromArrowArray;
2929
use vortex_array::dtype::DType;
30-
use vortex_array::dtype::Nullability;
3130
use vortex_array::kernel::ExecuteParentKernel;
3231
use vortex_array::kernel::ParentKernelSet;
3332
use vortex_array::scalar_fn::fns::variant_get::VariantGet;
@@ -72,18 +71,11 @@ impl ExecuteParentKernel<ParquetVariant> for VariantGetExecute {
7271
.with_as_type(to_arrow_as_type(parent.options.dtype())?);
7372

7473
let arrow_output = arrow_variant_get(&arrow_input, get_options)?;
75-
let output = if parent
76-
.options
77-
.dtype()
78-
.is_some_and(|dtype| !dtype.is_variant())
79-
{
80-
ArrayRef::from_arrow(arrow_output.as_ref(), true)?
81-
} else {
74+
let output = if parent.options.dtype().is_none_or(DType::is_variant) {
8275
let arrow_variant_output = ArrowVariantArray::try_new(arrow_output.as_ref())?;
83-
ParquetVariant::from_arrow_variant_with_nullability(
84-
&arrow_variant_output,
85-
Nullability::Nullable,
86-
)?
76+
ParquetVariant::from_arrow_variant_nullable(&arrow_variant_output)?
77+
} else {
78+
ArrayRef::from_arrow(arrow_output.as_ref(), true)?
8779
};
8880

8981
vortex_ensure_eq!(
@@ -100,7 +92,7 @@ fn to_parquet_variant_path(path: &VariantPath) -> VortexResult<PqVariantPath<'st
10092
.iter()
10193
.map(|element| match element {
10294
VariantPathElement::Field(name) => Ok(PqVariantPathElement::field(Cow::Owned(
103-
name.as_ref().to_string(),
95+
name.as_ref().to_owned(),
10496
))),
10597
VariantPathElement::Index(index) => {
10698
let index = usize::try_from(*index)

encodings/parquet-variant/src/operations.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -531,8 +531,8 @@ mod tests {
531531
);
532532

533533
let variant_view = vortex_arr.as_opt::<Variant>().unwrap();
534-
let child = variant_view.child();
535-
let inner_pv = child.as_opt::<ParquetVariant>().unwrap();
534+
let storage = variant_view.core_storage();
535+
let inner_pv = storage.as_opt::<ParquetVariant>().unwrap();
536536
let mut ctx = LEGACY_SESSION.create_execution_ctx();
537537
let roundtripped = inner_pv.to_arrow(&mut ctx)?;
538538
assert_eq!(roundtripped.inner().null_count(), 2);

0 commit comments

Comments
 (0)