@@ -29,7 +29,6 @@ use vortex_array::builtins::ArrayBuiltins;
2929use vortex_array:: dtype:: DType ;
3030use vortex_array:: dtype:: FieldName ;
3131use vortex_array:: dtype:: FieldNames ;
32- use vortex_array:: dtype:: Nullability ;
3332use vortex_array:: smallvec:: smallvec;
3433use vortex_array:: validity:: Validity ;
3534use vortex_array:: vtable:: child_to_validity;
@@ -78,33 +77,29 @@ impl ParquetVariant {
7877 /// Converts an Arrow `parquet_variant_compute::VariantArray` into a Vortex `ArrayRef`
7978 /// wrapping `VariantArray(ParquetVariantArray(...))`.
8079 pub fn from_arrow_variant ( arrow_variant : & ArrowVariantArray ) -> VortexResult < ArrayRef > {
81- Self :: from_arrow_variant_impl ( arrow_variant, None )
80+ Self :: from_arrow_variant_impl ( arrow_variant, false )
8281 }
8382
84- pub ( crate ) fn from_arrow_variant_with_nullability (
83+ pub ( crate ) fn from_arrow_variant_nullable (
8584 arrow_variant : & ArrowVariantArray ,
86- nullability : Nullability ,
8785 ) -> VortexResult < ArrayRef > {
88- Self :: from_arrow_variant_impl ( arrow_variant, Some ( nullability ) )
86+ Self :: from_arrow_variant_impl ( arrow_variant, true )
8987 }
9088
9189 fn from_arrow_variant_impl (
9290 arrow_variant : & ArrowVariantArray ,
93- forced_nullability : Option < Nullability > ,
91+ force_nullable : bool ,
9492 ) -> VortexResult < ArrayRef > {
9593 let storage = arrow_variant. inner ( ) ;
96- let value_nullable = storage
97- . fields ( )
98- . iter ( )
99- . find ( |field| field. name ( ) == "value" )
100- . map ( |field| field. is_nullable ( ) )
101- . unwrap_or ( false ) ;
102- let typed_value_nullable = storage
103- . fields ( )
104- . iter ( )
105- . find ( |field| field. name ( ) == "typed_value" )
106- . map ( |field| field. is_nullable ( ) )
107- . unwrap_or ( false ) ;
94+ let mut value_nullable = false ;
95+ let mut typed_value_nullable = false ;
96+ for field in storage. fields ( ) {
97+ match field. name ( ) . as_str ( ) {
98+ "value" => value_nullable = field. is_nullable ( ) ,
99+ "typed_value" => typed_value_nullable = field. is_nullable ( ) ,
100+ _ => { }
101+ }
102+ }
108103 let validity = arrow_variant
109104 . nulls ( )
110105 . map ( |nulls| {
@@ -114,9 +109,10 @@ impl ParquetVariant {
114109 Validity :: from ( BitBuffer :: from ( nulls. inner ( ) . clone ( ) ) )
115110 }
116111 } )
117- . unwrap_or ( match forced_nullability {
118- Some ( Nullability :: Nullable ) => Validity :: AllValid ,
119- Some ( Nullability :: NonNullable ) | None => Validity :: NonNullable ,
112+ . unwrap_or ( if force_nullable {
113+ Validity :: AllValid
114+ } else {
115+ Validity :: NonNullable
120116 } ) ;
121117 let metadata =
122118 ArrayRef :: from_arrow ( arrow_variant. metadata_field ( ) as & dyn ArrowArray , false ) ?;
@@ -144,6 +140,11 @@ impl ParquetVariant {
144140pub ( crate ) fn core_storage_without_typed_value (
145141 array : & Array < ParquetVariant > ,
146142) -> VortexResult < ArrayRef > {
143+ // `ParquetVariant::validate` requires at least one of `value`/`typed_value` to be present
144+ // (matching the Arrow canonical extension contract). When we lift `typed_value` out into
145+ // the outer `VariantArray::shredded` slot and the original had no `value`, synthesize an
146+ // all-null `value` so the remaining `ParquetVariant` still satisfies that invariant and
147+ // can round-trip back through `to_arrow`.
147148 let value = array. value_array ( ) . cloned ( ) . or_else ( || {
148149 array. typed_value_array ( ) . map ( |_| {
149150 VarBinViewArray :: from_iter_nullable_bin ( std:: iter:: repeat_n ( None :: < & [ u8 ] > , array. len ( ) ) )
@@ -280,28 +281,34 @@ fn inferred_shredded_field_validity(
280281 Ok ( Validity :: from_iter ( validity) )
281282}
282283
284+ /// Accessors and Arrow conversion for Parquet Variant storage arrays.
283285pub trait ParquetVariantArrayExt : TypedArrayRef < ParquetVariant > {
286+ /// Returns the non-nullable Parquet Variant metadata child.
284287 fn metadata_array ( & self ) -> & ArrayRef {
285288 self . as_ref ( ) . slots ( ) [ METADATA_SLOT ]
286289 . as_ref ( )
287290 . vortex_expect ( "ParquetVariantArray metadata slot" )
288291 }
289292
293+ /// Returns the outer row validity for the Variant values.
290294 fn validity ( & self ) -> Validity {
291295 child_to_validity (
292296 self . as_ref ( ) . slots ( ) [ VALIDITY_SLOT ] . as_ref ( ) ,
293297 self . as_ref ( ) . dtype ( ) . nullability ( ) ,
294298 )
295299 }
296300
301+ /// Returns the optional raw Parquet Variant `value` child.
297302 fn value_array ( & self ) -> Option < & ArrayRef > {
298303 self . as_ref ( ) . slots ( ) [ VALUE_SLOT ] . as_ref ( )
299304 }
300305
306+ /// Returns the optional shredded Parquet Variant `typed_value` child.
301307 fn typed_value_array ( & self ) -> Option < & ArrayRef > {
302308 self . as_ref ( ) . slots ( ) [ TYPED_VALUE_SLOT ] . as_ref ( )
303309 }
304310
311+ /// Converts this storage array to Arrow's canonical Parquet Variant extension storage.
305312 fn to_arrow ( & self , ctx : & mut ExecutionCtx ) -> VortexResult < ArrowVariantArray > {
306313 let metadata = self . metadata_array ( ) ;
307314 let len = metadata. len ( ) ;
@@ -385,7 +392,7 @@ mod tests {
385392 let variant_view = vortex_arr
386393 . as_opt :: < Variant > ( )
387394 . ok_or_else ( || vortex_err ! ( "expected variant array" ) ) ?;
388- let child = variant_view. child ( ) ;
395+ let child = variant_view. core_storage ( ) ;
389396 let inner = child
390397 . as_opt :: < ParquetVariant > ( )
391398 . ok_or_else ( || vortex_err ! ( "expected parquet variant child" ) ) ?;
@@ -488,7 +495,7 @@ mod tests {
488495 PrimitiveArray :: from_option_iter( [ Some ( 10 ) , None , Some ( 30 ) ] )
489496 ) ;
490497 let inner = variant_arr
491- . child ( )
498+ . core_storage ( )
492499 . as_opt :: < ParquetVariant > ( )
493500 . ok_or_else ( || vortex_err ! ( "expected parquet variant child" ) ) ?;
494501 assert ! ( inner. typed_value_array( ) . is_none( ) ) ;
0 commit comments