2020use std:: hash:: Hasher ;
2121
2222use arrow_array:: Array as ArrowArray ;
23+ use parquet_variant:: Variant as ParquetVariant ;
2324use prost:: Message ;
2425use vortex_array:: ArrayEq ;
2526use vortex_array:: ArrayHash ;
@@ -38,14 +39,17 @@ use vortex_array::dtype::DType;
3839use vortex_array:: dtype:: Nullability ;
3940use vortex_array:: optimizer:: rules:: ArrayParentReduceRule ;
4041use vortex_array:: optimizer:: rules:: ParentRuleSet ;
42+ use vortex_array:: scalar:: Scalar ;
43+ use vortex_array:: scalar:: ScalarValue ;
44+ use vortex_array:: scalar:: VariantValue ;
4145use vortex_array:: scalar_fn:: fns:: variant_get:: VariantGet ;
4246use vortex_array:: serde:: ArrayChildren ;
4347use vortex_array:: stats:: ArrayStats ;
4448use vortex_array:: stats:: StatsSetRef ;
4549use vortex_array:: validity:: Validity ;
4650use vortex_array:: vtable;
4751use vortex_array:: vtable:: ArrayId ;
48- use vortex_array:: vtable:: NotSupported ;
52+ use vortex_array:: vtable:: OperationsVTable ;
4953use vortex_array:: vtable:: VTable ;
5054use vortex_array:: vtable:: ValidityVTable ;
5155use vortex_array:: vtable:: validity_nchildren;
@@ -237,7 +241,7 @@ impl ParquetVariantArray {
237241impl VTable for ParquetVariantVTable {
238242 type Array = ParquetVariantArray ;
239243 type Metadata = ParquetVariantMetadata ;
240- type OperationsVTable = NotSupported ;
244+ type OperationsVTable = Self ;
241245 type ValidityVTable = Self ;
242246
243247 fn id ( _array : & Self :: Array ) -> ArrayId {
@@ -488,6 +492,149 @@ impl VTable for ParquetVariantVTable {
488492 }
489493}
490494
495+ fn scalar_to_variant_value ( scalar : Scalar ) -> VortexResult < VariantValue > {
496+ if scalar. is_null ( ) {
497+ return Ok ( VariantValue :: Null ) ;
498+ }
499+
500+ Ok ( match scalar. dtype ( ) {
501+ DType :: Null => VariantValue :: Null ,
502+ DType :: Bool ( _) => VariantValue :: Bool ( scalar. as_bool ( ) . value ( ) . unwrap_or ( false ) ) ,
503+ DType :: Primitive ( ..) => VariantValue :: Primitive (
504+ * scalar
505+ . value ( )
506+ . vortex_expect ( "non-null primitive scalar must have a value" )
507+ . as_primitive ( ) ,
508+ ) ,
509+ DType :: Decimal ( ..) => VariantValue :: Decimal (
510+ * scalar
511+ . value ( )
512+ . vortex_expect ( "non-null decimal scalar must have a value" )
513+ . as_decimal ( ) ,
514+ ) ,
515+ DType :: Utf8 ( _) => VariantValue :: Utf8 (
516+ scalar
517+ . value ( )
518+ . vortex_expect ( "non-null utf8 scalar must have a value" )
519+ . as_utf8 ( )
520+ . clone ( ) ,
521+ ) ,
522+ DType :: Binary ( _) => VariantValue :: Binary (
523+ scalar
524+ . value ( )
525+ . vortex_expect ( "non-null binary scalar must have a value" )
526+ . as_binary ( )
527+ . clone ( ) ,
528+ ) ,
529+ DType :: List ( ..) | DType :: FixedSizeList ( ..) => VariantValue :: List (
530+ scalar
531+ . as_list ( )
532+ . elements ( )
533+ . unwrap_or_default ( )
534+ . into_iter ( )
535+ . map ( scalar_to_variant_value)
536+ . collect :: < VortexResult < Vec < _ > > > ( ) ?,
537+ ) ,
538+ DType :: Struct ( fields, _) => VariantValue :: Object (
539+ fields
540+ . names ( )
541+ . iter ( )
542+ . cloned ( )
543+ . zip (
544+ scalar
545+ . as_struct ( )
546+ . fields_iter ( )
547+ . vortex_expect ( "non-null struct scalar must have field values" ) ,
548+ )
549+ . map ( |( name, field) | Ok ( ( name. as_ref ( ) . into ( ) , scalar_to_variant_value ( field) ?) ) )
550+ . collect :: < VortexResult < Vec < _ > > > ( ) ?,
551+ ) ,
552+ DType :: Extension ( _) => VariantValue :: Utf8 ( scalar. to_string ( ) . into ( ) ) ,
553+ DType :: Variant => scalar
554+ . value ( )
555+ . vortex_expect ( "non-null variant scalar must have a value" )
556+ . as_variant ( )
557+ . clone ( ) ,
558+ } )
559+ }
560+
561+ fn parquet_variant_to_variant_value ( variant : ParquetVariant < ' _ , ' _ > ) -> VortexResult < VariantValue > {
562+ Ok ( match variant {
563+ ParquetVariant :: Null => VariantValue :: Null ,
564+ ParquetVariant :: Int8 ( v) => VariantValue :: Primitive ( v. into ( ) ) ,
565+ ParquetVariant :: Int16 ( v) => VariantValue :: Primitive ( v. into ( ) ) ,
566+ ParquetVariant :: Int32 ( v) => VariantValue :: Primitive ( v. into ( ) ) ,
567+ ParquetVariant :: Int64 ( v) => VariantValue :: Primitive ( v. into ( ) ) ,
568+ ParquetVariant :: Float ( v) => VariantValue :: Primitive ( v. into ( ) ) ,
569+ ParquetVariant :: Double ( v) => VariantValue :: Primitive ( v. into ( ) ) ,
570+ ParquetVariant :: BooleanTrue => VariantValue :: Bool ( true ) ,
571+ ParquetVariant :: BooleanFalse => VariantValue :: Bool ( false ) ,
572+ ParquetVariant :: Decimal4 ( v) => VariantValue :: Decimal ( v. integer ( ) . into ( ) ) ,
573+ ParquetVariant :: Decimal8 ( v) => VariantValue :: Decimal ( v. integer ( ) . into ( ) ) ,
574+ ParquetVariant :: Decimal16 ( v) => VariantValue :: Decimal ( v. integer ( ) . into ( ) ) ,
575+ ParquetVariant :: Binary ( v) => VariantValue :: Binary ( v. to_vec ( ) . into ( ) ) ,
576+ ParquetVariant :: String ( v) => VariantValue :: Utf8 ( v. into ( ) ) ,
577+ ParquetVariant :: ShortString ( v) => VariantValue :: Utf8 ( v. as_str ( ) . into ( ) ) ,
578+ ParquetVariant :: Date ( v) => VariantValue :: Utf8 ( v. to_string ( ) . into ( ) ) ,
579+ ParquetVariant :: TimestampMicros ( v) => VariantValue :: Utf8 ( v. to_rfc3339 ( ) . into ( ) ) ,
580+ ParquetVariant :: TimestampNtzMicros ( v) => VariantValue :: Utf8 ( v. to_string ( ) . into ( ) ) ,
581+ ParquetVariant :: TimestampNanos ( v) => VariantValue :: Utf8 ( v. to_rfc3339 ( ) . into ( ) ) ,
582+ ParquetVariant :: TimestampNtzNanos ( v) => VariantValue :: Utf8 ( v. to_string ( ) . into ( ) ) ,
583+ ParquetVariant :: Time ( v) => VariantValue :: Utf8 ( v. to_string ( ) . into ( ) ) ,
584+ ParquetVariant :: Uuid ( v) => VariantValue :: Utf8 ( v. to_string ( ) . into ( ) ) ,
585+ ParquetVariant :: List ( values) => VariantValue :: List (
586+ values
587+ . iter ( )
588+ . map ( parquet_variant_to_variant_value)
589+ . collect :: < VortexResult < Vec < _ > > > ( ) ?,
590+ ) ,
591+ ParquetVariant :: Object ( values) => VariantValue :: Object (
592+ values
593+ . iter ( )
594+ . map ( |( name, value) | Ok ( ( name. into ( ) , parquet_variant_to_variant_value ( value) ?) ) )
595+ . collect :: < VortexResult < Vec < _ > > > ( ) ?,
596+ ) ,
597+ } )
598+ }
599+
600+ impl OperationsVTable < ParquetVariantVTable > for ParquetVariantVTable {
601+ fn scalar_at ( array : & ParquetVariantArray , index : usize ) -> VortexResult < Scalar > {
602+ if array. validity . is_null ( index) ? {
603+ return Ok ( Scalar :: null ( DType :: Variant ) ) ;
604+ }
605+
606+ let value = if let Some ( typed_value) = array. typed_value_array ( )
607+ && typed_value. is_valid ( index) ?
608+ {
609+ scalar_to_variant_value ( typed_value. scalar_at ( index) ?) ?
610+ } else if let Some ( value) = array. value_array ( )
611+ && value. is_valid ( index) ?
612+ {
613+ let metadata = array
614+ . metadata_array ( )
615+ . scalar_at ( index) ?
616+ . as_binary ( )
617+ . value ( )
618+ . cloned ( )
619+ . vortex_expect ( "non-null metadata row must have binary value" ) ;
620+ let value = value
621+ . scalar_at ( index) ?
622+ . as_binary ( )
623+ . value ( )
624+ . cloned ( )
625+ . vortex_expect ( "non-null value row must have binary value" ) ;
626+ parquet_variant_to_variant_value ( ParquetVariant :: try_new (
627+ metadata. as_ref ( ) ,
628+ value. as_ref ( ) ,
629+ ) ?) ?
630+ } else {
631+ VariantValue :: Null
632+ } ;
633+
634+ Scalar :: try_new ( DType :: Variant , Some ( ScalarValue :: Variant ( value) ) )
635+ }
636+ }
637+
491638const PARENT_RULES : ParentRuleSet < ParquetVariantVTable > =
492639 ParentRuleSet :: new ( & [ ParentRuleSet :: lift ( & ParquetVariantGetRule ) ] ) ;
493640
0 commit comments