1717
1818//! [`VariantArrayBuilder`] implementation
1919
20- use crate :: VariantArray ;
20+ use crate :: { VariantArray , shred_variant } ;
2121use arrow:: array:: { ArrayRef , BinaryViewArray , BinaryViewBuilder , NullBufferBuilder , StructArray } ;
2222use arrow_schema:: { ArrowError , DataType , Field , Fields } ;
2323use parquet_variant:: {
@@ -37,9 +37,6 @@ use std::sync::Arc;
3737/// This builder always creates a `VariantArray` using [`BinaryViewArray`] for both
3838/// the metadata and value fields.
3939///
40- /// # TODO
41- /// 1. Support shredding: <https://github.com/apache/arrow-rs/issues/7895>
42- ///
4340/// ## Example:
4441/// ```
4542/// # use arrow::array::Array;
@@ -82,6 +79,34 @@ use std::sync::Arc;
8279/// let value = variant_array.value(4);
8380/// assert_eq!(value, Variant::ShortString(ShortString::try_new("norm").unwrap()));
8481/// ```
82+ ///
83+ /// ## Shredded Example
84+ ///
85+ /// Use [`Self::build_shredded`] with [`ShreddedSchemaBuilder`] to produce a
86+ /// shredded [`VariantArray`] where known fields are extracted into typed columns.
87+ ///
88+ /// ```
89+ /// # use arrow::array::Array;
90+ /// # use arrow_schema::DataType;
91+ /// # use parquet_variant::{Variant, VariantBuilderExt};
92+ /// # use parquet_variant_compute::{ShreddedSchemaBuilder, VariantArrayBuilder};
93+ /// let schema = ShreddedSchemaBuilder::default()
94+ /// .with_path("brand", &DataType::Utf8).unwrap()
95+ /// .with_path("price", &DataType::Float64).unwrap()
96+ /// .build();
97+ ///
98+ /// let mut builder = VariantArrayBuilder::new(3);
99+ /// builder.new_object().with_field("brand", "Apple").with_field("price", 999.0f64).finish();
100+ /// builder.new_object().with_field("brand", "Samsung").finish();
101+ /// builder.append_null();
102+ ///
103+ /// let arr = builder.build_shredded(&schema).unwrap();
104+ /// assert_eq!(arr.len(), 3);
105+ /// assert!(arr.typed_value_field().is_some());
106+ /// assert!(!arr.is_null(0));
107+ /// assert!(!arr.is_null(1));
108+ /// assert!(arr.is_null(2));
109+ /// ```
85110#[ derive( Debug ) ]
86111pub struct VariantArrayBuilder {
87112 /// Nulls
@@ -96,8 +121,7 @@ pub struct VariantArrayBuilder {
96121 value_offsets : Vec < usize > ,
97122 /// The fields of the final `StructArray`
98123 ///
99- /// TODO: 1) Add extension type metadata
100- /// TODO: 2) Add support for shredding
124+ /// TODO: Add extension type metadata
101125 fields : Fields ,
102126}
103127
@@ -117,7 +141,7 @@ impl VariantArrayBuilder {
117141 }
118142 }
119143
120- /// Build the final builder
144+ /// Build the final [`VariantArray`] (unshredded).
121145 pub fn build ( self ) -> VariantArray {
122146 let Self {
123147 mut nulls,
@@ -134,7 +158,6 @@ impl VariantArrayBuilder {
134158 let value_buffer = value_builder. into_inner ( ) ;
135159 let value_array = binary_view_array_from_buffers ( value_buffer, value_offsets) ;
136160
137- // The build the final struct array
138161 let inner = StructArray :: new (
139162 fields,
140163 vec ! [
@@ -148,6 +171,13 @@ impl VariantArrayBuilder {
148171 VariantArray :: try_new ( & inner) . expect ( "valid VariantArray by construction" )
149172 }
150173
174+ /// Build a shredded [`VariantArray`] using `as_type` as the shredding schema.
175+ /// Use [`ShreddedSchemaBuilder`] to construct `as_type` for struct schemas.
176+ /// Returns `Err` if `as_type` is not a valid variant shredding type.
177+ pub fn build_shredded ( self , as_type : & DataType ) -> Result < VariantArray , ArrowError > {
178+ shred_variant ( & self . build ( ) , as_type)
179+ }
180+
151181 /// Appends a null row to the builder.
152182 pub fn append_null ( & mut self ) {
153183 self . nulls . append_null ( ) ;
@@ -471,6 +501,7 @@ fn binary_view_array_from_buffers(buffer: Vec<u8>, offsets: Vec<usize>) -> Binar
471501#[ cfg( test) ]
472502mod test {
473503 use super :: * ;
504+ use crate :: ShreddedSchemaBuilder ;
474505 use arrow:: array:: Array ;
475506 use parquet_variant:: { ShortString , Variant } ;
476507
@@ -659,4 +690,190 @@ mod test {
659690 assert_eq ! ( array. value( 2 ) , array2. value( 2 ) . get_list_element( 0 ) . unwrap( ) ) ;
660691 assert_eq ! ( array. value( 2 ) , array2. value( 2 ) . get_list_element( 1 ) . unwrap( ) ) ;
661692 }
693+
694+ #[ test]
695+ fn build_shredded_primitive_int64 ( ) {
696+ let mut b = VariantArrayBuilder :: new ( 3 ) ;
697+ b. append_variant ( Variant :: Int64 ( 42 ) ) ;
698+ b. append_variant ( Variant :: Int64 ( 100 ) ) ;
699+ b. append_null ( ) ;
700+ let arr = b. build_shredded ( & DataType :: Int64 ) . unwrap ( ) ;
701+ assert ! ( arr. typed_value_field( ) . is_some( ) ) ;
702+ assert_eq ! ( arr. len( ) , 3 ) ;
703+ assert ! ( !arr. is_null( 0 ) ) ;
704+ assert ! ( !arr. is_null( 1 ) ) ;
705+ assert ! ( arr. is_null( 2 ) ) ;
706+ }
707+
708+ #[ test]
709+ fn build_shredded_primitive_utf8 ( ) {
710+ let mut b = VariantArrayBuilder :: new ( 2 ) ;
711+ b. append_variant ( Variant :: from ( "hello" ) ) ;
712+ b. append_null ( ) ;
713+ let arr = b. build_shredded ( & DataType :: Utf8 ) . unwrap ( ) ;
714+ assert ! ( arr. typed_value_field( ) . is_some( ) ) ;
715+ assert_eq ! ( arr. len( ) , 2 ) ;
716+ assert ! ( !arr. is_null( 0 ) ) ;
717+ assert ! ( arr. is_null( 1 ) ) ;
718+ }
719+
720+ #[ test]
721+ fn build_shredded_primitive_float64 ( ) {
722+ let mut b = VariantArrayBuilder :: new ( 2 ) ;
723+ b. append_variant ( Variant :: Float ( 3.14 ) ) ;
724+ b. append_null ( ) ;
725+ let arr = b. build_shredded ( & DataType :: Float64 ) . unwrap ( ) ;
726+ assert ! ( arr. typed_value_field( ) . is_some( ) ) ;
727+ assert_eq ! ( arr. len( ) , 2 ) ;
728+ }
729+
730+ #[ test]
731+ fn build_shredded_primitive_bool ( ) {
732+ let mut b = VariantArrayBuilder :: new ( 2 ) ;
733+ b. append_variant ( Variant :: BooleanTrue ) ;
734+ b. append_variant ( Variant :: BooleanFalse ) ;
735+ let arr = b. build_shredded ( & DataType :: Boolean ) . unwrap ( ) ;
736+ assert ! ( arr. typed_value_field( ) . is_some( ) ) ;
737+ assert_eq ! ( arr. len( ) , 2 ) ;
738+ }
739+
740+ #[ test]
741+ fn build_shredded_type_mismatch_falls_back_to_value_column ( ) {
742+ // Row 0: matches Int64 -> typed_value non-null, value null
743+ // Row 1: string, does not match -> value non-null, typed_value null
744+ let mut b = VariantArrayBuilder :: new ( 2 ) ;
745+ b. append_variant ( Variant :: Int64 ( 7 ) ) ;
746+ b. append_variant ( Variant :: from ( "not an int" ) ) ;
747+ let arr = b. build_shredded ( & DataType :: Int64 ) . unwrap ( ) ;
748+ assert ! ( arr. typed_value_field( ) . is_some( ) ) ;
749+ assert_eq ! ( arr. len( ) , 2 ) ;
750+ assert ! ( !arr. is_null( 0 ) ) ;
751+ assert ! ( !arr. is_null( 1 ) ) ;
752+ }
753+
754+ #[ test]
755+ fn build_shredded_struct_single_field ( ) {
756+ let schema = DataType :: Struct ( vec ! [ Field :: new( "brand" , DataType :: Utf8 , true ) ] . into ( ) ) ;
757+ let mut b = VariantArrayBuilder :: new ( 3 ) ;
758+ b. new_object ( ) . with_field ( "brand" , "Apple" ) . finish ( ) ;
759+ b. new_object ( ) . with_field ( "brand" , "Samsung" ) . finish ( ) ;
760+ b. append_null ( ) ;
761+ let arr = b. build_shredded ( & schema) . unwrap ( ) ;
762+ assert ! ( arr. typed_value_field( ) . is_some( ) ) ;
763+ assert_eq ! ( arr. len( ) , 3 ) ;
764+ assert ! ( !arr. is_null( 0 ) ) ;
765+ assert ! ( !arr. is_null( 1 ) ) ;
766+ assert ! ( arr. is_null( 2 ) ) ;
767+ }
768+
769+ #[ test]
770+ fn build_shredded_struct_multi_field ( ) {
771+ let schema = ShreddedSchemaBuilder :: default ( )
772+ . with_path ( "name" , & DataType :: Utf8 )
773+ . unwrap ( )
774+ . with_path ( "age" , & DataType :: Int32 )
775+ . unwrap ( )
776+ . build ( ) ;
777+ let mut b = VariantArrayBuilder :: new ( 2 ) ;
778+ b. new_object ( )
779+ . with_field ( "name" , "Alice" )
780+ . with_field ( "age" , 30i32 )
781+ . finish ( ) ;
782+ b. new_object ( ) . with_field ( "name" , "Bob" ) . finish ( ) ;
783+ let arr = b. build_shredded ( & schema) . unwrap ( ) ;
784+ assert ! ( arr. typed_value_field( ) . is_some( ) ) ;
785+ assert_eq ! ( arr. len( ) , 2 ) ;
786+ }
787+
788+ #[ test]
789+ fn build_shredded_nested_struct ( ) {
790+ let schema = ShreddedSchemaBuilder :: default ( )
791+ . with_path ( "address.city" , & DataType :: Utf8 )
792+ . unwrap ( )
793+ . with_path ( "address.zip" , & DataType :: Utf8 )
794+ . unwrap ( )
795+ . build ( ) ;
796+ let mut b = VariantArrayBuilder :: new ( 2 ) ;
797+ {
798+ let mut obj = b. new_object ( ) ;
799+ obj. new_object ( "address" )
800+ . with_field ( "city" , "NYC" )
801+ . with_field ( "zip" , "10001" )
802+ . finish ( ) ;
803+ obj. finish ( ) ;
804+ }
805+ b. append_null ( ) ;
806+ let arr = b. build_shredded ( & schema) . unwrap ( ) ;
807+ assert ! ( arr. typed_value_field( ) . is_some( ) ) ;
808+ assert_eq ! ( arr. len( ) , 2 ) ;
809+ assert ! ( !arr. is_null( 0 ) ) ;
810+ assert ! ( arr. is_null( 1 ) ) ;
811+ }
812+
813+ #[ test]
814+ fn build_shredded_list_of_int64 ( ) {
815+ use arrow_schema:: Field as ArrowField ;
816+ use std:: sync:: Arc ;
817+ let list_schema = DataType :: List ( Arc :: new ( ArrowField :: new ( "item" , DataType :: Int64 , true ) ) ) ;
818+ let mut b = VariantArrayBuilder :: new ( 2 ) ;
819+ b. new_list ( )
820+ . with_value ( Variant :: Int64 ( 1 ) )
821+ . with_value ( Variant :: Int64 ( 2 ) )
822+ . finish ( ) ;
823+ b. append_null ( ) ;
824+ let arr = b. build_shredded ( & list_schema) . unwrap ( ) ;
825+ assert ! ( arr. typed_value_field( ) . is_some( ) ) ;
826+ assert_eq ! ( arr. len( ) , 2 ) ;
827+ assert ! ( !arr. is_null( 0 ) ) ;
828+ assert ! ( arr. is_null( 1 ) ) ;
829+ }
830+
831+ #[ test]
832+ fn build_shredded_extend_then_shred ( ) {
833+ let mut b = VariantArrayBuilder :: new ( 4 ) ;
834+ b. extend ( [
835+ Some ( Variant :: Int64 ( 1 ) ) ,
836+ None ,
837+ Some ( Variant :: Int64 ( 3 ) ) ,
838+ Some ( Variant :: from ( "oops" ) ) ,
839+ ] ) ;
840+ let arr = b. build_shredded ( & DataType :: Int64 ) . unwrap ( ) ;
841+ assert ! ( arr. typed_value_field( ) . is_some( ) ) ;
842+ assert_eq ! ( arr. len( ) , 4 ) ;
843+ assert ! ( !arr. is_null( 0 ) ) ;
844+ assert ! ( arr. is_null( 1 ) ) ;
845+ assert ! ( !arr. is_null( 2 ) ) ;
846+ assert ! ( !arr. is_null( 3 ) ) ;
847+ }
848+
849+ #[ test]
850+ fn build_shredded_all_nulls ( ) {
851+ let mut b = VariantArrayBuilder :: new ( 3 ) ;
852+ b. append_null ( ) ;
853+ b. append_null ( ) ;
854+ b. append_null ( ) ;
855+ let arr = b. build_shredded ( & DataType :: Int64 ) . unwrap ( ) ;
856+ assert_eq ! ( arr. len( ) , 3 ) ;
857+ assert ! ( arr. is_null( 0 ) ) ;
858+ assert ! ( arr. is_null( 1 ) ) ;
859+ assert ! ( arr. is_null( 2 ) ) ;
860+ }
861+
862+ #[ test]
863+ fn build_shredded_invalid_type_returns_err ( ) {
864+ let mut b = VariantArrayBuilder :: new ( 1 ) ;
865+ b. append_variant ( Variant :: Int64 ( 1 ) ) ;
866+ let result = b. build_shredded ( & DataType :: FixedSizeBinary ( 17 ) ) ;
867+ assert ! ( result. is_err( ) ) ;
868+ }
869+
870+ #[ test]
871+ fn build_shredded_uuid_fixed_size_binary_16 ( ) {
872+ let uuid_bytes: Vec < u8 > = ( 0u8 ..16 ) . collect ( ) ;
873+ let mut b = VariantArrayBuilder :: new ( 1 ) ;
874+ b. append_variant ( Variant :: from ( uuid_bytes. as_slice ( ) ) ) ;
875+ let arr = b. build_shredded ( & DataType :: FixedSizeBinary ( 16 ) ) . unwrap ( ) ;
876+ assert ! ( arr. typed_value_field( ) . is_some( ) ) ;
877+ assert_eq ! ( arr. len( ) , 1 ) ;
878+ }
662879}
0 commit comments