Skip to content

Commit f52d0c9

Browse files
committed
feat: add VariantArrayBuilder::with_shredding
1 parent 7abb225 commit f52d0c9

1 file changed

Lines changed: 52 additions & 4 deletions

File tree

parquet-variant-compute/src/variant_array_builder.rs

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
//! [`VariantArrayBuilder`] implementation
1919
20-
use crate::VariantArray;
20+
use crate::{VariantArray, shred_variant};
2121
use arrow::array::{ArrayRef, BinaryViewArray, BinaryViewBuilder, NullBufferBuilder, StructArray};
2222
use arrow_schema::{ArrowError, DataType, Field, Fields};
2323
use parquet_variant::{
@@ -96,9 +96,12 @@ pub struct VariantArrayBuilder {
9696
value_offsets: Vec<usize>,
9797
/// The fields of the final `StructArray`
9898
///
99-
/// TODO: 1) Add extension type metadata
100-
/// TODO: 2) Add support for shredding
99+
/// TODO: Add extension type metadata
101100
fields: Fields,
101+
/// Optional shredding schema. When set, [`build`](Self::build) returns a
102+
/// shredded `VariantArray` produced by [`shred_variant`](crate::shred_variant)
103+
/// against this Arrow `DataType` (used as the `typed_value` layout).
104+
shredding_schema: Option<DataType>,
102105
}
103106

104107
impl VariantArrayBuilder {
@@ -114,9 +117,21 @@ impl VariantArrayBuilder {
114117
value_builder: ValueBuilder::new(),
115118
value_offsets: Vec::with_capacity(row_capacity),
116119
fields: Fields::from(vec![metadata_field, value_field]),
120+
shredding_schema: None,
117121
}
118122
}
119123

124+
/// Configure this builder to produce a shredded [`VariantArray`].
125+
///
126+
/// Rows are still appended in the unshredded form via
127+
/// [`append_variant`](Self::append_variant); the shredding pass runs once at
128+
/// [`build`](Self::build) time by delegating to
129+
/// [`shred_variant`](crate::shred_variant).
130+
pub fn with_shredding(mut self, as_type: DataType) -> Self {
131+
self.shredding_schema = Some(as_type);
132+
self
133+
}
134+
120135
/// Build the final builder
121136
pub fn build(self) -> VariantArray {
122137
let Self {
@@ -126,6 +141,7 @@ impl VariantArrayBuilder {
126141
value_builder,
127142
value_offsets,
128143
fields,
144+
shredding_schema,
129145
} = self;
130146

131147
let metadata_buffer = metadata_builder.into_inner();
@@ -145,7 +161,11 @@ impl VariantArrayBuilder {
145161
);
146162
// TODO add arrow extension type metadata
147163

148-
VariantArray::try_new(&inner).expect("valid VariantArray by construction")
164+
let unshredded = VariantArray::try_new(&inner).expect("valid VariantArray by construction");
165+
match shredding_schema {
166+
Some(as_type) => shred_variant(&unshredded, &as_type).expect("shred_variant failed"),
167+
None => unshredded,
168+
}
149169
}
150170

151171
/// Appends a null row to the builder.
@@ -659,4 +679,32 @@ mod test {
659679
assert_eq!(array.value(2), array2.value(2).get_list_element(0).unwrap());
660680
assert_eq!(array.value(2), array2.value(2).get_list_element(1).unwrap());
661681
}
682+
683+
#[test]
684+
fn with_shredding_round_trip_primitive_long() {
685+
let mut b = VariantArrayBuilder::new(3).with_shredding(DataType::Int64);
686+
b.append_variant(Variant::Int64(42));
687+
b.append_variant(Variant::Int64(100));
688+
b.append_null();
689+
let arr = b.build();
690+
assert!(
691+
arr.typed_value_field().is_some(),
692+
"shredded array must have typed_value"
693+
);
694+
assert_eq!(arr.len(), 3);
695+
}
696+
697+
#[test]
698+
fn with_shredding_produces_typed_value_column() {
699+
let typed = DataType::Struct(vec![Field::new("brand", DataType::Utf8, true)].into());
700+
let mut b = VariantArrayBuilder::new(2).with_shredding(typed);
701+
b.new_object().with_field("brand", "Apple").finish();
702+
b.append_null();
703+
let arr = b.build();
704+
assert!(
705+
arr.typed_value_field().is_some(),
706+
"shredded array must have typed_value"
707+
);
708+
assert_eq!(arr.len(), 2);
709+
}
662710
}

0 commit comments

Comments
 (0)