Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 52 additions & 4 deletions parquet-variant-compute/src/variant_array_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

//! [`VariantArrayBuilder`] implementation

use crate::VariantArray;
use crate::{VariantArray, shred_variant};
use arrow::array::{ArrayRef, BinaryViewArray, BinaryViewBuilder, NullBufferBuilder, StructArray};
use arrow_schema::{ArrowError, DataType, Field, Fields};
use parquet_variant::{
Expand Down Expand Up @@ -96,9 +96,12 @@ pub struct VariantArrayBuilder {
value_offsets: Vec<usize>,
/// The fields of the final `StructArray`
///
/// TODO: 1) Add extension type metadata
/// TODO: 2) Add support for shredding
/// TODO: Add extension type metadata
fields: Fields,
/// Optional shredding schema. When set, [`build`](Self::build) returns a
/// shredded `VariantArray` produced by [`shred_variant`](crate::shred_variant)
/// against this Arrow `DataType` (used as the `typed_value` layout).
shredding_schema: Option<DataType>,
}

impl VariantArrayBuilder {
Expand All @@ -114,9 +117,21 @@ impl VariantArrayBuilder {
value_builder: ValueBuilder::new(),
value_offsets: Vec::with_capacity(row_capacity),
fields: Fields::from(vec![metadata_field, value_field]),
shredding_schema: None,
}
}

/// Configure this builder to produce a shredded [`VariantArray`].
///
/// Rows are still appended in the unshredded form via
/// [`append_variant`](Self::append_variant); the shredding pass runs once at
/// [`build`](Self::build) time by delegating to
/// [`shred_variant`](crate::shred_variant).
pub fn with_shredding(mut self, as_type: DataType) -> Self {
self.shredding_schema = Some(as_type);
self
}

/// Build the final builder
pub fn build(self) -> VariantArray {
let Self {
Expand All @@ -126,6 +141,7 @@ impl VariantArrayBuilder {
value_builder,
value_offsets,
fields,
shredding_schema,
} = self;

let metadata_buffer = metadata_builder.into_inner();
Expand All @@ -145,7 +161,11 @@ impl VariantArrayBuilder {
);
// TODO add arrow extension type metadata

VariantArray::try_new(&inner).expect("valid VariantArray by construction")
let unshredded = VariantArray::try_new(&inner).expect("valid VariantArray by construction");
match shredding_schema {
Some(as_type) => shred_variant(&unshredded, &as_type).expect("shred_variant failed"),
None => unshredded,
}
}

/// Appends a null row to the builder.
Expand Down Expand Up @@ -659,4 +679,32 @@ mod test {
assert_eq!(array.value(2), array2.value(2).get_list_element(0).unwrap());
assert_eq!(array.value(2), array2.value(2).get_list_element(1).unwrap());
}

#[test]
fn with_shredding_round_trip_primitive_long() {
let mut b = VariantArrayBuilder::new(3).with_shredding(DataType::Int64);
b.append_variant(Variant::Int64(42));
b.append_variant(Variant::Int64(100));
b.append_null();
let arr = b.build();
assert!(
arr.typed_value_field().is_some(),
"shredded array must have typed_value"
);
assert_eq!(arr.len(), 3);
}

#[test]
fn with_shredding_produces_typed_value_column() {
let typed = DataType::Struct(vec![Field::new("brand", DataType::Utf8, true)].into());
let mut b = VariantArrayBuilder::new(2).with_shredding(typed);
b.new_object().with_field("brand", "Apple").finish();
b.append_null();
let arr = b.build();
assert!(
arr.typed_value_field().is_some(),
"shredded array must have typed_value"
);
assert_eq!(arr.len(), 2);
}
}