diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f81d4966b42..a37f04b26fa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -202,7 +202,7 @@ jobs: target: wasm32-unknown-unknown env: rustflags: "RUSTFLAGS='-A warnings --cfg getrandom_backend=\"wasm_js\"'" - args: "--target wasm32-unknown-unknown --exclude vortex --exclude vortex-cuda --exclude vortex-cub --exclude vortex-nvcomp --exclude vortex-datafusion --exclude vortex-duckdb --exclude vortex-tui --exclude vortex-zstd --exclude vortex-test-e2e-cuda --exclude vortex-sqllogictest" + args: "--target wasm32-unknown-unknown --exclude vortex --exclude vortex-cuda --exclude vortex-cub --exclude vortex-nvcomp --exclude vortex-datafusion --exclude vortex-duckdb --exclude vortex-tui --exclude vortex-zstd --exclude vortex-test-e2e-cuda --exclude vortex-sqllogictest --exclude vortex-parquet-variant" steps: - uses: runs-on/action@v2 if: github.repository == 'vortex-data/vortex' diff --git a/Cargo.lock b/Cargo.lock index cbff57eb15b..1920758654d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6480,6 +6480,50 @@ dependencies = [ "zstd", ] +[[package]] +name = "parquet-variant" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6c31f8f9bfefb9dbf67b0807e00fd918676954a7477c889be971ac904103184" +dependencies = [ + "arrow-schema", + "chrono", + "half", + "indexmap", + "simdutf8", + "uuid", +] + +[[package]] +name = "parquet-variant-compute" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "196cd9f7178fed3ac8d5e6d2b51193818e896bbc3640aea3fde3440114a8f39c" +dependencies = [ + "arrow", + "arrow-schema", + "chrono", + "half", + "indexmap", + "parquet-variant", + "parquet-variant-json", + "uuid", +] + +[[package]] +name = "parquet-variant-json" +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed23d7acc90ef60f7fdbcc473fa2fdaefa33542ed15b84388959346d52c839be" +dependencies = [ + "arrow-schema", + "base64", + "chrono", + "parquet-variant", + "serde_json", + "uuid", +] + [[package]] name = "paste" version = "1.0.15" @@ -10414,6 +10458,25 @@ dependencies = [ "vortex-cuda-macros", ] +[[package]] +name = "vortex-parquet-variant" +version = "0.1.0" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-schema", + "parquet-variant", + "parquet-variant-compute", + "prost 0.14.3", + "rstest", + "vortex-array", + "vortex-buffer", + "vortex-error", + "vortex-mask", + "vortex-proto", + "vortex-session", +] + [[package]] name = "vortex-pco" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 0d6853627a8..32ac44128ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,7 @@ members = [ "benchmarks/duckdb-bench", "benchmarks/random-access-bench", "vortex-sqllogictest", + "encodings/parquet-variant", ] exclude = ["java/testfiles", "wasm-test"] resolver = "2" @@ -87,16 +88,16 @@ arbitrary = "1.3.2" arc-swap = "1.8" arcref = "0.2.0" arrayref = "0.3.7" -arrow-arith = "57.1" -arrow-array = "57.1" -arrow-buffer = "57.1" -arrow-cast = "57.1" -arrow-data = "57.1" -arrow-ipc = "57.1" -arrow-ord = "57.1" -arrow-schema = "57.1" -arrow-select = "57.1" -arrow-string = "57.1" +arrow-arith = "57.2" +arrow-array = "57.2" +arrow-buffer = "57.2" +arrow-cast = "57.2" +arrow-data = "57.2" +arrow-ipc = "57.2" +arrow-ord = "57.2" +arrow-schema = "57.2" +arrow-select = "57.2" +arrow-string = "57.2" async-fs = "2.2.0" async-lock = "3.4" async-stream = "0.3.6" @@ -182,7 +183,9 @@ opentelemetry = "0.31.0" opentelemetry-otlp = "0.31.0" opentelemetry_sdk = "0.31.0" parking_lot = { version = "0.12.3", features = ["nightly"] } -parquet = "57.1" +parquet = "57.2" +parquet-variant = "57.2" +parquet-variant-compute = "57.2" paste = "1.0.15" pco = "1.0.1" pin-project-lite = "0.2.15" diff --git a/encodings/parquet-variant/Cargo.toml b/encodings/parquet-variant/Cargo.toml new file mode 100644 index 00000000000..59f33cf9a07 --- /dev/null +++ b/encodings/parquet-variant/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "vortex-parquet-variant" +authors = { workspace = true } +categories = { workspace = true } +description = "Vortex Pco array" +edition = { workspace = true } +homepage = { workspace = true } +include = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +readme = { workspace = true } +repository = { workspace = true } +rust-version = { workspace = true } +version = { workspace = true } + +[lints] +workspace = true + +[dependencies] +arrow-array = { workspace = true } +parquet-variant = { workspace = true } +parquet-variant-compute = { workspace = true } +prost = { workspace = true } +vortex-array = { workspace = true } +vortex-buffer = { workspace = true } +vortex-error = { workspace = true } +vortex-mask = { workspace = true } +vortex-proto = { workspace = true } +vortex-session = { workspace = true } + +[dev-dependencies] +arrow-array = { workspace = true } +arrow-buffer = { workspace = true } +arrow-schema = { workspace = true } +parquet-variant = { workspace = true } +rstest = { workspace = true } +vortex-array = { workspace = true, features = ["_test-harness"] } + +[package.metadata.cargo-machete] +ignored = ["getrandom_v03"] diff --git a/encodings/parquet-variant/public-api.lock b/encodings/parquet-variant/public-api.lock new file mode 100644 index 00000000000..f83db5b8a30 --- /dev/null +++ b/encodings/parquet-variant/public-api.lock @@ -0,0 +1,129 @@ +pub mod vortex_parquet_variant + +pub struct vortex_parquet_variant::ParquetVariant + +impl vortex_parquet_variant::ParquetVariant + +pub const vortex_parquet_variant::ParquetVariant::ID: vortex_array::vtable::dyn_::ArrayId + +impl core::fmt::Debug for vortex_parquet_variant::ParquetVariant + +pub fn vortex_parquet_variant::ParquetVariant::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::vtable::VTable for vortex_parquet_variant::ParquetVariant + +pub type vortex_parquet_variant::ParquetVariant::Array = vortex_parquet_variant::ParquetVariantArray + +pub type vortex_parquet_variant::ParquetVariant::Metadata = vortex_parquet_variant::ParquetVariantMetadata + +pub type vortex_parquet_variant::ParquetVariant::OperationsVTable = vortex_parquet_variant::ParquetVariant + +pub type vortex_parquet_variant::ParquetVariant::ValidityVTable = vortex_parquet_variant::ParquetVariant + +pub fn vortex_parquet_variant::ParquetVariant::array_eq(array: &vortex_parquet_variant::ParquetVariantArray, other: &vortex_parquet_variant::ParquetVariantArray, precision: vortex_array::hash::Precision) -> bool + +pub fn vortex_parquet_variant::ParquetVariant::array_hash(array: &vortex_parquet_variant::ParquetVariantArray, state: &mut H, precision: vortex_array::hash::Precision) + +pub fn vortex_parquet_variant::ParquetVariant::buffer(_array: &vortex_parquet_variant::ParquetVariantArray, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_parquet_variant::ParquetVariant::buffer_name(_array: &vortex_parquet_variant::ParquetVariantArray, _idx: usize) -> core::option::Option + +pub fn vortex_parquet_variant::ParquetVariant::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_parquet_variant::ParquetVariant::child(array: &vortex_parquet_variant::ParquetVariantArray, idx: usize) -> vortex_array::array::ArrayRef + +pub fn vortex_parquet_variant::ParquetVariant::child_name(array: &vortex_parquet_variant::ParquetVariantArray, idx: usize) -> alloc::string::String + +pub fn vortex_parquet_variant::ParquetVariant::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_parquet_variant::ParquetVariant::dtype(array: &vortex_parquet_variant::ParquetVariantArray) -> &vortex_array::dtype::DType + +pub fn vortex_parquet_variant::ParquetVariant::execute(array: &Self::Array, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_parquet_variant::ParquetVariant::id(_array: &Self::Array) -> vortex_array::vtable::dyn_::ArrayId + +pub fn vortex_parquet_variant::ParquetVariant::len(array: &vortex_parquet_variant::ParquetVariantArray) -> usize + +pub fn vortex_parquet_variant::ParquetVariant::metadata(array: &vortex_parquet_variant::ParquetVariantArray) -> vortex_error::VortexResult + +pub fn vortex_parquet_variant::ParquetVariant::nbuffers(_array: &vortex_parquet_variant::ParquetVariantArray) -> usize + +pub fn vortex_parquet_variant::ParquetVariant::nchildren(array: &vortex_parquet_variant::ParquetVariantArray) -> usize + +pub fn vortex_parquet_variant::ParquetVariant::reduce_parent(array: &Self::Array, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_parquet_variant::ParquetVariant::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_parquet_variant::ParquetVariant::stats(array: &vortex_parquet_variant::ParquetVariantArray) -> vortex_array::stats::array::StatsSetRef<'_> + +pub fn vortex_parquet_variant::ParquetVariant::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + +impl vortex_array::vtable::operations::OperationsVTable for vortex_parquet_variant::ParquetVariant + +pub fn vortex_parquet_variant::ParquetVariant::scalar_at(array: &vortex_parquet_variant::ParquetVariantArray, index: usize) -> vortex_error::VortexResult + +impl vortex_array::vtable::validity::ValidityVTable for vortex_parquet_variant::ParquetVariant + +pub fn vortex_parquet_variant::ParquetVariant::validity(array: &vortex_parquet_variant::ParquetVariantArray) -> vortex_error::VortexResult + +pub struct vortex_parquet_variant::ParquetVariantArray + +impl vortex_parquet_variant::ParquetVariantArray + +pub fn vortex_parquet_variant::ParquetVariantArray::from_arrow_variant(arrow_variant: &parquet_variant_compute::variant_array::VariantArray) -> vortex_error::VortexResult + +pub fn vortex_parquet_variant::ParquetVariantArray::metadata_array(&self) -> &vortex_array::array::ArrayRef + +pub fn vortex_parquet_variant::ParquetVariantArray::try_new(metadata: vortex_array::array::ArrayRef, value: core::option::Option, typed_value: core::option::Option) -> vortex_error::VortexResult + +pub fn vortex_parquet_variant::ParquetVariantArray::try_new_with_validity(validity: vortex_array::validity::Validity, metadata: vortex_array::array::ArrayRef, value: core::option::Option, typed_value: core::option::Option) -> vortex_error::VortexResult + +pub fn vortex_parquet_variant::ParquetVariantArray::typed_value_array(&self) -> core::option::Option<&vortex_array::array::ArrayRef> + +pub fn vortex_parquet_variant::ParquetVariantArray::validity(&self) -> &vortex_array::validity::Validity + +pub fn vortex_parquet_variant::ParquetVariantArray::value_array(&self) -> core::option::Option<&vortex_array::array::ArrayRef> + +impl vortex_parquet_variant::ParquetVariantArray + +pub fn vortex_parquet_variant::ParquetVariantArray::to_array(&self) -> vortex_array::array::ArrayRef + +impl core::clone::Clone for vortex_parquet_variant::ParquetVariantArray + +pub fn vortex_parquet_variant::ParquetVariantArray::clone(&self) -> vortex_parquet_variant::ParquetVariantArray + +impl core::convert::AsRef for vortex_parquet_variant::ParquetVariantArray + +pub fn vortex_parquet_variant::ParquetVariantArray::as_ref(&self) -> &dyn vortex_array::array::DynArray + +impl core::convert::From for vortex_array::array::ArrayRef + +pub fn vortex_array::array::ArrayRef::from(value: vortex_parquet_variant::ParquetVariantArray) -> vortex_array::array::ArrayRef + +impl core::fmt::Debug for vortex_parquet_variant::ParquetVariantArray + +pub fn vortex_parquet_variant::ParquetVariantArray::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_parquet_variant::ParquetVariantArray + +pub type vortex_parquet_variant::ParquetVariantArray::Target = dyn vortex_array::array::DynArray + +pub fn vortex_parquet_variant::ParquetVariantArray::deref(&self) -> &Self::Target + +impl vortex_array::array::IntoArray for vortex_parquet_variant::ParquetVariantArray + +pub fn vortex_parquet_variant::ParquetVariantArray::into_array(self) -> vortex_array::array::ArrayRef + +pub struct vortex_parquet_variant::ParquetVariantMetadata + +pub vortex_parquet_variant::ParquetVariantMetadata::has_value: bool + +pub vortex_parquet_variant::ParquetVariantMetadata::typed_value_dtype: core::option::Option + +impl core::clone::Clone for vortex_parquet_variant::ParquetVariantMetadata + +pub fn vortex_parquet_variant::ParquetVariantMetadata::clone(&self) -> vortex_parquet_variant::ParquetVariantMetadata + +impl core::fmt::Debug for vortex_parquet_variant::ParquetVariantMetadata + +pub fn vortex_parquet_variant::ParquetVariantMetadata::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result diff --git a/encodings/parquet-variant/src/lib.rs b/encodings/parquet-variant/src/lib.rs new file mode 100644 index 00000000000..ce7a929defb --- /dev/null +++ b/encodings/parquet-variant/src/lib.rs @@ -0,0 +1,1007 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! This crate exposes a Vortex encoding that supports variant arrays, encoded as parquet's +//! [Variant encoding], in order to allow for zero-copy export to Arrow's new +//! [canonical extension type]. +//! +//! The encoding follows the Arrow Parquet Variant canonical extension type structure: +//! - `metadata` (binary, required): type information for arrays/objects, field names and offsets +//! - `value` (binary, optional): un-shredded serialized variant values +//! - `typed_value` (any type, optional): shredded column data with a known type +//! +//! At least one of `value` or `typed_value` must be present. The `typed_value` child supports +//! full recursive shredding — it can be a primitive type, a list (whose elements are variant +//! nodes with value/typed_value), or a struct (whose fields are variant nodes). +//! +//! [Variant encoding]: https://parquet.apache.org/docs/file-format/types/variantencoding/ +//! [canonical extension type]: https://arrow.apache.org/docs/format/CanonicalExtensions.html#parquet-variant + +use std::hash::Hasher; + +use arrow_array::Array as ArrowArray; +use parquet_variant::Variant as PqVariant; +use prost::Message; +use vortex_array::ArrayEq; +use vortex_array::ArrayHash; +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::ExecutionStep; +use vortex_array::IntoArray; +use vortex_array::Precision; +use vortex_array::arrays::VariantArray; +use vortex_array::arrays::scalar_fn::ExactScalarFn; +use vortex_array::arrays::scalar_fn::ScalarFnArrayView; +use vortex_array::arrow::FromArrowArray; +use vortex_array::buffer::BufferHandle; +use vortex_array::builtins::ArrayBuiltins; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_array::optimizer::rules::ArrayParentReduceRule; +use vortex_array::optimizer::rules::ParentRuleSet; +use vortex_array::scalar::Scalar; +use vortex_array::scalar_fn::fns::variant_get::VariantGet; +use vortex_array::serde::ArrayChildren; +use vortex_array::stats::ArrayStats; +use vortex_array::stats::StatsSetRef; +use vortex_array::validity::Validity; +use vortex_array::vtable; +use vortex_array::vtable::ArrayId; +use vortex_array::vtable::OperationsVTable; +use vortex_array::vtable::VTable; +use vortex_array::vtable::ValidityVTable; +use vortex_array::vtable::validity_nchildren; +use vortex_array::vtable::validity_to_child; +use vortex_buffer::BitBuffer; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; +use vortex_error::vortex_err; +use vortex_error::vortex_panic; +use vortex_proto::dtype as pb; +use vortex_session::VortexSession; + +vtable!(ParquetVariant); + +#[derive(Debug)] +pub struct ParquetVariant; + +impl ParquetVariant { + pub const ID: ArrayId = ArrayId::new_ref("vortex.parquet.variant"); +} + +/// Serialized metadata for a [`ParquetVariantArray`]. +/// +/// Tracks which optional children are present so the array can be correctly +/// reconstructed during deserialization. +#[derive(Clone, Debug)] +pub struct ParquetVariantMetadata { + /// Whether the un-shredded `value` child is present. + pub has_value: bool, + /// DType of the shredded `typed_value`, if present. + /// + /// This is required to deserialize non-variant shredded children. + pub typed_value_dtype: Option, +} + +#[derive(Clone, prost::Message)] +struct ParquetVariantMetadataProto { + /// Whether the un-shredded `value` child is present. + #[prost(bool, tag = "1")] + pub has_value: bool, + /// DType of the shredded `typed_value`, if present. + #[prost(message, optional, tag = "2")] + pub typed_value_dtype: Option, +} + +/// An array encoding that stores variant data in the Parquet Variant binary format. +/// +/// Contains up to three children following the Arrow Parquet Variant canonical extension type: +/// - `metadata` (always present): binary array with variant type information +/// - `value` (optional): binary array with un-shredded serialized variant values +/// - `typed_value` (optional): array of any type with shredded column data +/// +/// At least one of `value` or `typed_value` must be present. +/// The `typed_value` supports full recursive shredding — it can be a primitive, list, or struct +/// where nested struct/list elements themselves contain value/typed_value children. +#[derive(Clone, Debug)] +pub struct ParquetVariantArray { + dtype: DType, + validity: Validity, + metadata: ArrayRef, + value: Option, + typed_value: Option, + stats_set: ArrayStats, +} + +impl ParquetVariantArray { + /// Creates a new ParquetVariantArray. + pub fn try_new( + metadata: ArrayRef, + value: Option, + typed_value: Option, + ) -> VortexResult { + Self::try_new_with_validity(Validity::AllValid, metadata, value, typed_value) + } + + /// Creates a new ParquetVariantArray with explicit parent validity. + pub fn try_new_with_validity( + validity: Validity, + metadata: ArrayRef, + value: Option, + typed_value: Option, + ) -> VortexResult { + vortex_ensure!( + value.is_some() || typed_value.is_some(), + "at least one of value or typed_value must be present" + ); + let len = metadata.len(); + if let Some(validity_len) = validity.maybe_len() { + vortex_ensure!( + validity_len == len, + "validity length must match metadata length" + ); + } + if let Some(ref v) = value { + vortex_ensure!(v.len() == len, "value length must match metadata length"); + } + if let Some(ref tv) = typed_value { + vortex_ensure!( + tv.len() == len, + "typed_value length must match metadata length" + ); + } + let nullability = match &validity { + Validity::NonNullable | Validity::AllValid => Nullability::NonNullable, + _ => Nullability::Nullable, + }; + Ok(Self { + dtype: DType::Variant(nullability), + validity, + metadata, + value, + typed_value, + stats_set: ArrayStats::default(), + }) + } + + /// Returns a reference to the metadata child array. + pub fn metadata_array(&self) -> &ArrayRef { + &self.metadata + } + + /// Returns a reference to the un-shredded value child array, if present. + pub fn value_array(&self) -> Option<&ArrayRef> { + self.value.as_ref() + } + + /// Returns a reference to the shredded typed_value child array, if present. + pub fn typed_value_array(&self) -> Option<&ArrayRef> { + self.typed_value.as_ref() + } + + /// Returns the parent row validity for the variant storage struct. + pub fn validity(&self) -> &Validity { + &self.validity + } + + /// Converts an Arrow `parquet_variant_compute::VariantArray` into a Vortex `ArrayRef` + /// wrapping `VariantArray(ParquetVariantArray(...))`. + pub fn from_arrow_variant( + arrow_variant: &parquet_variant_compute::VariantArray, + ) -> VortexResult { + let storage = arrow_variant.inner(); + let value_nullable = storage + .fields() + .iter() + .find(|field| field.name() == "value") + .map(|field| field.is_nullable()) + .unwrap_or(false); + let typed_value_nullable = storage + .fields() + .iter() + .find(|field| field.name() == "typed_value") + .map(|field| field.is_nullable()) + .unwrap_or(false); + let validity = arrow_variant + .nulls() + .map(|nulls| { + if nulls.null_count() == nulls.len() { + Validity::AllInvalid + } else { + Validity::from(BitBuffer::from(nulls.inner().clone())) + } + }) + .unwrap_or(Validity::AllValid); + let metadata = + ArrayRef::from_arrow(arrow_variant.metadata_field() as &dyn ArrowArray, false)?; + + let value = arrow_variant + .value_field() + .map(|v| ArrayRef::from_arrow(v as &dyn ArrowArray, value_nullable)) + .transpose()?; + + let typed_value = arrow_variant + .typed_value_field() + .map(|tv| ArrayRef::from_arrow(tv.as_ref(), typed_value_nullable)) + .transpose()?; + + let nullability = if matches!(validity, Validity::NonNullable | Validity::AllValid) { + Nullability::NonNullable + } else { + Nullability::Nullable + }; + let pv = + ParquetVariantArray::try_new_with_validity(validity, metadata, value, typed_value)?; + Ok(VariantArray::new(pv.into_array(), nullability).into_array()) + } + + fn nchildren(&self) -> usize { + validity_nchildren(&self.validity) + + 1 + + self.value.is_some() as usize + + self.typed_value.is_some() as usize + } +} + +impl VTable for ParquetVariant { + type Array = ParquetVariantArray; + type Metadata = ParquetVariantMetadata; + type OperationsVTable = Self; + type ValidityVTable = Self; + + fn id(_array: &Self::Array) -> ArrayId { + Self::ID + } + + fn len(array: &ParquetVariantArray) -> usize { + array.metadata.len() + } + + fn dtype(array: &ParquetVariantArray) -> &DType { + &array.dtype + } + + fn stats(array: &ParquetVariantArray) -> StatsSetRef<'_> { + array.stats_set.to_ref(array.as_ref()) + } + + fn array_hash(array: &ParquetVariantArray, state: &mut H, precision: Precision) { + array.validity.array_hash(state, precision); + array.metadata.array_hash(state, precision); + if let Some(ref value) = array.value { + value.array_hash(state, precision); + } + if let Some(ref typed_value) = array.typed_value { + typed_value.array_hash(state, precision); + } + } + + fn array_eq( + array: &ParquetVariantArray, + other: &ParquetVariantArray, + precision: Precision, + ) -> bool { + if !array.validity.array_eq(&other.validity, precision) + || !array.metadata.array_eq(&other.metadata, precision) + { + return false; + } + match (&array.value, &other.value) { + (Some(a), Some(b)) => { + if !a.array_eq(b, precision) { + return false; + } + } + (None, None) => {} + _ => return false, + } + match (&array.typed_value, &other.typed_value) { + (Some(a), Some(b)) => a.array_eq(b, precision), + (None, None) => true, + _ => false, + } + } + + fn nbuffers(_array: &ParquetVariantArray) -> usize { + 0 + } + + fn buffer(_array: &ParquetVariantArray, idx: usize) -> BufferHandle { + vortex_panic!("ParquetVariantArray buffer index {idx} out of bounds") + } + + fn buffer_name(_array: &ParquetVariantArray, _idx: usize) -> Option { + None + } + + fn nchildren(array: &ParquetVariantArray) -> usize { + array.nchildren() + } + + fn child(array: &ParquetVariantArray, idx: usize) -> ArrayRef { + let vc = validity_nchildren(&array.validity); + if idx < vc { + validity_to_child(&array.validity, array.metadata.len()) + .vortex_expect("ParquetVariantArray validity child out of bounds") + } else { + match idx - vc { + 0 => array.metadata.clone(), + 1 if array.value.is_some() => array + .value + .clone() + .vortex_expect("ParquetVariantArray missing value child"), + 1 => array + .typed_value + .clone() + .vortex_expect("ParquetVariantArray missing typed_value child"), + 2 => array + .typed_value + .clone() + .vortex_expect("ParquetVariantArray missing typed_value child"), + _ => vortex_panic!("ParquetVariantArray child index {idx} out of bounds"), + } + } + } + + fn child_name(array: &ParquetVariantArray, idx: usize) -> String { + let vc = validity_nchildren(&array.validity); + match idx { + idx if idx < vc => "validity".to_string(), + idx => match idx - vc { + 0 => "metadata".to_string(), + 1 if array.value.is_some() => "value".to_string(), + 1 => "typed_value".to_string(), + 2 => "typed_value".to_string(), + _ => vortex_panic!("ParquetVariantArray child_name index {idx} out of bounds"), + }, + } + } + + fn metadata(array: &ParquetVariantArray) -> VortexResult { + Ok(ParquetVariantMetadata { + has_value: array.value.is_some(), + typed_value_dtype: array.typed_value.as_ref().map(|tv| tv.dtype().clone()), + }) + } + + fn serialize(metadata: Self::Metadata) -> VortexResult>> { + let typed_value_dtype = metadata + .typed_value_dtype + .as_ref() + .map(|dtype| dtype.try_into()) + .transpose()?; + Ok(Some( + ParquetVariantMetadataProto { + has_value: metadata.has_value, + typed_value_dtype, + } + .encode_to_vec(), + )) + } + + fn deserialize( + bytes: &[u8], + _dtype: &DType, + _len: usize, + _buffers: &[BufferHandle], + _session: &VortexSession, + ) -> VortexResult { + let proto = ParquetVariantMetadataProto::decode(bytes)?; + let typed_value_dtype = match proto.typed_value_dtype.as_ref() { + Some(dtype) => Some(DType::from_proto(dtype, _session)?), + None => None, + }; + Ok(ParquetVariantMetadata { + has_value: proto.has_value, + typed_value_dtype, + }) + } + + fn build( + dtype: &DType, + len: usize, + metadata: &Self::Metadata, + _buffers: &[BufferHandle], + children: &dyn ArrayChildren, + ) -> VortexResult { + vortex_ensure!(matches!(dtype, DType::Variant(_)), "Expected Variant DType"); + let has_typed_value = metadata.typed_value_dtype.is_some(); + vortex_ensure!( + metadata.has_value || has_typed_value, + "At least one of value or typed_value must be present" + ); + + let expected_children = 1 + metadata.has_value as usize + has_typed_value as usize; + vortex_ensure!( + children.len() == expected_children || children.len() == expected_children + 1, + "Expected {} or {} children, got {}", + expected_children, + expected_children + 1, + children.len() + ); + + let (validity, mut child_idx) = if children.len() == expected_children { + (Validity::AllValid, 0) + } else { + (Validity::Array(children.get(0, &Validity::DTYPE, len)?), 1) + }; + let variant_metadata = + children.get(child_idx, &DType::Binary(Nullability::NonNullable), len)?; + child_idx += 1; + + let value = if metadata.has_value { + let v = children.get(child_idx, &DType::Binary(Nullability::NonNullable), len)?; + child_idx += 1; + Some(v) + } else { + None + }; + + let typed_value = if has_typed_value { + // typed_value can be any type — primitive, list, struct, etc. + let dtype = metadata + .typed_value_dtype + .clone() + .ok_or_else(|| vortex_err!("typed_value_dtype missing for typed_value child"))?; + let tv = children.get(child_idx, &dtype, len)?; + Some(tv) + } else { + None + }; + + ParquetVariantArray::try_new_with_validity(validity, variant_metadata, value, typed_value) + } + + fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { + vortex_ensure!( + children.len() == array.nchildren(), + "ParquetVariantArray expects {} children, got {}", + array.nchildren(), + children.len() + ); + let mut iter = children.into_iter(); + if validity_nchildren(&array.validity) == 1 { + array.validity = Validity::Array( + iter.next() + .vortex_expect("ParquetVariantArray missing validity child"), + ); + } + array.metadata = iter + .next() + .vortex_expect("ParquetVariantArray missing metadata child"); + if array.value.is_some() { + array.value = Some( + iter.next() + .vortex_expect("ParquetVariantArray missing value child in with_children"), + ); + } + if array.typed_value.is_some() { + array.typed_value = + Some(iter.next().vortex_expect( + "ParquetVariantArray missing typed_value child in with_children", + )); + } + Ok(()) + } + + fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { + Ok(ExecutionStep::done(array.clone().into_array())) + } + + fn reduce_parent( + array: &Self::Array, + parent: &ArrayRef, + child_idx: usize, + ) -> VortexResult> { + PARENT_RULES.evaluate(array, parent, child_idx) + } +} + +fn parquet_variant_to_scalar(variant: PqVariant<'_, '_>) -> VortexResult { + use vortex_array::dtype::DecimalDType; + use vortex_array::dtype::FieldNames; + use vortex_array::dtype::StructFields; + use vortex_array::scalar::ScalarValue; + + let nn = Nullability::NonNullable; + + Ok(match variant { + PqVariant::Null => Scalar::null(DType::Null), + PqVariant::Int8(v) => Scalar::primitive(v, nn), + PqVariant::Int16(v) => Scalar::primitive(v, nn), + PqVariant::Int32(v) => Scalar::primitive(v, nn), + PqVariant::Int64(v) => Scalar::primitive(v, nn), + PqVariant::Float(v) => Scalar::primitive(v, nn), + PqVariant::Double(v) => Scalar::primitive(v, nn), + PqVariant::BooleanTrue => Scalar::bool(true, nn), + PqVariant::BooleanFalse => Scalar::bool(false, nn), + PqVariant::Decimal4(v) => Scalar::decimal( + v.integer().into(), + DecimalDType::new(9, v.scale() as i8), + nn, + ), + PqVariant::Decimal8(v) => Scalar::decimal( + v.integer().into(), + DecimalDType::new(18, v.scale() as i8), + nn, + ), + PqVariant::Decimal16(v) => Scalar::decimal( + v.integer().into(), + DecimalDType::new(38, v.scale() as i8), + nn, + ), + PqVariant::Binary(v) => Scalar::binary(v.to_vec(), nn), + PqVariant::String(v) => Scalar::utf8(v, nn), + PqVariant::ShortString(v) => Scalar::utf8(v.as_str(), nn), + PqVariant::Date(v) => Scalar::utf8(v.to_string(), nn), + PqVariant::TimestampMicros(v) => Scalar::utf8(v.to_rfc3339(), nn), + PqVariant::TimestampNtzMicros(v) => Scalar::utf8(v.to_string(), nn), + PqVariant::TimestampNanos(v) => Scalar::utf8(v.to_rfc3339(), nn), + PqVariant::TimestampNtzNanos(v) => Scalar::utf8(v.to_string(), nn), + PqVariant::Time(v) => Scalar::utf8(v.to_string(), nn), + PqVariant::Uuid(v) => Scalar::utf8(v.to_string(), nn), + PqVariant::List(values) => { + let children = values + .iter() + .map(|v| parquet_variant_to_scalar(v).map(Scalar::variant)) + .collect::>>()?; + Scalar::list(DType::Variant(nn), children, nn) + } + PqVariant::Object(values) => { + let mut names = Vec::new(); + let mut dtypes = Vec::new(); + let mut field_values = Vec::new(); + for (name, value) in values.iter() { + names.push(vortex_array::dtype::FieldName::from(name)); + dtypes.push(DType::Variant(nn)); + field_values.push(Some(ScalarValue::Variant(Box::new( + parquet_variant_to_scalar(value)?, + )))); + } + let fields = StructFields::new(FieldNames::from(names), dtypes); + Scalar::try_new( + DType::Struct(fields, nn), + Some(ScalarValue::List(field_values)), + )? + } + }) +} + +impl OperationsVTable for ParquetVariant { + fn scalar_at(array: &ParquetVariantArray, index: usize) -> VortexResult { + if array.validity.is_null(index)? { + return Ok(Scalar::null(DType::Variant(Nullability::Nullable))); + } + + let inner = if let Some(typed_value) = array.typed_value_array() + && typed_value.is_valid(index)? + { + typed_value.scalar_at(index)? + } else if let Some(value) = array.value_array() + && value.is_valid(index)? + { + let metadata = array + .metadata_array() + .scalar_at(index)? + .as_binary() + .value() + .cloned() + .vortex_expect("non-null metadata row must have binary value"); + let value = value + .scalar_at(index)? + .as_binary() + .value() + .cloned() + .vortex_expect("non-null value row must have binary value"); + parquet_variant_to_scalar(PqVariant::try_new(metadata.as_ref(), value.as_ref())?)? + } else { + Scalar::null(DType::Null) + }; + + Ok(Scalar::variant(inner)) + } +} + +const PARENT_RULES: ParentRuleSet = + ParentRuleSet::new(&[ParentRuleSet::lift(&ParquetVariantGetRule)]); + +/// Rule to handle VariantGet on a ParquetVariantArray by returning the typed_value child. +#[derive(Debug)] +struct ParquetVariantGetRule; + +impl ArrayParentReduceRule for ParquetVariantGetRule { + type Parent = ExactScalarFn; + + fn reduce_parent( + &self, + array: &ParquetVariantArray, + parent: ScalarFnArrayView<'_, VariantGet>, + _child_idx: usize, + ) -> VortexResult> { + let options = parent.options; + if options.path().is_some_and(|p| !p.is_empty()) { + vortex_bail!("ParquetVariant VariantGet only supports empty path"); + } + let target_dtype = options.dtype().with_nullability(Nullability::Nullable); + match array.typed_value_array() { + Some(typed_value) + if typed_value.dtype().with_nullability(Nullability::Nullable) == target_dtype => + { + // The shredded typed_value matches the requested type. + // Cast to ensure nullability matches (VariantGet always returns nullable). + Ok(Some(typed_value.cast(target_dtype)?)) + } + _ => { + // No shredded data or type mismatch; cannot push down. + Ok(None) + } + } + } +} + +impl ValidityVTable for ParquetVariant { + fn validity(array: &ParquetVariantArray) -> VortexResult { + Ok(array.validity.clone()) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use arrow_array::Array; + use arrow_array::ArrayRef as ArrowArrayRef; + use arrow_array::Int32Array; + use arrow_array::StructArray; + use arrow_array::builder::BinaryViewBuilder; + use arrow_array::cast::AsArray; + use arrow_buffer::NullBuffer; + use arrow_schema::DataType; + use arrow_schema::Field; + use arrow_schema::Fields; + use parquet_variant::Variant as PqVariant; + use parquet_variant_compute::VariantArray as ArrowVariantArray; + use parquet_variant_compute::VariantArrayBuilder; + use vortex_array::ArrayContext; + use vortex_array::IntoArray; + use vortex_array::LEGACY_SESSION; + use vortex_array::Precision; + use vortex_array::VortexSessionExecute; + use vortex_array::arrays::VarBinViewArray; + use vortex_array::arrays::Variant; + use vortex_array::arrow::ArrowArrayExecutor; + use vortex_array::builtins::ArrayBuiltins; + use vortex_array::dtype::DType; + use vortex_array::dtype::Nullability; + use vortex_array::dtype::PType; + use vortex_array::serde::ArrayParts; + use vortex_array::serde::SerializeOptions; + use vortex_array::session::ArraySessionExt; + use vortex_buffer::ByteBufferMut; + use vortex_buffer::buffer; + use vortex_session::VortexSession; + use vortex_session::registry::ReadContext; + + use super::*; + + #[test] + fn test_from_arrow_variant_basic() -> VortexResult<()> { + let mut builder = VariantArrayBuilder::new(3); + builder.append_variant(PqVariant::from(42i32)); + builder.append_variant(PqVariant::from("hello")); + builder.append_variant(PqVariant::from(true)); + let arrow_variant = builder.build(); + + let vortex_arr = ParquetVariantArray::from_arrow_variant(&arrow_variant)?; + + assert_eq!(vortex_arr.len(), 3); + assert_eq!( + vortex_arr.dtype(), + &DType::Variant(Nullability::NonNullable) + ); + + Ok(()) + } + + #[test] + fn test_from_arrow_variant_with_shredded_typed_value() -> VortexResult<()> { + // Build the underlying StructArray with metadata + typed_value fields + let mut metadata_builder = BinaryViewBuilder::new(); + // Minimal variant metadata: version 1, no dictionary + let min_metadata = [1u8, 0]; + for _ in 0..3 { + metadata_builder.append_value(min_metadata); + } + let metadata = metadata_builder.finish(); + + let typed_value: ArrowArrayRef = Arc::new(Int32Array::from(vec![10, 20, 30])); + + let struct_fields: Fields = vec![ + Arc::new(Field::new("metadata", DataType::BinaryView, false)), + Arc::new(Field::new("typed_value", DataType::Int32, false)), + ] + .into(); + let struct_array = + StructArray::try_new(struct_fields, vec![Arc::new(metadata), typed_value], None) + .unwrap(); + + let arrow_variant = ArrowVariantArray::try_new(&struct_array).unwrap(); + + let vortex_arr = ParquetVariantArray::from_arrow_variant(&arrow_variant)?; + assert_eq!(vortex_arr.len(), 3); + assert_eq!( + vortex_arr.dtype(), + &DType::Variant(Nullability::NonNullable) + ); + + // Verify typed_value is present by downcasting through the layers + let variant_arr = vortex_arr.as_opt::().unwrap(); + let inner = variant_arr.child().as_opt::().unwrap(); + assert!(inner.typed_value_array().is_some()); + + Ok(()) + } + + #[test] + fn test_variant_get_pushdown_with_typed_value() -> VortexResult<()> { + // Create a ParquetVariantArray with shredded typed_value (i32 data) + let metadata = buffer![0u8, 1, 2].into_array(); + let typed_value = buffer![10i32, 20, 30].into_array(); + let pv_array = ParquetVariantArray::try_new(metadata, None, Some(typed_value))?; + + // Wrap it in a VariantArray + let variant_array = VariantArray::new(pv_array.into_array(), Nullability::NonNullable); + + // Apply variant_get + let target_dtype = DType::Primitive(PType::I32, Nullability::Nullable); + let result = variant_array.into_array().variant_get(None, target_dtype)?; + + // The result should be the typed_value data, cast to nullable i32 + assert_eq!( + result.dtype(), + &DType::Primitive(PType::I32, Nullability::Nullable) + ); + assert_eq!(result.len(), 3); + + Ok(()) + } + + #[test] + fn test_variant_get_no_typed_value() -> VortexResult<()> { + // Create a ParquetVariantArray without typed_value (only value) + let metadata = buffer![0u8, 1, 2].into_array(); + let value = buffer![0u8, 1, 2].into_array(); + let pv_array = ParquetVariantArray::try_new(metadata, Some(value), None)?; + + // Wrap it in a VariantArray + let variant_array = VariantArray::new(pv_array.into_array(), Nullability::NonNullable); + + // Apply variant_get - the rule returns None since there's no typed_value, + // so the optimizer creates a lazy ScalarFnArray that will error on execute. + let target_dtype = DType::Primitive(PType::I32, Nullability::Nullable); + let result = variant_array.into_array().variant_get(None, target_dtype)?; + // The result is a lazy expression wrapping the variant array + assert_eq!( + result.dtype(), + &DType::Primitive(PType::I32, Nullability::Nullable) + ); + Ok(()) + } + + fn roundtrip(array: ArrayRef) -> ArrayRef { + let dtype = array.dtype().clone(); + let len = array.len(); + + let ctx = ArrayContext::empty(); + let serialized = array.serialize(&ctx, &SerializeOptions::default()).unwrap(); + + let mut concat = ByteBufferMut::empty(); + for buf in serialized { + concat.extend_from_slice(buf.as_ref()); + } + let concat = concat.freeze(); + + let session = VortexSession::empty().with::(); + session + .arrays() + .register(ParquetVariant::ID, ParquetVariant); + session.arrays().register(Variant::ID, Variant); + + let parts = ArrayParts::try_from(concat).unwrap(); + parts + .decode(&dtype, len, &ReadContext::new(ctx.to_ids()), &session) + .unwrap() + } + + fn assert_arrow_variant_storage_roundtrip(struct_array: StructArray) -> VortexResult<()> { + let arrow_variant = ArrowVariantArray::try_new(&struct_array).unwrap(); + let vortex_arr = ParquetVariantArray::from_arrow_variant(&arrow_variant)?; + + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + let roundtripped = vortex_arr.execute_arrow(None, &mut ctx)?; + let roundtripped = roundtripped.as_struct(); + + assert_eq!(struct_array.len(), roundtripped.len()); + assert_eq!(struct_array.column_names(), roundtripped.column_names()); + assert_eq!(struct_array.nulls(), roundtripped.nulls()); + assert_eq!(struct_array.fields().len(), roundtripped.fields().len()); + + for (expected, actual) in struct_array + .fields() + .iter() + .zip(roundtripped.fields().iter()) + { + assert_eq!(expected.name(), actual.name()); + assert_eq!(expected.data_type(), actual.data_type()); + assert_eq!(expected.is_nullable(), actual.is_nullable()); + } + + for (expected, actual) in struct_array + .columns() + .iter() + .zip(roundtripped.columns().iter()) + { + assert_eq!(expected.to_data(), actual.to_data()); + } + + Ok(()) + } + + fn binary_view_array(values: [&[u8]; N]) -> ArrowArrayRef { + let mut builder = BinaryViewBuilder::new(); + for value in values { + builder.append_value(value); + } + Arc::new(builder.finish()) + } + + #[test] + fn test_serde_roundtrip_typed_value_variant() { + let outer_metadata = + VarBinViewArray::from_iter_bin([b"\x01\x00", b"\x01\x00", b"\x01\x00"]).into_array(); + + let inner_metadata = + VarBinViewArray::from_iter_bin([b"\x01\x00", b"\x01\x00", b"\x01\x00"]).into_array(); + let inner_value = VarBinViewArray::from_iter_bin([b"\x02", b"\x03", b"\x04"]).into_array(); + let inner_pv = + ParquetVariantArray::try_new(inner_metadata, Some(inner_value), None).unwrap(); + let typed_value = + VariantArray::new(inner_pv.into_array(), Nullability::NonNullable).into_array(); + + let outer_pv = + ParquetVariantArray::try_new(outer_metadata, None, Some(typed_value)).unwrap(); + let array = outer_pv.into_array(); + let decoded = roundtrip(array.clone()); + + assert!(array.array_eq(&decoded, Precision::Value)); + let decoded_pv = decoded.as_opt::().unwrap(); + let typed = decoded_pv.typed_value_array().unwrap(); + assert_eq!(typed.dtype(), &DType::Variant(Nullability::NonNullable)); + } + + #[test] + fn test_serde_roundtrip_typed_value_int32() { + let outer_metadata = + VarBinViewArray::from_iter_bin([b"\x01\x00", b"\x01\x00", b"\x01\x00"]).into_array(); + let typed_value = buffer![10i32, 20, 30].into_array(); + + let outer_pv = + ParquetVariantArray::try_new(outer_metadata, None, Some(typed_value)).unwrap(); + let array = outer_pv.into_array(); + let decoded = roundtrip(array.clone()); + + assert!(array.array_eq(&decoded, Precision::Value)); + let decoded_pv = decoded.as_opt::().unwrap(); + let typed = decoded_pv.typed_value_array().unwrap(); + assert_eq!( + typed.dtype(), + &DType::Primitive(PType::I32, Nullability::NonNullable) + ); + } + + #[test] + fn test_arrow_variant_storage_basic() -> VortexResult<()> { + let metadata = VarBinViewArray::from_iter_bin([b"\x01\x00", b"\x01\x00"]).into_array(); + let value = VarBinViewArray::from_iter_bin([b"\x10", b"\x11"]).into_array(); + let pv_array = ParquetVariantArray::try_new(metadata, Some(value), None)?; + + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + let arrow = pv_array.into_array().execute_arrow(None, &mut ctx)?; + let struct_arr = arrow.as_struct(); + + assert_eq!(struct_arr.num_columns(), 2); + assert_eq!(struct_arr.column_names(), &["metadata", "value"]); + + Ok(()) + } + + #[test] + fn test_arrow_variant_storage_with_typed_value() -> VortexResult<()> { + let metadata = VarBinViewArray::from_iter_bin([b"\x01\x00", b"\x01\x00"]).into_array(); + let value = VarBinViewArray::from_iter_bin([b"\x10", b"\x11"]).into_array(); + let typed_value = buffer![1i32, 2].into_array(); + let pv_array = ParquetVariantArray::try_new(metadata, Some(value), Some(typed_value))?; + + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + let arrow = pv_array.into_array().execute_arrow(None, &mut ctx)?; + let struct_arr = arrow.as_struct(); + + assert_eq!(struct_arr.num_columns(), 3); + assert_eq!( + struct_arr.column_names(), + &["metadata", "value", "typed_value"] + ); + + Ok(()) + } + + #[test] + fn test_arrow_variant_roundtrip_unshredded_storage() -> VortexResult<()> { + let mut builder = VariantArrayBuilder::new(3); + builder.append_variant(PqVariant::from(42i32)); + builder.append_variant(PqVariant::from("hello")); + builder.append_variant(PqVariant::from(true)); + + assert_arrow_variant_storage_roundtrip(builder.build().into_inner()) + } + + #[test] + fn test_arrow_variant_roundtrip_typed_value_only_storage() -> VortexResult<()> { + let metadata = binary_view_array([b"\x01\x00", b"\x01\x00", b"\x01\x00"]); + let typed_value: ArrowArrayRef = Arc::new(Int32Array::from(vec![10, 20, 30])); + + let struct_array = StructArray::try_new( + vec![ + Arc::new(Field::new("metadata", DataType::BinaryView, false)), + Arc::new(Field::new("typed_value", DataType::Int32, false)), + ] + .into(), + vec![metadata, typed_value], + None, + ) + .unwrap(); + + assert_arrow_variant_storage_roundtrip(struct_array) + } + + #[test] + fn test_arrow_variant_roundtrip_value_and_typed_value_storage() -> VortexResult<()> { + let metadata = binary_view_array([b"\x01\x00", b"\x01\x00"]); + let value = binary_view_array([b"\x10", b"\x11"]); + let typed_value: ArrowArrayRef = Arc::new(Int32Array::from(vec![1, 2])); + + let struct_array = StructArray::try_new( + vec![ + Arc::new(Field::new("metadata", DataType::BinaryView, false)), + Arc::new(Field::new("value", DataType::BinaryView, true)), + Arc::new(Field::new("typed_value", DataType::Int32, false)), + ] + .into(), + vec![metadata, value, typed_value], + None, + ) + .unwrap(); + + assert_arrow_variant_storage_roundtrip(struct_array) + } + + #[test] + fn test_arrow_variant_roundtrip_with_outer_nulls() -> VortexResult<()> { + let metadata = binary_view_array([b"\x01\x00", b"\x01\x00", b"\x01\x00"]); + let value = binary_view_array([b"\x10", b"\x00", b"\x11"]); + let struct_array = StructArray::try_new( + vec![ + Arc::new(Field::new("metadata", DataType::BinaryView, false)), + Arc::new(Field::new("value", DataType::BinaryView, true)), + ] + .into(), + vec![metadata, value], + Some(NullBuffer::from(vec![true, false, true])), + ) + .unwrap(); + + assert_arrow_variant_storage_roundtrip(struct_array) + } +} diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 15cc3e5936e..15c85eeb6ec 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -9656,6 +9656,8 @@ pub fn vortex_array::builtins::ArrayBuiltins::mask(self, mask: vortex_array::Arr pub fn vortex_array::builtins::ArrayBuiltins::not(&self) -> vortex_error::VortexResult +pub fn vortex_array::builtins::ArrayBuiltins::variant_get(&self, path: core::option::Option, dtype: vortex_array::dtype::DType) -> vortex_error::VortexResult + pub fn vortex_array::builtins::ArrayBuiltins::zip(&self, if_true: vortex_array::ArrayRef, if_false: vortex_array::ArrayRef) -> vortex_error::VortexResult impl vortex_array::builtins::ArrayBuiltins for vortex_array::ArrayRef @@ -9678,6 +9680,8 @@ pub fn vortex_array::ArrayRef::mask(self, mask: vortex_array::ArrayRef) -> vorte pub fn vortex_array::ArrayRef::not(&self) -> vortex_error::VortexResult +pub fn vortex_array::ArrayRef::variant_get(&self, path: core::option::Option, dtype: vortex_array::dtype::DType) -> vortex_error::VortexResult + pub fn vortex_array::ArrayRef::zip(&self, if_true: vortex_array::ArrayRef, if_false: vortex_array::ArrayRef) -> vortex_error::VortexResult pub trait vortex_array::builtins::ExprBuiltins: core::marker::Sized @@ -9698,6 +9702,8 @@ pub fn vortex_array::builtins::ExprBuiltins::mask(&self, mask: vortex_array::exp pub fn vortex_array::builtins::ExprBuiltins::not(&self) -> vortex_error::VortexResult +pub fn vortex_array::builtins::ExprBuiltins::variant_get(&self, path: core::option::Option, dtype: vortex_array::dtype::DType) -> vortex_error::VortexResult + pub fn vortex_array::builtins::ExprBuiltins::zip(&self, if_true: vortex_array::expr::Expression, if_false: vortex_array::expr::Expression) -> vortex_error::VortexResult impl vortex_array::builtins::ExprBuiltins for vortex_array::expr::Expression @@ -9718,6 +9724,8 @@ pub fn vortex_array::expr::Expression::mask(&self, mask: vortex_array::expr::Exp pub fn vortex_array::expr::Expression::not(&self) -> vortex_error::VortexResult +pub fn vortex_array::expr::Expression::variant_get(&self, path: core::option::Option, dtype: vortex_array::dtype::DType) -> vortex_error::VortexResult + pub fn vortex_array::expr::Expression::zip(&self, if_true: vortex_array::expr::Expression, if_false: vortex_array::expr::Expression) -> vortex_error::VortexResult pub mod vortex_array::compute @@ -13792,6 +13800,8 @@ pub fn vortex_array::expr::Expression::mask(&self, mask: vortex_array::expr::Exp pub fn vortex_array::expr::Expression::not(&self) -> vortex_error::VortexResult +pub fn vortex_array::expr::Expression::variant_get(&self, path: core::option::Option, dtype: vortex_array::dtype::DType) -> vortex_error::VortexResult + pub fn vortex_array::expr::Expression::zip(&self, if_true: vortex_array::expr::Expression, if_false: vortex_array::expr::Expression) -> vortex_error::VortexResult impl vortex_array::expr::VortexExprExt for vortex_array::expr::Expression @@ -13930,6 +13940,8 @@ pub fn vortex_array::expr::select_exclude(fields: impl core::convert::Into alloc::vec::Vec +pub fn vortex_array::expr::variant_get(path: core::option::Option, dtype: vortex_array::dtype::DType, child: vortex_array::expr::Expression) -> vortex_array::expr::Expression + pub fn vortex_array::expr::zip_expr(mask: vortex_array::expr::Expression, if_true: vortex_array::expr::Expression, if_false: vortex_array::expr::Expression) -> vortex_array::expr::Expression pub type vortex_array::expr::Annotations<'a, A> = vortex_utils::aliases::hash_map::HashMap<&'a vortex_array::expr::Expression, vortex_utils::aliases::hash_set::HashSet> @@ -18556,6 +18568,116 @@ pub fn vortex_array::scalar_fn::fns::select::Select::stat_falsification(&self, o pub fn vortex_array::scalar_fn::fns::select::Select::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult> +pub mod vortex_array::scalar_fn::fns::variant_get + +pub struct vortex_array::scalar_fn::fns::variant_get::VariantGet + +impl core::clone::Clone for vortex_array::scalar_fn::fns::variant_get::VariantGet + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::clone(&self) -> vortex_array::scalar_fn::fns::variant_get::VariantGet + +impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::variant_get::VariantGet + +pub type vortex_array::scalar_fn::fns::variant_get::VariantGet::Options = vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::arity(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> vortex_array::scalar_fn::Arity + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::child_name(&self, _options: &Self::Options, child_idx: usize) -> vortex_array::scalar_fn::ChildName + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::coerce_args(&self, options: &Self::Options, args: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::id(&self) -> vortex_array::scalar_fn::ScalarFnId + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_fallible(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_null_sensitive(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::reduce(&self, options: &Self::Options, node: &dyn vortex_array::scalar_fn::ReduceNode, ctx: &dyn vortex_array::scalar_fn::ReduceCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::return_dtype(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::serialize(&self, instance: &Self::Options) -> vortex_error::VortexResult>> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::simplify(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, ctx: &dyn vortex_array::scalar_fn::SimplifyCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::simplify_untyped(&self, options: &Self::Options, expr: &vortex_array::expr::Expression) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::stat_expression(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, stat: vortex_array::expr::stats::Stat, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::stat_falsification(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult> + +pub struct vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +impl vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::dtype(&self) -> &vortex_array::dtype::DType + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::new(path: core::option::Option, dtype: vortex_array::dtype::DType) -> Self + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::path(&self) -> core::option::Option<&vortex_array::scalar_fn::fns::variant_get::VariantPath> + +impl core::clone::Clone for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::clone(&self) -> vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +impl core::cmp::Eq for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +impl core::cmp::PartialEq for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::eq(&self, other: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool + +impl core::fmt::Debug for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::fmt::Display for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub struct vortex_array::scalar_fn::fns::variant_get::VariantPath + +impl vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::is_empty(&self) -> bool + +impl core::clone::Clone for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::clone(&self) -> vortex_array::scalar_fn::fns::variant_get::VariantPath + +impl core::cmp::Eq for vortex_array::scalar_fn::fns::variant_get::VariantPath + +impl core::cmp::PartialEq for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::eq(&self, other: &vortex_array::scalar_fn::fns::variant_get::VariantPath) -> bool + +impl core::default::Default for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::default() -> vortex_array::scalar_fn::fns::variant_get::VariantPath + +impl core::fmt::Debug for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_array::scalar_fn::fns::variant_get::VariantPath + pub mod vortex_array::scalar_fn::fns::zip pub struct vortex_array::scalar_fn::fns::zip::Zip @@ -19628,6 +19750,44 @@ pub fn vortex_array::scalar_fn::fns::select::Select::stat_falsification(&self, o pub fn vortex_array::scalar_fn::fns::select::Select::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult> +impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::variant_get::VariantGet + +pub type vortex_array::scalar_fn::fns::variant_get::VariantGet::Options = vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::arity(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> vortex_array::scalar_fn::Arity + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::child_name(&self, _options: &Self::Options, child_idx: usize) -> vortex_array::scalar_fn::ChildName + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::coerce_args(&self, options: &Self::Options, args: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::id(&self) -> vortex_array::scalar_fn::ScalarFnId + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_fallible(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_null_sensitive(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::reduce(&self, options: &Self::Options, node: &dyn vortex_array::scalar_fn::ReduceNode, ctx: &dyn vortex_array::scalar_fn::ReduceCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::return_dtype(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::serialize(&self, instance: &Self::Options) -> vortex_error::VortexResult>> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::simplify(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, ctx: &dyn vortex_array::scalar_fn::SimplifyCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::simplify_untyped(&self, options: &Self::Options, expr: &vortex_array::expr::Expression) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::stat_expression(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, stat: vortex_array::expr::stats::Stat, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::stat_falsification(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult> + impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::zip::Zip pub type vortex_array::scalar_fn::fns::zip::Zip::Options = vortex_array::scalar_fn::EmptyOptions diff --git a/vortex-array/src/arrays/variant/vtable/rules.rs b/vortex-array/src/arrays/variant/vtable/rules.rs new file mode 100644 index 00000000000..af3c51edff1 --- /dev/null +++ b/vortex-array/src/arrays/variant/vtable/rules.rs @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::arrays::Variant; +use crate::arrays::VariantArray; +use crate::arrays::scalar_fn::ExactScalarFn; +use crate::arrays::scalar_fn::ScalarFnArrayView; +use crate::builtins::ArrayBuiltins; +use crate::optimizer::rules::ArrayParentReduceRule; +use crate::optimizer::rules::ParentRuleSet; +use crate::scalar_fn::fns::variant_get::VariantGet; + +pub(crate) const PARENT_RULES: ParentRuleSet = + ParentRuleSet::new(&[ParentRuleSet::lift(&VariantGetPushDownRule)]); + +/// Rule to push VariantGet through VariantArray to its child encoding. +#[derive(Debug)] +struct VariantGetPushDownRule; + +impl ArrayParentReduceRule for VariantGetPushDownRule { + type Parent = ExactScalarFn; + + fn reduce_parent( + &self, + array: &VariantArray, + parent: ScalarFnArrayView<'_, VariantGet>, + _child_idx: usize, + ) -> VortexResult> { + let options = parent.options; + Ok(Some(array.child().variant_get( + options.path().cloned(), + options.dtype().clone(), + )?)) + } +} diff --git a/vortex-array/src/arrow/executor/mod.rs b/vortex-array/src/arrow/executor/mod.rs index edaa244d967..1582081dbcb 100644 --- a/vortex-array/src/arrow/executor/mod.rs +++ b/vortex-array/src/arrow/executor/mod.rs @@ -15,6 +15,7 @@ mod run_end; mod struct_; mod temporal; mod validity; +mod variant; use arrow_array::ArrayRef as ArrowArrayRef; use arrow_array::RecordBatch; @@ -46,6 +47,7 @@ use crate::arrow::executor::primitive::to_arrow_primitive; use crate::arrow::executor::run_end::to_arrow_run_end; use crate::arrow::executor::struct_::to_arrow_struct; use crate::arrow::executor::temporal::to_arrow_temporal; +use crate::arrow::executor::variant::to_arrow_variant; use crate::dtype::DType; use crate::dtype::PType; use crate::executor::ExecutionCtx; @@ -88,6 +90,22 @@ impl ArrowArrayExecutor for ArrayRef { ) -> VortexResult { let len = self.len(); + if self.dtype().is_variant() { + let target_fields = match data_type { + Some(DataType::Struct(fields)) => Some(fields), + Some(_) => { + vortex_bail!("Variant can only be converted to Arrow Struct storage type"); + } + None => None, + }; + let arrow = to_arrow_variant(self, target_fields, ctx)?; + vortex_ensure!( + arrow.len() == len, + "Arrow array length does not match Vortex array length after conversion to Variant" + ); + return Ok(arrow); + } + // Resolve the DataType if it is a leaf type // we should likely make this extensible. let resolved_type: DataType = match data_type { diff --git a/vortex-array/src/arrow/executor/variant.rs b/vortex-array/src/arrow/executor/variant.rs new file mode 100644 index 00000000000..a02f7cfe593 --- /dev/null +++ b/vortex-array/src/arrow/executor/variant.rs @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::sync::Arc; + +use arrow_array::ArrayRef as ArrowArrayRef; +use arrow_array::StructArray as ArrowStructArray; +use arrow_schema::Field; +use arrow_schema::Fields; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; + +use crate::ArrayRef; +use crate::ExecutionCtx; +use crate::array::ArrayVisitor; +use crate::arrays::Variant; +use crate::arrow::ArrowArrayExecutor; +use crate::arrow::executor::validity::to_arrow_null_buffer; + +pub(super) fn to_arrow_variant( + array: ArrayRef, + target_fields: Option<&Fields>, + ctx: &mut ExecutionCtx, +) -> VortexResult { + let len = array.len(); + let nulls = to_arrow_null_buffer(array.validity()?, len, ctx)?; + let inner = match array.try_into::() { + Ok(variant) => variant.child().clone(), + Err(array) => array, + }; + + let named_children = inner.named_children(); + if named_children.is_empty() { + vortex_bail!("Variant array has no children"); + } + + let mut metadata: Option = None; + let mut value: Option = None; + let mut typed_value: Option = None; + + for (name, child) in named_children { + match name.as_str() { + "validity" => {} + "metadata" => metadata = Some(child), + "value" => value = Some(child), + "typed_value" => typed_value = Some(child), + _ => { + vortex_bail!("Unsupported variant child {name}"); + } + } + } + + let metadata = match metadata { + Some(metadata) => metadata, + None => vortex_bail!("Variant array missing metadata child"), + }; + + let mut ordered: Vec<(String, ArrayRef)> = vec![("metadata".to_string(), metadata.clone())]; + if let Some(value) = value.clone() { + ordered.push(("value".to_string(), value)); + } + if let Some(typed_value) = typed_value.clone() { + ordered.push(("typed_value".to_string(), typed_value)); + } + + let (fields, arrays) = if let Some(fields) = target_fields { + let mut arrays = Vec::with_capacity(fields.len()); + for field in fields.iter() { + let child = match field.name().as_str() { + "metadata" => Some(&metadata), + "value" => value.as_ref(), + "typed_value" => typed_value.as_ref(), + other => { + vortex_bail!("Unsupported variant field {other}"); + } + }; + + let Some(child) = child else { + vortex_bail!("Variant array missing child for field {}", field.name()); + }; + + arrays.push(child.clone().execute_arrow(Some(field.data_type()), ctx)?); + } + + // Ensure we didn't silently drop any children + vortex_ensure!( + fields.len() == ordered.len(), + "Variant array has {} children but target Arrow type has {} fields", + ordered.len(), + fields.len() + ); + + (fields.clone(), arrays) + } else { + let mut fields = Vec::with_capacity(ordered.len()); + let mut arrays = Vec::with_capacity(ordered.len()); + + for (name, child) in ordered { + let arrow = child.clone().execute_arrow(None, ctx)?; + fields.push(Field::new( + name, + arrow.data_type().clone(), + child.dtype().is_nullable(), + )); + arrays.push(arrow); + } + + (Fields::from(fields), arrays) + }; + + Ok(Arc::new(ArrowStructArray::try_new(fields, arrays, nulls)?)) +} diff --git a/vortex-array/src/builtins.rs b/vortex-array/src/builtins.rs index ba3b8542eb7..9a9315cc805 100644 --- a/vortex-array/src/builtins.rs +++ b/vortex-array/src/builtins.rs @@ -33,6 +33,9 @@ use crate::scalar_fn::fns::list_contains::ListContains; use crate::scalar_fn::fns::mask::Mask; use crate::scalar_fn::fns::not::Not; use crate::scalar_fn::fns::operators::Operator; +use crate::scalar_fn::fns::variant_get::VariantGet; +use crate::scalar_fn::fns::variant_get::VariantGetOptions; +use crate::scalar_fn::fns::variant_get::VariantPath; use crate::scalar_fn::fns::zip::Zip; /// A collection of built-in scalar functions that can be applied to expressions or arrays. @@ -63,6 +66,9 @@ pub trait ExprBuiltins: Sized { /// Conditional selection: `result[i] = if mask[i] then if_true[i] else if_false[i]`. fn zip(&self, if_true: Expression, if_false: Expression) -> VortexResult; + /// Extract data by path and dtype from a variant expression. + fn variant_get(&self, path: Option, dtype: DType) -> VortexResult; + /// Apply a binary operator to this expression and another. fn binary(&self, rhs: Expression, op: Operator) -> VortexResult; } @@ -100,6 +106,10 @@ impl ExprBuiltins for Expression { Zip.try_new_expr(EmptyOptions, [if_true, if_false, self.clone()]) } + fn variant_get(&self, path: Option, dtype: DType) -> VortexResult { + VariantGet.try_new_expr(VariantGetOptions::new(path, dtype), [self.clone()]) + } + fn binary(&self, rhs: Expression, op: Operator) -> VortexResult { Binary.try_new_expr(op, [self.clone(), rhs]) } @@ -132,6 +142,9 @@ pub trait ArrayBuiltins: Sized { /// Check if a list contains a value. fn list_contains(&self, value: ArrayRef) -> VortexResult; + /// Extract data by path and dtype from a variant array. + fn variant_get(&self, path: Option, dtype: DType) -> VortexResult; + /// Apply a binary operator to this array and another. fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult; @@ -202,6 +215,16 @@ impl ArrayBuiltins for ArrayRef { .optimize() } + fn variant_get(&self, path: Option, dtype: DType) -> VortexResult { + VariantGet + .try_new_array( + self.len(), + VariantGetOptions::new(path, dtype), + [self.clone()], + )? + .optimize() + } + fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult { Binary .try_new_array(self.len(), op, [self.clone(), rhs])? diff --git a/vortex-array/src/expr/exprs.rs b/vortex-array/src/expr/exprs.rs index bc30ba86ec4..7a5c91f2100 100644 --- a/vortex-array/src/expr/exprs.rs +++ b/vortex-array/src/expr/exprs.rs @@ -45,6 +45,9 @@ use crate::scalar_fn::fns::pack::PackOptions; use crate::scalar_fn::fns::root::Root; use crate::scalar_fn::fns::select::FieldSelection; use crate::scalar_fn::fns::select::Select; +use crate::scalar_fn::fns::variant_get::VariantGet; +use crate::scalar_fn::fns::variant_get::VariantGetOptions; +use crate::scalar_fn::fns::variant_get::VariantPath; use crate::scalar_fn::fns::zip::Zip; // ---- Root ---- @@ -663,6 +666,13 @@ pub fn dynamic( ) } +// ---- VariantGet ---- + +/// Creates an expression that extracts data by path and dtype from a variant expression. +pub fn variant_get(path: Option, dtype: DType, child: Expression) -> Expression { + VariantGet.new_expr(VariantGetOptions::new(path, dtype), vec![child]) +} + // ---- ListContains ---- /// Creates an expression that checks if a value is contained in a list. diff --git a/vortex-array/src/scalar/mod.rs b/vortex-array/src/scalar/mod.rs index 8496037ea20..5fa494230b2 100644 --- a/vortex-array/src/scalar/mod.rs +++ b/vortex-array/src/scalar/mod.rs @@ -24,7 +24,6 @@ mod scalar_value; mod truncation; mod typed_view; mod validate; - pub use scalar_value::*; pub use truncation::*; pub use typed_view::*; diff --git a/vortex-array/src/scalar/proto.rs b/vortex-array/src/scalar/proto.rs index 2c45a450bfa..34812f0a3c9 100644 --- a/vortex-array/src/scalar/proto.rs +++ b/vortex-array/src/scalar/proto.rs @@ -475,7 +475,6 @@ mod tests { use crate::scalar::DecimalValue; use crate::scalar::Scalar; use crate::scalar::ScalarValue; - fn session() -> VortexSession { VortexSession::empty() } diff --git a/vortex-array/src/scalar_fn/fns/mod.rs b/vortex-array/src/scalar_fn/fns/mod.rs index 94fc8fb0384..b0c7e7547e5 100644 --- a/vortex-array/src/scalar_fn/fns/mod.rs +++ b/vortex-array/src/scalar_fn/fns/mod.rs @@ -19,4 +19,5 @@ pub mod operators; pub mod pack; pub mod root; pub mod select; +pub mod variant_get; pub mod zip; diff --git a/vortex-array/src/scalar_fn/fns/variant_get.rs b/vortex-array/src/scalar_fn/fns/variant_get.rs new file mode 100644 index 00000000000..e89ceeddb35 --- /dev/null +++ b/vortex-array/src/scalar_fn/fns/variant_get.rs @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt; +use std::fmt::Formatter; + +use prost::Message; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_err; +use vortex_proto::expr as pb; +use vortex_session::VortexSession; + +use crate::ArrayRef; +use crate::ExecutionCtx; +use crate::dtype::DType; +use crate::dtype::Nullability; +use crate::scalar_fn::Arity; +use crate::scalar_fn::ChildName; +use crate::scalar_fn::ExecutionArgs; +use crate::scalar_fn::ScalarFnId; +use crate::scalar_fn::ScalarFnVTable; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] +pub struct VariantPath {} + +impl VariantPath { + pub fn is_empty(&self) -> bool { + true + } +} + +/// Options for the `VariantGet` scalar function. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct VariantGetOptions { + /// The variant field path to extract. + path: Option, + /// The expected return type. + dtype: DType, +} + +impl VariantGetOptions { + pub fn new(path: Option, dtype: DType) -> Self { + Self { path, dtype } + } + + pub fn path(&self) -> Option<&VariantPath> { + self.path.as_ref() + } + + pub fn dtype(&self) -> &DType { + &self.dtype + } +} + +impl fmt::Display for VariantGetOptions { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "variant_get({})", self.dtype) + } +} + +/// Scalar function that extracts data by path and dtype from variant arrays. +#[derive(Clone)] +pub struct VariantGet; + +impl ScalarFnVTable for VariantGet { + type Options = VariantGetOptions; + + fn id(&self) -> ScalarFnId { + ScalarFnId::from("vortex.variant_get") + } + + fn serialize(&self, instance: &Self::Options) -> VortexResult>> { + Ok(Some( + pb::VariantGetOpts { + dtype: Some(instance.dtype().try_into()?), + } + .encode_to_vec(), + )) + } + + fn deserialize(&self, metadata: &[u8], session: &VortexSession) -> VortexResult { + let opts = pb::VariantGetOpts::decode(metadata)?; + let dtype = DType::from_proto( + opts.dtype + .as_ref() + .ok_or_else(|| vortex_err!("VariantGetOpts missing dtype"))?, + session, + )?; + Ok(VariantGetOptions::new(None, dtype)) + } + + fn arity(&self, _options: &VariantGetOptions) -> Arity { + Arity::Exact(1) + } + + fn child_name(&self, _options: &Self::Options, child_idx: usize) -> ChildName { + match child_idx { + 0 => ChildName::from("input"), + _ => unreachable!( + "Invalid child index {} for VariantGet expression", + child_idx + ), + } + } + + fn fmt_sql( + &self, + options: &VariantGetOptions, + expr: &crate::expr::Expression, + f: &mut Formatter<'_>, + ) -> fmt::Result { + expr.children()[0].fmt_sql(f)?; + let _ = options; + Ok(()) + } + + fn return_dtype( + &self, + options: &VariantGetOptions, + _arg_dtypes: &[DType], + ) -> VortexResult { + // Always return nullable since Variant data is always nullable + Ok(options.dtype().with_nullability(Nullability::Nullable)) + } + + fn execute( + &self, + _options: &VariantGetOptions, + _args: &dyn ExecutionArgs, + _ctx: &mut ExecutionCtx, + ) -> VortexResult { + vortex_bail!( + "VariantGet should be pushed down via parent reduction rules, not executed directly" + ) + } + + fn is_null_sensitive(&self, _options: &VariantGetOptions) -> bool { + true + } + + fn is_fallible(&self, _options: &VariantGetOptions) -> bool { + false + } +} diff --git a/vortex-array/src/scalar_fn/session.rs b/vortex-array/src/scalar_fn/session.rs index eef759bf8e3..6553cbd29e3 100644 --- a/vortex-array/src/scalar_fn/session.rs +++ b/vortex-array/src/scalar_fn/session.rs @@ -23,6 +23,7 @@ use crate::scalar_fn::fns::not::Not; use crate::scalar_fn::fns::pack::Pack; use crate::scalar_fn::fns::root::Root; use crate::scalar_fn::fns::select::Select; +use crate::scalar_fn::fns::variant_get::VariantGet; /// Registry of scalar function vtables. /// Registry of scalar function vtables. @@ -67,6 +68,7 @@ impl Default for ScalarFnSession { this.register(Pack); this.register(Root); this.register(Select); + this.register(VariantGet); this } diff --git a/vortex-datafusion/src/convert/scalars.rs b/vortex-datafusion/src/convert/scalars.rs index a9b9649fd59..a94b5e1ba43 100644 --- a/vortex-datafusion/src/convert/scalars.rs +++ b/vortex-datafusion/src/convert/scalars.rs @@ -101,7 +101,7 @@ impl TryToDataFusion for Scalar { } } } - // SAFETY: By construction Utf8 scalar values are utf8 + // SAFETY: By construction Utf8 scalar values are utf8. DType::Utf8(_) => ScalarValue::Utf8(self.as_utf8().value().cloned().map(|s| unsafe { String::from_utf8_unchecked(Vec::::from(s.into_inner().into_inner())) })), diff --git a/vortex-proto/proto/expr.proto b/vortex-proto/proto/expr.proto index 73ba7209a15..ede36496598 100644 --- a/vortex-proto/proto/expr.proto +++ b/vortex-proto/proto/expr.proto @@ -95,3 +95,8 @@ message SelectOpts { message CaseWhenOpts { uint32 num_children = 1; } + +// Options for `vortex.variant_get` +message VariantGetOpts { + vortex.dtype.DType dtype = 1; +} diff --git a/vortex-proto/proto/scalar.proto b/vortex-proto/proto/scalar.proto index 251863dc3a3..2f253dae4de 100644 --- a/vortex-proto/proto/scalar.proto +++ b/vortex-proto/proto/scalar.proto @@ -37,3 +37,56 @@ message ScalarValue { message ListValue { repeated ScalarValue values = 1; } + +message VariantValue { + oneof kind { + google.protobuf.NullValue null_value = 1; + bool bool_value = 2; + VariantPrimitive primitive_value = 3; + VariantDecimal decimal_value = 4; + string string_value = 5; + bytes bytes_value = 6; + VariantListValue list_value = 7; + VariantObjectValue object_value = 8; + } +} + +message VariantPrimitive { + oneof kind { + sint64 int8_value = 1; + sint64 int16_value = 2; + sint64 int32_value = 3; + sint64 int64_value = 4; + uint64 uint8_value = 5; + uint64 uint16_value = 6; + uint64 uint32_value = 7; + uint64 uint64_value = 8; + uint32 f16_value = 9; + float f32_value = 10; + double f64_value = 11; + } +} + +message VariantDecimal { + oneof kind { + sint64 int8_value = 1; + sint64 int16_value = 2; + sint64 int32_value = 3; + sint64 int64_value = 4; + bytes int128_value = 5; + bytes int256_value = 6; + } +} + +message VariantListValue { + repeated VariantValue values = 1; +} + +message VariantObjectField { + string name = 1; + VariantValue value = 2; +} + +message VariantObjectValue { + repeated VariantObjectField fields = 1; +} diff --git a/vortex-proto/public-api.lock b/vortex-proto/public-api.lock index 045b53d17eb..2e45784e72c 100644 --- a/vortex-proto/public-api.lock +++ b/vortex-proto/public-api.lock @@ -1192,6 +1192,34 @@ pub fn vortex_proto::expr::SelectOpts::clear(&mut self) pub fn vortex_proto::expr::SelectOpts::encoded_len(&self) -> usize +pub struct vortex_proto::expr::VariantGetOpts + +pub vortex_proto::expr::VariantGetOpts::dtype: core::option::Option + +impl core::clone::Clone for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::clone(&self) -> vortex_proto::expr::VariantGetOpts + +impl core::cmp::PartialEq for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::eq(&self, other: &vortex_proto::expr::VariantGetOpts) -> bool + +impl core::default::Default for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::default() -> Self + +impl core::fmt::Debug for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::expr::VariantGetOpts + +impl prost::message::Message for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::clear(&mut self) + +pub fn vortex_proto::expr::VariantGetOpts::encoded_len(&self) -> usize + pub mod vortex_proto::scalar pub mod vortex_proto::scalar::scalar_value @@ -1242,6 +1270,142 @@ pub fn vortex_proto::scalar::scalar_value::Kind::fmt(&self, f: &mut core::fmt::F impl core::marker::StructuralPartialEq for vortex_proto::scalar::scalar_value::Kind +pub mod vortex_proto::scalar::variant_decimal + +pub enum vortex_proto::scalar::variant_decimal::Kind + +pub vortex_proto::scalar::variant_decimal::Kind::Int128Value(alloc::vec::Vec) + +pub vortex_proto::scalar::variant_decimal::Kind::Int16Value(i64) + +pub vortex_proto::scalar::variant_decimal::Kind::Int256Value(alloc::vec::Vec) + +pub vortex_proto::scalar::variant_decimal::Kind::Int32Value(i64) + +pub vortex_proto::scalar::variant_decimal::Kind::Int64Value(i64) + +pub vortex_proto::scalar::variant_decimal::Kind::Int8Value(i64) + +impl vortex_proto::scalar::variant_decimal::Kind + +pub fn vortex_proto::scalar::variant_decimal::Kind::encode(&self, buf: &mut impl bytes::buf::buf_mut::BufMut) + +pub fn vortex_proto::scalar::variant_decimal::Kind::encoded_len(&self) -> usize + +pub fn vortex_proto::scalar::variant_decimal::Kind::merge(field: &mut core::option::Option, tag: u32, wire_type: prost::encoding::wire_type::WireType, buf: &mut impl bytes::buf::buf_impl::Buf, ctx: prost::encoding::DecodeContext) -> core::result::Result<(), prost::error::DecodeError> + +impl core::clone::Clone for vortex_proto::scalar::variant_decimal::Kind + +pub fn vortex_proto::scalar::variant_decimal::Kind::clone(&self) -> vortex_proto::scalar::variant_decimal::Kind + +impl core::cmp::Eq for vortex_proto::scalar::variant_decimal::Kind + +impl core::cmp::PartialEq for vortex_proto::scalar::variant_decimal::Kind + +pub fn vortex_proto::scalar::variant_decimal::Kind::eq(&self, other: &vortex_proto::scalar::variant_decimal::Kind) -> bool + +impl core::fmt::Debug for vortex_proto::scalar::variant_decimal::Kind + +pub fn vortex_proto::scalar::variant_decimal::Kind::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_proto::scalar::variant_decimal::Kind + +pub fn vortex_proto::scalar::variant_decimal::Kind::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::variant_decimal::Kind + +pub mod vortex_proto::scalar::variant_primitive + +pub enum vortex_proto::scalar::variant_primitive::Kind + +pub vortex_proto::scalar::variant_primitive::Kind::F16Value(u32) + +pub vortex_proto::scalar::variant_primitive::Kind::F32Value(f32) + +pub vortex_proto::scalar::variant_primitive::Kind::F64Value(f64) + +pub vortex_proto::scalar::variant_primitive::Kind::Int16Value(i64) + +pub vortex_proto::scalar::variant_primitive::Kind::Int32Value(i64) + +pub vortex_proto::scalar::variant_primitive::Kind::Int64Value(i64) + +pub vortex_proto::scalar::variant_primitive::Kind::Int8Value(i64) + +pub vortex_proto::scalar::variant_primitive::Kind::Uint16Value(u64) + +pub vortex_proto::scalar::variant_primitive::Kind::Uint32Value(u64) + +pub vortex_proto::scalar::variant_primitive::Kind::Uint64Value(u64) + +pub vortex_proto::scalar::variant_primitive::Kind::Uint8Value(u64) + +impl vortex_proto::scalar::variant_primitive::Kind + +pub fn vortex_proto::scalar::variant_primitive::Kind::encode(&self, buf: &mut impl bytes::buf::buf_mut::BufMut) + +pub fn vortex_proto::scalar::variant_primitive::Kind::encoded_len(&self) -> usize + +pub fn vortex_proto::scalar::variant_primitive::Kind::merge(field: &mut core::option::Option, tag: u32, wire_type: prost::encoding::wire_type::WireType, buf: &mut impl bytes::buf::buf_impl::Buf, ctx: prost::encoding::DecodeContext) -> core::result::Result<(), prost::error::DecodeError> + +impl core::clone::Clone for vortex_proto::scalar::variant_primitive::Kind + +pub fn vortex_proto::scalar::variant_primitive::Kind::clone(&self) -> vortex_proto::scalar::variant_primitive::Kind + +impl core::cmp::PartialEq for vortex_proto::scalar::variant_primitive::Kind + +pub fn vortex_proto::scalar::variant_primitive::Kind::eq(&self, other: &vortex_proto::scalar::variant_primitive::Kind) -> bool + +impl core::fmt::Debug for vortex_proto::scalar::variant_primitive::Kind + +pub fn vortex_proto::scalar::variant_primitive::Kind::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::Copy for vortex_proto::scalar::variant_primitive::Kind + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::variant_primitive::Kind + +pub mod vortex_proto::scalar::variant_value + +pub enum vortex_proto::scalar::variant_value::Kind + +pub vortex_proto::scalar::variant_value::Kind::BoolValue(bool) + +pub vortex_proto::scalar::variant_value::Kind::BytesValue(alloc::vec::Vec) + +pub vortex_proto::scalar::variant_value::Kind::DecimalValue(vortex_proto::scalar::VariantDecimal) + +pub vortex_proto::scalar::variant_value::Kind::ListValue(vortex_proto::scalar::VariantListValue) + +pub vortex_proto::scalar::variant_value::Kind::NullValue(i32) + +pub vortex_proto::scalar::variant_value::Kind::ObjectValue(vortex_proto::scalar::VariantObjectValue) + +pub vortex_proto::scalar::variant_value::Kind::PrimitiveValue(vortex_proto::scalar::VariantPrimitive) + +pub vortex_proto::scalar::variant_value::Kind::StringValue(alloc::string::String) + +impl vortex_proto::scalar::variant_value::Kind + +pub fn vortex_proto::scalar::variant_value::Kind::encode(&self, buf: &mut impl bytes::buf::buf_mut::BufMut) + +pub fn vortex_proto::scalar::variant_value::Kind::encoded_len(&self) -> usize + +pub fn vortex_proto::scalar::variant_value::Kind::merge(field: &mut core::option::Option, tag: u32, wire_type: prost::encoding::wire_type::WireType, buf: &mut impl bytes::buf::buf_impl::Buf, ctx: prost::encoding::DecodeContext) -> core::result::Result<(), prost::error::DecodeError> + +impl core::clone::Clone for vortex_proto::scalar::variant_value::Kind + +pub fn vortex_proto::scalar::variant_value::Kind::clone(&self) -> vortex_proto::scalar::variant_value::Kind + +impl core::cmp::PartialEq for vortex_proto::scalar::variant_value::Kind + +pub fn vortex_proto::scalar::variant_value::Kind::eq(&self, other: &vortex_proto::scalar::variant_value::Kind) -> bool + +impl core::fmt::Debug for vortex_proto::scalar::variant_value::Kind + +pub fn vortex_proto::scalar::variant_value::Kind::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::variant_value::Kind + pub struct vortex_proto::scalar::ListValue pub vortex_proto::scalar::ListValue::values: alloc::vec::Vec @@ -1327,3 +1491,181 @@ impl prost::message::Message for vortex_proto::scalar::ScalarValue pub fn vortex_proto::scalar::ScalarValue::clear(&mut self) pub fn vortex_proto::scalar::ScalarValue::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::VariantDecimal + +pub vortex_proto::scalar::VariantDecimal::kind: core::option::Option + +impl core::clone::Clone for vortex_proto::scalar::VariantDecimal + +pub fn vortex_proto::scalar::VariantDecimal::clone(&self) -> vortex_proto::scalar::VariantDecimal + +impl core::cmp::Eq for vortex_proto::scalar::VariantDecimal + +impl core::cmp::PartialEq for vortex_proto::scalar::VariantDecimal + +pub fn vortex_proto::scalar::VariantDecimal::eq(&self, other: &vortex_proto::scalar::VariantDecimal) -> bool + +impl core::default::Default for vortex_proto::scalar::VariantDecimal + +pub fn vortex_proto::scalar::VariantDecimal::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::VariantDecimal + +pub fn vortex_proto::scalar::VariantDecimal::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_proto::scalar::VariantDecimal + +pub fn vortex_proto::scalar::VariantDecimal::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::VariantDecimal + +impl prost::message::Message for vortex_proto::scalar::VariantDecimal + +pub fn vortex_proto::scalar::VariantDecimal::clear(&mut self) + +pub fn vortex_proto::scalar::VariantDecimal::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::VariantListValue + +pub vortex_proto::scalar::VariantListValue::values: alloc::vec::Vec + +impl core::clone::Clone for vortex_proto::scalar::VariantListValue + +pub fn vortex_proto::scalar::VariantListValue::clone(&self) -> vortex_proto::scalar::VariantListValue + +impl core::cmp::PartialEq for vortex_proto::scalar::VariantListValue + +pub fn vortex_proto::scalar::VariantListValue::eq(&self, other: &vortex_proto::scalar::VariantListValue) -> bool + +impl core::default::Default for vortex_proto::scalar::VariantListValue + +pub fn vortex_proto::scalar::VariantListValue::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::VariantListValue + +pub fn vortex_proto::scalar::VariantListValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::VariantListValue + +impl prost::message::Message for vortex_proto::scalar::VariantListValue + +pub fn vortex_proto::scalar::VariantListValue::clear(&mut self) + +pub fn vortex_proto::scalar::VariantListValue::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::VariantObjectField + +pub vortex_proto::scalar::VariantObjectField::name: alloc::string::String + +pub vortex_proto::scalar::VariantObjectField::value: core::option::Option + +impl core::clone::Clone for vortex_proto::scalar::VariantObjectField + +pub fn vortex_proto::scalar::VariantObjectField::clone(&self) -> vortex_proto::scalar::VariantObjectField + +impl core::cmp::PartialEq for vortex_proto::scalar::VariantObjectField + +pub fn vortex_proto::scalar::VariantObjectField::eq(&self, other: &vortex_proto::scalar::VariantObjectField) -> bool + +impl core::default::Default for vortex_proto::scalar::VariantObjectField + +pub fn vortex_proto::scalar::VariantObjectField::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::VariantObjectField + +pub fn vortex_proto::scalar::VariantObjectField::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::VariantObjectField + +impl prost::message::Message for vortex_proto::scalar::VariantObjectField + +pub fn vortex_proto::scalar::VariantObjectField::clear(&mut self) + +pub fn vortex_proto::scalar::VariantObjectField::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::VariantObjectValue + +pub vortex_proto::scalar::VariantObjectValue::fields: alloc::vec::Vec + +impl core::clone::Clone for vortex_proto::scalar::VariantObjectValue + +pub fn vortex_proto::scalar::VariantObjectValue::clone(&self) -> vortex_proto::scalar::VariantObjectValue + +impl core::cmp::PartialEq for vortex_proto::scalar::VariantObjectValue + +pub fn vortex_proto::scalar::VariantObjectValue::eq(&self, other: &vortex_proto::scalar::VariantObjectValue) -> bool + +impl core::default::Default for vortex_proto::scalar::VariantObjectValue + +pub fn vortex_proto::scalar::VariantObjectValue::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::VariantObjectValue + +pub fn vortex_proto::scalar::VariantObjectValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::VariantObjectValue + +impl prost::message::Message for vortex_proto::scalar::VariantObjectValue + +pub fn vortex_proto::scalar::VariantObjectValue::clear(&mut self) + +pub fn vortex_proto::scalar::VariantObjectValue::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::VariantPrimitive + +pub vortex_proto::scalar::VariantPrimitive::kind: core::option::Option + +impl core::clone::Clone for vortex_proto::scalar::VariantPrimitive + +pub fn vortex_proto::scalar::VariantPrimitive::clone(&self) -> vortex_proto::scalar::VariantPrimitive + +impl core::cmp::PartialEq for vortex_proto::scalar::VariantPrimitive + +pub fn vortex_proto::scalar::VariantPrimitive::eq(&self, other: &vortex_proto::scalar::VariantPrimitive) -> bool + +impl core::default::Default for vortex_proto::scalar::VariantPrimitive + +pub fn vortex_proto::scalar::VariantPrimitive::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::VariantPrimitive + +pub fn vortex_proto::scalar::VariantPrimitive::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::Copy for vortex_proto::scalar::VariantPrimitive + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::VariantPrimitive + +impl prost::message::Message for vortex_proto::scalar::VariantPrimitive + +pub fn vortex_proto::scalar::VariantPrimitive::clear(&mut self) + +pub fn vortex_proto::scalar::VariantPrimitive::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::VariantValue + +pub vortex_proto::scalar::VariantValue::kind: core::option::Option + +impl core::clone::Clone for vortex_proto::scalar::VariantValue + +pub fn vortex_proto::scalar::VariantValue::clone(&self) -> vortex_proto::scalar::VariantValue + +impl core::cmp::PartialEq for vortex_proto::scalar::VariantValue + +pub fn vortex_proto::scalar::VariantValue::eq(&self, other: &vortex_proto::scalar::VariantValue) -> bool + +impl core::default::Default for vortex_proto::scalar::VariantValue + +pub fn vortex_proto::scalar::VariantValue::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::VariantValue + +pub fn vortex_proto::scalar::VariantValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::VariantValue + +impl prost::message::Message for vortex_proto::scalar::VariantValue + +pub fn vortex_proto::scalar::VariantValue::clear(&mut self) + +pub fn vortex_proto::scalar::VariantValue::encoded_len(&self) -> usize diff --git a/vortex-proto/src/generated/vortex.expr.rs b/vortex-proto/src/generated/vortex.expr.rs index 9c7ddb1d90c..07be9c21ecb 100644 --- a/vortex-proto/src/generated/vortex.expr.rs +++ b/vortex-proto/src/generated/vortex.expr.rs @@ -163,3 +163,9 @@ pub struct CaseWhenOpts { #[prost(uint32, tag = "1")] pub num_children: u32, } +/// Options for `vortex.variant_get` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VariantGetOpts { + #[prost(message, optional, tag = "1")] + pub dtype: ::core::option::Option, +} diff --git a/vortex-proto/src/generated/vortex.scalar.rs b/vortex-proto/src/generated/vortex.scalar.rs index df43794acf7..4c597722563 100644 --- a/vortex-proto/src/generated/vortex.scalar.rs +++ b/vortex-proto/src/generated/vortex.scalar.rs @@ -46,3 +46,106 @@ pub struct ListValue { #[prost(message, repeated, tag = "1")] pub values: ::prost::alloc::vec::Vec, } +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VariantValue { + #[prost(oneof = "variant_value::Kind", tags = "1, 2, 3, 4, 5, 6, 7, 8")] + pub kind: ::core::option::Option, +} +/// Nested message and enum types in `VariantValue`. +pub mod variant_value { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Kind { + #[prost(enumeration = "::prost_types::NullValue", tag = "1")] + NullValue(i32), + #[prost(bool, tag = "2")] + BoolValue(bool), + #[prost(message, tag = "3")] + PrimitiveValue(super::VariantPrimitive), + #[prost(message, tag = "4")] + DecimalValue(super::VariantDecimal), + #[prost(string, tag = "5")] + StringValue(::prost::alloc::string::String), + #[prost(bytes, tag = "6")] + BytesValue(::prost::alloc::vec::Vec), + #[prost(message, tag = "7")] + ListValue(super::VariantListValue), + #[prost(message, tag = "8")] + ObjectValue(super::VariantObjectValue), + } +} +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct VariantPrimitive { + #[prost( + oneof = "variant_primitive::Kind", + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11" + )] + pub kind: ::core::option::Option, +} +/// Nested message and enum types in `VariantPrimitive`. +pub mod variant_primitive { + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] + pub enum Kind { + #[prost(sint64, tag = "1")] + Int8Value(i64), + #[prost(sint64, tag = "2")] + Int16Value(i64), + #[prost(sint64, tag = "3")] + Int32Value(i64), + #[prost(sint64, tag = "4")] + Int64Value(i64), + #[prost(uint64, tag = "5")] + Uint8Value(u64), + #[prost(uint64, tag = "6")] + Uint16Value(u64), + #[prost(uint64, tag = "7")] + Uint32Value(u64), + #[prost(uint64, tag = "8")] + Uint64Value(u64), + #[prost(uint32, tag = "9")] + F16Value(u32), + #[prost(float, tag = "10")] + F32Value(f32), + #[prost(double, tag = "11")] + F64Value(f64), + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct VariantDecimal { + #[prost(oneof = "variant_decimal::Kind", tags = "1, 2, 3, 4, 5, 6")] + pub kind: ::core::option::Option, +} +/// Nested message and enum types in `VariantDecimal`. +pub mod variant_decimal { + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Oneof)] + pub enum Kind { + #[prost(sint64, tag = "1")] + Int8Value(i64), + #[prost(sint64, tag = "2")] + Int16Value(i64), + #[prost(sint64, tag = "3")] + Int32Value(i64), + #[prost(sint64, tag = "4")] + Int64Value(i64), + #[prost(bytes, tag = "5")] + Int128Value(::prost::alloc::vec::Vec), + #[prost(bytes, tag = "6")] + Int256Value(::prost::alloc::vec::Vec), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VariantListValue { + #[prost(message, repeated, tag = "1")] + pub values: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VariantObjectField { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + #[prost(message, optional, tag = "2")] + pub value: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VariantObjectValue { + #[prost(message, repeated, tag = "1")] + pub fields: ::prost::alloc::vec::Vec, +}