Skip to content

Commit 0ea4a41

Browse files
committed
Scalar
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 39237a3 commit 0ea4a41

7 files changed

Lines changed: 568 additions & 2 deletions

File tree

encodings/parquet-variant/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ workspace = true
1818

1919
[dependencies]
2020
arrow-array = { workspace = true }
21+
parquet-variant = { workspace = true }
2122
parquet-variant-compute = { workspace = true }
2223
prost = { workspace = true }
2324
vortex-array = { workspace = true }

encodings/parquet-variant/src/lib.rs

Lines changed: 149 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
use std::hash::Hasher;
2121

2222
use arrow_array::Array as ArrowArray;
23+
use parquet_variant::Variant as ParquetVariant;
2324
use prost::Message;
2425
use vortex_array::ArrayEq;
2526
use vortex_array::ArrayHash;
@@ -38,14 +39,17 @@ use vortex_array::dtype::DType;
3839
use vortex_array::dtype::Nullability;
3940
use vortex_array::optimizer::rules::ArrayParentReduceRule;
4041
use vortex_array::optimizer::rules::ParentRuleSet;
42+
use vortex_array::scalar::Scalar;
43+
use vortex_array::scalar::ScalarValue;
44+
use vortex_array::scalar::VariantValue;
4145
use vortex_array::scalar_fn::fns::variant_get::VariantGet;
4246
use vortex_array::serde::ArrayChildren;
4347
use vortex_array::stats::ArrayStats;
4448
use vortex_array::stats::StatsSetRef;
4549
use vortex_array::validity::Validity;
4650
use vortex_array::vtable;
4751
use vortex_array::vtable::ArrayId;
48-
use vortex_array::vtable::NotSupported;
52+
use vortex_array::vtable::OperationsVTable;
4953
use vortex_array::vtable::VTable;
5054
use vortex_array::vtable::ValidityVTable;
5155
use vortex_array::vtable::validity_nchildren;
@@ -237,7 +241,7 @@ impl ParquetVariantArray {
237241
impl VTable for ParquetVariantVTable {
238242
type Array = ParquetVariantArray;
239243
type Metadata = ParquetVariantMetadata;
240-
type OperationsVTable = NotSupported;
244+
type OperationsVTable = Self;
241245
type ValidityVTable = Self;
242246

243247
fn id(_array: &Self::Array) -> ArrayId {
@@ -488,6 +492,149 @@ impl VTable for ParquetVariantVTable {
488492
}
489493
}
490494

495+
fn scalar_to_variant_value(scalar: Scalar) -> VortexResult<VariantValue> {
496+
if scalar.is_null() {
497+
return Ok(VariantValue::Null);
498+
}
499+
500+
Ok(match scalar.dtype() {
501+
DType::Null => VariantValue::Null,
502+
DType::Bool(_) => VariantValue::Bool(scalar.as_bool().value().unwrap_or(false)),
503+
DType::Primitive(..) => VariantValue::Primitive(
504+
*scalar
505+
.value()
506+
.vortex_expect("non-null primitive scalar must have a value")
507+
.as_primitive(),
508+
),
509+
DType::Decimal(..) => VariantValue::Decimal(
510+
*scalar
511+
.value()
512+
.vortex_expect("non-null decimal scalar must have a value")
513+
.as_decimal(),
514+
),
515+
DType::Utf8(_) => VariantValue::Utf8(
516+
scalar
517+
.value()
518+
.vortex_expect("non-null utf8 scalar must have a value")
519+
.as_utf8()
520+
.clone(),
521+
),
522+
DType::Binary(_) => VariantValue::Binary(
523+
scalar
524+
.value()
525+
.vortex_expect("non-null binary scalar must have a value")
526+
.as_binary()
527+
.clone(),
528+
),
529+
DType::List(..) | DType::FixedSizeList(..) => VariantValue::List(
530+
scalar
531+
.as_list()
532+
.elements()
533+
.unwrap_or_default()
534+
.into_iter()
535+
.map(scalar_to_variant_value)
536+
.collect::<VortexResult<Vec<_>>>()?,
537+
),
538+
DType::Struct(fields, _) => VariantValue::Object(
539+
fields
540+
.names()
541+
.iter()
542+
.cloned()
543+
.zip(
544+
scalar
545+
.as_struct()
546+
.fields_iter()
547+
.vortex_expect("non-null struct scalar must have field values"),
548+
)
549+
.map(|(name, field)| Ok((name.as_ref().into(), scalar_to_variant_value(field)?)))
550+
.collect::<VortexResult<Vec<_>>>()?,
551+
),
552+
DType::Extension(_) => VariantValue::Utf8(scalar.to_string().into()),
553+
DType::Variant => scalar
554+
.value()
555+
.vortex_expect("non-null variant scalar must have a value")
556+
.as_variant()
557+
.clone(),
558+
})
559+
}
560+
561+
fn parquet_variant_to_variant_value(variant: ParquetVariant<'_, '_>) -> VortexResult<VariantValue> {
562+
Ok(match variant {
563+
ParquetVariant::Null => VariantValue::Null,
564+
ParquetVariant::Int8(v) => VariantValue::Primitive(v.into()),
565+
ParquetVariant::Int16(v) => VariantValue::Primitive(v.into()),
566+
ParquetVariant::Int32(v) => VariantValue::Primitive(v.into()),
567+
ParquetVariant::Int64(v) => VariantValue::Primitive(v.into()),
568+
ParquetVariant::Float(v) => VariantValue::Primitive(v.into()),
569+
ParquetVariant::Double(v) => VariantValue::Primitive(v.into()),
570+
ParquetVariant::BooleanTrue => VariantValue::Bool(true),
571+
ParquetVariant::BooleanFalse => VariantValue::Bool(false),
572+
ParquetVariant::Decimal4(v) => VariantValue::Decimal(v.integer().into()),
573+
ParquetVariant::Decimal8(v) => VariantValue::Decimal(v.integer().into()),
574+
ParquetVariant::Decimal16(v) => VariantValue::Decimal(v.integer().into()),
575+
ParquetVariant::Binary(v) => VariantValue::Binary(v.to_vec().into()),
576+
ParquetVariant::String(v) => VariantValue::Utf8(v.into()),
577+
ParquetVariant::ShortString(v) => VariantValue::Utf8(v.as_str().into()),
578+
ParquetVariant::Date(v) => VariantValue::Utf8(v.to_string().into()),
579+
ParquetVariant::TimestampMicros(v) => VariantValue::Utf8(v.to_rfc3339().into()),
580+
ParquetVariant::TimestampNtzMicros(v) => VariantValue::Utf8(v.to_string().into()),
581+
ParquetVariant::TimestampNanos(v) => VariantValue::Utf8(v.to_rfc3339().into()),
582+
ParquetVariant::TimestampNtzNanos(v) => VariantValue::Utf8(v.to_string().into()),
583+
ParquetVariant::Time(v) => VariantValue::Utf8(v.to_string().into()),
584+
ParquetVariant::Uuid(v) => VariantValue::Utf8(v.to_string().into()),
585+
ParquetVariant::List(values) => VariantValue::List(
586+
values
587+
.iter()
588+
.map(parquet_variant_to_variant_value)
589+
.collect::<VortexResult<Vec<_>>>()?,
590+
),
591+
ParquetVariant::Object(values) => VariantValue::Object(
592+
values
593+
.iter()
594+
.map(|(name, value)| Ok((name.into(), parquet_variant_to_variant_value(value)?)))
595+
.collect::<VortexResult<Vec<_>>>()?,
596+
),
597+
})
598+
}
599+
600+
impl OperationsVTable<ParquetVariantVTable> for ParquetVariantVTable {
601+
fn scalar_at(array: &ParquetVariantArray, index: usize) -> VortexResult<Scalar> {
602+
if array.validity.is_null(index)? {
603+
return Ok(Scalar::null(DType::Variant));
604+
}
605+
606+
let value = if let Some(typed_value) = array.typed_value_array()
607+
&& typed_value.is_valid(index)?
608+
{
609+
scalar_to_variant_value(typed_value.scalar_at(index)?)?
610+
} else if let Some(value) = array.value_array()
611+
&& value.is_valid(index)?
612+
{
613+
let metadata = array
614+
.metadata_array()
615+
.scalar_at(index)?
616+
.as_binary()
617+
.value()
618+
.cloned()
619+
.vortex_expect("non-null metadata row must have binary value");
620+
let value = value
621+
.scalar_at(index)?
622+
.as_binary()
623+
.value()
624+
.cloned()
625+
.vortex_expect("non-null value row must have binary value");
626+
parquet_variant_to_variant_value(ParquetVariant::try_new(
627+
metadata.as_ref(),
628+
value.as_ref(),
629+
)?)?
630+
} else {
631+
VariantValue::Null
632+
};
633+
634+
Scalar::try_new(DType::Variant, Some(ScalarValue::Variant(value)))
635+
}
636+
}
637+
491638
const PARENT_RULES: ParentRuleSet<ParquetVariantVTable> =
492639
ParentRuleSet::new(&[ParentRuleSet::lift(&ParquetVariantGetRule)]);
493640

vortex-array/src/scalar/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@ mod scalar_value;
2424
mod truncation;
2525
mod typed_view;
2626
mod validate;
27+
mod variant_value;
2728

2829
pub use scalar_value::*;
2930
pub use truncation::*;
3031
pub use typed_view::*;
32+
pub use variant_value::*;
3133

3234
use crate::dtype::DType;
3335

0 commit comments

Comments
 (0)