Skip to content

Commit b8b056d

Browse files
authored
Variant DType (#6912)
This PR includes initial support for the Variant DType, as described in the [Variant RFC](https://github.com/vortex-data/rfcs/blob/develop/accepted/0015-variant-type.md). It includes most of the required boilerplate and initial structure for this new dtype. It includes the following changes: 1. New dtype 2. serialization for the dtype 3. Scalar variant and new scalar value 4. A lot of code paths that aren't supported yet and error accordingly. --------- Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 7cf49a1 commit b8b056d

52 files changed

Lines changed: 779 additions & 41 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

encodings/sparse/src/canonical.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ use vortex_buffer::buffer_mut;
5050
use vortex_error::VortexError;
5151
use vortex_error::VortexExpect;
5252
use vortex_error::VortexResult;
53+
use vortex_error::vortex_bail;
5354
use vortex_error::vortex_panic;
5455

5556
use crate::ConstantArray;
@@ -117,6 +118,7 @@ pub(super) fn execute_sparse(
117118
execute_sparse_fixed_size_list(array, *nullability, ctx)?
118119
}
119120
DType::Extension(_ext_dtype) => todo!(),
121+
DType::Variant(_) => vortex_bail!("Sparse canonicalization does not support Variant"),
120122
})
121123
}
122124

fuzz/src/array/compare.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ pub fn compare_canonical_array(
141141
)
142142
.into_array()
143143
}
144-
d @ (DType::Null | DType::Extension(_)) => {
144+
d @ (DType::Null | DType::Extension(_) | DType::Variant(_)) => {
145145
unreachable!("DType {d} not supported for fuzzing")
146146
}
147147
}

fuzz/src/array/filter.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ pub fn filter_canonical_array(array: &ArrayRef, filter: &[bool]) -> VortexResult
115115
}
116116
take_canonical_array_non_nullable_indices(array, indices.as_slice())
117117
}
118-
d @ (DType::Null | DType::Extension(_)) => {
118+
d @ (DType::Null | DType::Extension(_) | DType::Variant(_)) => {
119119
unreachable!("DType {d} not supported for fuzzing")
120120
}
121121
}

fuzz/src/array/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,8 @@ fn actions_for_dtype(dtype: &DType) -> HashSet<ActionType> {
508508
// Extension types delegate to storage dtype, support most operations
509509
ActionType::iter().collect()
510510
}
511+
// Currently, no support at all
512+
DType::Variant(_) => unreachable!("Variant dtype shouldn't be fuzzed"),
511513
}
512514
}
513515

fuzz/src/array/search_sorted.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ pub fn search_sorted_canonical_array(
131131
.collect::<VortexResult<Vec<_>>>()?;
132132
scalar_vals.search_sorted(&scalar.cast(array.dtype())?, side)
133133
}
134-
d @ (DType::Null | DType::Extension(_)) => {
134+
d @ (DType::Null | DType::Extension(_) | DType::Variant(_)) => {
135135
unreachable!("DType {d} not supported for fuzzing")
136136
}
137137
}

fuzz/src/array/slice.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ pub fn slice_canonical_array(
114114
.into_array(),
115115
)
116116
}
117-
d @ (DType::Null | DType::Extension(_)) => {
117+
d @ (DType::Null | DType::Extension(_) | DType::Variant(_)) => {
118118
unreachable!("DType {d} not supported for fuzzing")
119119
}
120120
}

fuzz/src/array/sort.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ pub fn sort_canonical_array(array: &ArrayRef) -> VortexResult<ArrayRef> {
8181
});
8282
take_canonical_array_non_nullable_indices(array, &sort_indices)
8383
}
84-
d @ (DType::Null | DType::Extension(_)) => {
84+
d @ (DType::Null | DType::Extension(_) | DType::Variant(_)) => {
8585
unreachable!("DType {d} not supported for fuzzing")
8686
}
8787
}

fuzz/src/array/take.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ pub fn take_canonical_array(array: &ArrayRef, indices: &[Option<usize>]) -> Vort
138138
}
139139
Ok(builder.finish())
140140
}
141-
d @ (DType::Null | DType::Extension(_)) => {
141+
d @ (DType::Null | DType::Extension(_) | DType::Variant(_)) => {
142142
unreachable!("DType {d} not supported for fuzzing")
143143
}
144144
}

vortex-array/public-api.lock

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10374,6 +10374,8 @@ pub vortex_array::dtype::DType::Struct(vortex_array::dtype::StructFields, vortex
1037410374

1037510375
pub vortex_array::dtype::DType::Utf8(vortex_array::dtype::Nullability)
1037610376

10377+
pub vortex_array::dtype::DType::Variant(vortex_array::dtype::Nullability)
10378+
1037710379
impl vortex_array::dtype::DType
1037810380

1037910381
pub const vortex_array::dtype::DType::BYTES: Self
@@ -10450,6 +10452,8 @@ pub fn vortex_array::dtype::DType::is_unsigned_int(&self) -> bool
1045010452

1045110453
pub fn vortex_array::dtype::DType::is_utf8(&self) -> bool
1045210454

10455+
pub fn vortex_array::dtype::DType::is_variant(&self) -> bool
10456+
1045310457
pub fn vortex_array::dtype::DType::list(dtype: impl core::convert::Into<vortex_array::dtype::DType>, nullability: vortex_array::dtype::Nullability) -> Self
1045410458

1045510459
pub fn vortex_array::dtype::DType::nullability(&self) -> vortex_array::dtype::Nullability
@@ -15354,6 +15358,8 @@ pub vortex_array::scalar::ScalarValue::Primitive(vortex_array::scalar::PValue)
1535415358

1535515359
pub vortex_array::scalar::ScalarValue::Utf8(vortex_buffer::string::BufferString)
1535615360

15361+
pub vortex_array::scalar::ScalarValue::Variant(alloc::boxed::Box<vortex_array::scalar::Scalar>)
15362+
1535715363
impl vortex_array::scalar::ScalarValue
1535815364

1535915365
pub fn vortex_array::scalar::ScalarValue::as_binary(&self) -> &vortex_buffer::ByteBuffer
@@ -15368,6 +15374,8 @@ pub fn vortex_array::scalar::ScalarValue::as_primitive(&self) -> &vortex_array::
1536815374

1536915375
pub fn vortex_array::scalar::ScalarValue::as_utf8(&self) -> &vortex_buffer::string::BufferString
1537015376

15377+
pub fn vortex_array::scalar::ScalarValue::as_variant(&self) -> &vortex_array::scalar::Scalar
15378+
1537115379
pub fn vortex_array::scalar::ScalarValue::into_binary(self) -> vortex_buffer::ByteBuffer
1537215380

1537315381
pub fn vortex_array::scalar::ScalarValue::into_bool(self) -> bool
@@ -15380,6 +15388,8 @@ pub fn vortex_array::scalar::ScalarValue::into_primitive(self) -> vortex_array::
1538015388

1538115389
pub fn vortex_array::scalar::ScalarValue::into_utf8(self) -> vortex_buffer::string::BufferString
1538215390

15391+
pub fn vortex_array::scalar::ScalarValue::into_variant(self) -> vortex_array::scalar::Scalar
15392+
1538315393
impl vortex_array::scalar::ScalarValue
1538415394

1538515395
pub fn vortex_array::scalar::ScalarValue::from_proto(value: &vortex_proto::scalar::ScalarValue, dtype: &vortex_array::dtype::DType, session: &vortex_session::VortexSession) -> vortex_error::VortexResult<core::option::Option<Self>>
@@ -16054,6 +16064,10 @@ pub fn vortex_array::scalar::Scalar::as_utf8(&self) -> vortex_array::scalar::Utf
1605416064

1605516065
pub fn vortex_array::scalar::Scalar::as_utf8_opt(&self) -> core::option::Option<vortex_array::scalar::Utf8Scalar<'_>>
1605616066

16067+
pub fn vortex_array::scalar::Scalar::as_variant(&self) -> vortex_array::scalar::VariantScalar<'_>
16068+
16069+
pub fn vortex_array::scalar::Scalar::as_variant_opt(&self) -> core::option::Option<vortex_array::scalar::VariantScalar<'_>>
16070+
1605716071
impl vortex_array::scalar::Scalar
1605816072

1605916073
pub fn vortex_array::scalar::Scalar::binary(buffer: impl core::convert::Into<vortex_buffer::ByteBuffer>, nullability: vortex_array::dtype::Nullability) -> Self
@@ -16080,6 +16094,8 @@ pub fn vortex_array::scalar::Scalar::try_utf8<B>(str: B, nullability: vortex_arr
1608016094

1608116095
pub fn vortex_array::scalar::Scalar::utf8<B>(str: B, nullability: vortex_array::dtype::Nullability) -> Self where B: core::convert::Into<vortex_buffer::string::BufferString>
1608216096

16097+
pub fn vortex_array::scalar::Scalar::variant(value: vortex_array::scalar::Scalar) -> Self
16098+
1608316099
impl vortex_array::scalar::Scalar
1608416100

1608516101
pub fn vortex_array::scalar::Scalar::cast(&self, target_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<vortex_array::scalar::Scalar>
@@ -16744,6 +16760,34 @@ impl<'a> core::hash::Hash for vortex_array::scalar::Utf8Scalar<'a>
1674416760

1674516761
pub fn vortex_array::scalar::Utf8Scalar<'a>::hash<__H: core::hash::Hasher>(&self, state: &mut __H)
1674616762

16763+
pub struct vortex_array::scalar::VariantScalar<'a>
16764+
16765+
impl<'a> vortex_array::scalar::VariantScalar<'a>
16766+
16767+
pub fn vortex_array::scalar::VariantScalar<'a>::dtype(&self) -> &'a vortex_array::dtype::DType
16768+
16769+
pub fn vortex_array::scalar::VariantScalar<'a>::is_null(&self) -> bool
16770+
16771+
pub fn vortex_array::scalar::VariantScalar<'a>::is_variant_null(&self) -> core::option::Option<bool>
16772+
16773+
pub fn vortex_array::scalar::VariantScalar<'a>::is_zero(&self) -> core::option::Option<bool>
16774+
16775+
pub fn vortex_array::scalar::VariantScalar<'a>::value(&self) -> core::option::Option<&'a vortex_array::scalar::Scalar>
16776+
16777+
impl core::fmt::Display for vortex_array::scalar::VariantScalar<'_>
16778+
16779+
pub fn vortex_array::scalar::VariantScalar<'_>::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
16780+
16781+
impl<'a> core::clone::Clone for vortex_array::scalar::VariantScalar<'a>
16782+
16783+
pub fn vortex_array::scalar::VariantScalar<'a>::clone(&self) -> vortex_array::scalar::VariantScalar<'a>
16784+
16785+
impl<'a> core::fmt::Debug for vortex_array::scalar::VariantScalar<'a>
16786+
16787+
pub fn vortex_array::scalar::VariantScalar<'a>::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
16788+
16789+
impl<'a> core::marker::Copy for vortex_array::scalar::VariantScalar<'a>
16790+
1674716791
pub trait vortex_array::scalar::ScalarTruncation: core::marker::Send + core::marker::Sized
1674816792

1674916793
pub fn vortex_array::scalar::ScalarTruncation::from_scalar(value: vortex_array::scalar::Scalar) -> vortex_error::VortexResult<core::option::Option<Self>>

vortex-array/src/arrays/arbitrary.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,10 @@ fn random_array_chunk(
162162
random_fixed_size_list(u, elem_dtype, *list_size, *null, chunk_len)
163163
}
164164
DType::Extension(..) => {
165-
todo!("Extension arrays are not implemented")
165+
unimplemented!("Extension arrays are not implemented")
166+
}
167+
DType::Variant(_) => {
168+
unimplemented!("Variant arrays are not implemented")
166169
}
167170
}
168171
}

0 commit comments

Comments
 (0)