Skip to content

Commit ea7804c

Browse files
authored
Extension Scalar VTable (#6680)
## Summary Tracking Issue: #6618 Adds methods related to extension scalars to the `ExtVTable`. This then required updating existing implementors of `ExtVTable` (only datetime and mock extension test types). I think this is a nice step before the much more intrusive change to `ScalarValue` where we add the `Extension` variant. ## API Changes This is a breaking change to `ExtVTable`. ## Testing TODO <!-- Please describe how this change was tested. Here are some common categories for testing in Vortex: 1. Verifying existing behavior is maintained. 2. Verifying new behavior and functionality works correctly. 3. Serialization compatibility (backwards and forwards) should be maintained or explicitly broken. --> --------- Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent d59d334 commit ea7804c

21 files changed

Lines changed: 1264 additions & 61 deletions

File tree

vortex-array/public-api.lock

Lines changed: 156 additions & 14 deletions
Large diffs are not rendered by default.

vortex-array/src/arrays/extension/compute/rules.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,23 +75,42 @@ mod tests {
7575
use crate::extension::EmptyMetadata;
7676
use crate::optimizer::ArrayOptimizer;
7777
use crate::scalar::Scalar;
78+
use crate::scalar::ScalarValue;
7879

7980
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
8081
struct TestExt;
8182
impl ExtVTable for TestExt {
8283
type Metadata = EmptyMetadata;
84+
type NativeValue<'a> = &'a str;
8385

8486
fn id(&self) -> ExtId {
8587
ExtId::new_ref("test_ext")
8688
}
8789

90+
fn serialize_metadata(&self, _metadata: &Self::Metadata) -> VortexResult<Vec<u8>> {
91+
Ok(vec![])
92+
}
93+
94+
fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult<Self::Metadata> {
95+
Ok(EmptyMetadata)
96+
}
97+
8898
fn validate_dtype(
8999
&self,
90100
_options: &Self::Metadata,
91101
_storage_dtype: &DType,
92102
) -> VortexResult<()> {
93103
Ok(())
94104
}
105+
106+
fn unpack_native<'a>(
107+
&self,
108+
_metadata: &'a Self::Metadata,
109+
_storage_dtype: &'a DType,
110+
_storage_value: &'a ScalarValue,
111+
) -> VortexResult<Self::NativeValue<'a>> {
112+
Ok("")
113+
}
95114
}
96115

97116
fn test_ext_dtype() -> ExtDTypeRef {
@@ -164,18 +183,36 @@ mod tests {
164183
struct TestExt2;
165184
impl ExtVTable for TestExt2 {
166185
type Metadata = EmptyMetadata;
186+
type NativeValue<'a> = &'a str;
167187

168188
fn id(&self) -> ExtId {
169189
ExtId::new_ref("test_ext_2")
170190
}
171191

192+
fn serialize_metadata(&self, _metadata: &Self::Metadata) -> VortexResult<Vec<u8>> {
193+
Ok(vec![])
194+
}
195+
196+
fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult<Self::Metadata> {
197+
Ok(EmptyMetadata)
198+
}
199+
172200
fn validate_dtype(
173201
&self,
174202
_options: &Self::Metadata,
175203
_storage_dtype: &DType,
176204
) -> VortexResult<()> {
177205
Ok(())
178206
}
207+
208+
fn unpack_native<'a>(
209+
&self,
210+
_metadata: &'a Self::Metadata,
211+
_storage_dtype: &'a DType,
212+
_storage_value: &'a ScalarValue,
213+
) -> VortexResult<Self::NativeValue<'a>> {
214+
Ok("")
215+
}
179216
}
180217

181218
let ext_dtype1 = ExtDType::<TestExt>::try_new(

vortex-array/src/dtype/extension/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
//! Extension DTypes, and interfaces for working with extension types (dtypes, scalars, and arrays).
4+
//! Extension DTypes, and interfaces for working with extension types.
55
//!
66
//! ## File layout convention
77
//!

vortex-array/src/dtype/extension/plugin.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ impl<V: ExtVTable> ExtDTypePlugin for V {
3232
}
3333

3434
fn deserialize(&self, data: &[u8], storage_dtype: DType) -> VortexResult<ExtDTypeRef> {
35-
let metadata = ExtVTable::deserialize(self, data)?;
35+
let metadata = ExtVTable::deserialize_metadata(self, data)?;
3636
Ok(ExtDType::try_with_vtable(self.clone(), metadata, storage_dtype)?.erased())
3737
}
3838
}

vortex-array/src/dtype/extension/typed.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ impl<V: ExtVTable> DynExtDType for ExtDTypeInner<V> {
168168
}
169169

170170
fn metadata_serialize(&self) -> VortexResult<Vec<u8>> {
171-
V::serialize(&self.vtable, &self.metadata)
171+
V::serialize_metadata(&self.vtable, &self.metadata)
172172
}
173173

174174
fn with_nullability(&self, nullability: Nullability) -> ExtDTypeRef {

vortex-array/src/dtype/extension/vtable.rs

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,40 +6,70 @@ use std::fmt::Display;
66
use std::hash::Hash;
77

88
use vortex_error::VortexResult;
9-
use vortex_error::vortex_bail;
109

1110
use crate::dtype::DType;
1211
use crate::dtype::extension::ExtId;
12+
use crate::scalar::ScalarValue;
1313

1414
/// The public API for defining new extension types.
1515
///
1616
/// This is the non-object-safe trait that plugin authors implement to define a new extension
1717
/// type. It specifies the type's identity, metadata, serialization, and validation.
1818
pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash {
19-
/// Associated type containing the deserialized metadata for this extension type
19+
/// Associated type containing the deserialized metadata for this extension type.
2020
type Metadata: 'static + Send + Sync + Clone + Debug + Display + Eq + Hash;
2121

22+
/// A native Rust value that represents a scalar of the extension type.
23+
///
24+
/// The value only represents non-null values. We denote nullable values as `Option<Value>`.
25+
type NativeValue<'a>: Display;
26+
2227
/// Returns the ID for this extension type.
2328
fn id(&self) -> ExtId;
2429

30+
// Methods related to the extension `DType`.
31+
2532
/// Serialize the metadata into a byte vector.
26-
fn serialize(&self, metadata: &Self::Metadata) -> VortexResult<Vec<u8>> {
27-
_ = metadata;
28-
vortex_bail!(
29-
"Serialization not implemented for extension type {}",
30-
self.id()
31-
);
32-
}
33+
fn serialize_metadata(&self, metadata: &Self::Metadata) -> VortexResult<Vec<u8>>;
3334

3435
/// Deserialize the metadata from a byte slice.
35-
fn deserialize(&self, metadata: &[u8]) -> VortexResult<Self::Metadata> {
36-
_ = metadata;
37-
vortex_bail!(
38-
"Deserialization not implemented for extension type {}",
39-
self.id()
40-
);
41-
}
36+
fn deserialize_metadata(&self, metadata: &[u8]) -> VortexResult<Self::Metadata>;
4237

4338
/// Validate that the given storage type is compatible with this extension type.
4439
fn validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &DType) -> VortexResult<()>;
40+
41+
// Methods related to the extension scalar values.
42+
43+
/// Validate the given storage value is compatible with the extension type.
44+
///
45+
/// By default, this calls [`unpack_native()`](ExtVTable::unpack_native) and discards the result.
46+
///
47+
/// # Errors
48+
///
49+
/// Returns an error if the storage [`ScalarValue`] is not compatible with the extension type.
50+
fn validate_scalar_value(
51+
&self,
52+
metadata: &Self::Metadata,
53+
storage_dtype: &DType,
54+
storage_value: &ScalarValue,
55+
) -> VortexResult<()> {
56+
self.unpack_native(metadata, storage_dtype, storage_value)
57+
.map(|_| ())
58+
}
59+
60+
/// Validate and unpack a native value from the storage [`ScalarValue`].
61+
///
62+
/// Note that [`ExtVTable::validate_dtype()`] is always called first to validate the storage
63+
/// [`DType`], and the [`Scalar`](crate::scalar::Scalar) implementation will verify that the
64+
/// storage value is compatible with the storage dtype on construction.
65+
///
66+
/// # Errors
67+
///
68+
/// Returns an error if the storage [`ScalarValue`] is not compatible with the extension type.
69+
fn unpack_native<'a>(
70+
&self,
71+
metadata: &'a Self::Metadata,
72+
storage_dtype: &'a DType,
73+
storage_value: &'a ScalarValue,
74+
) -> VortexResult<Self::NativeValue<'a>>;
4575
}

vortex-array/src/extension/datetime/date.rs

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
use std::fmt;
5+
6+
use jiff::Span;
47
use vortex_error::VortexExpect;
58
use vortex_error::VortexResult;
9+
use vortex_error::vortex_bail;
610
use vortex_error::vortex_ensure;
711
use vortex_error::vortex_err;
812

@@ -13,11 +17,25 @@ use crate::dtype::extension::ExtDType;
1317
use crate::dtype::extension::ExtId;
1418
use crate::dtype::extension::ExtVTable;
1519
use crate::extension::datetime::TimeUnit;
20+
use crate::scalar::ScalarValue;
21+
22+
/// The Unix epoch date (1970-01-01).
23+
const EPOCH: jiff::civil::Date = jiff::civil::Date::constant(1970, 1, 1);
1624

1725
/// Date DType.
1826
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
1927
pub struct Date;
2028

29+
fn date_ptype(time_unit: &TimeUnit) -> Option<PType> {
30+
match time_unit {
31+
TimeUnit::Nanoseconds => None,
32+
TimeUnit::Microseconds => None,
33+
TimeUnit::Milliseconds => Some(PType::I64),
34+
TimeUnit::Seconds => None,
35+
TimeUnit::Days => Some(PType::I32),
36+
}
37+
}
38+
2139
impl Date {
2240
/// Creates a new Date extension dtype with the given time unit and nullability.
2341
///
@@ -38,18 +56,37 @@ impl Date {
3856
}
3957
}
4058

59+
/// Unpacked value of a [`Date`] extension scalar.
60+
pub enum DateValue {
61+
/// Days since the Unix epoch.
62+
Days(i32),
63+
/// Milliseconds since the Unix epoch.
64+
Milliseconds(i64),
65+
}
66+
67+
impl fmt::Display for DateValue {
68+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
69+
let date = match self {
70+
DateValue::Days(days) => EPOCH + Span::new().days(*days),
71+
DateValue::Milliseconds(ms) => EPOCH + Span::new().milliseconds(*ms),
72+
};
73+
write!(f, "{}", date)
74+
}
75+
}
76+
4177
impl ExtVTable for Date {
4278
type Metadata = TimeUnit;
79+
type NativeValue<'a> = DateValue;
4380

4481
fn id(&self) -> ExtId {
4582
ExtId::new_ref("vortex.date")
4683
}
4784

48-
fn serialize(&self, metadata: &Self::Metadata) -> VortexResult<Vec<u8>> {
85+
fn serialize_metadata(&self, metadata: &Self::Metadata) -> VortexResult<Vec<u8>> {
4986
Ok(vec![u8::from(*metadata)])
5087
}
5188

52-
fn deserialize(&self, metadata: &[u8]) -> VortexResult<Self::Metadata> {
89+
fn deserialize_metadata(&self, metadata: &[u8]) -> VortexResult<Self::Metadata> {
5390
let tag = metadata[0];
5491
TimeUnit::try_from(tag)
5592
}
@@ -67,14 +104,19 @@ impl ExtVTable for Date {
67104

68105
Ok(())
69106
}
70-
}
71107

72-
fn date_ptype(time_unit: &TimeUnit) -> Option<PType> {
73-
match time_unit {
74-
TimeUnit::Nanoseconds => None,
75-
TimeUnit::Microseconds => None,
76-
TimeUnit::Milliseconds => Some(PType::I64),
77-
TimeUnit::Seconds => None,
78-
TimeUnit::Days => Some(PType::I32),
108+
fn unpack_native(
109+
&self,
110+
metadata: &Self::Metadata,
111+
_storage_dtype: &DType,
112+
storage_value: &ScalarValue,
113+
) -> VortexResult<Self::NativeValue<'_>> {
114+
match metadata {
115+
TimeUnit::Milliseconds => Ok(DateValue::Milliseconds(
116+
storage_value.as_primitive().cast::<i64>()?,
117+
)),
118+
TimeUnit::Days => Ok(DateValue::Days(storage_value.as_primitive().cast::<i32>()?)),
119+
_ => vortex_bail!("Date type does not support time unit {}", metadata),
120+
}
79121
}
80122
}

0 commit comments

Comments
 (0)