Skip to content

Commit 452a4a3

Browse files
authored
Remove ExtensionData (#7587)
## Summary This was unfortunately not caught in the big refactor. ## Testing Existing tests pass (and actually caught a bug in the extension array constructor that just happened to be hidden by `ExtensionData` panicking. Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent f152281 commit 452a4a3

6 files changed

Lines changed: 101 additions & 213 deletions

File tree

vortex-array/public-api.lock

Lines changed: 16 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2302,7 +2302,7 @@ pub fn vortex_array::arrays::Extension::scalar_at(array: vortex_array::ArrayView
23022302

23032303
impl vortex_array::VTable for vortex_array::arrays::Extension
23042304

2305-
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::arrays::extension::ExtensionData
2305+
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::EmptyArrayData
23062306

23072307
pub type vortex_array::arrays::Extension::OperationsVTable = vortex_array::arrays::Extension
23082308

@@ -2338,7 +2338,7 @@ pub fn vortex_array::arrays::Extension::serialize(_array: vortex_array::ArrayVie
23382338

23392339
pub fn vortex_array::arrays::Extension::slot_name(_array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String
23402340

2341-
pub fn vortex_array::arrays::Extension::validate(&self, data: &vortex_array::arrays::extension::ExtensionData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
2341+
pub fn vortex_array::arrays::Extension::validate(&self, _data: &vortex_array::EmptyArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
23422342

23432343
impl vortex_array::ValidityChild<vortex_array::arrays::Extension> for vortex_array::arrays::Extension
23442344

@@ -2368,44 +2368,16 @@ impl vortex_array::scalar_fn::fns::mask::MaskReduce for vortex_array::arrays::Ex
23682368

23692369
pub fn vortex_array::arrays::Extension::mask(array: vortex_array::ArrayView<'_, vortex_array::arrays::Extension>, mask: &vortex_array::ArrayRef) -> vortex_error::VortexResult<core::option::Option<vortex_array::ArrayRef>>
23702370

2371-
pub struct vortex_array::arrays::extension::ExtensionData
2372-
2373-
impl vortex_array::arrays::extension::ExtensionData
2374-
2375-
pub fn vortex_array::arrays::extension::ExtensionData::ext_dtype(&self) -> &vortex_array::dtype::extension::ExtDTypeRef
2376-
2377-
pub fn vortex_array::arrays::extension::ExtensionData::new(ext_dtype: vortex_array::dtype::extension::ExtDTypeRef, storage_dtype: &vortex_array::dtype::DType) -> Self
2378-
2379-
pub unsafe fn vortex_array::arrays::extension::ExtensionData::new_unchecked(ext_dtype: vortex_array::dtype::extension::ExtDTypeRef, storage_dtype: &vortex_array::dtype::DType) -> Self
2380-
2381-
pub fn vortex_array::arrays::extension::ExtensionData::try_new(ext_dtype: vortex_array::dtype::extension::ExtDTypeRef, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<Self>
2382-
2383-
impl core::clone::Clone for vortex_array::arrays::extension::ExtensionData
2384-
2385-
pub fn vortex_array::arrays::extension::ExtensionData::clone(&self) -> vortex_array::arrays::extension::ExtensionData
2386-
2387-
impl core::fmt::Debug for vortex_array::arrays::extension::ExtensionData
2388-
2389-
pub fn vortex_array::arrays::extension::ExtensionData::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
2390-
2391-
impl core::fmt::Display for vortex_array::arrays::extension::ExtensionData
2392-
2393-
pub fn vortex_array::arrays::extension::ExtensionData::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
2394-
2395-
impl vortex_array::ArrayEq for vortex_array::arrays::extension::ExtensionData
2396-
2397-
pub fn vortex_array::arrays::extension::ExtensionData::array_eq(&self, _other: &Self, _precision: vortex_array::Precision) -> bool
2398-
2399-
impl vortex_array::ArrayHash for vortex_array::arrays::extension::ExtensionData
2400-
2401-
pub fn vortex_array::arrays::extension::ExtensionData::array_hash<H: core::hash::Hasher>(&self, _state: &mut H, _precision: vortex_array::Precision)
2402-
24032371
pub trait vortex_array::arrays::extension::ExtensionArrayExt: vortex_array::TypedArrayRef<vortex_array::arrays::Extension>
24042372

2373+
pub fn vortex_array::arrays::extension::ExtensionArrayExt::ext_dtype(&self) -> &vortex_array::dtype::extension::ExtDTypeRef
2374+
24052375
pub fn vortex_array::arrays::extension::ExtensionArrayExt::storage_array(&self) -> &vortex_array::ArrayRef
24062376

24072377
impl<T: vortex_array::TypedArrayRef<vortex_array::arrays::Extension>> vortex_array::arrays::extension::ExtensionArrayExt for T
24082378

2379+
pub fn T::ext_dtype(&self) -> &vortex_array::dtype::extension::ExtDTypeRef
2380+
24092381
pub fn T::storage_array(&self) -> &vortex_array::ArrayRef
24102382

24112383
pub type vortex_array::arrays::extension::ExtensionArray = vortex_array::Array<vortex_array::arrays::Extension>
@@ -5558,7 +5530,7 @@ pub fn vortex_array::arrays::Extension::scalar_at(array: vortex_array::ArrayView
55585530

55595531
impl vortex_array::VTable for vortex_array::arrays::Extension
55605532

5561-
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::arrays::extension::ExtensionData
5533+
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::EmptyArrayData
55625534

55635535
pub type vortex_array::arrays::Extension::OperationsVTable = vortex_array::arrays::Extension
55645536

@@ -5594,7 +5566,7 @@ pub fn vortex_array::arrays::Extension::serialize(_array: vortex_array::ArrayVie
55945566

55955567
pub fn vortex_array::arrays::Extension::slot_name(_array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String
55965568

5597-
pub fn vortex_array::arrays::Extension::validate(&self, data: &vortex_array::arrays::extension::ExtensionData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
5569+
pub fn vortex_array::arrays::Extension::validate(&self, _data: &vortex_array::EmptyArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
55985570

55995571
impl vortex_array::ValidityChild<vortex_array::arrays::Extension> for vortex_array::arrays::Extension
56005572

@@ -19464,7 +19436,7 @@ pub fn vortex_array::arrays::Decimal::validate(&self, data: &vortex_array::array
1946419436

1946519437
impl vortex_array::VTable for vortex_array::arrays::Extension
1946619438

19467-
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::arrays::extension::ExtensionData
19439+
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::EmptyArrayData
1946819440

1946919441
pub type vortex_array::arrays::Extension::OperationsVTable = vortex_array::arrays::Extension
1947019442

@@ -19500,7 +19472,7 @@ pub fn vortex_array::arrays::Extension::serialize(_array: vortex_array::ArrayVie
1950019472

1950119473
pub fn vortex_array::arrays::Extension::slot_name(_array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String
1950219474

19503-
pub fn vortex_array::arrays::Extension::validate(&self, data: &vortex_array::arrays::extension::ExtensionData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
19475+
pub fn vortex_array::arrays::Extension::validate(&self, _data: &vortex_array::EmptyArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
1950419476

1950519477
impl vortex_array::VTable for vortex_array::arrays::Filter
1950619478

@@ -20436,7 +20408,7 @@ pub fn vortex_array::arrays::Decimal::validate(&self, data: &vortex_array::array
2043620408

2043720409
impl vortex_array::VTable for vortex_array::arrays::Extension
2043820410

20439-
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::arrays::extension::ExtensionData
20411+
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::EmptyArrayData
2044020412

2044120413
pub type vortex_array::arrays::Extension::OperationsVTable = vortex_array::arrays::Extension
2044220414

@@ -20472,7 +20444,7 @@ pub fn vortex_array::arrays::Extension::serialize(_array: vortex_array::ArrayVie
2047220444

2047320445
pub fn vortex_array::arrays::Extension::slot_name(_array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String
2047420446

20475-
pub fn vortex_array::arrays::Extension::validate(&self, data: &vortex_array::arrays::extension::ExtensionData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
20447+
pub fn vortex_array::arrays::Extension::validate(&self, _data: &vortex_array::EmptyArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
2047620448

2047720449
impl vortex_array::VTable for vortex_array::arrays::Filter
2047820450

@@ -22686,10 +22658,6 @@ impl vortex_array::ArrayEq for vortex_array::arrays::dict::DictData
2268622658

2268722659
pub fn vortex_array::arrays::dict::DictData::array_eq(&self, _other: &Self, _precision: vortex_array::Precision) -> bool
2268822660

22689-
impl vortex_array::ArrayEq for vortex_array::arrays::extension::ExtensionData
22690-
22691-
pub fn vortex_array::arrays::extension::ExtensionData::array_eq(&self, _other: &Self, _precision: vortex_array::Precision) -> bool
22692-
2269322661
impl vortex_array::ArrayEq for vortex_array::arrays::filter::FilterData
2269422662

2269522663
pub fn vortex_array::arrays::filter::FilterData::array_eq(&self, other: &Self, precision: vortex_array::Precision) -> bool
@@ -22794,10 +22762,6 @@ impl vortex_array::ArrayHash for vortex_array::arrays::dict::DictData
2279422762

2279522763
pub fn vortex_array::arrays::dict::DictData::array_hash<H: core::hash::Hasher>(&self, _state: &mut H, _precision: vortex_array::Precision)
2279622764

22797-
impl vortex_array::ArrayHash for vortex_array::arrays::extension::ExtensionData
22798-
22799-
pub fn vortex_array::arrays::extension::ExtensionData::array_hash<H: core::hash::Hasher>(&self, _state: &mut H, _precision: vortex_array::Precision)
22800-
2280122765
impl vortex_array::ArrayHash for vortex_array::arrays::filter::FilterData
2280222766

2280322767
pub fn vortex_array::arrays::filter::FilterData::array_hash<H: core::hash::Hasher>(&self, state: &mut H, precision: vortex_array::Precision)
@@ -23132,7 +23096,7 @@ pub fn vortex_array::arrays::Decimal::validate(&self, data: &vortex_array::array
2313223096

2313323097
impl vortex_array::VTable for vortex_array::arrays::Extension
2313423098

23135-
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::arrays::extension::ExtensionData
23099+
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::EmptyArrayData
2313623100

2313723101
pub type vortex_array::arrays::Extension::OperationsVTable = vortex_array::arrays::Extension
2313823102

@@ -23168,7 +23132,7 @@ pub fn vortex_array::arrays::Extension::serialize(_array: vortex_array::ArrayVie
2316823132

2316923133
pub fn vortex_array::arrays::Extension::slot_name(_array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String
2317023134

23171-
pub fn vortex_array::arrays::Extension::validate(&self, data: &vortex_array::arrays::extension::ExtensionData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
23135+
pub fn vortex_array::arrays::Extension::validate(&self, _data: &vortex_array::EmptyArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
2317223136

2317323137
impl vortex_array::VTable for vortex_array::arrays::Filter
2317423138

@@ -24352,7 +24316,7 @@ pub fn vortex_array::arrays::Decimal::validate(&self, data: &vortex_array::array
2435224316

2435324317
impl vortex_array::VTable for vortex_array::arrays::Extension
2435424318

24355-
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::arrays::extension::ExtensionData
24319+
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::EmptyArrayData
2435624320

2435724321
pub type vortex_array::arrays::Extension::OperationsVTable = vortex_array::arrays::Extension
2435824322

@@ -24388,7 +24352,7 @@ pub fn vortex_array::arrays::Extension::serialize(_array: vortex_array::ArrayVie
2438824352

2438924353
pub fn vortex_array::arrays::Extension::slot_name(_array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String
2439024354

24391-
pub fn vortex_array::arrays::Extension::validate(&self, data: &vortex_array::arrays::extension::ExtensionData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
24355+
pub fn vortex_array::arrays::Extension::validate(&self, _data: &vortex_array::EmptyArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
2439224356

2439324357
impl vortex_array::VTable for vortex_array::arrays::Filter
2439424358

Lines changed: 22 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
use std::fmt::Display;
5-
use std::fmt::Formatter;
6-
74
use vortex_error::VortexExpect;
85
use vortex_error::VortexResult;
6+
use vortex_error::vortex_ensure_eq;
97

108
use crate::ArrayRef;
9+
use crate::EmptyArrayData;
1110
use crate::array::Array;
1211
use crate::array::ArrayParts;
1312
use crate::array::TypedArrayRef;
@@ -22,113 +21,14 @@ pub(super) const STORAGE_SLOT: usize = 0;
2221
pub(super) const NUM_SLOTS: usize = 1;
2322
pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["storage"];
2423

25-
/// An extension array that wraps another array with additional type information.
26-
///
27-
/// **⚠️ Unstable API**: This is an experimental feature that may change significantly
28-
/// in future versions. The extension type system is still evolving.
29-
///
30-
/// Unlike Apache Arrow's extension arrays, Vortex extension arrays provide a more flexible
31-
/// mechanism for adding semantic meaning to existing array types without requiring
32-
/// changes to the core type system.
33-
///
34-
/// ## Design Philosophy
35-
///
36-
/// Extension arrays serve as a type-safe wrapper that:
37-
/// - Preserves the underlying storage format and operations
38-
/// - Adds semantic type information via `ExtDType`
39-
/// - Enables custom serialization and deserialization logic
40-
/// - Allows domain-specific interpretations of generic data
41-
///
42-
/// ## Storage and Type Relationship
43-
///
44-
/// The extension array maintains a strict contract:
45-
/// - **Storage array**: Contains the actual data in a standard Vortex encoding
46-
/// - **Extension type**: Defines how to interpret the storage data semantically
47-
/// - **Type safety**: The storage array's dtype must match the extension type's storage dtype
48-
///
49-
/// ## Use Cases
50-
///
51-
/// Extension arrays are ideal for:
52-
/// - **Custom numeric types**: Units of measurement, currencies
53-
/// - **Temporal types**: Custom date/time formats, time zones, calendars
54-
/// - **Domain-specific types**: UUIDs, IP addresses, geographic coordinates
55-
/// - **Encoded types**: Base64 strings, compressed data, encrypted values
56-
///
57-
/// ## Validity and Operations
58-
///
59-
/// Extension arrays delegate validity and most operations to their storage array:
60-
/// - Validity is inherited from the underlying storage
61-
/// - Slicing preserves the extension type
62-
/// - Scalar access wraps storage scalars with extension metadata
63-
#[derive(Clone, Debug)]
64-
pub struct ExtensionData {
65-
/// The storage dtype. This **must** be a [`Extension::DType`] variant.
66-
pub(super) ext_dtype: ExtDTypeRef,
67-
}
68-
69-
impl Display for ExtensionData {
70-
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
71-
write!(f, "ext_dtype: {}", self.ext_dtype)
72-
}
73-
}
74-
75-
impl ExtensionData {
76-
/// Constructs a new `ExtensionArray`.
77-
///
78-
/// # Panics
79-
///
80-
/// Panics if the storage array in not compatible with the extension dtype.
81-
pub fn new(ext_dtype: ExtDTypeRef, storage_dtype: &DType) -> Self {
82-
Self::try_new(ext_dtype, storage_dtype).vortex_expect("Failed to create `ExtensionArray`")
83-
}
84-
85-
/// Tries to construct a new `ExtensionArray`.
86-
///
87-
/// # Errors
88-
///
89-
/// Returns an error if the storage array in not compatible with the extension dtype.
90-
pub fn try_new(ext_dtype: ExtDTypeRef, storage_dtype: &DType) -> VortexResult<Self> {
91-
// TODO(connor): Replace these statements once we add `validate_storage_array`.
92-
// ext_dtype.validate_storage_array(&storage_array)?;
93-
assert_eq!(
94-
ext_dtype.storage_dtype(),
95-
storage_dtype,
96-
"ExtensionArray: storage_dtype must match storage array DType",
97-
);
98-
99-
// SAFETY: we validate that the inputs are valid above.
100-
Ok(unsafe { Self::new_unchecked(ext_dtype, storage_dtype) })
101-
}
102-
103-
/// Creates a new `ExtensionArray`.
104-
///
105-
/// # Safety
106-
///
107-
/// The caller must ensure that the storage array is compatible with the extension dtype. In
108-
/// other words, they must know that `ext_dtype.validate_storage_array(&storage_array)` has been
109-
/// called successfully on this storage array.
110-
pub unsafe fn new_unchecked(ext_dtype: ExtDTypeRef, storage_dtype: &DType) -> Self {
111-
// TODO(connor): Replace these statements once we add `validate_storage_array`.
112-
// #[cfg(debug_assertions)]
113-
// ext_dtype
114-
// .validate_storage_array(&storage_array)
115-
// .vortex_expect("[Debug Assertion]: Invalid storage array for `ExtensionArray`");
116-
debug_assert_eq!(
117-
ext_dtype.storage_dtype(),
118-
storage_dtype,
119-
"ExtensionArray: storage_dtype must match storage array DType",
120-
);
121-
122-
Self { ext_dtype }
123-
}
124-
125-
/// The extension dtype of this array.
126-
pub fn ext_dtype(&self) -> &ExtDTypeRef {
127-
&self.ext_dtype
24+
pub trait ExtensionArrayExt: TypedArrayRef<Extension> {
25+
fn ext_dtype(&self) -> &ExtDTypeRef {
26+
self.as_ref()
27+
.dtype()
28+
.as_extension_opt()
29+
.vortex_expect("extension array somehow did not have an extension dtype")
12830
}
129-
}
13031

131-
pub trait ExtensionArrayExt: TypedArrayRef<Extension> {
13232
fn storage_array(&self) -> &ArrayRef {
13333
self.as_ref().slots()[STORAGE_SLOT]
13434
.as_ref()
@@ -144,26 +44,24 @@ impl Array<Extension> {
14444
///
14545
/// Panics if the storage array is not compatible with the extension dtype.
14646
pub fn new(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> Self {
147-
let dtype = DType::Extension(ext_dtype.clone());
148-
let len = storage_array.len();
149-
let data = ExtensionData::new(ext_dtype, storage_array.dtype());
150-
unsafe {
151-
Array::from_parts_unchecked(
152-
ArrayParts::new(Extension, dtype, len, data).with_slots(vec![Some(storage_array)]),
153-
)
154-
}
47+
Self::try_new(ext_dtype, storage_array).vortex_expect("Unable to create `ExtensionArray`")
15548
}
15649

15750
/// Tries to construct a new `ExtensionArray`.
15851
pub fn try_new(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> VortexResult<Self> {
159-
let dtype = DType::Extension(ext_dtype.clone());
52+
vortex_ensure_eq!(
53+
ext_dtype.storage_dtype(),
54+
storage_array.dtype(),
55+
"Tried to create an `ExtensionArray` with an incompatible storage array"
56+
);
57+
58+
let dtype = DType::Extension(ext_dtype);
16059
let len = storage_array.len();
161-
let data = ExtensionData::try_new(ext_dtype, storage_array.dtype())?;
162-
Ok(unsafe {
163-
Array::from_parts_unchecked(
164-
ArrayParts::new(Extension, dtype, len, data).with_slots(vec![Some(storage_array)]),
165-
)
166-
})
60+
61+
let parts = ArrayParts::new(Extension, dtype, len, EmptyArrayData)
62+
.with_slots(vec![Some(storage_array)]);
63+
64+
Ok(unsafe { Array::from_parts_unchecked(parts) })
16765
}
16866

16967
/// Creates a new [`ExtensionArray`](crate::arrays::ExtensionArray) from a vtable, metadata, and
@@ -176,6 +74,7 @@ impl Array<Extension> {
17674
let ext_dtype =
17775
ExtDType::<V>::try_with_vtable(vtable, metadata, storage_array.dtype().clone())?
17876
.erased();
77+
17978
Self::try_new(ext_dtype, storage_array)
18079
}
18180
}

vortex-array/src/arrays/extension/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
mod array;
55
pub use array::ExtensionArrayExt;
6-
pub use array::ExtensionData;
76
pub use vtable::ExtensionArray;
87

98
pub(crate) mod compute;

0 commit comments

Comments
 (0)