Skip to content

Commit 325d779

Browse files
committed
feat[vortex-array]: support executing UUID to arrow
Signed-off-by: Alfonso Subiotto Marques <alfonso.subiotto@polarsignals.com>
1 parent 38ab5af commit 325d779

File tree

7 files changed

+230
-35
lines changed

7 files changed

+230
-35
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ arrow-cast = "58"
9797
arrow-data = "58"
9898
arrow-ipc = "58"
9999
arrow-ord = "58"
100-
arrow-schema = "58"
100+
arrow-schema = { version = "58", features = ["canonical_extension_types"] }
101101
arrow-select = "58"
102102
arrow-string = "58"
103103
async-fs = "2.2.0"

vortex-array/public-api.lock

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14858,6 +14858,14 @@ pub mod vortex_array::extension::uuid
1485814858

1485914859
pub struct vortex_array::extension::uuid::Uuid
1486014860

14861+
impl vortex_array::extension::uuid::Uuid
14862+
14863+
pub fn vortex_array::extension::uuid::Uuid::default(nullability: vortex_array::dtype::Nullability) -> vortex_array::dtype::extension::ExtDType<Self>
14864+
14865+
pub fn vortex_array::extension::uuid::Uuid::new(metadata: vortex_array::extension::uuid::UuidMetadata, nullability: vortex_array::dtype::Nullability) -> vortex_array::dtype::extension::ExtDType<Self>
14866+
14867+
pub fn vortex_array::extension::uuid::Uuid::storage_dtype(nullability: vortex_array::dtype::Nullability) -> vortex_array::dtype::DType
14868+
1486114869
impl core::clone::Clone for vortex_array::extension::uuid::Uuid
1486214870

1486314871
pub fn vortex_array::extension::uuid::Uuid::clone(&self) -> vortex_array::extension::uuid::Uuid
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::sync::Arc;
5+
6+
use arrow_array::ArrayRef as ArrowArrayRef;
7+
use arrow_array::FixedSizeBinaryArray;
8+
use vortex_error::VortexResult;
9+
use vortex_error::vortex_bail;
10+
11+
use crate::ArrayRef;
12+
use crate::ExecutionCtx;
13+
use crate::arrays::ExtensionArray;
14+
use crate::arrays::FixedSizeListArray;
15+
use crate::arrays::PrimitiveArray;
16+
use crate::arrow::executor::validity::to_arrow_null_buffer;
17+
use crate::dtype::DType;
18+
use crate::dtype::PType;
19+
use crate::vtable::ValidityHelper;
20+
21+
/// Convert a Vortex extension array (e.g. UUID) to an Arrow `FixedSizeBinaryArray`.
22+
///
23+
/// The array must be an extension type whose storage is `FixedSizeList(Primitive(U8), size)`.
24+
pub(super) fn to_arrow_fixed_size_binary(
25+
array: ArrayRef,
26+
size: i32,
27+
ctx: &mut ExecutionCtx,
28+
) -> VortexResult<ArrowArrayRef> {
29+
let Some(ext) = array.dtype().as_extension_opt() else {
30+
vortex_bail!(
31+
"FixedSizeBinary conversion requires an extension dtype, got {}",
32+
array.dtype()
33+
);
34+
};
35+
36+
match ext.storage_dtype() {
37+
DType::FixedSizeList(elem, list_size, _)
38+
if *list_size == size as u32
39+
&& matches!(elem.as_ref(), DType::Primitive(PType::U8, _)) => {}
40+
other => {
41+
vortex_bail!(
42+
"FixedSizeBinary({size}) conversion requires FixedSizeList(U8, {size}) storage, got {other}"
43+
);
44+
}
45+
}
46+
47+
let ext_array = array.execute::<ExtensionArray>(ctx)?;
48+
let fsl = ext_array
49+
.storage_array()
50+
.clone()
51+
.execute::<FixedSizeListArray>(ctx)?;
52+
let elements = fsl.elements().clone().execute::<PrimitiveArray>(ctx)?;
53+
let values = elements.into_buffer::<u8>().into_arrow_buffer();
54+
let null_buffer = to_arrow_null_buffer(fsl.validity().clone(), fsl.len(), ctx)?;
55+
56+
Ok(Arc::new(FixedSizeBinaryArray::new(
57+
size,
58+
values,
59+
null_buffer,
60+
)))
61+
}
62+
63+
#[cfg(test)]
64+
mod tests {
65+
use arrow_array::FixedSizeBinaryArray;
66+
use arrow_schema::DataType;
67+
use vortex_buffer::BitBuffer;
68+
use vortex_buffer::Buffer;
69+
70+
use crate::IntoArray;
71+
use crate::LEGACY_SESSION;
72+
use crate::VortexSessionExecute;
73+
use crate::arrays::ExtensionArray;
74+
use crate::arrays::FixedSizeListArray;
75+
use crate::arrays::PrimitiveArray;
76+
use crate::arrow::ArrowArrayExecutor;
77+
use crate::dtype::Nullability;
78+
use crate::extension::uuid::Uuid;
79+
use crate::extension::uuid::vtable::UUID_BYTE_LEN;
80+
use crate::validity::Validity;
81+
82+
#[expect(
83+
clippy::cast_possible_truncation,
84+
reason = "UUID_BYTE_LEN always fits u32/i32"
85+
)]
86+
#[test]
87+
fn test_uuid_to_fixed_size_binary() {
88+
let u1 = uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
89+
let u2 = uuid::Uuid::parse_str("f47ac10b-58cc-4372-a567-0e02b2c3d479").unwrap();
90+
91+
let flat: Vec<u8> = [u1.as_bytes(), &[0u8; 16], u2.as_bytes()]
92+
.into_iter()
93+
.flatten()
94+
.copied()
95+
.collect();
96+
let elements = PrimitiveArray::new(Buffer::from(flat), Validity::NonNullable).into_array();
97+
let validity = Validity::from(BitBuffer::from_iter([true, false, true]));
98+
let fsl = FixedSizeListArray::try_new(elements, UUID_BYTE_LEN as u32, validity, 3)
99+
.unwrap()
100+
.into_array();
101+
let uuid_array = ExtensionArray::new(Uuid::default(Nullability::Nullable).erased(), fsl);
102+
103+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
104+
let arrow = uuid_array
105+
.into_array()
106+
.execute_arrow(
107+
Some(&DataType::FixedSizeBinary(UUID_BYTE_LEN as i32)),
108+
&mut ctx,
109+
)
110+
.unwrap();
111+
112+
let expected = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
113+
[Some(u1.as_bytes().as_slice()), None, Some(u2.as_bytes())].into_iter(),
114+
UUID_BYTE_LEN as i32,
115+
)
116+
.unwrap();
117+
assert_eq!(arrow.as_ref(), &expected as &dyn arrow_array::Array);
118+
}
119+
}

vortex-array/src/arrow/executor/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ mod byte;
66
pub mod byte_view;
77
mod decimal;
88
mod dictionary;
9+
mod fixed_size_binary;
910
mod fixed_size_list;
1011
mod list;
1112
mod list_view;
@@ -38,6 +39,7 @@ use crate::arrow::executor::byte::to_arrow_byte_array;
3839
use crate::arrow::executor::byte_view::to_arrow_byte_view;
3940
use crate::arrow::executor::decimal::to_arrow_decimal;
4041
use crate::arrow::executor::dictionary::to_arrow_dictionary;
42+
use crate::arrow::executor::fixed_size_binary::to_arrow_fixed_size_binary;
4143
use crate::arrow::executor::fixed_size_list::to_arrow_fixed_list;
4244
use crate::arrow::executor::list::to_arrow_list;
4345
use crate::arrow::executor::list_view::to_arrow_list_view;
@@ -156,8 +158,8 @@ impl ArrowArrayExecutor for ArrayRef {
156158
DataType::RunEndEncoded(ends_type, values_type) => {
157159
to_arrow_run_end(self, ends_type.data_type(), values_type, ctx)
158160
}
159-
DataType::FixedSizeBinary(_)
160-
| DataType::Map(..)
161+
DataType::FixedSizeBinary(size) => to_arrow_fixed_size_binary(self, *size, ctx),
162+
DataType::Map(..)
161163
| DataType::Duration(_)
162164
| DataType::Interval(_)
163165
| DataType::Union(..) => {

vortex-array/src/dtype/arrow.rs

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use arrow_schema::Schema;
2323
use arrow_schema::SchemaBuilder;
2424
use arrow_schema::SchemaRef;
2525
use arrow_schema::TimeUnit as ArrowTimeUnit;
26+
use arrow_schema::extension::ExtensionType as _;
2627
use vortex_error::VortexError;
2728
use vortex_error::VortexExpect;
2829
use vortex_error::VortexResult;
@@ -42,6 +43,8 @@ use crate::extension::datetime::TemporalMetadata;
4243
use crate::extension::datetime::Time;
4344
use crate::extension::datetime::TimeUnit;
4445
use crate::extension::datetime::Timestamp;
46+
use crate::extension::uuid::Uuid;
47+
use crate::extension::uuid::vtable::UUID_BYTE_LEN;
4548

4649
/// Trait for converting Arrow types to Vortex types.
4750
pub trait FromArrowType<T>: Sized {
@@ -210,15 +213,22 @@ impl FromArrowType<(&DataType, Nullability)> for DType {
210213

211214
impl FromArrowType<&Field> for DType {
212215
fn from_arrow(field: &Field) -> Self {
216+
let nullability = Nullability::from(field.is_nullable());
217+
213218
if field
214219
.metadata()
215220
.get("ARROW:extension:name")
216221
.map(|s| s.as_str())
217222
== Some("arrow.parquet.variant")
218223
{
219-
return DType::Variant(field.is_nullable().into());
224+
return DType::Variant(nullability);
225+
}
226+
227+
if field.extension_type_name() == Some(arrow_schema::extension::Uuid::NAME) {
228+
return DType::Extension(Uuid::default(nullability).erased());
220229
}
221-
Self::from_arrow((field.data_type(), field.is_nullable().into()))
230+
231+
Self::from_arrow((field.data_type(), nullability))
222232
}
223233
}
224234

@@ -245,11 +255,17 @@ impl DType {
245255
.into(),
246256
)
247257
} else {
248-
Field::new(
258+
let mut field = Field::new(
249259
field_name.as_ref(),
250260
field_dtype.to_arrow_dtype()?,
251261
field_dtype.is_nullable(),
252-
)
262+
);
263+
if let DType::Extension(ext) = field_dtype
264+
&& ext.is::<Uuid>()
265+
{
266+
field = field.with_extension_type(arrow_schema::extension::Uuid);
267+
}
268+
field
253269
};
254270
builder.push(field);
255271
}
@@ -349,6 +365,14 @@ impl DType {
349365
});
350366
};
351367

368+
if ext_dtype.is::<Uuid>() {
369+
#[expect(
370+
clippy::cast_possible_truncation,
371+
reason = "UUID_BYTE_LEN always fits i32"
372+
)]
373+
return Ok(DataType::FixedSizeBinary(UUID_BYTE_LEN as i32));
374+
}
375+
352376
vortex_bail!("Unsupported extension type \"{}\"", ext_dtype.id())
353377
}
354378
})
@@ -561,4 +585,25 @@ mod test {
561585

562586
assert_eq!(original_dtype, roundtripped_dtype);
563587
}
588+
589+
#[test]
590+
fn test_uuid_schema_roundtrip() {
591+
let original = DType::struct_(
592+
[(
593+
"id",
594+
DType::Extension(Uuid::default(Nullability::Nullable).erased()),
595+
)],
596+
Nullability::NonNullable,
597+
);
598+
let schema = original.to_arrow_schema().unwrap();
599+
600+
let field = schema.field(0);
601+
assert_eq!(field.data_type(), &DataType::FixedSizeBinary(16));
602+
assert_eq!(
603+
field.extension_type_name(),
604+
Some(arrow_schema::extension::Uuid::NAME)
605+
);
606+
607+
assert_eq!(DType::from_arrow(&schema), original);
608+
}
564609
}

vortex-array/src/extension/uuid/mod.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,42 @@ pub use metadata::UuidMetadata;
1515

1616
pub(crate) mod vtable;
1717

18+
use std::sync::Arc;
19+
20+
use vortex_error::VortexExpect;
21+
22+
use crate::dtype::DType;
23+
use crate::dtype::Nullability;
24+
use crate::dtype::PType;
25+
use crate::dtype::extension::ExtDType;
26+
1827
/// The VTable for the UUID extension type.
1928
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
2029
pub struct Uuid;
30+
31+
#[expect(
32+
clippy::cast_possible_truncation,
33+
reason = "UUID_BYTE_LEN always fits u32"
34+
)]
35+
#[allow(clippy::same_name_method)]
36+
impl Uuid {
37+
/// Returns the canonical UUID storage dtype: `FixedSizeList(Primitive(U8, NonNullable), 16)`.
38+
pub fn storage_dtype(nullability: Nullability) -> DType {
39+
DType::FixedSizeList(
40+
Arc::new(DType::Primitive(PType::U8, Nullability::NonNullable)),
41+
vtable::UUID_BYTE_LEN as u32,
42+
nullability,
43+
)
44+
}
45+
46+
/// Creates a new UUID extension dtype with the given metadata and nullability.
47+
pub fn new(metadata: UuidMetadata, nullability: Nullability) -> ExtDType<Self> {
48+
ExtDType::try_new(metadata, Self::storage_dtype(nullability))
49+
.vortex_expect("valid UUID storage dtype")
50+
}
51+
52+
/// Creates a new UUID extension dtype with default metadata.
53+
pub fn default(nullability: Nullability) -> ExtDType<Self> {
54+
Self::new(UuidMetadata::default(), nullability)
55+
}
56+
}

0 commit comments

Comments
 (0)