Skip to content

Commit 3d23b9f

Browse files
committed
Use canonical extension types from arrow
1 parent 6d8774a commit 3d23b9f

4 files changed

Lines changed: 57 additions & 38 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ apache-avro = { version = "0.20", default-features = false }
9494
arrow = { version = "57.0.0", features = [
9595
"prettyprint",
9696
"chrono-tz",
97+
"canonical_extension_types"
9798
] }
9899
arrow-buffer = { version = "57.0.0", default-features = false }
99100
arrow-flight = { version = "57.0.0", features = [
@@ -103,7 +104,7 @@ arrow-ipc = { version = "57.0.0", default-features = false, features = [
103104
"lz4",
104105
] }
105106
arrow-ord = { version = "57.0.0", default-features = false }
106-
arrow-schema = { version = "57.0.0", default-features = false }
107+
arrow-schema = { version = "57.0.0", default-features = false, features = ["canonical_extension_types"] }
107108
async-trait = "0.1.89"
108109
bigdecimal = "0.4.8"
109110
bytes = "1.10"

datafusion/common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ pyo3 = { version = "0.26", optional = true }
7676
recursive = { workspace = true, optional = true }
7777
sqlparser = { workspace = true, optional = true }
7878
tokio = { workspace = true }
79+
uuid = { version = "1.18.1", features = ["v4"] }
7980

8081
[target.'cfg(target_family = "wasm")'.dependencies]
8182
web-time = "1.1.0"

datafusion/common/src/types/canonical.rs

Lines changed: 53 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -15,37 +15,24 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use crate::error::_internal_err;
1819
use crate::types::{
1920
LogicalType, NativeType, TypeParameter, TypeSignature, ValuePrettyPrinter,
2021
};
21-
use crate::Result;
2222
use crate::ScalarValue;
23+
use crate::{Result, _internal_datafusion_err};
24+
use arrow_schema::extension::{ExtensionType, Opaque, Uuid};
2325
use std::sync::{Arc, LazyLock};
26+
use uuid::Bytes;
2427

25-
/// Represents the canonical [UUID extension type](https://arrow.apache.org/docs/format/CanonicalExtensions.html#uuid).
26-
pub struct UuidType {}
27-
28-
impl UuidType {
29-
/// Creates a new [UuidType].
30-
pub fn new() -> Self {
31-
Self {}
32-
}
33-
}
34-
35-
impl Default for UuidType {
36-
fn default() -> Self {
37-
Self::new()
38-
}
39-
}
40-
41-
impl LogicalType for UuidType {
28+
impl LogicalType for Uuid {
4229
fn native(&self) -> &NativeType {
4330
&NativeType::FixedSizeBinary(16)
4431
}
4532

4633
fn signature(&self) -> TypeSignature<'_> {
4734
TypeSignature::Extension {
48-
name: "arrow.uuid",
35+
name: Uuid::NAME,
4936
parameters: vec![],
5037
}
5138
}
@@ -62,36 +49,41 @@ struct UuidValuePrettyPrinter;
6249

6350
impl ValuePrettyPrinter for UuidValuePrettyPrinter {
6451
fn pretty_print_scalar(&self, value: &ScalarValue) -> Result<String> {
65-
Ok(format!("arrow.uuid({})", value))
52+
match value {
53+
ScalarValue::FixedSizeBinary(16, value) => match value {
54+
Some(value) => {
55+
let bytes = Bytes::try_from(value.as_slice()).map_err(|_| {
56+
_internal_datafusion_err!(
57+
"Invalid UUID bytes even though type is correct."
58+
)
59+
})?;
60+
let uuid = uuid::Uuid::from_bytes(bytes);
61+
Ok(format!("arrow.uuid({})", uuid))
62+
}
63+
None => Ok("arrow.uuid(NULL)".to_owned()),
64+
},
65+
_ => _internal_err!("Wrong scalar given to "),
66+
}
6667
}
6768
}
6869

6970
/// Represents the canonical [Opaque extension type](https://arrow.apache.org/docs/format/CanonicalExtensions.html#opaque).
7071
///
7172
/// In the context of DataFusion, a common use case of the opaque type is when an extension type
7273
/// is unknown to DataFusion. Contrary to [UnresolvedExtensionType], the extension type has
73-
/// already been checked against the extension type registry and was not found.
74-
pub struct OpaqueType {
75-
/// The underlying native type.
76-
native_type: NativeType,
77-
}
78-
79-
impl OpaqueType {
80-
/// Creates a new [OpaqueType].
81-
pub fn new(native_type: NativeType) -> Self {
82-
Self { native_type }
83-
}
84-
}
85-
86-
impl LogicalType for OpaqueType {
74+
/// already been checked against the extension type registry and was not found.
75+
impl LogicalType for Opaque {
8776
fn native(&self) -> &NativeType {
8877
&NativeType::FixedSizeBinary(16)
8978
}
9079

9180
fn signature(&self) -> TypeSignature<'_> {
92-
let parameter = TypeParameter::Type(TypeSignature::Native(&self.native_type));
81+
let parameter = TypeParameter::Type(TypeSignature::Extension {
82+
name: self.metadata().type_name(),
83+
parameters: vec![],
84+
});
9385
TypeSignature::Extension {
94-
name: "arrow.opaque",
86+
name: Opaque::NAME,
9587
parameters: vec![parameter],
9688
}
9789
}
@@ -103,6 +95,8 @@ impl LogicalType for OpaqueType {
10395
}
10496
}
10597

98+
// TODO Other canonical extension types.
99+
106100
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
107101
struct OpaqueValuePrettyPrinter;
108102

@@ -159,9 +153,13 @@ impl LogicalType for UnresolvedExtensionType {
159153
}
160154

161155
fn signature(&self) -> TypeSignature<'_> {
156+
let inner_type = TypeParameter::Type(TypeSignature::Extension {
157+
name: &self.name,
158+
parameters: vec![],
159+
});
162160
TypeSignature::Extension {
163161
name: &"datafusion.unresolved",
164-
parameters: vec![],
162+
parameters: vec![inner_type],
165163
}
166164
}
167165

@@ -180,3 +178,21 @@ impl ValuePrettyPrinter for UnresolvedValuePrettyPrinter {
180178
Ok(format!("datafusion.unresolved({})", value))
181179
}
182180
}
181+
182+
#[cfg(test)]
183+
mod tests {
184+
use super::*;
185+
186+
#[test]
187+
pub fn test_pretty_print_uuid() {
188+
let my_uuid = uuid::Uuid::nil();
189+
let uuid = ScalarValue::FixedSizeBinary(16, Some(my_uuid.as_bytes().to_vec()));
190+
191+
let printer = UuidValuePrettyPrinter::default();
192+
let pretty_printed = printer.pretty_print_scalar(&uuid).unwrap();
193+
assert_eq!(
194+
pretty_printed,
195+
"arrow.uuid(00000000-0000-0000-0000-000000000000)"
196+
);
197+
}
198+
}

0 commit comments

Comments
 (0)