Skip to content

Commit 6d8774a

Browse files
committed
Add first draft of extension type registry
1 parent efcc216 commit 6d8774a

File tree

11 files changed

+401
-4
lines changed

11 files changed

+401
-4
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ testcontainers = { version = "0.25.2", features = ["default"] }
182182
testcontainers-modules = { version = "0.13" }
183183
tokio = { version = "1.48", features = ["macros", "rt", "sync"] }
184184
url = "2.5.7"
185+
uuid = { version = "1.18", features = ["v4"] }
185186

186187
[workspace.lints.clippy]
187188
# Detects large stack-allocated futures that may cause stack overflow crashes (see threshold in clippy.toml)

datafusion/common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ apache-avro = { version = "0.20", default-features = false, features = [
6161
], optional = true }
6262
arrow = { workspace = true }
6363
arrow-ipc = { workspace = true }
64+
arrow-schema = { workspace = true }
6465
chrono = { workspace = true }
6566
half = { workspace = true }
6667
hashbrown = { workspace = true }
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::types::{
19+
LogicalType, NativeType, TypeParameter, TypeSignature, ValuePrettyPrinter,
20+
};
21+
use crate::Result;
22+
use crate::ScalarValue;
23+
use std::sync::{Arc, LazyLock};
24+
25+
/// Represents the canonical [UUID extension type](https://arrow.apache.org/docs/format/CanonicalExtensions.html#uuid).
26+
pub struct UuidType {}
27+
28+
impl UuidType {
29+
/// Creates a new [UuidType].
30+
pub fn new() -> Self {
31+
Self {}
32+
}
33+
}
34+
35+
impl Default for UuidType {
36+
fn default() -> Self {
37+
Self::new()
38+
}
39+
}
40+
41+
impl LogicalType for UuidType {
42+
fn native(&self) -> &NativeType {
43+
&NativeType::FixedSizeBinary(16)
44+
}
45+
46+
fn signature(&self) -> TypeSignature<'_> {
47+
TypeSignature::Extension {
48+
name: "arrow.uuid",
49+
parameters: vec![],
50+
}
51+
}
52+
53+
fn pretty_printer(&self) -> &Arc<dyn ValuePrettyPrinter> {
54+
static PRETTY_PRINTER: LazyLock<Arc<dyn ValuePrettyPrinter>> =
55+
LazyLock::new(|| Arc::new(UuidValuePrettyPrinter {}));
56+
&PRETTY_PRINTER
57+
}
58+
}
59+
60+
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
61+
struct UuidValuePrettyPrinter;
62+
63+
impl ValuePrettyPrinter for UuidValuePrettyPrinter {
64+
fn pretty_print_scalar(&self, value: &ScalarValue) -> Result<String> {
65+
Ok(format!("arrow.uuid({})", value))
66+
}
67+
}
68+
69+
/// Represents the canonical [Opaque extension type](https://arrow.apache.org/docs/format/CanonicalExtensions.html#opaque).
70+
///
71+
/// In the context of DataFusion, a common use case of the opaque type is when an extension type
72+
/// is unknown to DataFusion. Contrary to [UnresolvedExtensionType], the extension type has
73+
/// already been checked against the extension type registry and was not found.
74+
pub struct OpaqueType {
75+
/// The underlying native type.
76+
native_type: NativeType,
77+
}
78+
79+
impl OpaqueType {
80+
/// Creates a new [OpaqueType].
81+
pub fn new(native_type: NativeType) -> Self {
82+
Self { native_type }
83+
}
84+
}
85+
86+
impl LogicalType for OpaqueType {
87+
fn native(&self) -> &NativeType {
88+
&NativeType::FixedSizeBinary(16)
89+
}
90+
91+
fn signature(&self) -> TypeSignature<'_> {
92+
let parameter = TypeParameter::Type(TypeSignature::Native(&self.native_type));
93+
TypeSignature::Extension {
94+
name: "arrow.opaque",
95+
parameters: vec![parameter],
96+
}
97+
}
98+
99+
fn pretty_printer(&self) -> &Arc<dyn ValuePrettyPrinter> {
100+
static PRETTY_PRINTER: LazyLock<Arc<dyn ValuePrettyPrinter>> =
101+
LazyLock::new(|| Arc::new(OpaqueValuePrettyPrinter {}));
102+
&PRETTY_PRINTER
103+
}
104+
}
105+
106+
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
107+
struct OpaqueValuePrettyPrinter;
108+
109+
impl ValuePrettyPrinter for OpaqueValuePrettyPrinter {
110+
fn pretty_print_scalar(&self, value: &ScalarValue) -> Result<String> {
111+
Ok(format!("arrow.opaque({})", value))
112+
}
113+
}
114+
115+
/// Represents an unresolved extension type with a given native type and name.
116+
///
117+
/// This does not necessarily indicate that DataFusion does not understand the extension type. For
118+
/// this purpose, see [OpaqueType]. However, it does indicate that the extension type was not yet
119+
/// checked against the extension type registry.
120+
///
121+
/// This extension type exists because it is often challenging to gain access to an extension type
122+
/// registry. Especially because extension type support is relatively new, and therefore this
123+
/// consideration was not taken into account by users. This provides a workaround such that
124+
/// unresolved extension types can be resolved at a later point in time where access to the registry
125+
/// is available.
126+
pub struct UnresolvedExtensionType {
127+
/// The name of the underlying extension type.
128+
name: String,
129+
/// The metadata of the underlying extension type.
130+
metadata: Option<String>,
131+
/// The underlying native type.
132+
native_type: NativeType,
133+
}
134+
135+
impl UnresolvedExtensionType {
136+
/// Creates a new [UnresolvedExtensionType].
137+
pub fn new(name: String, metadata: Option<String>, native_type: NativeType) -> Self {
138+
Self {
139+
name,
140+
metadata,
141+
native_type,
142+
}
143+
}
144+
145+
/// The name of the unresolved extension type.
146+
pub fn name(&self) -> &str {
147+
&self.name
148+
}
149+
150+
/// The metadata of the unresolved extension type.
151+
pub fn metadata(&self) -> Option<&str> {
152+
self.metadata.as_deref()
153+
}
154+
}
155+
156+
impl LogicalType for UnresolvedExtensionType {
157+
fn native(&self) -> &NativeType {
158+
&self.native_type
159+
}
160+
161+
fn signature(&self) -> TypeSignature<'_> {
162+
TypeSignature::Extension {
163+
name: &"datafusion.unresolved",
164+
parameters: vec![],
165+
}
166+
}
167+
168+
fn pretty_printer(&self) -> &Arc<dyn ValuePrettyPrinter> {
169+
static PRETTY_PRINTER: LazyLock<Arc<dyn ValuePrettyPrinter>> =
170+
LazyLock::new(|| Arc::new(UnresolvedValuePrettyPrinter {}));
171+
&PRETTY_PRINTER
172+
}
173+
}
174+
175+
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
176+
struct UnresolvedValuePrettyPrinter {}
177+
178+
impl ValuePrettyPrinter for UnresolvedValuePrettyPrinter {
179+
fn pretty_print_scalar(&self, value: &ScalarValue) -> Result<String> {
180+
Ok(format!("datafusion.unresolved({})", value))
181+
}
182+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::Result;
19+
use crate::ScalarValue;
20+
use arrow::array::Array;
21+
use std::fmt::Debug;
22+
23+
/// Implements pretty printing for a set of types.
24+
///
25+
/// For example, the default pretty-printer for a byte array might not be adequate for a UUID type,
26+
/// which is physically stored as a fixed-length byte array. This extension allows the user to
27+
/// override the default pretty-printer for a given type.
28+
pub trait ValuePrettyPrinter: Debug + Sync + Send {
29+
/// Pretty print a scalar value.
30+
///
31+
/// # Error
32+
///
33+
/// Will return an error if the given `df_type` is not supported by this pretty printer.
34+
fn pretty_print_scalar(&self, value: &ScalarValue) -> Result<String>;
35+
36+
/// Pretty print a specific value of a given array.
37+
///
38+
/// # Error
39+
///
40+
/// Will return an error if the given `df_type` is not supported by this pretty printer.
41+
fn pretty_print_array(&self, array: &dyn Array, index: usize) -> Result<String> {
42+
let value = ScalarValue::try_from_array(array, index)?;
43+
self.pretty_print_scalar(&value)
44+
}
45+
}
46+
47+
/// The default pretty printer.
48+
///
49+
/// Uses the arrow implementation of printing values.
50+
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
51+
pub struct DefaultValuePrettyPrinter;
52+
53+
impl ValuePrettyPrinter for DefaultValuePrettyPrinter {
54+
fn pretty_print_scalar(&self, value: &ScalarValue) -> Result<String> {
55+
Ok(value.to_string())
56+
}
57+
}

datafusion/common/src/types/logical.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use super::NativeType;
18+
use super::{NativeType, ValuePrettyPrinter};
1919
use crate::error::Result;
2020
use arrow::datatypes::DataType;
2121
use core::fmt;
@@ -32,7 +32,7 @@ pub enum TypeSignature<'a> {
3232
/// The `name` should contain the same value as 'ARROW:extension:name'.
3333
Extension {
3434
name: &'a str,
35-
parameters: &'a [TypeParameter<'a>],
35+
parameters: Vec<TypeParameter<'a>>,
3636
},
3737
}
3838

@@ -87,6 +87,9 @@ pub trait LogicalType: Sync + Send {
8787
fn default_cast_for(&self, origin: &DataType) -> Result<DataType> {
8888
self.native().default_cast_for(origin)
8989
}
90+
91+
/// Returns a pretty-printer that can format values of this type.
92+
fn pretty_printer(&self) -> &Arc<dyn ValuePrettyPrinter>;
9093
}
9194

9295
impl fmt::Debug for dyn LogicalType {

datafusion/common/src/types/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,15 @@
1616
// under the License.
1717

1818
mod builtin;
19+
mod canonical;
20+
mod extensions;
1921
mod field;
2022
mod logical;
2123
mod native;
2224

2325
pub use builtin::*;
26+
pub use canonical::*;
27+
pub use extensions::*;
2428
pub use field::*;
2529
pub use logical::*;
2630
pub use native::*;

datafusion/common/src/types/native.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,16 @@
1717

1818
use super::{
1919
LogicalField, LogicalFieldRef, LogicalFields, LogicalType, LogicalUnionFields,
20-
TypeSignature,
20+
TypeSignature, ValuePrettyPrinter,
2121
};
2222
use crate::error::{Result, _internal_err};
23+
use crate::types::DefaultValuePrettyPrinter;
2324
use arrow::compute::can_cast_types;
2425
use arrow::datatypes::{
2526
DataType, Field, FieldRef, Fields, IntervalUnit, TimeUnit, UnionFields,
2627
DECIMAL128_MAX_PRECISION, DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION,
2728
};
29+
use std::sync::LazyLock;
2830
use std::{fmt::Display, sync::Arc};
2931

3032
/// Representation of a type that DataFusion can handle natively. It is a subset
@@ -368,6 +370,12 @@ impl LogicalType for NativeType {
368370
}
369371
})
370372
}
373+
374+
fn pretty_printer(&self) -> &Arc<dyn ValuePrettyPrinter> {
375+
static PRETTY_PRINTER: LazyLock<Arc<dyn ValuePrettyPrinter>> =
376+
LazyLock::new(|| Arc::new(DefaultValuePrettyPrinter {}));
377+
&PRETTY_PRINTER
378+
}
371379
}
372380

373381
// The following From<DataType>, From<Field>, ... implementations are temporary

datafusion/expr/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ sql = ["sqlparser"]
4444

4545
[dependencies]
4646
arrow = { workspace = true }
47+
arrow-schema = { workspace = true }
4748
async-trait = { workspace = true }
4849
chrono = { workspace = true }
4950
datafusion-common = { workspace = true, default-features = false }

0 commit comments

Comments
 (0)