Skip to content

Commit a3604ff

Browse files
committed
Add draft for more extension types
1 parent e3c7f27 commit a3604ff

10 files changed

Lines changed: 657 additions & 21 deletions

File tree

datafusion/common/src/types/canonical_extensions/bool8.rs

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,55 @@ use crate::types::extension::DFExtensionType;
2020
use arrow::array::{Array, Int8Array};
2121
use arrow::datatypes::DataType;
2222
use arrow::util::display::{ArrayFormatter, DisplayIndex, FormatOptions, FormatResult};
23+
use arrow_schema::ArrowError;
24+
use arrow_schema::extension::{Bool8, ExtensionType};
2325
use std::fmt::Write;
2426

25-
/// Defines the extension type logic for the canonical `arrow.uuid` extension type.
27+
/// Defines the extension type logic for the canonical `arrow.bool8` extension type.
2628
///
2729
/// See [`DFExtensionType`] for information on DataFusion's extension type mechanism.
28-
impl DFExtensionType for arrow_schema::extension::Bool8 {
30+
#[derive(Debug, Clone)]
31+
pub struct DFBool8(Bool8);
32+
33+
impl ExtensionType for DFBool8 {
34+
const NAME: &'static str = Bool8::NAME;
35+
type Metadata = <Bool8 as ExtensionType>::Metadata;
36+
37+
fn metadata(&self) -> &Self::Metadata {
38+
self.0.metadata()
39+
}
40+
41+
fn serialize_metadata(&self) -> Option<String> {
42+
self.0.serialize_metadata()
43+
}
44+
45+
fn deserialize_metadata(
46+
metadata: Option<&str>,
47+
) -> Result<Self::Metadata, ArrowError> {
48+
Bool8::deserialize_metadata(metadata)
49+
}
50+
51+
fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
52+
self.0.supports_data_type(data_type)
53+
}
54+
55+
fn try_new(
56+
data_type: &DataType,
57+
metadata: Self::Metadata,
58+
) -> Result<Self, ArrowError> {
59+
Ok(Self(<Bool8 as ExtensionType>::try_new(
60+
data_type, metadata,
61+
)?))
62+
}
63+
}
64+
65+
impl DFExtensionType for DFBool8 {
2966
fn storage_type(&self) -> DataType {
3067
DataType::Int8
3168
}
3269

3370
fn serialize_metadata(&self) -> Option<String> {
34-
None
71+
self.0.serialize_metadata()
3572
}
3673

3774
fn create_array_formatter<'fmt>(
@@ -82,7 +119,7 @@ mod tests {
82119
pub fn test_pretty_bool8() {
83120
let values = Int8Array::from_iter([Some(0), Some(1), Some(-20), None]);
84121

85-
let extension_type = arrow_schema::extension::Bool8 {};
122+
let extension_type = DFBool8(Bool8 {});
86123
let formatter = extension_type
87124
.create_array_formatter(&values, &FormatOptions::default().with_null("NULL"))
88125
.unwrap()
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. See the NOTICE file distributed with this
8+
// work for additional information regarding copyright ownership.
9+
// The ASF licenses this file to you under the Apache License,
10+
// Version 2.0 (the "License"); you may not use this file
11+
// except in compliance with the License. You may obtain a
12+
// copy of the License at
13+
//
14+
// http://www.apache.org/licenses/LICENSE-2.0
15+
//
16+
// Unless required by applicable law or agreed to in writing,
17+
// software distributed under the License is distributed on an
18+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19+
// KIND, either express or implied. See the License for the
20+
// specific language governing permissions and limitations
21+
// under the License.
22+
23+
use crate::types::extension::DFExtensionType;
24+
use arrow::datatypes::DataType;
25+
use arrow_schema::ArrowError;
26+
use arrow_schema::extension::{ExtensionType, Json};
27+
28+
/// Defines the extension type logic for the canonical `arrow.json` extension type.
29+
///
30+
/// See [`DFExtensionType`] for information on DataFusion's extension type mechanism.
31+
#[derive(Debug, Clone)]
32+
pub struct DFJson {
33+
inner: Json,
34+
storage_type: DataType,
35+
}
36+
37+
impl ExtensionType for DFJson {
38+
const NAME: &'static str = Json::NAME;
39+
type Metadata = <Json as ExtensionType>::Metadata;
40+
41+
fn metadata(&self) -> &Self::Metadata {
42+
self.inner.metadata()
43+
}
44+
45+
fn serialize_metadata(&self) -> Option<String> {
46+
self.inner.serialize_metadata()
47+
}
48+
49+
fn deserialize_metadata(
50+
metadata: Option<&str>,
51+
) -> Result<Self::Metadata, ArrowError> {
52+
Json::deserialize_metadata(metadata)
53+
}
54+
55+
fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
56+
self.inner.supports_data_type(data_type)
57+
}
58+
59+
fn try_new(
60+
data_type: &DataType,
61+
metadata: Self::Metadata,
62+
) -> Result<Self, ArrowError> {
63+
Ok(Self {
64+
inner: <Json as ExtensionType>::try_new(data_type, metadata)?,
65+
storage_type: data_type.clone(),
66+
})
67+
}
68+
}
69+
70+
impl DFExtensionType for DFJson {
71+
fn storage_type(&self) -> DataType {
72+
self.storage_type.clone()
73+
}
74+
75+
fn serialize_metadata(&self) -> Option<String> {
76+
self.inner.serialize_metadata()
77+
}
78+
}

datafusion/common/src/types/canonical_extensions/mod.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@
44
// regarding copyright ownership. The ASF licenses this file
55
// to you under the Apache License, Version 2.0 (the
66
// "License"); you may not use this file except in compliance
7-
// with the License. You may obtain a copy of the License at
7+
// with the License. See the NOTICE file distributed with this
8+
// work for additional information regarding copyright ownership.
9+
// The ASF licenses this file to you under the Apache License,
10+
// Version 2.0 (the "License"); you may not use this file
11+
// except in compliance with the License. You may obtain a
12+
// copy of the License at
813
//
914
// http://www.apache.org/licenses/LICENSE-2.0
1015
//
@@ -16,4 +21,15 @@
1621
// under the License.
1722

1823
mod bool8;
24+
mod json;
25+
mod opaque;
26+
mod tensor;
27+
mod timestamp_with_offset;
1928
mod uuid;
29+
30+
pub use bool8::DFBool8;
31+
pub use json::DFJson;
32+
pub use opaque::DFOpaque;
33+
pub use tensor::{DFFixedShapeTensor, DFVariableShapeTensor};
34+
pub use timestamp_with_offset::DFTimestampWithOffset;
35+
pub use uuid::DFUuid;
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. See the NOTICE file distributed with this
8+
// work for additional information regarding copyright ownership.
9+
// The ASF licenses this file to you under the Apache License,
10+
// Version 2.0 (the "License"); you may not use this file
11+
// except in compliance with the License. You may obtain a
12+
// copy of the License at
13+
//
14+
// http://www.apache.org/licenses/LICENSE-2.0
15+
//
16+
// Unless required by applicable law or agreed to in writing,
17+
// software distributed under the License is distributed on an
18+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19+
// KIND, either express or implied. See the License for the
20+
// specific language governing permissions and limitations
21+
// under the License.
22+
23+
use crate::types::extension::DFExtensionType;
24+
use arrow::datatypes::DataType;
25+
use arrow_schema::ArrowError;
26+
use arrow_schema::extension::{ExtensionType, Opaque};
27+
28+
/// Defines the extension type logic for the canonical `arrow.opaque` extension type.
29+
///
30+
/// See [`DFExtensionType`] for information on DataFusion's extension type mechanism.
31+
#[derive(Debug, Clone)]
32+
pub struct DFOpaque {
33+
inner: Opaque,
34+
storage_type: DataType,
35+
}
36+
37+
impl ExtensionType for DFOpaque {
38+
const NAME: &'static str = Opaque::NAME;
39+
type Metadata = <Opaque as ExtensionType>::Metadata;
40+
41+
fn metadata(&self) -> &Self::Metadata {
42+
self.inner.metadata()
43+
}
44+
45+
fn serialize_metadata(&self) -> Option<String> {
46+
self.inner.serialize_metadata()
47+
}
48+
49+
fn deserialize_metadata(
50+
metadata: Option<&str>,
51+
) -> Result<Self::Metadata, ArrowError> {
52+
Opaque::deserialize_metadata(metadata)
53+
}
54+
55+
fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
56+
self.inner.supports_data_type(data_type)
57+
}
58+
59+
fn try_new(
60+
data_type: &DataType,
61+
metadata: Self::Metadata,
62+
) -> Result<Self, ArrowError> {
63+
Ok(Self {
64+
inner: <Opaque as ExtensionType>::try_new(data_type, metadata)?,
65+
storage_type: data_type.clone(),
66+
})
67+
}
68+
}
69+
70+
impl DFExtensionType for DFOpaque {
71+
fn storage_type(&self) -> DataType {
72+
self.storage_type.clone()
73+
}
74+
75+
fn serialize_metadata(&self) -> Option<String> {
76+
self.inner.serialize_metadata()
77+
}
78+
}
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::types::extension::DFExtensionType;
19+
use arrow::datatypes::DataType;
20+
use arrow_schema::ArrowError;
21+
use arrow_schema::extension::{ExtensionType, FixedShapeTensor, VariableShapeTensor};
22+
23+
/// Defines the extension type logic for the canonical `arrow.fixed_shape_tensor` extension type.
24+
///
25+
/// See [`DFExtensionType`] for information on DataFusion's extension type mechanism.
26+
#[derive(Debug, Clone)]
27+
pub struct DFFixedShapeTensor {
28+
inner: FixedShapeTensor,
29+
/// The storage type of the tensor.
30+
///
31+
/// While we could reconstruct the storage type from the inner [`FixedShapeTensor`], we may
32+
/// choose a different name for the field within the [`DataType::FixedSizeList`] which can
33+
/// cause problems down the line (e.g., checking for equality).
34+
storage_type: DataType,
35+
}
36+
37+
impl ExtensionType for DFFixedShapeTensor {
38+
const NAME: &'static str = FixedShapeTensor::NAME;
39+
type Metadata = <FixedShapeTensor as ExtensionType>::Metadata;
40+
41+
fn metadata(&self) -> &Self::Metadata {
42+
self.inner.metadata()
43+
}
44+
45+
fn serialize_metadata(&self) -> Option<String> {
46+
self.inner.serialize_metadata()
47+
}
48+
49+
fn deserialize_metadata(
50+
metadata: Option<&str>,
51+
) -> Result<Self::Metadata, ArrowError> {
52+
FixedShapeTensor::deserialize_metadata(metadata)
53+
}
54+
55+
fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
56+
self.inner.supports_data_type(data_type)
57+
}
58+
59+
fn try_new(
60+
data_type: &DataType,
61+
metadata: Self::Metadata,
62+
) -> Result<Self, ArrowError> {
63+
Ok(Self {
64+
inner: <FixedShapeTensor as ExtensionType>::try_new(data_type, metadata)?,
65+
storage_type: data_type.clone(),
66+
})
67+
}
68+
}
69+
70+
impl DFExtensionType for DFFixedShapeTensor {
71+
fn storage_type(&self) -> DataType {
72+
self.storage_type.clone()
73+
}
74+
75+
fn serialize_metadata(&self) -> Option<String> {
76+
self.inner.serialize_metadata()
77+
}
78+
}
79+
80+
/// Defines the extension type logic for the canonical `arrow.variable_shape_tensor` extension type.
81+
///
82+
/// See [`DFExtensionType`] for information on DataFusion's extension type mechanism.
83+
#[derive(Debug, Clone)]
84+
pub struct DFVariableShapeTensor {
85+
inner: VariableShapeTensor,
86+
/// While we could reconstruct the storage type from the inner [`VariableShapeTensor`], we may
87+
/// choose a different name for the field within the [`DataType::List`] which can cause problems
88+
/// down the line (e.g., checking for equality).
89+
storage_type: DataType,
90+
}
91+
92+
impl ExtensionType for DFVariableShapeTensor {
93+
const NAME: &'static str = VariableShapeTensor::NAME;
94+
type Metadata = <VariableShapeTensor as ExtensionType>::Metadata;
95+
96+
fn metadata(&self) -> &Self::Metadata {
97+
self.inner.metadata()
98+
}
99+
100+
fn serialize_metadata(&self) -> Option<String> {
101+
self.inner.serialize_metadata()
102+
}
103+
104+
fn deserialize_metadata(
105+
metadata: Option<&str>,
106+
) -> Result<Self::Metadata, ArrowError> {
107+
VariableShapeTensor::deserialize_metadata(metadata)
108+
}
109+
110+
fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
111+
self.inner.supports_data_type(data_type)
112+
}
113+
114+
fn try_new(
115+
data_type: &DataType,
116+
metadata: Self::Metadata,
117+
) -> Result<Self, ArrowError> {
118+
Ok(Self {
119+
inner: <VariableShapeTensor as ExtensionType>::try_new(data_type, metadata)?,
120+
storage_type: data_type.clone(),
121+
})
122+
}
123+
}
124+
125+
impl DFExtensionType for DFVariableShapeTensor {
126+
fn storage_type(&self) -> DataType {
127+
self.storage_type.clone()
128+
}
129+
130+
fn serialize_metadata(&self) -> Option<String> {
131+
self.inner.serialize_metadata()
132+
}
133+
}

0 commit comments

Comments
 (0)