Skip to content

Commit 41f9312

Browse files
adriangbclaude
andcommitted
Add arrow_field(expr) scalar UDF
Adds a new introspection function that returns a struct containing the complete Arrow Field information for any expression: name, data_type, nullable, and metadata. This unifies what `arrow_typeof`, `arrow_metadata`, and `is_nullable` provide individually. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c17c87c commit 41f9312

3 files changed

Lines changed: 273 additions & 0 deletions

File tree

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow::array::{
19+
Array, BooleanArray, MapBuilder, StringArray, StringBuilder, StructArray,
20+
};
21+
use arrow::datatypes::{DataType, Field, Fields};
22+
use datafusion_common::{Result, ScalarValue, utils::take_function_args};
23+
use datafusion_expr::{
24+
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
25+
Volatility,
26+
};
27+
use datafusion_macros::user_doc;
28+
use std::sync::Arc;
29+
30+
#[user_doc(
31+
doc_section(label = "Other Functions"),
32+
description = "Returns a struct containing the Arrow field information of the expression, including name, data type, nullability, and metadata.",
33+
syntax_example = "arrow_field(expression)",
34+
sql_example = r#"```sql
35+
> select arrow_field(1);
36+
+----------------------------------------------+
37+
| arrow_field(Int64(1)) |
38+
+----------------------------------------------+
39+
| {name: Int64(1), data_type: Int64, ...} |
40+
+----------------------------------------------+
41+
42+
> select arrow_field(1)['data_type'];
43+
+-----------------------------------+
44+
| arrow_field(Int64(1))[data_type] |
45+
+-----------------------------------+
46+
| Int64 |
47+
+-----------------------------------+
48+
```"#,
49+
argument(
50+
name = "expression",
51+
description = "Expression to evaluate. The expression can be a constant, column, or function, and any combination of operators."
52+
)
53+
)]
54+
#[derive(Debug, PartialEq, Eq, Hash)]
55+
pub struct ArrowFieldFunc {
56+
signature: Signature,
57+
}
58+
59+
impl Default for ArrowFieldFunc {
60+
fn default() -> Self {
61+
Self::new()
62+
}
63+
}
64+
65+
impl ArrowFieldFunc {
66+
pub fn new() -> Self {
67+
Self {
68+
signature: Signature::any(1, Volatility::Immutable),
69+
}
70+
}
71+
72+
fn return_struct_type() -> DataType {
73+
DataType::Struct(Fields::from(vec![
74+
Field::new("name", DataType::Utf8, false),
75+
Field::new("data_type", DataType::Utf8, false),
76+
Field::new("nullable", DataType::Boolean, false),
77+
Field::new(
78+
"metadata",
79+
DataType::Map(
80+
Arc::new(Field::new(
81+
"entries",
82+
DataType::Struct(Fields::from(vec![
83+
Field::new("keys", DataType::Utf8, false),
84+
Field::new("values", DataType::Utf8, true),
85+
])),
86+
false,
87+
)),
88+
false,
89+
),
90+
false,
91+
),
92+
]))
93+
}
94+
}
95+
96+
impl ScalarUDFImpl for ArrowFieldFunc {
97+
fn name(&self) -> &str {
98+
"arrow_field"
99+
}
100+
101+
fn signature(&self) -> &Signature {
102+
&self.signature
103+
}
104+
105+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
106+
Ok(Self::return_struct_type())
107+
}
108+
109+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
110+
let [_arg] = take_function_args(self.name(), args.args)?;
111+
let field = &args.arg_fields[0];
112+
113+
// Build the name array
114+
let name_array =
115+
Arc::new(StringArray::from(vec![field.name().as_str()])) as Arc<dyn Array>;
116+
117+
// Build the data_type array
118+
let data_type_str = format!("{}", field.data_type());
119+
let data_type_array =
120+
Arc::new(StringArray::from(vec![data_type_str.as_str()])) as Arc<dyn Array>;
121+
122+
// Build the nullable array
123+
let nullable_array =
124+
Arc::new(BooleanArray::from(vec![field.is_nullable()])) as Arc<dyn Array>;
125+
126+
// Build the metadata map array (same pattern as arrow_metadata.rs)
127+
let metadata = field.metadata();
128+
let mut map_builder =
129+
MapBuilder::new(None, StringBuilder::new(), StringBuilder::new());
130+
131+
let mut entries: Vec<_> = metadata.iter().collect();
132+
entries.sort_by_key(|(k, _)| *k);
133+
134+
for (k, v) in entries {
135+
map_builder.keys().append_value(k);
136+
map_builder.values().append_value(v);
137+
}
138+
map_builder.append(true)?;
139+
140+
let metadata_array = Arc::new(map_builder.finish()) as Arc<dyn Array>;
141+
142+
// Build the struct
143+
let DataType::Struct(fields) = Self::return_struct_type() else {
144+
unreachable!()
145+
};
146+
147+
let struct_array = StructArray::new(
148+
fields,
149+
vec![name_array, data_type_array, nullable_array, metadata_array],
150+
None,
151+
);
152+
153+
Ok(ColumnarValue::Scalar(ScalarValue::try_from_array(
154+
&struct_array,
155+
0,
156+
)?))
157+
}
158+
159+
fn documentation(&self) -> Option<&Documentation> {
160+
self.doc()
161+
}
162+
}

datafusion/functions/src/core/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use datafusion_expr::ScalarUDF;
2121
use std::sync::Arc;
2222

2323
pub mod arrow_cast;
24+
pub mod arrow_field;
2425
pub mod arrow_metadata;
2526
pub mod arrow_try_cast;
2627
pub mod arrowtypeof;
@@ -59,6 +60,7 @@ make_udf_function!(union_extract::UnionExtractFun, union_extract);
5960
make_udf_function!(union_tag::UnionTagFunc, union_tag);
6061
make_udf_function!(version::VersionFunc, version);
6162
make_udf_function!(arrow_metadata::ArrowMetadataFunc, arrow_metadata);
63+
make_udf_function!(arrow_field::ArrowFieldFunc, arrow_field);
6264

6365
pub mod expr_fn {
6466
use datafusion_expr::{Expr, Literal};
@@ -91,6 +93,10 @@ pub mod expr_fn {
9193
arrow_typeof,
9294
"Returns the Arrow type of the input expression.",
9395
arg1
96+
),(
97+
arrow_field,
98+
"Returns the Arrow field info (name, data_type, nullable, metadata) of the input expression.",
99+
arg1
94100
),(
95101
arrow_metadata,
96102
"Returns the metadata of the input expression",
@@ -147,6 +153,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
147153
nullif(),
148154
arrow_cast(),
149155
arrow_try_cast(),
156+
arrow_field(),
150157
arrow_metadata(),
151158
nvl(),
152159
nvl2(),
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# arrow_field on integer literal
19+
query ?
20+
SELECT arrow_field(1)
21+
----
22+
{name: lit, data_type: Int64, nullable: false, metadata: {}}
23+
24+
# arrow_field on null literal
25+
query ?
26+
SELECT arrow_field(null)
27+
----
28+
{name: lit, data_type: Null, nullable: true, metadata: {}}
29+
30+
# arrow_field on boolean literal
31+
query ?
32+
SELECT arrow_field(true)
33+
----
34+
{name: lit, data_type: Boolean, nullable: false, metadata: {}}
35+
36+
# arrow_field on string literal
37+
query ?
38+
SELECT arrow_field('foo')
39+
----
40+
{name: lit, data_type: Utf8, nullable: false, metadata: {}}
41+
42+
# arrow_field on float literal
43+
query ?
44+
SELECT arrow_field(1.0)
45+
----
46+
{name: lit, data_type: Float64, nullable: false, metadata: {}}
47+
48+
# arrow_field on list
49+
query ?
50+
SELECT arrow_field(ARRAY[1,2,3])
51+
----
52+
{name: lit, data_type: List(Int64), nullable: false, metadata: {}}
53+
54+
# arrow_field struct field access - data_type
55+
query T
56+
SELECT arrow_field(1)['data_type']
57+
----
58+
Int64
59+
60+
# arrow_field struct field access - nullable
61+
query B
62+
SELECT arrow_field(1)['nullable']
63+
----
64+
false
65+
66+
# arrow_field struct field access - name
67+
query T
68+
SELECT arrow_field(1)['name']
69+
----
70+
lit
71+
72+
# arrow_field with table columns
73+
statement ok
74+
CREATE TABLE arrow_field_test(x INT NOT NULL, y TEXT) AS VALUES (1, 'a');
75+
76+
query ?
77+
SELECT arrow_field(x) FROM arrow_field_test
78+
----
79+
{name: x, data_type: Int32, nullable: false, metadata: {}}
80+
81+
query ?
82+
SELECT arrow_field(y) FROM arrow_field_test
83+
----
84+
{name: y, data_type: Utf8View, nullable: true, metadata: {}}
85+
86+
# arrow_field column access - name reflects column name
87+
query T
88+
SELECT arrow_field(x)['name'] FROM arrow_field_test
89+
----
90+
x
91+
92+
# arrow_field column access - nullability
93+
query B
94+
SELECT arrow_field(x)['nullable'] FROM arrow_field_test
95+
----
96+
false
97+
98+
query B
99+
SELECT arrow_field(y)['nullable'] FROM arrow_field_test
100+
----
101+
true
102+
103+
statement ok
104+
DROP TABLE arrow_field_test;

0 commit comments

Comments
 (0)