Skip to content

Commit 10d8bcb

Browse files
authored
Add support for ListView in unnest (#20760)
## Which issue does this PR close? Closes #20759 ## What changes are included in this PR? Support for unnest ## Are these changes tested? Yes, via SLTs. ## Are there any user-facing changes? No, only net-new support for existing APIs.
1 parent d2278a9 commit 10d8bcb

File tree

7 files changed

+163
-6
lines changed

7 files changed

+163
-6
lines changed

datafusion/common/src/scalar/mod.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3676,6 +3676,22 @@ impl ScalarValue {
36763676
.with_field(field)
36773677
.build_fixed_size_list_scalar(list_size)
36783678
}
3679+
DataType::ListView(field) => {
3680+
let list_array = array.as_list_view::<i32>();
3681+
let nested_array = list_array.value(index);
3682+
// Store as List scalar since ScalarValue has no ListView variant.
3683+
SingleRowListArrayBuilder::new(nested_array)
3684+
.with_field(field)
3685+
.build_list_scalar()
3686+
}
3687+
DataType::LargeListView(field) => {
3688+
let list_array = array.as_list_view::<i64>();
3689+
let nested_array = list_array.value(index);
3690+
// Store as LargeList scalar since ScalarValue has no LargeListView variant.
3691+
SingleRowListArrayBuilder::new(nested_array)
3692+
.with_field(field)
3693+
.build_large_list_scalar()
3694+
}
36793695
DataType::Date32 => typed_cast!(array, index, as_date32_array, Date32)?,
36803696
DataType::Date64 => typed_cast!(array, index, as_date64_array, Date64)?,
36813697
DataType::Time32(TimeUnit::Second) => {

datafusion/expr/src/expr_schema.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,9 @@ impl ExprSchemable for Expr {
141141
match arg_data_type {
142142
DataType::List(field)
143143
| DataType::LargeList(field)
144-
| DataType::FixedSizeList(field, _) => Ok(field.data_type().clone()),
144+
| DataType::FixedSizeList(field, _)
145+
| DataType::ListView(field)
146+
| DataType::LargeListView(field) => Ok(field.data_type().clone()),
145147
DataType::Struct(_) => Ok(arg_data_type),
146148
DataType::Null => {
147149
not_impl_err!("unnest() does not support null yet")

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4194,7 +4194,9 @@ impl Unnest {
41944194
}
41954195
DataType::List(_)
41964196
| DataType::FixedSizeList(_, _)
4197-
| DataType::LargeList(_) => {
4197+
| DataType::LargeList(_)
4198+
| DataType::ListView(_)
4199+
| DataType::LargeListView(_) => {
41984200
list_columns.push((
41994201
index,
42004202
ColumnUnnestList {
@@ -4269,7 +4271,11 @@ fn get_unnested_columns(
42694271
let mut qualified_columns = Vec::with_capacity(1);
42704272

42714273
match data_type {
4272-
DataType::List(_) | DataType::FixedSizeList(_, _) | DataType::LargeList(_) => {
4274+
DataType::List(_)
4275+
| DataType::FixedSizeList(_, _)
4276+
| DataType::LargeList(_)
4277+
| DataType::ListView(_)
4278+
| DataType::LargeListView(_) => {
42734279
let data_type = get_unnested_list_datatype_recursive(data_type, depth)?;
42744280
let new_field = Arc::new(Field::new(
42754281
col_name, data_type,
@@ -4306,7 +4312,9 @@ fn get_unnested_list_datatype_recursive(
43064312
match data_type {
43074313
DataType::List(field)
43084314
| DataType::FixedSizeList(field, _)
4309-
| DataType::LargeList(field) => {
4315+
| DataType::LargeList(field)
4316+
| DataType::ListView(field)
4317+
| DataType::LargeListView(field) => {
43104318
if depth == 1 {
43114319
return Ok(field.data_type().clone());
43124320
}

datafusion/physical-plan/src/unnest.rs

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ use crate::{
3333

3434
use arrow::array::{
3535
Array, ArrayRef, AsArray, BooleanBufferBuilder, FixedSizeListArray, Int64Array,
36-
LargeListArray, ListArray, PrimitiveArray, Scalar, StructArray, new_null_array,
36+
LargeListArray, LargeListViewArray, ListArray, ListViewArray, PrimitiveArray, Scalar,
37+
StructArray, new_null_array,
3738
};
3839
use arrow::compute::kernels::length::length;
3940
use arrow::compute::kernels::zip::zip;
@@ -845,6 +846,30 @@ impl ListArrayType for FixedSizeListArray {
845846
}
846847
}
847848

849+
impl ListArrayType for ListViewArray {
850+
fn values(&self) -> &ArrayRef {
851+
self.values()
852+
}
853+
854+
fn value_offsets(&self, row: usize) -> (i64, i64) {
855+
let offset = self.value_offsets()[row] as i64;
856+
let size = self.value_sizes()[row] as i64;
857+
(offset, offset + size)
858+
}
859+
}
860+
861+
impl ListArrayType for LargeListViewArray {
862+
fn values(&self) -> &ArrayRef {
863+
self.values()
864+
}
865+
866+
fn value_offsets(&self, row: usize) -> (i64, i64) {
867+
let offset = self.value_offsets()[row];
868+
let size = self.value_sizes()[row];
869+
(offset, offset + size)
870+
}
871+
}
872+
848873
/// Unnest multiple list arrays according to the length array.
849874
fn unnest_list_arrays(
850875
list_arrays: &[ArrayRef],
@@ -861,6 +886,12 @@ fn unnest_list_arrays(
861886
DataType::FixedSizeList(_, _) => {
862887
Ok(list_array.as_fixed_size_list() as &dyn ListArrayType)
863888
}
889+
DataType::ListView(_) => {
890+
Ok(list_array.as_list_view::<i32>() as &dyn ListArrayType)
891+
}
892+
DataType::LargeListView(_) => {
893+
Ok(list_array.as_list_view::<i64>() as &dyn ListArrayType)
894+
}
864895
other => exec_err!("Invalid unnest datatype {other }"),
865896
})
866897
.collect::<Result<Vec<_>>>()?;

datafusion/sql/src/expr/function.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
905905
DataType::List(_)
906906
| DataType::LargeList(_)
907907
| DataType::FixedSizeList(_, _)
908+
| DataType::ListView(_)
909+
| DataType::LargeListView(_)
908910
| DataType::Struct(_) => Ok(()),
909911
DataType::Null => {
910912
not_impl_err!("unnest() does not support null yet")

datafusion/sql/src/utils.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,9 @@ impl RecursiveUnnestRewriter<'_> {
466466
}
467467
DataType::List(_)
468468
| DataType::FixedSizeList(_, _)
469-
| DataType::LargeList(_) => {
469+
| DataType::LargeList(_)
470+
| DataType::ListView(_)
471+
| DataType::LargeListView(_) => {
470472
push_projection_dedupl(
471473
self.inner_projection_exprs,
472474
expr_in_unnest.clone().alias(placeholder_name.clone()),

datafusion/sqllogictest/test_files/unnest.slt

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,3 +1233,99 @@ physical_plan
12331233
# cleanup
12341234
statement ok
12351235
drop table t;
1236+
1237+
########################################
1238+
# Unnest ListView / LargeListView Tests #
1239+
########################################
1240+
1241+
## Basic unnest ListView in select list
1242+
query I
1243+
select unnest(arrow_cast([1,2,3], 'ListView(Int64)'));
1244+
----
1245+
1
1246+
2
1247+
3
1248+
1249+
## Basic unnest ListView in from clause
1250+
query I
1251+
select * from unnest(arrow_cast([1,2,3], 'ListView(Int64)'));
1252+
----
1253+
1
1254+
2
1255+
3
1256+
1257+
## Basic unnest LargeListView in select list
1258+
query I
1259+
select unnest(arrow_cast([1,2,3], 'LargeListView(Int64)'));
1260+
----
1261+
1
1262+
2
1263+
3
1264+
1265+
## Basic unnest LargeListView in from clause
1266+
query I
1267+
select * from unnest(arrow_cast([1,2,3], 'LargeListView(Int64)'));
1268+
----
1269+
1
1270+
2
1271+
3
1272+
1273+
## Unnest ListView with range
1274+
query I
1275+
select unnest(arrow_cast(range(1, 3), 'ListView(Int64)'));
1276+
----
1277+
1
1278+
2
1279+
1280+
## Unnest LargeListView with range
1281+
query I
1282+
select * from unnest(arrow_cast(range(1, 3), 'LargeListView(Int64)'));
1283+
----
1284+
1
1285+
2
1286+
1287+
## Multiple unnest with ListView columns from a table
1288+
query III
1289+
select
1290+
unnest(column1),
1291+
unnest(arrow_cast(column2, 'ListView(Int64)')),
1292+
unnest(arrow_cast(column4, 'LargeListView(Int64)'))
1293+
from unnest_table where column4 is not null;
1294+
----
1295+
1 7 13
1296+
2 NULL 14
1297+
3 NULL NULL
1298+
4 8 15
1299+
5 9 16
1300+
NULL 10 NULL
1301+
NULL NULL 17
1302+
NULL NULL 18
1303+
1304+
## Unnest ListView with null elements
1305+
query I
1306+
select unnest(arrow_cast([1, null, 3], 'ListView(Int64)'));
1307+
----
1308+
1
1309+
NULL
1310+
3
1311+
1312+
## Unnest empty ListView
1313+
query I
1314+
select unnest(arrow_cast([], 'ListView(Int64)'));
1315+
----
1316+
1317+
## Unnest ListView of strings
1318+
query T
1319+
select unnest(arrow_cast(['a','b','c'], 'ListView(Utf8)'));
1320+
----
1321+
a
1322+
b
1323+
c
1324+
1325+
## Unnest LargeListView of strings
1326+
query T
1327+
select unnest(arrow_cast(['a','b','c'], 'LargeListView(Utf8)'));
1328+
----
1329+
a
1330+
b
1331+
c

0 commit comments

Comments
 (0)