Skip to content

Commit 33304ca

Browse files
authored
Merge branch 'main' into proto-fix
2 parents 7cc6f90 + 61fe692 commit 33304ca

25 files changed

Lines changed: 904 additions & 710 deletions

File tree

.github/workflows/audit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
steps:
4646
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
4747
- name: Install cargo-audit
48-
uses: taiki-e/install-action@055f5df8c3f65ea01cd41e9dc855becd88953486 # v2.75.18
48+
uses: taiki-e/install-action@481c34c1cf3a84c68b5e46f4eccfc82af798415a # v2.75.23
4949
with:
5050
tool: cargo-audit
5151
- name: Run audit check

.github/workflows/dev.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ jobs:
6464
source ci/scripts/utils/tool_versions.sh
6565
echo "LYCHEE_VERSION=${LYCHEE_VERSION}" >> "$GITHUB_ENV"
6666
- name: Install lychee
67-
uses: taiki-e/install-action@055f5df8c3f65ea01cd41e9dc855becd88953486 # v2.75.18
67+
uses: taiki-e/install-action@481c34c1cf3a84c68b5e46f4eccfc82af798415a # v2.75.23
6868
with:
6969
tool: lychee@${{ env.LYCHEE_VERSION }}
7070
- name: Run markdown link check

.github/workflows/rust.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ jobs:
430430
sudo apt-get update -qq
431431
sudo apt-get install -y -qq clang
432432
- name: Setup wasm-pack
433-
uses: taiki-e/install-action@055f5df8c3f65ea01cd41e9dc855becd88953486 # v2.75.18
433+
uses: taiki-e/install-action@481c34c1cf3a84c68b5e46f4eccfc82af798415a # v2.75.23
434434
with:
435435
tool: wasm-pack
436436
- name: Run tests with headless mode
@@ -770,7 +770,7 @@ jobs:
770770
- name: Setup Rust toolchain
771771
uses: ./.github/actions/setup-builder
772772
- name: Install cargo-msrv
773-
uses: taiki-e/install-action@055f5df8c3f65ea01cd41e9dc855becd88953486 # v2.75.18
773+
uses: taiki-e/install-action@481c34c1cf3a84c68b5e46f4eccfc82af798415a # v2.75.23
774774
with:
775775
tool: cargo-msrv
776776

Cargo.lock

Lines changed: 14 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-examples/examples/ffi/ffi_module_loader/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,5 @@ workspace = true
2828
datafusion = { workspace = true }
2929
datafusion-ffi = { workspace = true }
3030
ffi_module_interface = { path = "../ffi_module_interface" }
31-
libloading = "0.8"
31+
libloading = "0.9"
3232
tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }

datafusion/common/src/utils/mod.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,11 +599,17 @@ pub fn base_type(data_type: &DataType) -> DataType {
599599
match data_type {
600600
DataType::List(field)
601601
| DataType::LargeList(field)
602+
| DataType::ListView(field)
603+
| DataType::LargeListView(field)
602604
| DataType::FixedSizeList(field, _) => base_type(field.data_type()),
603605
_ => data_type.to_owned(),
604606
}
605607
}
606608

609+
// TODO: Modify this to also allow specifying how listviews should be treated.
610+
// For example if cast to List (default) or maintain as ListView (requires
611+
// function to implement support for ListViews)
612+
// https://github.com/apache/datafusion/issues/21777
607613
/// Information about how to coerce lists.
608614
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
609615
pub enum ListCoercion {
@@ -657,6 +663,19 @@ pub fn coerced_type_with_base_type_only(
657663
*len,
658664
)
659665
}
666+
(DataType::ListView(field), _) => {
667+
let field_type = coerced_type_with_base_type_only(
668+
field.data_type(),
669+
base_type,
670+
array_coercion,
671+
);
672+
673+
DataType::ListView(Arc::new(Field::new(
674+
field.name(),
675+
field_type,
676+
field.is_nullable(),
677+
)))
678+
}
660679
(DataType::LargeList(field), _) => {
661680
let field_type = coerced_type_with_base_type_only(
662681
field.data_type(),
@@ -670,6 +689,19 @@ pub fn coerced_type_with_base_type_only(
670689
field.is_nullable(),
671690
)))
672691
}
692+
(DataType::LargeListView(field), _) => {
693+
let field_type = coerced_type_with_base_type_only(
694+
field.data_type(),
695+
base_type,
696+
array_coercion,
697+
);
698+
699+
DataType::LargeListView(Arc::new(Field::new(
700+
field.name(),
701+
field_type,
702+
field.is_nullable(),
703+
)))
704+
}
673705

674706
_ => base_type.clone(),
675707
}
@@ -687,6 +719,15 @@ pub fn coerced_fixed_size_list_to_list(data_type: &DataType) -> DataType {
687719
field.is_nullable(),
688720
)))
689721
}
722+
DataType::ListView(field) => {
723+
let field_type = coerced_fixed_size_list_to_list(field.data_type());
724+
725+
DataType::ListView(Arc::new(Field::new(
726+
field.name(),
727+
field_type,
728+
field.is_nullable(),
729+
)))
730+
}
690731
DataType::LargeList(field) => {
691732
let field_type = coerced_fixed_size_list_to_list(field.data_type());
692733

@@ -696,6 +737,15 @@ pub fn coerced_fixed_size_list_to_list(data_type: &DataType) -> DataType {
696737
field.is_nullable(),
697738
)))
698739
}
740+
DataType::LargeListView(field) => {
741+
let field_type = coerced_fixed_size_list_to_list(field.data_type());
742+
743+
DataType::LargeListView(Arc::new(Field::new(
744+
field.name(),
745+
field_type,
746+
field.is_nullable(),
747+
)))
748+
}
699749

700750
_ => data_type.clone(),
701751
}
@@ -706,6 +756,8 @@ pub fn list_ndims(data_type: &DataType) -> u64 {
706756
match data_type {
707757
DataType::List(field)
708758
| DataType::LargeList(field)
759+
| DataType::ListView(field)
760+
| DataType::LargeListView(field)
709761
| DataType::FixedSizeList(field, _) => 1 + list_ndims(field.data_type()),
710762
_ => 0,
711763
}

datafusion/expr/src/higher_order_function.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use crate::expr::schema_name_from_exprs_comma_separated_without_space;
2121
use crate::{ColumnarValue, Documentation, Expr};
2222
use arrow::array::{ArrayRef, RecordBatch};
2323
use arrow::datatypes::{DataType, FieldRef, Schema};
24+
use arrow_schema::SchemaRef;
2425
use datafusion_common::config::ConfigOptions;
2526
use datafusion_common::{Result, ScalarValue, not_impl_err};
2627
use datafusion_expr_common::dyn_eq::{DynEq, DynHash};
@@ -218,11 +219,21 @@ pub struct LambdaArgument {
218219
/// For example, for `array_transform([2], v -> -v)`,
219220
/// this will be the physical expression of `-v`
220221
body: Arc<dyn PhysicalExpr>,
222+
/// Cached schema built from `params`. Reused across every `evaluate` call
223+
/// (and across every nested-list iteration when the lambda is called once
224+
/// per outer sublist), avoiding the per-call `Schema::new` build that
225+
/// includes constructing the internal name -> index map.
226+
schema: SchemaRef,
221227
}
222228

223229
impl LambdaArgument {
224230
pub fn new(params: Vec<FieldRef>, body: Arc<dyn PhysicalExpr>) -> Self {
225-
Self { params, body }
231+
let schema = Arc::new(Schema::new(params.clone()));
232+
Self {
233+
params,
234+
body,
235+
schema,
236+
}
226237
}
227238

228239
/// Evaluate this lambda
@@ -238,9 +249,7 @@ impl LambdaArgument {
238249
.map(|arg| arg())
239250
.collect::<Result<_>>()?;
240251

241-
let schema = Arc::new(Schema::new(self.params.clone()));
242-
243-
let batch = RecordBatch::try_new(schema, columns)?;
252+
let batch = RecordBatch::try_new(Arc::clone(&self.schema), columns)?;
244253

245254
self.body.evaluate(&batch)
246255
}

datafusion/expr/src/type_coercion/functions.rs

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -537,12 +537,12 @@ fn get_valid_types(
537537
element_types.push(DataType::Null);
538538
nested_item_nullability.push(None);
539539
}
540-
DataType::List(field) => {
540+
DataType::List(field) | DataType::ListView(field) => {
541541
element_types.push(field.data_type().clone());
542542
nested_item_nullability.push(Some(field.is_nullable()));
543543
fixed_size = false;
544544
}
545-
DataType::LargeList(field) => {
545+
DataType::LargeList(field) | DataType::LargeListView(field) => {
546546
element_types.push(field.data_type().clone());
547547
nested_item_nullability.push(Some(field.is_nullable()));
548548
large_list = true;
@@ -580,6 +580,8 @@ fn get_valid_types(
580580
ArrayFunctionArgument::Index => DataType::Int64,
581581
ArrayFunctionArgument::String => DataType::Utf8,
582582
ArrayFunctionArgument::Element => element_type.clone(),
583+
// TODO: support maintaining ListView types here
584+
// https://github.com/apache/datafusion/issues/21777
583585
ArrayFunctionArgument::Array => {
584586
if current_type.is_null() {
585587
DataType::Null
@@ -611,6 +613,8 @@ fn get_valid_types(
611613
match array_type {
612614
DataType::List(_)
613615
| DataType::LargeList(_)
616+
| DataType::ListView(_)
617+
| DataType::LargeListView(_)
614618
| DataType::FixedSizeList(_, _) => {
615619
let array_type = coerced_fixed_size_list_to_list(array_type);
616620
Some(array_type)
@@ -1044,7 +1048,7 @@ fn coerced_from<'a>(
10441048

10451049
// Only accept list and largelist with the same number of dimensions unless the type is Null.
10461050
// List or LargeList with different dimensions should be handled in TypeSignature or other places before this
1047-
(List(_) | LargeList(_), _)
1051+
(List(_) | LargeList(_) | ListView(_) | LargeListView(_), _)
10481052
if base_type(type_from).is_null()
10491053
|| list_ndims(type_from) == list_ndims(type_into) =>
10501054
{
@@ -1495,6 +1499,54 @@ mod tests {
14951499
]]
14961500
);
14971501

1502+
let data_types = vec![
1503+
DataType::ListView(Field::new_list_field(DataType::Int32, true).into()),
1504+
DataType::new_list(DataType::Int32, true),
1505+
];
1506+
assert_eq!(
1507+
get_valid_types(function, &signature.type_signature, &data_types)?,
1508+
vec![vec![
1509+
DataType::new_list(DataType::Int32, true),
1510+
DataType::new_list(DataType::Int32, true),
1511+
]]
1512+
);
1513+
1514+
let data_types = vec![
1515+
DataType::LargeListView(Field::new_list_field(DataType::Int32, true).into()),
1516+
DataType::new_list(DataType::Int32, true),
1517+
];
1518+
assert_eq!(
1519+
get_valid_types(function, &signature.type_signature, &data_types)?,
1520+
vec![vec![
1521+
DataType::new_large_list(DataType::Int32, true),
1522+
DataType::new_large_list(DataType::Int32, true),
1523+
]]
1524+
);
1525+
1526+
let data_types = vec![
1527+
DataType::ListView(Field::new_list_field(DataType::Int32, true).into()),
1528+
DataType::ListView(Field::new_list_field(DataType::Int32, true).into()),
1529+
];
1530+
assert_eq!(
1531+
get_valid_types(function, &signature.type_signature, &data_types)?,
1532+
vec![vec![
1533+
DataType::new_list(DataType::Int32, true),
1534+
DataType::new_list(DataType::Int32, true),
1535+
]]
1536+
);
1537+
1538+
let data_types = vec![
1539+
DataType::LargeListView(Field::new_list_field(DataType::Int32, true).into()),
1540+
DataType::LargeListView(Field::new_list_field(DataType::Int32, true).into()),
1541+
];
1542+
assert_eq!(
1543+
get_valid_types(function, &signature.type_signature, &data_types)?,
1544+
vec![vec![
1545+
DataType::new_large_list(DataType::Int32, true),
1546+
DataType::new_large_list(DataType::Int32, true),
1547+
]]
1548+
);
1549+
14981550
Ok(())
14991551
}
15001552

0 commit comments

Comments
 (0)