Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .github/actions/setup-submodules/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: "Setup Submodules"
description: "Initialize and update git submodules for testing"
runs:
using: "composite"
steps:
- name: Initialize and update submodules
shell: bash
run: |
git config --global --add safe.directory $GITHUB_WORKSPACE
git submodule init
# Override update=none setting for CI
git config submodule.testing.update checkout
git config submodule.parquet-testing.update checkout
git config submodule.datafusion-testing.update checkout
git submodule update --depth 1
3 changes: 2 additions & 1 deletion .github/workflows/dependencies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ jobs:
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/docs_pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ jobs:
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup uv
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
- name: Install doc dependencies
Expand Down
37 changes: 34 additions & 3 deletions .github/workflows/extended.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,34 @@ permissions:
checks: write

jobs:

# Check crate compiles and base cargo check passes
linux-build-lib:
name: linux build test
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=8,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
# note: do not use amd/rust container to preserve disk space
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Install Rust
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
source $HOME/.cargo/env
rustup toolchain install
- name: Install Protobuf Compiler
run: |
sudo apt-get update
sudo apt-get install -y protobuf-compiler
- name: Prepare cargo build
run: |
cargo check --profile ci --all-targets
cargo clean

# Run extended tests (with feature 'extended_tests')
linux-test-extended:
name: cargo test 'extended_tests' (amd64)
Expand All @@ -67,8 +95,9 @@ jobs:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
- name: Install Rust
Expand Down Expand Up @@ -114,8 +143,9 @@ jobs:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
Expand All @@ -136,8 +166,9 @@ jobs:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
# Don't use setup-builder to avoid configuring RUST_BACKTRACE which is expensive
- name: Install protobuf compiler
run: |
Expand Down
36 changes: 24 additions & 12 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,9 @@ jobs:
- uses: runs-on/action@d141ef83eb66d096ce8afc767e09115a65c63b60 # v2.1.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
Expand Down Expand Up @@ -325,8 +326,9 @@ jobs:
- uses: runs-on/action@d141ef83eb66d096ce8afc767e09115a65c63b60 # v2.1.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup Rust toolchain
run: rustup toolchain install stable
- name: Rust Dependency Cache
Expand Down Expand Up @@ -357,8 +359,9 @@ jobs:
- uses: runs-on/action@d141ef83eb66d096ce8afc767e09115a65c63b60 # v2.1.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
Expand Down Expand Up @@ -388,8 +391,9 @@ jobs:
- uses: runs-on/action@d141ef83eb66d096ce8afc767e09115a65c63b60 # v2.1.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
Expand Down Expand Up @@ -451,8 +455,9 @@ jobs:
- uses: runs-on/action@d141ef83eb66d096ce8afc767e09115a65c63b60 # v2.1.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
Expand Down Expand Up @@ -499,8 +504,9 @@ jobs:
- uses: runs-on/action@d141ef83eb66d096ce8afc767e09115a65c63b60 # v2.1.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
Expand All @@ -524,8 +530,9 @@ jobs:
- uses: runs-on/action@d141ef83eb66d096ce8afc767e09115a65c63b60 # v2.1.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
Expand Down Expand Up @@ -567,8 +574,9 @@ jobs:
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup Rust toolchain
uses: ./.github/actions/setup-macos-aarch64-builder
- name: Run datafusion-ffi tests
Expand Down Expand Up @@ -660,8 +668,9 @@ jobs:
- uses: runs-on/action@d141ef83eb66d096ce8afc767e09115a65c63b60 # v2.1.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
Expand All @@ -685,8 +694,9 @@ jobs:
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
Expand All @@ -707,8 +717,9 @@ jobs:
- uses: runs-on/action@d141ef83eb66d096ce8afc767e09115a65c63b60 # v2.1.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
Expand Down Expand Up @@ -742,8 +753,9 @@ jobs:
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: true
fetch-depth: 1
- name: Setup Submodules
uses: ./.github/actions/setup-submodules

- name: Mark repository as safe for git
# Required for git commands inside container (avoids "dubious ownership" error)
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
[submodule "parquet-testing"]
path = parquet-testing
url = https://github.com/apache/parquet-testing.git
update = none
[submodule "testing"]
path = testing
url = https://github.com/apache/arrow-testing
update = none
[submodule "datafusion-testing"]
path = datafusion-testing
url = https://github.com/apache/datafusion-testing.git
branch = main
update = none
2 changes: 1 addition & 1 deletion datafusion-testing
22 changes: 20 additions & 2 deletions datafusion/datasource/src/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,18 @@ use datafusion_physical_plan::filter_pushdown::{
/// └─────────────────────┘
/// ```
pub trait DataSource: Any + Send + Sync + Debug {
/// Optionally delegates downcast identity to an inner [`DataSource`].
///
/// Wrapper types (e.g. a partitioning adapter that wraps a [`FileScanConfig`])
/// can implement this to make `is` and `downcast_ref` transparently see
/// through to the inner source, exactly as [`ExecutionPlan::downcast_delegate`]
/// does for execution plans.
///
/// Implementations should return the inner source, not `self`.
fn downcast_delegate(&self) -> Option<&dyn DataSource> {
None
}

/// Open the specified output partition and return its stream of
/// [`RecordBatch`]es.
///
Expand Down Expand Up @@ -292,11 +304,17 @@ impl OpenArgs {

impl dyn DataSource {
pub fn is<T: DataSource>(&self) -> bool {
(self as &dyn Any).is::<T>()
match self.downcast_delegate() {
Some(delegate) => delegate.is::<T>(),
None => (self as &dyn Any).is::<T>(),
}
}

pub fn downcast_ref<T: DataSource>(&self) -> Option<&T> {
(self as &dyn Any).downcast_ref()
match self.downcast_delegate() {
Some(delegate) => delegate.downcast_ref::<T>(),
None => (self as &dyn Any).downcast_ref(),
}
}
}

Expand Down
51 changes: 51 additions & 0 deletions datafusion/optimizer/src/extract_leaf_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,13 @@ fn try_push_into_inputs(
return Ok(None);
}

// Unnest may output a column with the same name but different value/type
// than its input column. Name-based routing cannot distinguish those.
// On top of that Unnest can't go through the `node.with_new_exprs(node.expressions(), new_inputs)` rebuild
if matches!(node, LogicalPlan::Unnest(_)) {
return Ok(None);
}

// SubqueryAlias remaps qualifiers between input and output.
// Rewrite pairs/columns from alias-space to input-space before routing.
let remapped = if let LogicalPlan::SubqueryAlias(sa) = node {
Expand Down Expand Up @@ -3035,4 +3042,48 @@ mod tests {

Ok(())
}

/// Regression test for the `Assertion failed: expr.is_empty(): Unnest`
/// internal error.
///
/// `try_push_into_inputs` rebuilds the parent node via
/// `node.with_new_exprs(node.expressions(), new_inputs)`. For `Unnest`,
/// `apply_expressions` exposes the `exec_columns` as `Expr::Column`s
/// (so `expressions()` is **non-empty**), but `with_new_exprs` for
/// `Unnest` immediately calls `assert_no_expressions(expr)?` and errors
/// out. The optimizer should treat `Unnest` as a barrier and bail
/// instead of attempting to push through it.
#[test]
fn test_no_push_through_unnest() -> Result<()> {
use arrow::datatypes::{DataType, Field, Schema};

let schema = Schema::new(vec![
Field::new("list_col", DataType::new_list(DataType::Int32, true), true),
Field::new("other_col", DataType::Int32, true),
]);
let table_scan =
datafusion_expr::logical_plan::table_scan(Some("t"), &schema, None)?
.build()?;
let plan = LogicalPlanBuilder::from(table_scan)
.unnest_column("list_col")?
.filter(leaf_udf(col("list_col"), "x").eq(lit(1i32)))?
.build()?;

let ctx = OptimizerContext::new().with_max_passes(1);
let optimizer = Optimizer::with_rules(vec![
Arc::new(ExtractLeafExpressions::new()),
Arc::new(PushDownLeafProjections::new()),
]);
let optimized = optimizer.optimize(plan, &ctx, |_, _| {})?;

insta::assert_snapshot!(format!("{optimized}"), @r#"
Projection: list_col, t.other_col
Filter: __datafusion_extracted_1 = Int32(1)
Projection: leaf_udf(list_col, Utf8("x")) AS __datafusion_extracted_1, list_col, t.other_col
Unnest: lists[t.list_col|depth=1] structs[]
TableScan: t
"#);

Ok(())
}
}
Loading
Loading