Skip to content

Commit f340fe8

Browse files
authored
Merge branch 'main' into file-stats-cache
2 parents b3649dd + 3c53618 commit f340fe8

30 files changed

Lines changed: 1797 additions & 278 deletions

File tree

.github/workflows/audit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
steps:
4646
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
4747
- name: Install cargo-audit
48-
uses: taiki-e/install-action@7ea35f098a7369cd23488403f58be9c491a6c55f # v2.77.0
48+
uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6
4949
with:
5050
tool: cargo-audit
5151
- name: Run audit check

.github/workflows/breaking_changes_detector.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ jobs:
8989
9090
- name: Install cargo-semver-checks
9191
if: steps.changed_crates.outputs.packages != ''
92-
uses: taiki-e/install-action@7ea35f098a7369cd23488403f58be9c491a6c55f # v2.77.0
92+
uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6
9393
with:
9494
tool: cargo-semver-checks
9595

.github/workflows/dev.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ jobs:
6464
source ci/scripts/utils/tool_versions.sh
6565
echo "LYCHEE_VERSION=${LYCHEE_VERSION}" >> "$GITHUB_ENV"
6666
- name: Install lychee
67-
uses: taiki-e/install-action@7ea35f098a7369cd23488403f58be9c491a6c55f # v2.77.0
67+
uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6
6868
with:
6969
tool: lychee@${{ env.LYCHEE_VERSION }}
7070
- name: Run markdown link check

.github/workflows/rust.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ jobs:
429429
sudo apt-get update -qq
430430
sudo apt-get install -y -qq clang
431431
- name: Setup wasm-pack
432-
uses: taiki-e/install-action@7ea35f098a7369cd23488403f58be9c491a6c55f # v2.77.0
432+
uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6
433433
with:
434434
tool: wasm-pack
435435
- name: Run tests with headless mode
@@ -774,7 +774,7 @@ jobs:
774774
- name: Setup Rust toolchain
775775
uses: ./.github/actions/setup-builder
776776
- name: Install cargo-msrv
777-
uses: taiki-e/install-action@7ea35f098a7369cd23488403f58be9c491a6c55f # v2.77.0
777+
uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6
778778
with:
779779
tool: cargo-msrv
780780

datafusion/core/tests/parquet/row_group_pruning.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1777,6 +1777,15 @@ fn make_i32_batch(
17771777
RecordBatch::try_new(schema, vec![array]).map_err(DataFusionError::from)
17781778
}
17791779

1780+
fn make_nullable_i32_batch(
1781+
name: &str,
1782+
values: Vec<Option<i32>>,
1783+
) -> datafusion_common::error::Result<RecordBatch> {
1784+
let schema = Arc::new(Schema::new(vec![Field::new(name, DataType::Int32, true)]));
1785+
let array: ArrayRef = Arc::new(Int32Array::from(values));
1786+
RecordBatch::try_new(schema, vec![array]).map_err(DataFusionError::from)
1787+
}
1788+
17801789
// Helper function to create a batch with two Int32 columns
17811790
fn make_two_col_i32_batch(
17821791
name_a: &str,
@@ -1793,6 +1802,72 @@ fn make_two_col_i32_batch(
17931802
RecordBatch::try_new(schema, vec![array_a, array_b]).map_err(DataFusionError::from)
17941803
}
17951804

1805+
#[tokio::test]
1806+
async fn prune_is_not_distinct_from_i32() -> datafusion_common::error::Result<()> {
1807+
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
1808+
let batches = vec![
1809+
make_nullable_i32_batch("a", vec![None, None])?,
1810+
make_nullable_i32_batch("a", vec![Some(0), Some(0)])?,
1811+
make_nullable_i32_batch("a", vec![Some(0), Some(1)])?,
1812+
make_nullable_i32_batch("a", vec![Some(2), Some(3)])?,
1813+
make_nullable_i32_batch("a", vec![None, Some(5)])?,
1814+
];
1815+
1816+
RowGroupPruningTest::new()
1817+
.with_scenario(Scenario::Int)
1818+
.with_query("SELECT a FROM t WHERE a IS NOT DISTINCT FROM 0")
1819+
.with_expected_errors(Some(0))
1820+
.with_expected_rows(3)
1821+
.with_pruned_files(Some(0))
1822+
.with_matched_by_stats(Some(2))
1823+
.with_fully_matched_by_stats(Some(1))
1824+
.with_pruned_by_stats(Some(3))
1825+
.with_limit_pruned_row_groups(Some(0))
1826+
.test_row_group_prune_with_custom_data(schema.clone(), batches.clone(), 2)
1827+
.await;
1828+
1829+
RowGroupPruningTest::new()
1830+
.with_scenario(Scenario::Int)
1831+
.with_query("SELECT a FROM t WHERE a IS NOT DISTINCT FROM NULL")
1832+
.with_expected_errors(Some(0))
1833+
.with_expected_rows(3)
1834+
.with_pruned_files(Some(0))
1835+
.with_matched_by_stats(Some(2))
1836+
.with_fully_matched_by_stats(Some(0))
1837+
.with_pruned_by_stats(Some(3))
1838+
.with_limit_pruned_row_groups(Some(0))
1839+
.test_row_group_prune_with_custom_data(schema.clone(), batches.clone(), 2)
1840+
.await;
1841+
1842+
RowGroupPruningTest::new()
1843+
.with_scenario(Scenario::Int)
1844+
.with_query("SELECT a FROM t WHERE a IS DISTINCT FROM 0")
1845+
.with_expected_errors(Some(0))
1846+
.with_expected_rows(7)
1847+
.with_pruned_files(Some(0))
1848+
.with_matched_by_stats(Some(4))
1849+
.with_fully_matched_by_stats(Some(1))
1850+
.with_pruned_by_stats(Some(1))
1851+
.with_limit_pruned_row_groups(Some(0))
1852+
.test_row_group_prune_with_custom_data(schema.clone(), batches.clone(), 2)
1853+
.await;
1854+
1855+
RowGroupPruningTest::new()
1856+
.with_scenario(Scenario::Int)
1857+
.with_query("SELECT a FROM t WHERE a IS DISTINCT FROM NULL")
1858+
.with_expected_errors(Some(0))
1859+
.with_expected_rows(7)
1860+
.with_pruned_files(Some(0))
1861+
.with_matched_by_stats(Some(4))
1862+
.with_fully_matched_by_stats(Some(3))
1863+
.with_pruned_by_stats(Some(1))
1864+
.with_limit_pruned_row_groups(Some(0))
1865+
.test_row_group_prune_with_custom_data(schema, batches, 2)
1866+
.await;
1867+
1868+
Ok(())
1869+
}
1870+
17961871
#[tokio::test]
17971872
async fn test_limit_pruning_basic() -> datafusion_common::error::Result<()> {
17981873
// Scenario: Simple integer column, multiple row groups

datafusion/expr-common/src/operator.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,9 @@ impl Operator {
255255
Operator::GtEq => Some(Operator::LtEq),
256256
Operator::AtArrow => Some(Operator::ArrowAt),
257257
Operator::ArrowAt => Some(Operator::AtArrow),
258-
Operator::IsDistinctFrom
259-
| Operator::IsNotDistinctFrom
260-
| Operator::Plus
258+
Operator::IsDistinctFrom => Some(Operator::IsDistinctFrom),
259+
Operator::IsNotDistinctFrom => Some(Operator::IsNotDistinctFrom),
260+
Operator::Plus
261261
| Operator::Minus
262262
| Operator::Multiply
263263
| Operator::Divide

datafusion/expr/src/expr.rs

Lines changed: 7 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,13 @@ use datafusion_common::{
4646
use datafusion_expr_common::placement::ExpressionPlacement;
4747
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
4848
#[cfg(feature = "sql")]
49-
use sqlparser::ast::{
49+
pub use sqlparser::ast::{
50+
ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem, RenameSelectItem,
51+
ReplaceSelectElement,
52+
};
53+
// Use shims for sqlparser types when the sql feature is disabled.
54+
#[cfg(not(feature = "sql"))]
55+
pub use crate::sql::{
5056
ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem, RenameSelectItem,
5157
ReplaceSelectElement,
5258
};
@@ -1416,63 +1422,6 @@ impl Lambda {
14161422
}
14171423
}
14181424

1419-
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1420-
#[cfg(not(feature = "sql"))]
1421-
pub struct IlikeSelectItem {
1422-
pub pattern: String,
1423-
}
1424-
#[cfg(not(feature = "sql"))]
1425-
impl Display for IlikeSelectItem {
1426-
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1427-
write!(f, "ILIKE '{}'", &self.pattern)?;
1428-
Ok(())
1429-
}
1430-
}
1431-
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1432-
#[cfg(not(feature = "sql"))]
1433-
pub enum ExcludeSelectItem {
1434-
Single(Ident),
1435-
Multiple(Vec<Ident>),
1436-
}
1437-
#[cfg(not(feature = "sql"))]
1438-
impl Display for ExcludeSelectItem {
1439-
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1440-
write!(f, "EXCLUDE")?;
1441-
match self {
1442-
Self::Single(column) => {
1443-
write!(f, " {column}")?;
1444-
}
1445-
Self::Multiple(columns) => {
1446-
write!(f, " ({})", display_comma_separated(columns))?;
1447-
}
1448-
}
1449-
Ok(())
1450-
}
1451-
}
1452-
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1453-
#[cfg(not(feature = "sql"))]
1454-
pub struct ExceptSelectItem {
1455-
pub first_element: Ident,
1456-
pub additional_elements: Vec<Ident>,
1457-
}
1458-
#[cfg(not(feature = "sql"))]
1459-
impl Display for ExceptSelectItem {
1460-
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1461-
write!(f, "EXCEPT ")?;
1462-
if self.additional_elements.is_empty() {
1463-
write!(f, "({})", self.first_element)?;
1464-
} else {
1465-
write!(
1466-
f,
1467-
"({}, {})",
1468-
self.first_element,
1469-
display_comma_separated(&self.additional_elements)
1470-
)?;
1471-
}
1472-
Ok(())
1473-
}
1474-
}
1475-
14761425
pub fn display_comma_separated<T>(slice: &[T]) -> String
14771426
where
14781427
T: Display,
@@ -1481,64 +1430,6 @@ where
14811430
slice.iter().map(|v| format!("{v}")).join(", ")
14821431
}
14831432

1484-
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1485-
#[cfg(not(feature = "sql"))]
1486-
pub enum RenameSelectItem {
1487-
Single(String),
1488-
Multiple(Vec<String>),
1489-
}
1490-
#[cfg(not(feature = "sql"))]
1491-
impl Display for RenameSelectItem {
1492-
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1493-
write!(f, "RENAME")?;
1494-
match self {
1495-
Self::Single(column) => {
1496-
write!(f, " {column}")?;
1497-
}
1498-
Self::Multiple(columns) => {
1499-
write!(f, " ({})", display_comma_separated(columns))?;
1500-
}
1501-
}
1502-
Ok(())
1503-
}
1504-
}
1505-
1506-
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1507-
#[cfg(not(feature = "sql"))]
1508-
pub struct Ident {
1509-
/// The value of the identifier without quotes.
1510-
pub value: String,
1511-
/// The starting quote if any. Valid quote characters are the single quote,
1512-
/// double quote, backtick, and opening square bracket.
1513-
pub quote_style: Option<char>,
1514-
/// The span of the identifier in the original SQL string.
1515-
pub span: String,
1516-
}
1517-
#[cfg(not(feature = "sql"))]
1518-
impl Display for Ident {
1519-
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1520-
write!(f, "[{}]", self.value)
1521-
}
1522-
}
1523-
1524-
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
1525-
#[cfg(not(feature = "sql"))]
1526-
pub struct ReplaceSelectElement {
1527-
pub expr: String,
1528-
pub column_name: Ident,
1529-
pub as_keyword: bool,
1530-
}
1531-
#[cfg(not(feature = "sql"))]
1532-
impl Display for ReplaceSelectElement {
1533-
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1534-
if self.as_keyword {
1535-
write!(f, "{} AS {}", self.expr, self.column_name)
1536-
} else {
1537-
write!(f, "{} {}", self.expr, self.column_name)
1538-
}
1539-
}
1540-
}
1541-
15421433
/// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`.
15431434
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)]
15441435
pub struct WildcardOptions {

datafusion/expr/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ pub mod statistics {
8080
mod predicate_bounds;
8181
pub mod preimage;
8282
pub mod ptr_eq;
83+
#[cfg(not(feature = "sql"))]
84+
pub mod sql;
8385
pub mod test;
8486
pub mod tree_node;
8587
pub mod type_coercion;

datafusion/expr/src/logical_plan/ddl.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ use std::{
2424
hash::{Hash, Hasher},
2525
};
2626

27-
#[cfg(not(feature = "sql"))]
28-
use crate::expr::Ident;
2927
use crate::expr::Sort;
28+
#[cfg(not(feature = "sql"))]
29+
use crate::sql::Ident;
3030
use arrow::datatypes::DataType;
3131
use datafusion_common::tree_node::{Transformed, TreeNodeContainer, TreeNodeRecursion};
3232
use datafusion_common::{

0 commit comments

Comments
 (0)