Skip to content

Commit 32f472d

Browse files
authored
Merge branch 'main' into feature/ffi-executionplan-metrics
2 parents c2589d6 + 0c4ace8 commit 32f472d

50 files changed

Lines changed: 624 additions & 57 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/codeql.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ jobs:
4545
persist-credentials: false
4646

4747
- name: Initialize CodeQL
48-
uses: github/codeql-action/init@e46ed2cbd01164d986452f91f178727624ae40d7 # v4
48+
uses: github/codeql-action/init@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4
4949
with:
5050
languages: actions
5151

5252
- name: Perform CodeQL Analysis
53-
uses: github/codeql-action/analyze@e46ed2cbd01164d986452f91f178727624ae40d7 # v4
53+
uses: github/codeql-action/analyze@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4
5454
with:
5555
category: "/language:actions"

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ regex = "1.12"
192192
rstest = "0.26.1"
193193
serde_json = "1"
194194
sha2 = "^0.11.0"
195-
sqlparser = { version = "0.61.0", default-features = false, features = ["std", "visitor"] }
195+
sqlparser = { version = "0.62.0", default-features = false, features = ["std", "visitor"] }
196196
strum = "0.28.0"
197197
strum_macros = "0.28.0"
198198
tempfile = "3"

datafusion/execution/src/memory_pool/mod.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
//! help with allocation accounting.
2020
2121
use datafusion_common::{Result, internal_datafusion_err};
22+
use std::any::Any;
2223
use std::fmt::Display;
2324
use std::hash::{Hash, Hasher};
2425
use std::{cmp::Ordering, sync::Arc, sync::atomic};
@@ -182,7 +183,7 @@ pub use pool::*;
182183
///
183184
/// * [`TrackConsumersPool`]: Wraps another [`MemoryPool`] and tracks consumers,
184185
/// providing better error messages on the largest memory users.
185-
pub trait MemoryPool: Send + Sync + std::fmt::Debug + Display {
186+
pub trait MemoryPool: Any + Send + Sync + std::fmt::Debug + Display {
186187
/// Return pool name
187188
fn name(&self) -> &str;
188189

@@ -224,6 +225,18 @@ pub trait MemoryPool: Send + Sync + std::fmt::Debug + Display {
224225
}
225226
}
226227

228+
impl dyn MemoryPool {
229+
/// Returns `true` if this pool is of type `T`.
230+
pub fn is<T: MemoryPool>(&self) -> bool {
231+
(self as &dyn Any).is::<T>()
232+
}
233+
234+
/// Attempts to downcast this pool to a concrete type `T`.
235+
pub fn downcast_ref<T: MemoryPool>(&self) -> Option<&T> {
236+
(self as &dyn Any).downcast_ref()
237+
}
238+
}
239+
227240
/// Memory limit of `MemoryPool`
228241
pub enum MemoryLimit {
229242
Infinite,
@@ -603,6 +616,18 @@ mod tests {
603616
assert_eq!(pool.reserved(), 28);
604617
}
605618

619+
#[test]
620+
fn test_downcast() {
621+
let pool: Arc<dyn MemoryPool> = Arc::new(GreedyMemoryPool::new(50));
622+
623+
assert!(pool.is::<GreedyMemoryPool>());
624+
assert!(!pool.is::<UnboundedMemoryPool>());
625+
626+
let greedy: &GreedyMemoryPool = pool.downcast_ref().unwrap();
627+
assert_eq!(greedy.reserved(), 0);
628+
assert!(pool.downcast_ref::<UnboundedMemoryPool>().is_none());
629+
}
630+
606631
#[test]
607632
fn test_try_shrink() {
608633
let pool = Arc::new(GreedyMemoryPool::new(100)) as _;

datafusion/expr-common/src/statistics.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! Probabilistic distributions for expression-level statistics (unused).
19+
//!
20+
//! Note: All public items in this module are **deprecated** as of `54.0.0`.
21+
//!
22+
//! See <https://github.com/apache/datafusion/pull/22071> for details.
23+
24+
// The whole module is deprecated; suppress warnings from intra-module uses
25+
// of the deprecated types so the module continues to compile.
26+
#![allow(deprecated)]
27+
1828
use std::f64::consts::LN_2;
1929

2030
use crate::interval_arithmetic::{Interval, apply_operator};
@@ -37,6 +47,10 @@ use datafusion_common::{
3747
/// is the main unit of calculus when evaluating expressions in a statistical
3848
/// context. Notions like column and table statistics are built on top of this
3949
/// object and the operations it supports.
50+
#[deprecated(
51+
since = "54.0.0",
52+
note = "Part of the unused Statistics V2 framework; see https://github.com/apache/datafusion/pull/22071"
53+
)]
4054
#[derive(Clone, Debug, PartialEq)]
4155
pub enum Distribution {
4256
Uniform(UniformDistribution),
@@ -214,6 +228,10 @@ impl Distribution {
214228
///
215229
/// <https://en.wikipedia.org/wiki/Continuous_uniform_distribution>
216230
/// <https://en.wikipedia.org/wiki/Prior_probability#Improper_priors>
231+
#[deprecated(
232+
since = "54.0.0",
233+
note = "Part of the unused Statistics V2 framework; see https://github.com/apache/datafusion/pull/22071"
234+
)]
217235
#[derive(Clone, Debug, PartialEq)]
218236
pub struct UniformDistribution {
219237
interval: Interval,
@@ -236,6 +254,10 @@ pub struct UniformDistribution {
236254
/// For more information, see:
237255
///
238256
/// <https://en.wikipedia.org/wiki/Exponential_distribution>
257+
#[deprecated(
258+
since = "54.0.0",
259+
note = "Part of the unused Statistics V2 framework; see https://github.com/apache/datafusion/pull/22071"
260+
)]
239261
#[derive(Clone, Debug, PartialEq)]
240262
pub struct ExponentialDistribution {
241263
rate: ScalarValue,
@@ -249,6 +271,10 @@ pub struct ExponentialDistribution {
249271
/// For a more in-depth discussion, see:
250272
///
251273
/// <https://en.wikipedia.org/wiki/Normal_distribution>
274+
#[deprecated(
275+
since = "54.0.0",
276+
note = "Part of the unused Statistics V2 framework; see https://github.com/apache/datafusion/pull/22071"
277+
)]
252278
#[derive(Clone, Debug, PartialEq)]
253279
pub struct GaussianDistribution {
254280
mean: ScalarValue,
@@ -259,6 +285,10 @@ pub struct GaussianDistribution {
259285
/// the success probability is unknown. For a more in-depth discussion, see:
260286
///
261287
/// <https://en.wikipedia.org/wiki/Bernoulli_distribution>
288+
#[deprecated(
289+
since = "54.0.0",
290+
note = "Part of the unused Statistics V2 framework; see https://github.com/apache/datafusion/pull/22071"
291+
)]
262292
#[derive(Clone, Debug, PartialEq)]
263293
pub struct BernoulliDistribution {
264294
p: ScalarValue,
@@ -268,6 +298,10 @@ pub struct BernoulliDistribution {
268298
/// approximated via some summary statistics. For a more in-depth discussion, see:
269299
///
270300
/// <https://en.wikipedia.org/wiki/Summary_statistics>
301+
#[deprecated(
302+
since = "54.0.0",
303+
note = "Part of the unused Statistics V2 framework; see https://github.com/apache/datafusion/pull/22071"
304+
)]
271305
#[derive(Clone, Debug, PartialEq)]
272306
pub struct GenericDistribution {
273307
mean: ScalarValue,
@@ -594,6 +628,10 @@ impl GenericDistribution {
594628
/// This function takes a logical operator and two Bernoulli distributions,
595629
/// and it returns a new Bernoulli distribution that represents the result of
596630
/// the operation. Currently, only `AND` and `OR` operations are supported.
631+
#[deprecated(
632+
since = "54.0.0",
633+
note = "Part of the unused Statistics V2 framework; see https://github.com/apache/datafusion/pull/22071"
634+
)]
597635
pub fn combine_bernoullis(
598636
op: &Operator,
599637
left: &BernoulliDistribution,
@@ -649,6 +687,10 @@ pub fn combine_bernoullis(
649687
/// see:
650688
///
651689
/// <https://en.wikipedia.org/wiki/Sum_of_normally_distributed_random_variables>
690+
#[deprecated(
691+
since = "54.0.0",
692+
note = "Part of the unused Statistics V2 framework; see https://github.com/apache/datafusion/pull/22071"
693+
)]
652694
pub fn combine_gaussians(
653695
op: &Operator,
654696
left: &GaussianDistribution,
@@ -673,6 +715,10 @@ pub fn combine_gaussians(
673715
/// Expects `op` to be a comparison operator, with `left` and `right` having
674716
/// numeric distributions. The resulting distribution has the `Float64` data
675717
/// type.
718+
#[deprecated(
719+
since = "54.0.0",
720+
note = "Part of the unused Statistics V2 framework; see https://github.com/apache/datafusion/pull/22071"
721+
)]
676722
pub fn create_bernoulli_from_comparison(
677723
op: &Operator,
678724
left: &Distribution,
@@ -751,6 +797,10 @@ pub fn create_bernoulli_from_comparison(
751797
/// given binary operation on two unknown quantities represented by their
752798
/// [`Distribution`] objects. The function computes the mean, median and
753799
/// variance if possible.
800+
#[deprecated(
801+
since = "54.0.0",
802+
note = "Part of the unused Statistics V2 framework; see https://github.com/apache/datafusion/pull/22071"
803+
)]
754804
pub fn new_generic_from_binary_op(
755805
op: &Operator,
756806
left: &Distribution,
@@ -766,6 +816,10 @@ pub fn new_generic_from_binary_op(
766816

767817
/// Computes the mean value for the result of the given binary operation on
768818
/// two unknown quantities represented by their [`Distribution`] objects.
819+
#[deprecated(
820+
since = "54.0.0",
821+
note = "Part of the unused Statistics V2 framework; see https://github.com/apache/datafusion/pull/22071"
822+
)]
769823
pub fn compute_mean(
770824
op: &Operator,
771825
left: &Distribution,
@@ -798,6 +852,10 @@ pub fn compute_mean(
798852
/// the median is calculable only for addition and subtraction operations on:
799853
/// - [`Uniform`] and [`Uniform`] distributions, and
800854
/// - [`Gaussian`] and [`Gaussian`] distributions.
855+
#[deprecated(
856+
since = "54.0.0",
857+
note = "Part of the unused Statistics V2 framework; see https://github.com/apache/datafusion/pull/22071"
858+
)]
801859
pub fn compute_median(
802860
op: &Operator,
803861
left: &Distribution,
@@ -835,6 +893,10 @@ pub fn compute_median(
835893

836894
/// Computes the variance value for the result of the given binary operation on
837895
/// two unknown quantities represented by their [`Distribution`] objects.
896+
#[deprecated(
897+
since = "54.0.0",
898+
note = "Part of the unused Statistics V2 framework; see https://github.com/apache/datafusion/pull/22071"
899+
)]
838900
pub fn compute_variance(
839901
op: &Operator,
840902
left: &Distribution,

datafusion/expr/src/expr.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4014,8 +4014,8 @@ mod test {
40144014
wildcard_with_options(wildcard_options(
40154015
None,
40164016
Some(ExcludeSelectItem::Multiple(vec![
4017-
Ident::from("c1"),
4018-
Ident::from("c2")
4017+
Ident::from("c1").into(),
4018+
Ident::from("c2").into()
40194019
])),
40204020
None,
40214021
None,

datafusion/expr/src/sql.rs

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ impl Display for IlikeSelectItem {
4545

4646
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
4747
pub enum ExcludeSelectItem {
48-
Single(Ident),
49-
Multiple(Vec<Ident>),
48+
Single(ObjectName),
49+
Multiple(Vec<ObjectName>),
5050
}
5151

5252
impl Display for ExcludeSelectItem {
@@ -64,6 +64,37 @@ impl Display for ExcludeSelectItem {
6464
}
6565
}
6666

67+
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
68+
pub struct ObjectName(pub Vec<ObjectNamePart>);
69+
70+
impl Display for ObjectName {
71+
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
72+
let parts: Vec<String> = self.0.iter().map(|p| format!("{p}")).collect();
73+
write!(f, "{}", parts.join("."))
74+
}
75+
}
76+
77+
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
78+
pub enum ObjectNamePart {
79+
Identifier(Ident),
80+
}
81+
82+
impl ObjectNamePart {
83+
pub fn as_ident(&self) -> Option<&Ident> {
84+
match self {
85+
ObjectNamePart::Identifier(ident) => Some(ident),
86+
}
87+
}
88+
}
89+
90+
impl Display for ObjectNamePart {
91+
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
92+
match self {
93+
ObjectNamePart::Identifier(ident) => write!(f, "{ident}"),
94+
}
95+
}
96+
}
97+
6798
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
6899
pub struct ExceptSelectItem {
69100
pub first_element: Ident,

datafusion/expr/src/utils.rs

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ use datafusion_common::{
3939
};
4040

4141
#[cfg(not(feature = "sql"))]
42-
use crate::sql::{ExceptSelectItem, ExcludeSelectItem};
42+
use crate::sql::{ExceptSelectItem, ExcludeSelectItem, Ident, ObjectName};
4343
use indexmap::IndexSet;
4444
#[cfg(feature = "sql")]
45-
use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem};
45+
use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem, Ident, ObjectName};
4646

4747
pub use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity;
4848

@@ -339,11 +339,32 @@ fn get_excluded_columns(
339339
idents.push(&excepts.first_element);
340340
idents.extend(&excepts.additional_elements);
341341
}
342+
// Declared outside the `if let` so `idents.extend(exclude_owned.iter())`
343+
// below can borrow references that outlive the inner scope.
344+
let exclude_owned: Vec<Ident>;
342345
if let Some(exclude) = opt_exclude {
343-
match exclude {
344-
ExcludeSelectItem::Single(ident) => idents.push(ident),
345-
ExcludeSelectItem::Multiple(idents_inner) => idents.extend(idents_inner),
346-
}
346+
let object_name_to_ident = |name: &ObjectName| -> Result<Ident> {
347+
if name.0.len() != 1 {
348+
return plan_err!(
349+
"EXCLUDE with multi-part identifiers is not supported: {name}"
350+
);
351+
}
352+
let part = &name.0[0];
353+
let Some(ident) = part.as_ident() else {
354+
return plan_err!(
355+
"EXCLUDE with non-identifier name part is not supported: {part}"
356+
);
357+
};
358+
Ok(ident.clone())
359+
};
360+
exclude_owned = match exclude {
361+
ExcludeSelectItem::Single(name) => vec![object_name_to_ident(name)?],
362+
ExcludeSelectItem::Multiple(names) => names
363+
.iter()
364+
.map(object_name_to_ident)
365+
.collect::<Result<Vec<_>>>()?,
366+
};
367+
idents.extend(exclude_owned.iter());
347368
}
348369
// Excluded columns should be unique
349370
let n_elem = idents.len();

datafusion/ffi/src/expr/distribution.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! FFI types for the deprecated Statistics V2 [`Distribution`] framework.
19+
//!
20+
//! These FFI types mirror the deprecated probabilistic distribution types.
21+
//! See <https://github.com/apache/datafusion/pull/22071> for details.
22+
23+
#![allow(deprecated)]
24+
1825
use datafusion_common::DataFusionError;
1926
use datafusion_expr::statistics::{
2027
BernoulliDistribution, Distribution, ExponentialDistribution, GaussianDistribution,

0 commit comments

Comments
 (0)