Skip to content

Commit 2f637a2

Browse files
chore: update datafusion to 50 (#7076)
* chore: update datafusion to 50 Signed-off-by: luofucong <luofc@foxmail.com> * fix ci Signed-off-by: luofucong <luofc@foxmail.com> * fix: update datafusion_pg_catalog import * chore: fix toml format * chore: fix toml format again * fix nextest Signed-off-by: luofucong <luofc@foxmail.com> * fix sqlness Signed-off-by: luofucong <luofc@foxmail.com> * chore: switch datafusion-orc to upstream tag * fix sqlness Signed-off-by: luofucong <luofc@foxmail.com> * resolve PR comments Signed-off-by: luofucong <luofc@foxmail.com> --------- Signed-off-by: luofucong <luofc@foxmail.com> Co-authored-by: Ning Sun <sunning@greptime.com>
1 parent f388dbd commit 2f637a2

63 files changed

Lines changed: 830 additions & 778 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 379 additions & 426 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,12 @@ rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
9999
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
100100
ahash = { version = "0.8", features = ["compile-time-rng"] }
101101
aquamarine = "0.6"
102-
arrow = { version = "56.0", features = ["prettyprint"] }
103-
arrow-array = { version = "56.0", default-features = false, features = ["chrono-tz"] }
104-
arrow-buffer = "56.0"
105-
arrow-flight = "56.0"
106-
arrow-ipc = { version = "56.0", default-features = false, features = ["lz4", "zstd"] }
107-
arrow-schema = { version = "56.0", features = ["serde"] }
102+
arrow = { version = "56.2", features = ["prettyprint"] }
103+
arrow-array = { version = "56.2", default-features = false, features = ["chrono-tz"] }
104+
arrow-buffer = "56.2"
105+
arrow-flight = "56.2"
106+
arrow-ipc = { version = "56.2", default-features = false, features = ["lz4", "zstd"] }
107+
arrow-schema = { version = "56.2", features = ["serde"] }
108108
async-stream = "0.3"
109109
async-trait = "0.1"
110110
# Remember to update axum-extra, axum-macros when updating axum
@@ -123,18 +123,18 @@ clap = { version = "4.4", features = ["derive"] }
123123
config = "0.13.0"
124124
crossbeam-utils = "0.8"
125125
dashmap = "6.1"
126-
datafusion = "49"
127-
datafusion-common = "49"
128-
datafusion-expr = "49"
129-
datafusion-functions = "49"
130-
datafusion-functions-aggregate-common = "49"
131-
datafusion-optimizer = "49"
132-
datafusion-orc = { git = "https://github.com/GreptimeTeam/datafusion-orc", rev = "a0a5f902158f153119316eaeec868cff3fc8a99d" }
133-
datafusion-pg-catalog = { git = "https://github.com/datafusion-contrib/datafusion-postgres", rev = "3d1b7c7d5b82dd49bafc2803259365e633f654fa" }
134-
datafusion-physical-expr = "49"
135-
datafusion-physical-plan = "49"
136-
datafusion-sql = "49"
137-
datafusion-substrait = "49"
126+
datafusion = "50"
127+
datafusion-common = "50"
128+
datafusion-expr = "50"
129+
datafusion-functions = "50"
130+
datafusion-functions-aggregate-common = "50"
131+
datafusion-optimizer = "50"
132+
datafusion-orc = "0.5"
133+
datafusion-pg-catalog = "0.11"
134+
datafusion-physical-expr = "50"
135+
datafusion-physical-plan = "50"
136+
datafusion-sql = "50"
137+
datafusion-substrait = "50"
138138
deadpool = "0.12"
139139
deadpool-postgres = "0.14"
140140
derive_builder = "0.20"
@@ -180,7 +180,7 @@ otel-arrow-rust = { git = "https://github.com/GreptimeTeam/otel-arrow", rev = "2
180180
"server",
181181
] }
182182
parking_lot = "0.12"
183-
parquet = { version = "56.0", default-features = false, features = ["arrow", "async", "object_store"] }
183+
parquet = { version = "56.2", default-features = false, features = ["arrow", "async", "object_store"] }
184184
paste = "1.0"
185185
pin-project = "1.0"
186186
pretty_assertions = "1.4.0"
@@ -217,10 +217,7 @@ simd-json = "0.15"
217217
similar-asserts = "1.6.0"
218218
smallvec = { version = "1", features = ["serde"] }
219219
snafu = "0.8"
220-
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "39e4fc94c3c741981f77e9d63b5ce8c02e0a27ea", features = [
221-
"visitor",
222-
"serde",
223-
] } # branch = "v0.55.x"
220+
sqlparser = { version = "0.58.0", default-features = false, features = ["std", "visitor", "serde"] }
224221
sqlx = { version = "0.8", features = [
225222
"runtime-tokio-rustls",
226223
"mysql",
@@ -322,16 +319,19 @@ git = "https://github.com/GreptimeTeam/greptime-meter.git"
322319
rev = "5618e779cf2bb4755b499c630fba4c35e91898cb"
323320

324321
[patch.crates-io]
325-
datafusion = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
326-
datafusion-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
327-
datafusion-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
328-
datafusion-functions = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
329-
datafusion-functions-aggregate-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
330-
datafusion-optimizer = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
331-
datafusion-physical-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
332-
datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
333-
datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
334-
datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
322+
datafusion = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
323+
datafusion-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
324+
datafusion-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
325+
datafusion-functions = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
326+
datafusion-functions-aggregate-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
327+
datafusion-optimizer = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
328+
datafusion-physical-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
329+
datafusion-physical-expr-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
330+
datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
331+
datafusion-datasource = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
332+
datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
333+
datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
334+
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "4b519a5caa95472cc3988f5556813a583dd35af1" } # branch = "v0.58.x"
335335

336336
[profile.release]
337337
debug = 1

src/catalog/src/system_schema/pg_catalog.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use datafusion::error::DataFusionError;
2727
use datafusion::execution::TaskContext;
2828
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
2929
use datafusion_pg_catalog::pg_catalog::catalog_info::CatalogInfo;
30+
use datafusion_pg_catalog::pg_catalog::context::EmptyContextProvider;
3031
use datafusion_pg_catalog::pg_catalog::{
3132
PG_CATALOG_TABLES, PgCatalogSchemaProvider, PgCatalogStaticTables, PgCatalogTable,
3233
};
@@ -44,7 +45,7 @@ use crate::system_schema::{
4445
/// [`PGCatalogProvider`] is the provider for a schema named `pg_catalog`, it is not a catalog.
4546
pub struct PGCatalogProvider {
4647
catalog_name: String,
47-
inner: PgCatalogSchemaProvider<CatalogManagerWrapper>,
48+
inner: PgCatalogSchemaProvider<CatalogManagerWrapper, EmptyContextProvider>,
4849
tables: HashMap<String, TableRef>,
4950
table_ids: HashMap<&'static str, u32>,
5051
}
@@ -69,6 +70,7 @@ impl PGCatalogProvider {
6970
catalog_manager,
7071
},
7172
Arc::new(static_tables),
73+
EmptyContextProvider,
7274
)
7375
.expect("Failed to initialize PgCatalogSchemaProvider");
7476

src/common/datasource/src/file_format.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ use bytes::{Buf, Bytes};
3333
use datafusion::datasource::physical_plan::FileOpenFuture;
3434
use datafusion::error::{DataFusionError, Result as DataFusionResult};
3535
use datafusion::physical_plan::SendableRecordBatchStream;
36-
use futures::StreamExt;
36+
use futures::{StreamExt, TryStreamExt};
3737
use object_store::ObjectStore;
3838
use snafu::ResultExt;
3939
use tokio_util::compat::FuturesAsyncWriteCompatExt;
@@ -179,7 +179,7 @@ pub fn open_with_decoder<T: ArrowDecoder, F: Fn() -> DataFusionResult<T>>(
179179
Poll::Ready(decoder.flush().transpose())
180180
});
181181

182-
Ok(stream.boxed())
182+
Ok(stream.map_err(Into::into).boxed())
183183
}))
184184
}
185185

src/common/function/src/aggrs/aggr_wrapper.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
//! `foo_merge`'s input arg is the same as `foo_state`'s output, and its output is the same as `foo`'s input.
2323
//!
2424
25+
use std::hash::{Hash, Hasher};
2526
use std::sync::Arc;
2627

2728
use arrow::array::StructArray;
@@ -272,7 +273,7 @@ impl StateMergeHelper {
272273
}
273274

274275
/// Wrapper to make an aggregate function out of a state function.
275-
#[derive(Debug, Clone, PartialEq, Eq)]
276+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
276277
pub struct StateWrapper {
277278
inner: AggregateUDF,
278279
name: String,
@@ -616,6 +617,20 @@ impl AggregateUDFImpl for MergeWrapper {
616617
}
617618
}
618619

620+
impl PartialEq for MergeWrapper {
621+
fn eq(&self, other: &Self) -> bool {
622+
self.inner == other.inner
623+
}
624+
}
625+
626+
impl Eq for MergeWrapper {}
627+
628+
impl Hash for MergeWrapper {
629+
fn hash<H: Hasher>(&self, state: &mut H) {
630+
self.inner.hash(state);
631+
}
632+
}
633+
619634
/// The merge accumulator, which modify `update_batch`'s behavior to accept one struct array which
620635
/// include the state fields of original aggregate function, and merge said states into original accumulator
621636
/// the output is the same as original aggregate function

src/common/function/src/aggrs/aggr_wrapper/tests.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ use datafusion::prelude::SessionContext;
3939
use datafusion_common::arrow::array::AsArray;
4040
use datafusion_common::arrow::datatypes::{Float64Type, UInt64Type};
4141
use datafusion_common::{Column, TableReference};
42-
use datafusion_expr::expr::AggregateFunction;
43-
use datafusion_expr::sqlparser::ast::NullTreatment;
42+
use datafusion_expr::expr::{AggregateFunction, NullTreatment};
4443
use datafusion_expr::{
4544
Aggregate, ColumnarValue, Expr, LogicalPlan, ScalarFunctionArgs, SortExpr, TableScan, lit,
4645
};

src/common/function/src/aggrs/count_hash.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ impl CountHash {
6868
}
6969
}
7070

71-
#[derive(Debug, Clone)]
71+
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
7272
pub struct CountHash {
7373
signature: Signature,
7474
}

src/common/function/src/scalars/geo/geohash.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ impl Function for GeohashFunction {
7676
}
7777

7878
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
79-
Ok(DataType::Utf8)
79+
Ok(DataType::Utf8View)
8080
}
8181

8282
fn signature(&self) -> &Signature {
@@ -176,7 +176,7 @@ impl Function for GeohashNeighboursFunction {
176176
Ok(DataType::List(Arc::new(Field::new(
177177
"item",
178178
DataType::Utf8View,
179-
false,
179+
true,
180180
))))
181181
}
182182

src/common/function/src/scalars/geo/h3.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,9 +355,9 @@ impl Function for H3CellCenterLatLng {
355355

356356
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
357357
Ok(DataType::List(Arc::new(Field::new(
358-
"x",
358+
"item",
359359
DataType::Float64,
360-
false,
360+
true,
361361
))))
362362
}
363363

src/common/function/src/scalars/udf.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
use std::any::Any;
1616
use std::fmt::{Debug, Formatter};
17+
use std::hash::{Hash, Hasher};
1718

1819
use datafusion::arrow::datatypes::DataType;
1920
use datafusion::logical_expr::{ScalarFunctionArgs, ScalarUDFImpl};
@@ -33,6 +34,20 @@ impl Debug for ScalarUdf {
3334
}
3435
}
3536

37+
impl PartialEq for ScalarUdf {
38+
fn eq(&self, other: &Self) -> bool {
39+
self.function.signature() == other.function.signature()
40+
}
41+
}
42+
43+
impl Eq for ScalarUdf {}
44+
45+
impl Hash for ScalarUdf {
46+
fn hash<H: Hasher>(&self, state: &mut H) {
47+
self.function.signature().hash(state)
48+
}
49+
}
50+
3651
impl ScalarUDFImpl for ScalarUdf {
3752
fn as_any(&self) -> &dyn Any {
3853
self

0 commit comments

Comments
 (0)