Skip to content

Commit 70a688b

Browse files
committed
Minimal build now that pyo3 has updated. Still need to resolve many deprecation warnings
1 parent d8d4e04 commit 70a688b

File tree

11 files changed

+288
-340
lines changed

11 files changed

+288
-340
lines changed

Cargo.lock

Lines changed: 245 additions & 291 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -34,28 +34,27 @@ protoc = [ "datafusion-substrait/protoc" ]
3434
substrait = ["dep:datafusion-substrait"]
3535

3636
[dependencies]
37-
tokio = { version = "1.47", features = ["macros", "rt", "rt-multi-thread", "sync"] }
38-
pyo3 = { version = "0.25", features = ["extension-module", "abi3", "abi3-py39"] }
39-
pyo3-async-runtimes = { version = "0.25", features = ["tokio-runtime"]}
40-
pyo3-log = "0.12.4"
41-
arrow = { version = "56", features = ["pyarrow"] }
42-
datafusion = { version = "50.3.0", features = ["avro", "unicode_expressions"] }
43-
datafusion-substrait = { version = "50.3.0", optional = true }
44-
datafusion-proto = { version = "50.3.0" }
45-
datafusion-ffi = { version = "50.3.0" }
46-
prost = "0.13.1" # keep in line with `datafusion-substrait`
47-
uuid = { version = "1.18", features = ["v4"] }
48-
mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] }
37+
arrow = { version = "57", features = ["pyarrow"] }
4938
async-trait = "0.1.89"
39+
datafusion = { version = "51", features = ["avro", "unicode_expressions"] }
40+
datafusion-substrait = { version = "51", optional = true }
41+
datafusion-proto = { version = "51" }
42+
datafusion-ffi = { version = "51" }
5043
futures = "0.3"
51-
object_store = { version = "0.12.4", features = ["aws", "gcp", "azure", "http"] }
52-
url = "2"
5344
log = "0.4.27"
45+
mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] }
46+
object_store = { version = "0.12.4", features = ["aws", "gcp", "azure", "http"] }
5447
parking_lot = "0.12"
48+
prost = "0.14" # keep in line with `datafusion-substrait`
49+
pyo3 = { version = "0.26", features = ["extension-module", "abi3", "abi3-py39"] }
50+
pyo3-async-runtimes = { version = "0.26", features = ["tokio-runtime"]}
51+
pyo3-log = "0.13"
52+
tokio = { version = "1.48", features = ["macros", "rt", "rt-multi-thread", "sync"] }
53+
uuid = { version = "1.18", features = ["v4"] }
54+
url = "2"
5555

5656
[build-dependencies]
57-
prost-types = "0.13.1" # keep in line with `datafusion-substrait`
58-
pyo3-build-config = "0.25"
57+
pyo3-build-config = "0.26"
5958

6059
[lib]
6160
name = "datafusion_python"
@@ -66,7 +65,7 @@ lto = true
6665
codegen-units = 1
6766

6867
[patch.crates-io]
69-
datafusion = { git = "https://github.com/apache/datafusion", branch = "main" }
70-
datafusion-substrait = { git = "https://github.com/apache/datafusion", branch = "main" }
71-
datafusion-proto = { git = "https://github.com/apache/datafusion", branch = "main" }
72-
datafusion-ffi = { git = "https://github.com/apache/datafusion", branch = "main" }
68+
datafusion = { git = "https://github.com/apache/datafusion", branch = "branch-51" }
69+
datafusion-substrait = { git = "https://github.com/apache/datafusion", branch = "branch-51" }
70+
datafusion-proto = { git = "https://github.com/apache/datafusion", branch = "branch-51" }
71+
datafusion-ffi = { git = "https://github.com/apache/datafusion", branch = "branch-51" }

src/config.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ impl PyConfig {
6565
}
6666

6767
/// Set a configuration option
68-
pub fn set(&self, key: &str, value: PyObject, py: Python) -> PyDataFusionResult<()> {
68+
pub fn set(&self, key: &str, value: Bound<PyAny>, py: Python) -> PyDataFusionResult<()> {
6969
let scalar_value = py_obj_to_scalar_value(py, value)?;
7070
let mut options = self.config.write();
7171
options.set(key, scalar_value.to_string().as_str())?;

src/dataframe.rs

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -309,11 +309,11 @@ impl PyDataFrame {
309309

310310
let table_uuid = uuid::Uuid::new_v4().to_string();
311311

312-
// Convert record batches to PyObject list
312+
// Convert record batches to PyAny list
313313
let py_batches = batches
314314
.iter()
315315
.map(|rb| rb.to_pyarrow(py))
316-
.collect::<PyResult<Vec<PyObject>>>()?;
316+
.collect::<PyResult<Vec<_>>>()?;
317317

318318
let py_schema = self.schema().into_pyobject(py)?;
319319

@@ -523,7 +523,7 @@ impl PyDataFrame {
523523
/// Executes the plan, returning a list of `RecordBatch`es.
524524
/// Unless some order is specified in the plan, there is no
525525
/// guarantee of the order of the result.
526-
fn collect(&self, py: Python) -> PyResult<Vec<PyObject>> {
526+
fn collect<'py>(&self, py: Python<'py>) -> PyResult<Vec<Bound<'py, PyAny>>> {
527527
let batches = wait_for_future(py, self.df.as_ref().clone().collect())?
528528
.map_err(PyDataFusionError::from)?;
529529
// cannot use PyResult<Vec<RecordBatch>> return type due to
@@ -539,7 +539,7 @@ impl PyDataFrame {
539539

540540
/// Executes this DataFrame and collects all results into a vector of vector of RecordBatch
541541
/// maintaining the input partitioning.
542-
fn collect_partitioned(&self, py: Python) -> PyResult<Vec<Vec<PyObject>>> {
542+
fn collect_partitioned<'py>(&self, py: Python<'py>) -> PyResult<Vec<Vec<Bound<'py, PyAny>>>> {
543543
let batches = wait_for_future(py, self.df.as_ref().clone().collect_partitioned())?
544544
.map_err(PyDataFusionError::from)?;
545545

@@ -907,15 +907,14 @@ impl PyDataFrame {
907907

908908
/// Convert to Arrow Table
909909
/// Collect the batches and pass to Arrow Table
910-
fn to_arrow_table(&self, py: Python<'_>) -> PyResult<PyObject> {
910+
fn to_arrow_table<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
911911
let batches = self.collect(py)?.into_pyobject(py)?;
912912
let schema = self.schema().into_pyobject(py)?;
913913

914914
// Instantiate pyarrow Table object and use its from_batches method
915915
let table_class = py.import("pyarrow")?.getattr("Table")?;
916916
let args = PyTuple::new(py, &[batches, schema])?;
917-
let table: PyObject = table_class.call_method1("from_batches", args)?.into();
918-
Ok(table)
917+
table_class.call_method1("from_batches", args)
919918
}
920919

921920
#[pyo3(signature = (requested_schema=None))]
@@ -976,42 +975,38 @@ impl PyDataFrame {
976975

977976
/// Convert to pandas dataframe with pyarrow
978977
/// Collect the batches, pass to Arrow Table & then convert to Pandas DataFrame
979-
fn to_pandas(&self, py: Python<'_>) -> PyResult<PyObject> {
978+
fn to_pandas<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
980979
let table = self.to_arrow_table(py)?;
981980

982981
// See also: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pandas
983-
let result = table.call_method0(py, "to_pandas")?;
984-
Ok(result)
982+
table.call_method0("to_pandas")
985983
}
986984

987985
/// Convert to Python list using pyarrow
988986
/// Each list item represents one row encoded as dictionary
989-
fn to_pylist(&self, py: Python<'_>) -> PyResult<PyObject> {
987+
fn to_pylist<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
990988
let table = self.to_arrow_table(py)?;
991989

992990
// See also: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pylist
993-
let result = table.call_method0(py, "to_pylist")?;
994-
Ok(result)
991+
table.call_method0("to_pylist")
995992
}
996993

997994
/// Convert to Python dictionary using pyarrow
998995
/// Each dictionary key is a column and the dictionary value represents the column values
999-
fn to_pydict(&self, py: Python) -> PyResult<PyObject> {
996+
fn to_pydict<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
1000997
let table = self.to_arrow_table(py)?;
1001998

1002999
// See also: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pydict
1003-
let result = table.call_method0(py, "to_pydict")?;
1004-
Ok(result)
1000+
table.call_method0("to_pydict")
10051001
}
10061002

10071003
/// Convert to polars dataframe with pyarrow
10081004
/// Collect the batches, pass to Arrow Table & then convert to polars DataFrame
1009-
fn to_polars(&self, py: Python<'_>) -> PyResult<PyObject> {
1005+
fn to_polars<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
10101006
let table = self.to_arrow_table(py)?;
10111007
let dataframe = py.import("polars")?.getattr("DataFrame")?;
10121008
let args = PyTuple::new(py, &[table])?;
1013-
let result: PyObject = dataframe.call1(args)?.into();
1014-
Ok(result)
1009+
dataframe.call1(args)
10151010
}
10161011

10171012
// Executes this DataFrame to get the total number of rows.
@@ -1023,7 +1018,7 @@ impl PyDataFrame {
10231018
#[pyo3(signature = (value, columns=None))]
10241019
fn fill_null(
10251020
&self,
1026-
value: PyObject,
1021+
value: Bound<PyAny>,
10271022
columns: Option<Vec<PyBackedStr>>,
10281023
py: Python,
10291024
) -> PyDataFusionResult<Self> {

src/expr.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ impl PyExpr {
381381
}
382382

383383
/// Extracts the Expr value into a PyObject that can be shared with Python
384-
pub fn python_value(&self, py: Python) -> PyResult<PyObject> {
384+
pub fn python_value<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
385385
match &self.expr {
386386
Expr::Literal(scalar_value, _) => scalar_to_pyarrow(scalar_value, py),
387387
_ => Err(py_type_err(format!(

src/pyarrow_filter_expression.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ impl TryFrom<&Expr> for PyArrowFilterExpression {
106106
let op_module = Python::import(py, "operator")?;
107107
let pc_expr: PyDataFusionResult<Bound<'_, PyAny>> = match expr {
108108
Expr::Column(Column { name, .. }) => Ok(pc.getattr("field")?.call1((name,))?),
109-
Expr::Literal(scalar, _) => Ok(scalar_to_pyarrow(scalar, py)?.into_bound(py)),
109+
Expr::Literal(scalar, _) => Ok(scalar_to_pyarrow(scalar, py)?),
110110
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
111111
let operator = operator_to_py(op, &op_module)?;
112112
let left = PyArrowFilterExpression::try_from(left.as_ref())?.0;

src/pyarrow_util.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,11 @@ impl<'source> FromPyObject<'source> for PyScalarValue {
5151
}
5252
}
5353

54-
pub fn scalar_to_pyarrow(scalar: &ScalarValue, py: Python) -> PyResult<PyObject> {
54+
pub fn scalar_to_pyarrow<'py>(scalar: &ScalarValue, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
5555
let array = scalar.to_array().map_err(PyDataFusionError::from)?;
5656
// convert to pyarrow array using C data interface
5757
let pyarray = array.to_data().to_pyarrow(py)?;
58-
let pyscalar = pyarray.call_method1(py, "__getitem__", (0,))?;
58+
let pyscalar = pyarray.call_method1("__getitem__", (0,))?;
5959

6060
Ok(pyscalar)
6161
}

src/record_batch.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ pub struct PyRecordBatch {
3535

3636
#[pymethods]
3737
impl PyRecordBatch {
38-
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
38+
fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
3939
self.batch.to_pyarrow(py)
4040
}
4141
}

src/table.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ impl PyTable {
8686

8787
/// Get a reference to the schema for this table
8888
#[getter]
89-
fn schema(&self, py: Python) -> PyResult<PyObject> {
89+
fn schema<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
9090
self.table.schema().to_pyarrow(py)
9191
}
9292

src/udaf.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ impl Accumulator for RustAccumulator {
9090
fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
9191
Python::with_gil(|py| {
9292
// // 1. cast states to Pyarrow arrays
93-
let py_states: Result<Vec<PyObject>> = states
93+
let py_states: Result<Vec<Bound<PyAny>>> = states
9494
.iter()
9595
.map(|state| {
9696
state

0 commit comments

Comments
 (0)