feat: add Arrow C Data Interface export to RecordBatch and update DataFrame iteration

kosiew · kosiew · commit 52a89129dcbc · 2025-09-06T11:56:12.000+08:00
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
@@ -43,7 +43,7 @@
 from datafusion._internal import ParquetWriterOptions as ParquetWriterOptionsInternal
 from datafusion.expr import Expr, SortExpr, sort_or_default
 from datafusion.plan import ExecutionPlan, LogicalPlan
-from datafusion.record_batch import RecordBatchStream
+from datafusion.record_batch import RecordBatch, RecordBatchStream
 
 if TYPE_CHECKING:
     import pathlib
@@ -291,7 +291,7 @@ class DataFrame:
     """Two dimensional table representation of data.
 
     DataFrame objects are iterable; iterating over a DataFrame yields
-    :class:`pyarrow.RecordBatch` instances lazily.
+    :class:`datafusion.record_batch.RecordBatch` instances lazily.
 
     See :ref:`user_guide_concepts` in the online documentation for more information.
     """
@@ -1121,22 +1121,14 @@ def __arrow_c_stream__(self, requested_schema: object | None = None) -> object:
         # preserving the original partition order.
         return self.df.__arrow_c_stream__(requested_schema)
 
-    def __iter__(self) -> Iterator[pa.RecordBatch]:
+    def __iter__(self) -> Iterator[RecordBatch]:
         """Yield record batches from the DataFrame without materializing results.
 
-        This implementation streams record batches via the Arrow C Stream
-        interface, allowing callers such as :func:`pyarrow.Table.from_batches` to
-        consume results lazily. The DataFrame is executed using DataFusion's
-        partitioned streaming APIs so ``collect`` is never invoked and batch
-        order across partitions is preserved.
+        This executes the DataFrame using DataFusion's partitioned streaming
+        APIs and yields :class:`datafusion.record_batch.RecordBatch` objects.
         """
-        from contextlib import closing
-
-        import pyarrow as pa
-
-        reader = pa.RecordBatchReader._import_from_c_capsule(self.__arrow_c_stream__())
-        with closing(reader):
-            yield from reader
+        for stream in self.execute_stream_partitioned():
+            yield from stream
 
     def transform(self, func: Callable[..., DataFrame], *args: Any) -> DataFrame:
         """Apply a function to the current DataFrame which returns another DataFrame.
diff --git a/python/datafusion/record_batch.py b/python/datafusion/record_batch.py
@@ -46,6 +46,12 @@ def to_pyarrow(self) -> pa.RecordBatch:
         """Convert to :py:class:`pa.RecordBatch`."""
         return self.record_batch.to_pyarrow()
 
+    def __arrow_c_array__(
+        self, requested_schema: object | None = None
+    ) -> tuple[object, object]:
+        """Arrow C Data Interface export."""
+        return self.record_batch.__arrow_c_array__(requested_schema)
+
 
 class RecordBatchStream:
     """This class represents a stream of record batches.
diff --git a/src/record_batch.rs b/src/record_batch.rs
@@ -16,18 +16,58 @@
 // under the License.
 
 use std::sync::Arc;
+use std::{ffi::c_void, ffi::CStr};
 
 use crate::errors::PyDataFusionError;
 use crate::utils::wait_for_future;
+use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
+use datafusion::arrow::array::{Array, StructArray};
 use datafusion::arrow::pyarrow::ToPyArrow;
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::physical_plan::SendableRecordBatchStream;
 use futures::StreamExt;
 use pyo3::exceptions::{PyStopAsyncIteration, PyStopIteration};
+use pyo3::ffi;
 use pyo3::prelude::*;
+use pyo3::types::PyCapsule;
 use pyo3::{pyclass, pymethods, PyObject, PyResult, Python};
 use tokio::sync::Mutex;
 
+#[allow(clippy::manual_c_str_literals)]
+static ARROW_ARRAY_NAME: &CStr = unsafe { CStr::from_bytes_with_nul_unchecked(b"arrow_array\0") };
+#[allow(clippy::manual_c_str_literals)]
+static ARROW_SCHEMA_NAME: &CStr = unsafe { CStr::from_bytes_with_nul_unchecked(b"arrow_schema\0") };
+
+unsafe extern "C" fn drop_array(capsule: *mut ffi::PyObject) {
+    if capsule.is_null() {
+        return;
+    }
+
+    if ffi::PyCapsule_IsValid(capsule, ARROW_ARRAY_NAME.as_ptr()) == 1 {
+        let array_ptr =
+            ffi::PyCapsule_GetPointer(capsule, ARROW_ARRAY_NAME.as_ptr()) as *mut FFI_ArrowArray;
+        if !array_ptr.is_null() {
+            drop(Box::from_raw(array_ptr));
+        }
+    }
+    ffi::PyErr_Clear();
+}
+
+unsafe extern "C" fn drop_schema(capsule: *mut ffi::PyObject) {
+    if capsule.is_null() {
+        return;
+    }
+
+    if ffi::PyCapsule_IsValid(capsule, ARROW_SCHEMA_NAME.as_ptr()) == 1 {
+        let schema_ptr =
+            ffi::PyCapsule_GetPointer(capsule, ARROW_SCHEMA_NAME.as_ptr()) as *mut FFI_ArrowSchema;
+        if !schema_ptr.is_null() {
+            drop(Box::from_raw(schema_ptr));
+        }
+    }
+    ffi::PyErr_Clear();
+}
+
 #[pyclass(name = "RecordBatch", module = "datafusion", subclass)]
 pub struct PyRecordBatch {
     batch: RecordBatch,
@@ -38,6 +78,69 @@ impl PyRecordBatch {
     fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
         self.batch.to_pyarrow(py)
     }
+
+    #[pyo3(signature = (requested_schema=None))]
+    fn __arrow_c_array__<'py>(
+        &self,
+        py: Python<'py>,
+        requested_schema: Option<Bound<'py, PyCapsule>>,
+    ) -> PyResult<(Bound<'py, PyCapsule>, Bound<'py, PyCapsule>)> {
+        // For now ignore requested_schema; future work could apply projection
+        if let Some(schema_capsule) = requested_schema {
+            crate::utils::validate_pycapsule(&schema_capsule, "arrow_schema")?;
+        }
+
+        let struct_array = StructArray::from(self.batch.clone());
+        let data = struct_array.to_data();
+        let array = FFI_ArrowArray::new(&data);
+        let schema =
+            FFI_ArrowSchema::try_from(data.data_type()).map_err(PyDataFusionError::from)?;
+
+        let array_ptr = Box::into_raw(Box::new(array));
+        let schema_ptr = Box::into_raw(Box::new(schema));
+
+        unsafe {
+            let schema_capsule = ffi::PyCapsule_New(
+                schema_ptr as *mut c_void,
+                ARROW_SCHEMA_NAME.as_ptr(),
+                Some(drop_schema),
+            );
+            if schema_capsule.is_null() {
+                drop(Box::from_raw(schema_ptr));
+                drop(Box::from_raw(array_ptr));
+                return Err(PyErr::fetch(py));
+            }
+
+            let array_capsule = ffi::PyCapsule_New(
+                array_ptr as *mut c_void,
+                ARROW_ARRAY_NAME.as_ptr(),
+                Some(drop_array),
+            );
+            if array_capsule.is_null() {
+                drop(Box::from_raw(array_ptr));
+                if ffi::PyCapsule_IsValid(schema_capsule, ARROW_SCHEMA_NAME.as_ptr()) == 1 {
+                    let schema_ptr =
+                        ffi::PyCapsule_GetPointer(schema_capsule, ARROW_SCHEMA_NAME.as_ptr())
+                            as *mut FFI_ArrowSchema;
+                    if !schema_ptr.is_null() {
+                        drop(Box::from_raw(schema_ptr));
+                    }
+                }
+                ffi::PyErr_Clear();
+                ffi::Py_DECREF(schema_capsule);
+                return Err(PyErr::fetch(py));
+            }
+
+            let schema_capsule = Bound::from_owned_ptr(py, schema_capsule)
+                .downcast_into::<PyCapsule>()
+                .unwrap();
+            let array_capsule = Bound::from_owned_ptr(py, array_capsule)
+                .downcast_into::<PyCapsule>()
+                .unwrap();
+
+            Ok((schema_capsule, array_capsule))
+        }
+    }
 }
 
 impl From<RecordBatch> for PyRecordBatch {