feat(dataframe): enhance __arrow_c_stream__ docstring for clarity and detail

kosiew · kosiew · commit 77f632a933c7 · 2025-09-01T10:49:52.000+08:00
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
@@ -1098,19 +1098,22 @@ def unnest_columns(self, *columns: str, preserve_nulls: bool = True) -> DataFram
         return DataFrame(self.df.unnest_columns(columns, preserve_nulls=preserve_nulls))
 
     def __arrow_c_stream__(self, requested_schema: object | None = None) -> object:
-        """Export an Arrow PyCapsule Stream.
+        """Export the DataFrame as an Arrow C Stream.
 
-        This will execute and collect the DataFrame. We will attempt to respect the
-        requested schema, but only trivial transformations will be applied such as only
-        returning the fields listed in the requested schema if their data types match
-        those in the DataFrame.
+        The DataFrame is executed using DataFusion's streaming APIs and exposed via
+        Arrow's C Stream interface. Record batches are produced incrementally, so the
+        full result set is never materialized in memory. When ``requested_schema`` is
+        provided, only straightforward projections such as column selection or
+        reordering are applied.
 
         Args:
             requested_schema: Attempt to provide the DataFrame using this schema.
 
         Returns:
-            Arrow PyCapsule object.
+            Arrow PyCapsule object representing an ``ArrowArrayStream``.
         """
+        # ``DataFrame.__arrow_c_stream__`` in the Rust extension leverages
+        # ``execute_stream`` under the hood to stream batches one at a time.
         return self.df.__arrow_c_stream__(requested_schema)
 
     def transform(self, func: Callable[..., DataFrame], *args: Any) -> DataFrame: