@@ -1098,19 +1098,22 @@ def unnest_columns(self, *columns: str, preserve_nulls: bool = True) -> DataFram
10981098 return DataFrame (self .df .unnest_columns (columns , preserve_nulls = preserve_nulls ))
10991099
11001100 def __arrow_c_stream__ (self , requested_schema : object | None = None ) -> object :
1101- """Export an Arrow PyCapsule Stream.
1101+ """Export the DataFrame as an Arrow C Stream.
11021102
1103- This will execute and collect the DataFrame. We will attempt to respect the
1104- requested schema, but only trivial transformations will be applied such as only
1105- returning the fields listed in the requested schema if their data types match
1106- those in the DataFrame.
1103+ The DataFrame is executed using DataFusion's streaming APIs and exposed via
1104+ Arrow's C Stream interface. Record batches are produced incrementally, so the
1105+ full result set is never materialized in memory. When ``requested_schema`` is
1106+ provided, only straightforward projections such as column selection or
1107+ reordering are applied.
11071108
11081109 Args:
11091110 requested_schema: Attempt to provide the DataFrame using this schema.
11101111
11111112 Returns:
1112- Arrow PyCapsule object.
1113+ Arrow PyCapsule object representing an ``ArrowArrayStream`` .
11131114 """
1115+ # ``DataFrame.__arrow_c_stream__`` in the Rust extension leverages
1116+ # ``execute_stream`` under the hood to stream batches one at a time.
11141117 return self .df .__arrow_c_stream__ (requested_schema )
11151118
11161119 def transform (self , func : Callable [..., DataFrame ], * args : Any ) -> DataFrame :
0 commit comments