File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1414# KIND, either express or implied. See the License for the
1515# specific language governing permissions and limitations
1616# under the License.
17+ import resource
1718from pathlib import Path
1819
1920import pyarrow as pa
@@ -92,3 +93,28 @@ def test_read_avro():
9293 path = Path .cwd () / "testing/data/avro/alltypes_plain.avro"
9394 avro_df = read_avro (path = path )
9495 assert avro_df is not None
96+
97+
98+ def test_arrow_c_stream_large_dataset (ctx ):
99+ """DataFrame.__arrow_c_stream__ yields batches incrementally.
100+
101+ This test constructs a DataFrame that would be far larger than available
102+ memory if materialized. The ``__arrow_c_stream__`` method should expose a
103+ stream of record batches without collecting the full dataset, so reading a
104+ handful of batches should not exhaust process memory.
105+ """
106+ # Create a very large DataFrame using range; this would be terabytes if collected
107+ df = ctx .range (0 , 1 << 40 )
108+
109+ reader = pa .RecordBatchReader ._import_from_c (df .__arrow_c_stream__ ())
110+
111+ # Track maximum RSS before consuming batches
112+ start_max_rss = resource .getrusage (resource .RUSAGE_SELF ).ru_maxrss
113+
114+ for _ in range (5 ):
115+ batch = reader .read_next_batch ()
116+ assert batch is not None
117+ assert len (batch ) > 0
118+ current_max_rss = resource .getrusage (resource .RUSAGE_SELF ).ru_maxrss
119+ # Ensure memory usage hasn't grown substantially (>50MB)
120+ assert current_max_rss - start_max_rss < 50 * 1024
You can’t perform that action at this time.
0 commit comments