|
29 | 29 | DataFrame, |
30 | 30 | ParquetColumnOptions, |
31 | 31 | ParquetWriterOptions, |
| 32 | + RecordBatch, |
32 | 33 | SessionContext, |
33 | 34 | WindowFrame, |
34 | 35 | column, |
@@ -390,10 +391,23 @@ def test_iter_batches(df): |
390 | 391 | assert len(batches) == 1 |
391 | 392 |
|
392 | 393 | batch = batches[0] |
393 | | - assert isinstance(batch, pa.RecordBatch) |
394 | | - assert batch.column(0).to_pylist() == [1, 2, 3] |
395 | | - assert batch.column(1).to_pylist() == [4, 5, 6] |
396 | | - assert batch.column(2).to_pylist() == [8, 5, 8] |
| 394 | + assert isinstance(batch, RecordBatch) |
| 395 | + pa_batch = batch.to_pyarrow() |
| 396 | + assert pa_batch.column(0).to_pylist() == [1, 2, 3] |
| 397 | + assert pa_batch.column(1).to_pylist() == [4, 5, 6] |
| 398 | + assert pa_batch.column(2).to_pylist() == [8, 5, 8] |
| 399 | + |
| 400 | + |
| 401 | +def test_to_record_batch_stream(df): |
| 402 | + stream = df.to_record_batch_stream() |
| 403 | + batches = list(stream) |
| 404 | + |
| 405 | + assert len(batches) == 1 |
| 406 | + assert isinstance(batches[0], RecordBatch) |
| 407 | + pa_batch = batches[0].to_pyarrow() |
| 408 | + assert pa_batch.column(0).to_pylist() == [1, 2, 3] |
| 409 | + assert pa_batch.column(1).to_pylist() == [4, 5, 6] |
| 410 | + assert pa_batch.column(2).to_pylist() == [8, 5, 8] |
397 | 411 |
|
398 | 412 |
|
399 | 413 | def test_with_column_renamed(df): |
@@ -1331,7 +1345,7 @@ def test_execution_plan(aggregate_df): |
1331 | 1345 | @pytest.mark.asyncio |
1332 | 1346 | async def test_async_iteration_of_df(aggregate_df): |
1333 | 1347 | rows_returned = 0 |
1334 | | - async for batch in aggregate_df.execute_stream(): |
| 1348 | + async for batch in aggregate_df: |
1335 | 1349 | assert batch is not None |
1336 | 1350 | rows_returned += len(batch.to_pyarrow()[0]) |
1337 | 1351 |
|
|
0 commit comments