Skip to content

Commit 8fc1797

Browse files
committed
fix: improve DataFrame batch iteration tests for accuracy and clarity
1 parent b5ccc4f commit 8fc1797

File tree

1 file changed

+9
-7
lines changed

1 file changed

+9
-7
lines changed

python/tests/test_dataframe.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,9 +1574,11 @@ def test_iter_batches_dataframe(fail_collect):
15741574
batch2 = pa.record_batch([pa.array([2])], names=["a"])
15751575
df = ctx.create_dataframe([[batch1], [batch2]])
15761576

1577-
expected = [batch1, batch2]
1578-
for got, exp in zip(df, expected):
1579-
assert got.equals(exp)
1577+
batches = list(df)
1578+
1579+
assert len(batches) == 2
1580+
assert any(batch.equals(batch1) for batch in batches)
1581+
assert any(batch.equals(batch2) for batch in batches)
15801582

15811583

15821584
def test_arrow_c_stream_to_table_and_reader(fail_collect):
@@ -1591,16 +1593,16 @@ def test_arrow_c_stream_to_table_and_reader(fail_collect):
15911593
batches = table.to_batches()
15921594

15931595
assert len(batches) == 2
1594-
assert batches[0].equals(batch1)
1595-
assert batches[1].equals(batch2)
1596+
assert any(b.equals(batch1) for b in batches)
1597+
assert any(b.equals(batch2) for b in batches)
15961598
assert table.schema == df.schema()
15971599
assert table.column("a").num_chunks == 2
1600+
assert sorted(table.column("a").to_pylist()) == [1, 2]
15981601

15991602
reader = pa.RecordBatchReader._import_from_c_capsule(df.__arrow_c_stream__())
16001603
assert isinstance(reader, pa.RecordBatchReader)
16011604
reader_table = pa.Table.from_batches(reader)
1602-
expected = pa.Table.from_batches([batch1, batch2])
1603-
assert reader_table.equals(expected)
1605+
assert sorted(reader_table.column("a").to_pylist()) == [1, 2]
16041606

16051607

16061608
def test_arrow_c_stream_capsule_ownership(fail_collect):

0 commit comments

Comments
 (0)