fix tests

ShreyeshArangath · ShreyeshArangath · commit babeba227d2d · 2026-02-11T17:02:05.000-08:00
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
@@ -1804,9 +1804,7 @@ def batches_for_task(task: FileScanTask) -> list[pa.RecordBatch]:
                 # This break will also cancel all running tasks in the executor
                 break
 
-    def to_record_batch_stream(
-        self, tasks: Iterable[FileScanTask], batch_size: int | None = None
-    ) -> Iterator[pa.RecordBatch]:
+    def to_record_batch_stream(self, tasks: Iterable[FileScanTask], batch_size: int | None = None) -> Iterator[pa.RecordBatch]:
         """Scan the Iceberg table and return an Iterator[pa.RecordBatch] in a streaming fashion.
 
         Files are read sequentially and batches are yielded one at a time
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
@@ -2182,7 +2182,7 @@ def to_arrow_batch_reader(self) -> pa.RecordBatchReader:
             batches,
         ).cast(target_schema)
 
-    def to_record_batches(self, batch_size: int | None = None) -> Iterator["pa.RecordBatch"]:
+    def to_record_batches(self, batch_size: int | None = None) -> Iterator[pa.RecordBatch]:
         """Read record batches in a streaming fashion from this DataScan.
 
         Files are read sequentially and batches are yielded one at a time
diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py
@@ -1290,9 +1290,7 @@ def test_datascan_to_record_batches(catalog: Catalog) -> None:
 
     scan = table.scan()
     streaming_batches = list(scan.to_record_batches())
-    streaming_result = pa.concat_tables(
-        [pa.Table.from_batches([b]) for b in streaming_batches], promote_options="permissive"
-    )
+    streaming_result = pa.concat_tables([pa.Table.from_batches([b]) for b in streaming_batches], promote_options="permissive")
 
     eager_result = scan.to_arrow()
 
diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
@@ -4917,7 +4917,7 @@ def test_task_to_record_batches_with_batch_size(tmpdir: str) -> None:
         assert len(batch) <= 100
 
 
-def test_to_record_batches_streaming_basic(tmpdir: str) -> None:
+def test_to_record_batch_stream_basic(tmpdir: str) -> None:
     schema = Schema(NestedField(1, "id", IntegerType(), required=False))
     pyarrow_schema = schema_to_pyarrow(schema, metadata={ICEBERG_SCHEMA: bytes(schema.model_dump_json(), UTF8)})
 
@@ -4941,7 +4941,7 @@ def test_to_record_batches_streaming_basic(tmpdir: str) -> None:
         case_sensitive=True,
     )
 
-    result = scan.to_record_batches_streaming([task])
+    result = scan.to_record_batch_stream([task])
     # Should be a generator/iterator, not a list
     import types
 
@@ -4952,7 +4952,7 @@ def test_to_record_batches_streaming_basic(tmpdir: str) -> None:
     assert total_rows == 100
 
 
-def test_to_record_batches_streaming_with_batch_size(tmpdir: str) -> None:
+def test_to_record_batch_stream_with_batch_size(tmpdir: str) -> None:
     schema = Schema(NestedField(1, "id", IntegerType(), required=False))
     pyarrow_schema = schema_to_pyarrow(schema, metadata={ICEBERG_SCHEMA: bytes(schema.model_dump_json(), UTF8)})
 
@@ -4976,15 +4976,15 @@ def test_to_record_batches_streaming_with_batch_size(tmpdir: str) -> None:
         case_sensitive=True,
     )
 
-    batches = list(scan.to_record_batches_streaming([task], batch_size=50))
+    batches = list(scan.to_record_batch_stream([task], batch_size=50))
 
     total_rows = sum(len(b) for b in batches)
     assert total_rows == 500
     for batch in batches:
         assert len(batch) <= 50
 
 
-def test_to_record_batches_streaming_with_limit(tmpdir: str) -> None:
+def test_to_record_batch_stream_with_limit(tmpdir: str) -> None:
     schema = Schema(NestedField(1, "id", IntegerType(), required=False))
     pyarrow_schema = schema_to_pyarrow(schema, metadata={ICEBERG_SCHEMA: bytes(schema.model_dump_json(), UTF8)})
 
@@ -5009,13 +5009,13 @@ def test_to_record_batches_streaming_with_limit(tmpdir: str) -> None:
         limit=100,
     )
 
-    batches = list(scan.to_record_batches_streaming([task]))
+    batches = list(scan.to_record_batch_stream([task]))
 
     total_rows = sum(len(b) for b in batches)
     assert total_rows == 100
 
 
-def test_to_record_batches_streaming_with_deletes(
+def test_to_record_batch_stream_with_deletes(
     deletes_file: str, request: pytest.FixtureRequest, table_schema_simple: Schema
 ) -> None:
     file_format = FileFormat.PARQUET if deletes_file.endswith(".parquet") else FileFormat.ORC
@@ -5052,17 +5052,15 @@ def test_to_record_batches_streaming_with_deletes(
     )
 
     # Compare streaming path to table path
-    streaming_batches = list(scan.to_record_batches_streaming([example_task_with_delete]))
-    streaming_table = pa.concat_tables(
-        [pa.Table.from_batches([b]) for b in streaming_batches], promote_options="permissive"
-    )
+    streaming_batches = list(scan.to_record_batch_stream([example_task_with_delete]))
+    streaming_table = pa.concat_tables([pa.Table.from_batches([b]) for b in streaming_batches], promote_options="permissive")
     eager_table = scan.to_table(tasks=[example_task_with_delete])
 
     assert streaming_table.num_rows == eager_table.num_rows
     assert streaming_table.column_names == eager_table.column_names
 
 
-def test_to_record_batches_streaming_multiple_files(tmpdir: str) -> None:
+def test_to_record_batch_stream_multiple_files(tmpdir: str) -> None:
     schema = Schema(NestedField(1, "id", IntegerType(), required=False))
     pyarrow_schema = schema_to_pyarrow(schema, metadata={ICEBERG_SCHEMA: bytes(schema.model_dump_json(), UTF8)})
 
@@ -5090,6 +5088,6 @@ def test_to_record_batches_streaming_multiple_files(tmpdir: str) -> None:
         case_sensitive=True,
     )
 
-    batches = list(scan.to_record_batches_streaming(tasks))
+    batches = list(scan.to_record_batch_stream(tasks))
     total_rows = sum(len(b) for b in batches)
     assert total_rows == total_expected  # 600 rows total