apache · kevinjqliu · Jun 22, 2026 · Jun 1, 2026 · Jun 1, 2026 · Jun 2, 2026
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
@@ -1681,17 +1681,14 @@ def _task_to_record_batches(
             if current_batch.num_rows == 0:
                 continue
 
-            # Apply the user filter
-            if pyarrow_filter is not None:
-                # Temporary fix until PyArrow 21 is released ( https://github.com/apache/arrow/pull/46057 )
-                table = pa.Table.from_batches([current_batch])
-                table = table.filter(pyarrow_filter)
+            # Apply the user filter only when positional deletes are present.
+            # In the default case, the filter is already pushed down via Scanner.from_fragment.
+            if pyarrow_filter is not None and positional_deletes:
+                current_batch = current_batch.filter(pyarrow_filter)
                 # skip empty batches
-                if table.num_rows == 0:
+                if current_batch.num_rows == 0:
                     continue
 
-                current_batch = table.combine_chunks().to_batches()[0]
-
             yield _to_requested_schema(
                 projected_schema,
                 file_project_schema,

diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
@@ -3267,6 +3267,59 @@ def _expected_batch(unit: str) -> pa.RecordBatch:
     assert _expected_batch("ns" if format_version > 2 else "us").equals(actual_result)
 
 
+def test_task_to_record_batches_filter_without_positional_deletes_avoids_table_refilter(tmpdir: str) -> None:
+    arrow_schema = pa.schema((pa.field("id", pa.int32(), nullable=True, metadata={PYARROW_PARQUET_FIELD_ID_KEY: "1"}),))
+    arrow_table = pa.table([pa.array([1, 2, 3], type=pa.int32())], schema=arrow_schema)
+    data_file = _write_table_to_data_file(
+        f"{tmpdir}/test_task_to_record_batches_filter_no_positional.parquet", arrow_schema, arrow_table
+    )
+
+    table_schema = Schema(NestedField(1, "id", IntegerType(), required=False))
+    from pyiceberg.expressions.visitors import bind
+
+    result_batches = list(
+        _task_to_record_batches(
+            PyArrowFileIO(),
+            FileScanTask(data_file),
+            bound_row_filter=bind(table_schema, GreaterThan("id", 1), case_sensitive=True),
+            projected_schema=table_schema,
+            table_schema=table_schema,
+            projected_field_ids={1},
+            positional_deletes=None,
+            case_sensitive=True,
+        )
+    )
+    assert len(result_batches) == 1
+    assert result_batches[0].column(0).to_pylist() == [2, 3]
+
+
+def test_task_to_record_batches_filter_with_positional_deletes_handles_empty_batch(tmpdir: str) -> None:
+    arrow_schema = pa.schema((pa.field("id", pa.int32(), nullable=True, metadata={PYARROW_PARQUET_FIELD_ID_KEY: "1"}),))
+    arrow_table = pa.table([pa.array([1, 2, 3], type=pa.int32())], schema=arrow_schema)
+    data_file = _write_table_to_data_file(
+        f"{tmpdir}/test_task_to_record_batches_filter_with_positional.parquet", arrow_schema, arrow_table
+    )
+
+    table_schema = Schema(NestedField(1, "id", IntegerType(), required=False))
+    from pyiceberg.expressions.visitors import bind
+
+    positional_deletes = [pa.chunked_array([pa.array([], type=pa.int64())])]
+    result_batches = list(
+        _task_to_record_batches(
+            PyArrowFileIO(),
+            FileScanTask(data_file),
+            bound_row_filter=bind(table_schema, GreaterThan("id", 100), case_sensitive=True),
+            projected_schema=table_schema,
+            table_schema=table_schema,
+            projected_field_ids={1},
+            positional_deletes=positional_deletes,
+            case_sensitive=True,
+        )
+    )
+
+    assert result_batches == []
+
+
 def test_parse_location_defaults() -> None:
     """Test that parse_location uses defaults."""