5858from pyiceberg .io import InputStream , OutputStream , load_file_io
5959from pyiceberg .io .pyarrow import (
6060 ICEBERG_SCHEMA ,
61+ ArrowScan ,
6162 PyArrowFile ,
6263 PyArrowFileIO ,
6364 StatsAggregator ,
6970 _to_requested_schema ,
7071 bin_pack_arrow_table ,
7172 expression_to_pyarrow ,
72- project_table ,
7373 schema_to_pyarrow ,
7474)
7575from pyiceberg .manifest import DataFile , DataFileContent , FileFormat
@@ -952,7 +952,19 @@ def file_map(schema_map: Schema, tmpdir: str) -> str:
952952def project (
953953 schema : Schema , files : List [str ], expr : Optional [BooleanExpression ] = None , table_schema : Optional [Schema ] = None
954954) -> pa .Table :
955- return project_table (
955+ return ArrowScan (
956+ table_metadata = TableMetadataV2 (
957+ location = "file://a/b/" ,
958+ last_column_id = 1 ,
959+ format_version = 2 ,
960+ schemas = [table_schema or schema ],
961+ partition_specs = [PartitionSpec ()],
962+ ),
963+ io = PyArrowFileIO (),
964+ projected_schema = schema ,
965+ row_filter = expr or AlwaysTrue (),
966+ case_sensitive = True ,
967+ ).to_table (
956968 tasks = [
957969 FileScanTask (
958970 DataFile (
@@ -965,18 +977,7 @@ def project(
965977 )
966978 )
967979 for file in files
968- ],
969- table_metadata = TableMetadataV2 (
970- location = "file://a/b/" ,
971- last_column_id = 1 ,
972- format_version = 2 ,
973- schemas = [table_schema or schema ],
974- partition_specs = [PartitionSpec ()],
975- ),
976- io = PyArrowFileIO (),
977- row_filter = expr or AlwaysTrue (),
978- projected_schema = schema ,
979- case_sensitive = True ,
980+ ]
980981 )
981982
982983
@@ -1411,9 +1412,7 @@ def test_delete(deletes_file: str, example_task: FileScanTask, table_schema_simp
14111412 data_file = example_task .file ,
14121413 delete_files = {DataFile (content = DataFileContent .POSITION_DELETES , file_path = deletes_file , file_format = FileFormat .PARQUET )},
14131414 )
1414-
1415- with_deletes = project_table (
1416- tasks = [example_task_with_delete ],
1415+ with_deletes = ArrowScan (
14171416 table_metadata = TableMetadataV2 (
14181417 location = metadata_location ,
14191418 last_column_id = 1 ,
@@ -1423,9 +1422,9 @@ def test_delete(deletes_file: str, example_task: FileScanTask, table_schema_simp
14231422 partition_specs = [PartitionSpec ()],
14241423 ),
14251424 io = load_file_io (),
1426- row_filter = AlwaysTrue (),
14271425 projected_schema = table_schema_simple ,
1428- )
1426+ row_filter = AlwaysTrue (),
1427+ ).to_table (tasks = [example_task_with_delete ])
14291428
14301429 assert (
14311430 str (with_deletes )
@@ -1450,8 +1449,7 @@ def test_delete_duplicates(deletes_file: str, example_task: FileScanTask, table_
14501449 },
14511450 )
14521451
1453- with_deletes = project_table (
1454- tasks = [example_task_with_delete ],
1452+ with_deletes = ArrowScan (
14551453 table_metadata = TableMetadataV2 (
14561454 location = metadata_location ,
14571455 last_column_id = 1 ,
@@ -1461,9 +1459,9 @@ def test_delete_duplicates(deletes_file: str, example_task: FileScanTask, table_
14611459 partition_specs = [PartitionSpec ()],
14621460 ),
14631461 io = load_file_io (),
1464- row_filter = AlwaysTrue (),
14651462 projected_schema = table_schema_simple ,
1466- )
1463+ row_filter = AlwaysTrue (),
1464+ ).to_table (tasks = [example_task_with_delete ])
14671465
14681466 assert (
14691467 str (with_deletes )
@@ -1480,8 +1478,8 @@ def test_delete_duplicates(deletes_file: str, example_task: FileScanTask, table_
14801478
14811479def test_pyarrow_wrap_fsspec (example_task : FileScanTask , table_schema_simple : Schema ) -> None :
14821480 metadata_location = "file://a/b/c.json"
1483- projection = project_table (
1484- tasks = [ example_task ],
1481+
1482+ projection = ArrowScan (
14851483 table_metadata = TableMetadataV2 (
14861484 location = metadata_location ,
14871485 last_column_id = 1 ,
@@ -1494,7 +1492,7 @@ def test_pyarrow_wrap_fsspec(example_task: FileScanTask, table_schema_simple: Sc
14941492 case_sensitive = True ,
14951493 projected_schema = table_schema_simple ,
14961494 row_filter = AlwaysTrue (),
1497- )
1495+ ). to_table ( tasks = [ example_task ])
14981496
14991497 assert (
15001498 str (projection )
0 commit comments