@@ -1400,11 +1400,11 @@ def _field_id(self, field: pa.Field) -> int:
14001400
14011401
14021402def _get_column_projection_values (
1403- file : DataFile , projected_schema : Schema , partition_spec : PartitionSpec , file_project_field_ids : Set [int ]
1403+ file : DataFile , projected_schema : Schema , partition_spec : Optional [ PartitionSpec ] , file_project_field_ids : Set [int ]
14041404) -> Dict [int , Any ]:
14051405 """Apply Column Projection rules to File Schema."""
14061406 project_schema_diff = projected_schema .field_ids .difference (file_project_field_ids )
1407- if len (project_schema_diff ) == 0 :
1407+ if len (project_schema_diff ) == 0 or partition_spec is None :
14081408 return EMPTY_DICT
14091409
14101410 partition_schema = partition_spec .partition_type (projected_schema )
@@ -1428,8 +1428,8 @@ def _task_to_record_batches(
14281428 projected_field_ids : Set [int ],
14291429 positional_deletes : Optional [List [ChunkedArray ]],
14301430 case_sensitive : bool ,
1431- partition_spec : PartitionSpec ,
14321431 name_mapping : Optional [NameMapping ] = None ,
1432+ partition_spec : Optional [PartitionSpec ] = None ,
14331433) -> Iterator [pa .RecordBatch ]:
14341434 arrow_format = ds .ParquetFileFormat (pre_buffer = True , buffer_size = (ONE_MEGABYTE * 8 ))
14351435 with io .new_input (task .file .file_path ).open () as fin :
@@ -1668,8 +1668,8 @@ def _record_batches_from_scan_tasks_and_deletes(
16681668 self ._projected_field_ids ,
16691669 deletes_per_file .get (task .file .file_path ),
16701670 self ._case_sensitive ,
1671- self ._table_metadata .spec (),
16721671 self ._table_metadata .name_mapping (),
1672+ self ._table_metadata .specs ().get (task .file .spec_id ),
16731673 )
16741674 for batch in batches :
16751675 if self ._limit is not None :
@@ -1801,7 +1801,7 @@ def struct(
18011801 # When an optional field is added, or when a required field with a non-null initial default is added
18021802 arrow_type = schema_to_pyarrow (field .field_type , include_field_ids = self ._include_field_ids )
18031803 if projected_value := self ._projected_missing_fields .get (field .field_id ):
1804- field_arrays .append (pa .repeat (projected_value , len (struct_array )). cast ( arrow_type ))
1804+ field_arrays .append (pa .repeat (pa . scalar ( projected_value , type = arrow_type ), len (struct_array )))
18051805 elif field .initial_default is None :
18061806 field_arrays .append (pa .nulls (len (struct_array ), type = arrow_type ))
18071807 else :
0 commit comments