103103)
104104from pyiceberg .table .name_mapping import (
105105 NameMapping ,
106- parse_mapping_from_json ,
107106 update_mapping ,
108107)
109108from pyiceberg .table .refs import MAIN_BRANCH , SnapshotRef
@@ -1215,7 +1214,8 @@ def scan(
12151214 limit : Optional [int ] = None ,
12161215 ) -> DataScan :
12171216 return DataScan (
1218- table = self ,
1217+ table_metadata = self .metadata ,
1218+ io = self .io ,
12191219 row_filter = row_filter ,
12201220 selected_fields = selected_fields ,
12211221 case_sensitive = case_sensitive ,
@@ -1312,10 +1312,7 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
13121312
13131313 def name_mapping (self ) -> Optional [NameMapping ]:
13141314 """Return the table's field-id NameMapping."""
1315- if name_mapping_json := self .properties .get (TableProperties .DEFAULT_NAME_MAPPING ):
1316- return parse_mapping_from_json (name_mapping_json )
1317- else :
1318- return None
1315+ return self .metadata .name_mapping ()
13191316
13201317 def append (self , df : pa .Table , snapshot_properties : Dict [str , str ] = EMPTY_DICT ) -> None :
13211318 """
@@ -1468,7 +1465,8 @@ def _parse_row_filter(expr: Union[str, BooleanExpression]) -> BooleanExpression:
14681465
14691466
14701467class TableScan (ABC ):
1471- table : Table
1468+ table_metadata : TableMetadata
1469+ io : FileIO
14721470 row_filter : BooleanExpression
14731471 selected_fields : Tuple [str , ...]
14741472 case_sensitive : bool
@@ -1478,15 +1476,17 @@ class TableScan(ABC):
14781476
14791477 def __init__ (
14801478 self ,
1481- table : Table ,
1479+ table_metadata : TableMetadata ,
1480+ io : FileIO ,
14821481 row_filter : Union [str , BooleanExpression ] = ALWAYS_TRUE ,
14831482 selected_fields : Tuple [str , ...] = ("*" ,),
14841483 case_sensitive : bool = True ,
14851484 snapshot_id : Optional [int ] = None ,
14861485 options : Properties = EMPTY_DICT ,
14871486 limit : Optional [int ] = None ,
14881487 ):
1489- self .table = table
1488+ self .table_metadata = table_metadata
1489+ self .io = io
14901490 self .row_filter = _parse_row_filter (row_filter )
14911491 self .selected_fields = selected_fields
14921492 self .case_sensitive = case_sensitive
@@ -1496,19 +1496,20 @@ def __init__(
14961496
14971497 def snapshot (self ) -> Optional [Snapshot ]:
14981498 if self .snapshot_id :
1499- return self .table .snapshot_by_id (self .snapshot_id )
1500- return self .table .current_snapshot ()
1499+ return self .table_metadata .snapshot_by_id (self .snapshot_id )
1500+ return self .table_metadata .current_snapshot ()
15011501
15021502 def projection (self ) -> Schema :
1503- current_schema = self .table .schema ()
1503+ current_schema = self .table_metadata .schema ()
15041504 if self .snapshot_id is not None :
1505- snapshot = self .table .snapshot_by_id (self .snapshot_id )
1505+ snapshot = self .table_metadata .snapshot_by_id (self .snapshot_id )
15061506 if snapshot is not None :
15071507 if snapshot .schema_id is not None :
1508- snapshot_schema = self .table .schemas ().get (snapshot .schema_id )
1509- if snapshot_schema is not None :
1510- current_schema = snapshot_schema
1511- else :
1508+ try :
1509+ current_schema = next (
1510+ schema for schema in self .table_metadata .schemas if schema .schema_id == snapshot .schema_id
1511+ )
1512+ except StopIteration :
15121513 warnings .warn (f"Metadata does not contain schema with id: { snapshot .schema_id } " )
15131514 else :
15141515 raise ValueError (f"Snapshot not found: { self .snapshot_id } " )
@@ -1534,7 +1535,7 @@ def update(self: S, **overrides: Any) -> S:
15341535 def use_ref (self : S , name : str ) -> S :
15351536 if self .snapshot_id :
15361537 raise ValueError (f"Cannot override ref, already set snapshot id={ self .snapshot_id } " )
1537- if snapshot := self .table .snapshot_by_name (name ):
1538+ if snapshot := self .table_metadata .snapshot_by_name (name ):
15381539 return self .update (snapshot_id = snapshot .snapshot_id )
15391540
15401541 raise ValueError (f"Cannot scan unknown ref={ name } " )
@@ -1626,33 +1627,21 @@ def _match_deletes_to_data_file(data_entry: ManifestEntry, positional_delete_ent
16261627
16271628
16281629class DataScan (TableScan ):
1629- def __init__ (
1630- self ,
1631- table : Table ,
1632- row_filter : Union [str , BooleanExpression ] = ALWAYS_TRUE ,
1633- selected_fields : Tuple [str , ...] = ("*" ,),
1634- case_sensitive : bool = True ,
1635- snapshot_id : Optional [int ] = None ,
1636- options : Properties = EMPTY_DICT ,
1637- limit : Optional [int ] = None ,
1638- ):
1639- super ().__init__ (table , row_filter , selected_fields , case_sensitive , snapshot_id , options , limit )
1640-
16411630 def _build_partition_projection (self , spec_id : int ) -> BooleanExpression :
1642- project = inclusive_projection (self .table .schema (), self .table .specs ()[spec_id ])
1631+ project = inclusive_projection (self .table_metadata .schema (), self .table_metadata .specs ()[spec_id ])
16431632 return project (self .row_filter )
16441633
16451634 @cached_property
16461635 def partition_filters (self ) -> KeyDefaultDict [int , BooleanExpression ]:
16471636 return KeyDefaultDict (self ._build_partition_projection )
16481637
16491638 def _build_manifest_evaluator (self , spec_id : int ) -> Callable [[ManifestFile ], bool ]:
1650- spec = self .table .specs ()[spec_id ]
1651- return manifest_evaluator (spec , self .table .schema (), self .partition_filters [spec_id ], self .case_sensitive )
1639+ spec = self .table_metadata .specs ()[spec_id ]
1640+ return manifest_evaluator (spec , self .table_metadata .schema (), self .partition_filters [spec_id ], self .case_sensitive )
16521641
16531642 def _build_partition_evaluator (self , spec_id : int ) -> Callable [[DataFile ], bool ]:
1654- spec = self .table .specs ()[spec_id ]
1655- partition_type = spec .partition_type (self .table .schema ())
1643+ spec = self .table_metadata .specs ()[spec_id ]
1644+ partition_type = spec .partition_type (self .table_metadata .schema ())
16561645 partition_schema = Schema (* partition_type .fields )
16571646 partition_expr = self .partition_filters [spec_id ]
16581647
@@ -1687,16 +1676,14 @@ def plan_files(self) -> Iterable[FileScanTask]:
16871676 if not snapshot :
16881677 return iter ([])
16891678
1690- io = self .table .io
1691-
16921679 # step 1: filter manifests using partition summaries
16931680 # the filter depends on the partition spec used to write the manifest file, so create a cache of filters for each spec id
16941681
16951682 manifest_evaluators : Dict [int , Callable [[ManifestFile ], bool ]] = KeyDefaultDict (self ._build_manifest_evaluator )
16961683
16971684 manifests = [
16981685 manifest_file
1699- for manifest_file in snapshot .manifests (io )
1686+ for manifest_file in snapshot .manifests (self . io )
17001687 if manifest_evaluators [manifest_file .partition_spec_id ](manifest_file )
17011688 ]
17021689
@@ -1705,7 +1692,7 @@ def plan_files(self) -> Iterable[FileScanTask]:
17051692
17061693 partition_evaluators : Dict [int , Callable [[DataFile ], bool ]] = KeyDefaultDict (self ._build_partition_evaluator )
17071694 metrics_evaluator = _InclusiveMetricsEvaluator (
1708- self .table .schema (), self .row_filter , self .case_sensitive , self .options .get ("include_empty_files" ) == "true"
1695+ self .table_metadata .schema (), self .row_filter , self .case_sensitive , self .options .get ("include_empty_files" ) == "true"
17091696 ).eval
17101697
17111698 min_data_sequence_number = _min_data_file_sequence_number (manifests )
@@ -1719,7 +1706,7 @@ def plan_files(self) -> Iterable[FileScanTask]:
17191706 lambda args : _open_manifest (* args ),
17201707 [
17211708 (
1722- io ,
1709+ self . io ,
17231710 manifest ,
17241711 partition_evaluators [manifest .partition_spec_id ],
17251712 metrics_evaluator ,
@@ -1755,7 +1742,8 @@ def to_arrow(self) -> pa.Table:
17551742
17561743 return project_table (
17571744 self .plan_files (),
1758- self .table ,
1745+ self .table_metadata ,
1746+ self .io ,
17591747 self .row_filter ,
17601748 self .projection (),
17611749 case_sensitive = self .case_sensitive ,
0 commit comments