Skip to content

Commit 6f88749

Browse files
committed
move metadata file location function to Table
1 parent f4c075c commit 6f88749

File tree

6 files changed

+36
-23
lines changed

6 files changed

+36
-23
lines changed

pyiceberg/catalog/__init__.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -857,7 +857,7 @@ def _create_staged_table(
857857
database_name, table_name = self.identifier_to_database_and_table(identifier)
858858

859859
location = self._resolve_table_location(location, database_name, table_name)
860-
metadata_location = self._get_metadata_location(table_location=location, properties=properties)
860+
metadata_location = Table.new_table_metadata_file_location(table_location=location, properties=properties)
861861
metadata = new_table_metadata(
862862
location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties
863863
)
@@ -888,7 +888,7 @@ def _update_and_stage_table(
888888
)
889889

890890
new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1 if current_table else 0
891-
new_metadata_location = self._get_metadata_location(
891+
new_metadata_location = Table.new_table_metadata_file_location(
892892
updated_metadata.location, new_metadata_version, updated_metadata.properties
893893
)
894894

@@ -947,14 +947,6 @@ def _get_default_warehouse_location(self, database_name: str, table_name: str) -
947947
def _write_metadata(metadata: TableMetadata, io: FileIO, metadata_path: str) -> None:
948948
ToOutputFile.table_metadata(metadata, io.new_output(metadata_path))
949949

950-
@staticmethod
951-
def _get_metadata_location(table_location: str, new_version: int = 0, properties: Properties = EMPTY_DICT) -> str:
952-
if new_version < 0:
953-
raise ValueError(f"Table metadata version: `{new_version}` must be a non-negative integer")
954-
955-
file_name = f"{new_version:05d}-{uuid.uuid4()}.metadata.json"
956-
return Table.metadata_file_location(table_location, file_name, properties)
957-
958950
@staticmethod
959951
def _parse_metadata_version(metadata_location: str) -> int:
960952
"""Parse the version from the metadata location.

pyiceberg/catalog/dynamodb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def create_table(
173173
database_name, table_name = self.identifier_to_database_and_table(identifier)
174174

175175
location = self._resolve_table_location(location, database_name, table_name)
176-
metadata_location = self._get_metadata_location(table_location=location, properties=properties)
176+
metadata_location = Table.new_table_metadata_file_location(table_location=location, properties=properties)
177177
metadata = new_table_metadata(
178178
location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties
179179
)

pyiceberg/catalog/sql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ def create_table(
207207

208208
namespace = Catalog.namespace_to_string(namespace_identifier)
209209
location = self._resolve_table_location(location, namespace, table_name)
210-
metadata_location = self._get_metadata_location(table_location=location, properties=properties)
210+
metadata_location = Table.new_table_metadata_file_location(table_location=location, properties=properties)
211211
metadata = new_table_metadata(
212212
location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties
213213
)

pyiceberg/table/__init__.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,19 +1238,40 @@ def to_polars(self) -> pl.LazyFrame:
12381238
return pl.scan_iceberg(self)
12391239

12401240
@staticmethod
1241-
def metadata_file_location(table_location: str, file_name: str, properties: Properties = EMPTY_DICT) -> str:
1242-
"""Get the full path for a metadata file.
1241+
def new_table_metadata_file_location(table_location: str, new_version: int = 0, properties: Properties = EMPTY_DICT) -> str:
1242+
"""Return a fully-qualified metadata file location for a new table version.
1243+
1244+
Args:
1245+
table_location (str): the base table location.
1246+
new_version (int): Version number of the metadata file.
1247+
properties (Properties): Table properties that may contain a custom metadata path.
1248+
1249+
Returns:
1250+
str: fully-qualified URI for the new table metadata file.
1251+
1252+
Raises:
1253+
ValueError: If the version is negative.
1254+
"""
1255+
if new_version < 0:
1256+
raise ValueError(f"Table metadata version: `{new_version}` must be a non-negative integer")
1257+
1258+
file_name = f"{new_version:05d}-{uuid.uuid4()}.metadata.json"
1259+
return Table.new_metadata_location(table_location, file_name, properties)
1260+
1261+
@staticmethod
1262+
def new_metadata_location(table_location: str, file_name: str, properties: Properties = EMPTY_DICT) -> str:
1263+
"""Return a fully-qualified metadata file location for the given filename.
12431264
12441265
Args:
12451266
table_location (str): The base table location
12461267
file_name (str): Name of the metadata file
1247-
properties (Properties): Table properties that may contain custom metadata path
1268+
properties (Properties): Table properties that may contain a custom metadata path
12481269
12491270
Returns:
1250-
str: Full path where the metadata file should be stored
1271+
str: A fully-qualified location URI for the metadata file.
12511272
"""
12521273
if metadata_path := properties.get(TableProperties.WRITE_METADATA_PATH):
1253-
return f"{metadata_path.rstrip("/")}/{file_name}"
1274+
return f"{metadata_path.rstrip('/')}/{file_name}"
12541275

12551276
return f"{table_location}/metadata/{file_name}"
12561277

pyiceberg/table/update/snapshot.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def _new_manifest_file_name(num: int, commit_uuid: uuid.UUID) -> str:
8888
return f"{commit_uuid}-m{num}.avro"
8989

9090

91-
def _generate_manifest_list_file_name(snapshot_id: int, attempt: int, commit_uuid: uuid.UUID) -> str:
91+
def _new_manifest_list_file_name(snapshot_id: int, attempt: int, commit_uuid: uuid.UUID) -> str:
9292
# Mimics the behavior in Java:
9393
# https://github.com/apache/iceberg/blob/c862b9177af8e2d83122220764a056f3b96fd00c/core/src/main/java/org/apache/iceberg/SnapshotProducer.java#L491
9494
return f"snap-{snapshot_id}-{attempt}-{commit_uuid}.avro"
@@ -245,12 +245,12 @@ def _commit(self) -> UpdatesAndRequirements:
245245
summary = self._summary(self.snapshot_properties)
246246
table_location = self._transaction.table_metadata.location
247247
properties = self._transaction.table_metadata.properties
248-
file_name = _generate_manifest_list_file_name(
248+
file_name = _new_manifest_list_file_name(
249249
snapshot_id=self._snapshot_id,
250250
attempt=0,
251251
commit_uuid=self.commit_uuid,
252252
)
253-
manifest_list_file_path = self._transaction._table.metadata_file_location(table_location, file_name, properties)
253+
manifest_list_file_path = self._transaction._table.new_metadata_location(table_location, file_name, properties)
254254
with write_manifest_list(
255255
format_version=self._transaction.table_metadata.format_version,
256256
output_file=self._io.new_output(manifest_list_file_path),
@@ -299,7 +299,7 @@ def new_manifest_output(self) -> OutputFile:
299299
table_location = self._transaction.table_metadata.location
300300
properties = self._transaction.table_metadata.properties
301301
file_name = _new_manifest_file_name(num=next(self._manifest_num_counter), commit_uuid=self.commit_uuid)
302-
file_path = self._transaction._table.metadata_file_location(table_location, file_name, properties)
302+
file_path = self._transaction._table.new_metadata_location(table_location, file_name, properties)
303303
return self._io.new_output(file_path)
304304

305305
def fetch_manifest_entry(self, manifest: ManifestFile, discard_deleted: bool = True) -> List[ManifestEntry]:

tests/catalog/test_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ def test_table_writes_metadata_to_custom_location(catalog: InMemoryCatalog) -> N
580580
table.append(df)
581581
manifests = table.current_snapshot().manifests(table.io) # type: ignore
582582

583-
assert table.metadata_file_location(table.location(), "", table.properties).startswith(metadata_path)
583+
assert table.new_metadata_location(table.location(), "", table.properties).startswith(metadata_path)
584584
assert manifests[0].manifest_path.startswith(metadata_path)
585585
assert table.location() != metadata_path
586586
assert table.metadata_location.startswith(metadata_path)
@@ -599,6 +599,6 @@ def test_table_writes_metadata_to_default_path(catalog: InMemoryCatalog) -> None
599599
table.append(df)
600600
manifests = table.current_snapshot().manifests(table.io) # type: ignore
601601

602-
assert table.metadata_file_location(table.location(), "", table.properties).startswith(metadata_path)
602+
assert table.new_metadata_location(table.location(), "", table.properties).startswith(metadata_path)
603603
assert manifests[0].manifest_path.startswith(metadata_path)
604604
assert table.metadata_location.startswith(metadata_path)

0 commit comments

Comments
 (0)