Skip to content

Commit 5e138a7

Browse files
committed
move metadata file location function to Table
1 parent 4e47edc commit 5e138a7

File tree

6 files changed

+36
-23
lines changed

6 files changed

+36
-23
lines changed

pyiceberg/catalog/__init__.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -840,7 +840,7 @@ def _create_staged_table(
840840
database_name, table_name = self.identifier_to_database_and_table(identifier)
841841

842842
location = self._resolve_table_location(location, database_name, table_name)
843-
metadata_location = self._get_metadata_location(table_location=location, properties=properties)
843+
metadata_location = Table.new_table_metadata_file_location(table_location=location, properties=properties)
844844
metadata = new_table_metadata(
845845
location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties
846846
)
@@ -871,7 +871,7 @@ def _update_and_stage_table(
871871
)
872872

873873
new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1 if current_table else 0
874-
new_metadata_location = self._get_metadata_location(
874+
new_metadata_location = Table.new_table_metadata_file_location(
875875
updated_metadata.location, new_metadata_version, updated_metadata.properties
876876
)
877877

@@ -930,14 +930,6 @@ def _get_default_warehouse_location(self, database_name: str, table_name: str) -
930930
def _write_metadata(metadata: TableMetadata, io: FileIO, metadata_path: str) -> None:
931931
ToOutputFile.table_metadata(metadata, io.new_output(metadata_path))
932932

933-
@staticmethod
934-
def _get_metadata_location(table_location: str, new_version: int = 0, properties: Properties = EMPTY_DICT) -> str:
935-
if new_version < 0:
936-
raise ValueError(f"Table metadata version: `{new_version}` must be a non-negative integer")
937-
938-
file_name = f"{new_version:05d}-{uuid.uuid4()}.metadata.json"
939-
return Table.metadata_file_location(table_location, file_name, properties)
940-
941933
@staticmethod
942934
def _parse_metadata_version(metadata_location: str) -> int:
943935
"""Parse the version from the metadata location.

pyiceberg/catalog/dynamodb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def create_table(
173173
database_name, table_name = self.identifier_to_database_and_table(identifier)
174174

175175
location = self._resolve_table_location(location, database_name, table_name)
176-
metadata_location = self._get_metadata_location(table_location=location, properties=properties)
176+
metadata_location = Table.new_table_metadata_file_location(table_location=location, properties=properties)
177177
metadata = new_table_metadata(
178178
location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties
179179
)

pyiceberg/catalog/sql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ def create_table(
207207

208208
namespace = Catalog.namespace_to_string(namespace_identifier)
209209
location = self._resolve_table_location(location, namespace, table_name)
210-
metadata_location = self._get_metadata_location(table_location=location, properties=properties)
210+
metadata_location = Table.new_table_metadata_file_location(table_location=location, properties=properties)
211211
metadata = new_table_metadata(
212212
location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties
213213
)

pyiceberg/table/__init__.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,19 +1214,40 @@ def to_daft(self) -> daft.DataFrame:
12141214
return daft.read_iceberg(self)
12151215

12161216
@staticmethod
1217-
def metadata_file_location(table_location: str, file_name: str, properties: Properties = EMPTY_DICT) -> str:
1218-
"""Get the full path for a metadata file.
1217+
def new_table_metadata_file_location(table_location: str, new_version: int = 0, properties: Properties = EMPTY_DICT) -> str:
1218+
"""Return a fully-qualified metadata file location for a new table version.
1219+
1220+
Args:
1221+
table_location (str): the base table location.
1222+
new_version (int): Version number of the metadata file.
1223+
properties (Properties): Table properties that may contain a custom metadata path.
1224+
1225+
Returns:
1226+
str: fully-qualified URI for the new table metadata file.
1227+
1228+
Raises:
1229+
ValueError: If the version is negative.
1230+
"""
1231+
if new_version < 0:
1232+
raise ValueError(f"Table metadata version: `{new_version}` must be a non-negative integer")
1233+
1234+
file_name = f"{new_version:05d}-{uuid.uuid4()}.metadata.json"
1235+
return Table.new_metadata_location(table_location, file_name, properties)
1236+
1237+
@staticmethod
1238+
def new_metadata_location(table_location: str, file_name: str, properties: Properties = EMPTY_DICT) -> str:
1239+
"""Return a fully-qualified metadata file location for the given filename.
12191240
12201241
Args:
12211242
table_location (str): The base table location
12221243
file_name (str): Name of the metadata file
1223-
properties (Properties): Table properties that may contain custom metadata path
1244+
properties (Properties): Table properties that may contain a custom metadata path
12241245
12251246
Returns:
1226-
str: Full path where the metadata file should be stored
1247+
str: A fully-qualified location URI for the metadata file.
12271248
"""
12281249
if metadata_path := properties.get(TableProperties.WRITE_METADATA_PATH):
1229-
return f"{metadata_path.rstrip("/")}/{file_name}"
1250+
return f"{metadata_path.rstrip('/')}/{file_name}"
12301251

12311252
return f"{table_location}/metadata/{file_name}"
12321253

pyiceberg/table/update/snapshot.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def _new_manifest_file_name(num: int, commit_uuid: uuid.UUID) -> str:
8888
return f"{commit_uuid}-m{num}.avro"
8989

9090

91-
def _generate_manifest_list_file_name(snapshot_id: int, attempt: int, commit_uuid: uuid.UUID) -> str:
91+
def _new_manifest_list_file_name(snapshot_id: int, attempt: int, commit_uuid: uuid.UUID) -> str:
9292
# Mimics the behavior in Java:
9393
# https://github.com/apache/iceberg/blob/c862b9177af8e2d83122220764a056f3b96fd00c/core/src/main/java/org/apache/iceberg/SnapshotProducer.java#L491
9494
return f"snap-{snapshot_id}-{attempt}-{commit_uuid}.avro"
@@ -245,12 +245,12 @@ def _commit(self) -> UpdatesAndRequirements:
245245
summary = self._summary(self.snapshot_properties)
246246
table_location = self._transaction.table_metadata.location
247247
properties = self._transaction.table_metadata.properties
248-
file_name = _generate_manifest_list_file_name(
248+
file_name = _new_manifest_list_file_name(
249249
snapshot_id=self._snapshot_id,
250250
attempt=0,
251251
commit_uuid=self.commit_uuid,
252252
)
253-
manifest_list_file_path = self._transaction._table.metadata_file_location(table_location, file_name, properties)
253+
manifest_list_file_path = self._transaction._table.new_metadata_location(table_location, file_name, properties)
254254
with write_manifest_list(
255255
format_version=self._transaction.table_metadata.format_version,
256256
output_file=self._io.new_output(manifest_list_file_path),
@@ -299,7 +299,7 @@ def new_manifest_output(self) -> OutputFile:
299299
table_location = self._transaction.table_metadata.location
300300
properties = self._transaction.table_metadata.properties
301301
file_name = _new_manifest_file_name(num=next(self._manifest_num_counter), commit_uuid=self.commit_uuid)
302-
file_path = self._transaction._table.metadata_file_location(table_location, file_name, properties)
302+
file_path = self._transaction._table.new_metadata_location(table_location, file_name, properties)
303303
return self._io.new_output(file_path)
304304

305305
def fetch_manifest_entry(self, manifest: ManifestFile, discard_deleted: bool = True) -> List[ManifestEntry]:

tests/catalog/test_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ def test_table_writes_metadata_to_custom_location(catalog: InMemoryCatalog) -> N
580580
table.append(df)
581581
manifests = table.current_snapshot().manifests(table.io) # type: ignore
582582

583-
assert table.metadata_file_location(table.location(), "", table.properties).startswith(metadata_path)
583+
assert table.new_metadata_location(table.location(), "", table.properties).startswith(metadata_path)
584584
assert manifests[0].manifest_path.startswith(metadata_path)
585585
assert table.location() != metadata_path
586586
assert table.metadata_location.startswith(metadata_path)
@@ -599,6 +599,6 @@ def test_table_writes_metadata_to_default_path(catalog: InMemoryCatalog) -> None
599599
table.append(df)
600600
manifests = table.current_snapshot().manifests(table.io) # type: ignore
601601

602-
assert table.metadata_file_location(table.location(), "", table.properties).startswith(metadata_path)
602+
assert table.new_metadata_location(table.location(), "", table.properties).startswith(metadata_path)
603603
assert manifests[0].manifest_path.startswith(metadata_path)
604604
assert table.metadata_location.startswith(metadata_path)

0 commit comments

Comments
 (0)