Skip to content

Commit 3085c40

Browse files
authored
Table Metadata Update: Support SetPropertiesUpdate and RemovePropertiesUpdate (#266)
* Support table properties update * Add test for glue catalog
1 parent d047f1b commit 3085c40

4 files changed

Lines changed: 117 additions & 0 deletions

File tree

pyiceberg/table/__init__.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,31 @@ def _(update: UpgradeFormatVersionUpdate, base_metadata: TableMetadata, context:
415415
return TableMetadataUtil.parse_obj(updated_metadata_data)
416416

417417

418+
@_apply_table_update.register(SetPropertiesUpdate)
419+
def _(update: SetPropertiesUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
420+
if len(update.updates) == 0:
421+
return base_metadata
422+
423+
properties = dict(base_metadata.properties)
424+
properties.update(update.updates)
425+
426+
context.add_update(update)
427+
return base_metadata.model_copy(update={"properties": properties})
428+
429+
430+
@_apply_table_update.register(RemovePropertiesUpdate)
431+
def _(update: RemovePropertiesUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
432+
if len(update.removals) == 0:
433+
return base_metadata
434+
435+
properties = dict(base_metadata.properties)
436+
for key in update.removals:
437+
properties.pop(key)
438+
439+
context.add_update(update)
440+
return base_metadata.model_copy(update={"properties": properties})
441+
442+
418443
@_apply_table_update.register(AddSchemaUpdate)
419444
def _(update: AddSchemaUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
420445
if update.last_column_id < base_metadata.last_column_id:

tests/catalog/integration_test_glue.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,3 +294,20 @@ def test_commit_table_update_schema(
294294
assert new_schema
295295
assert new_schema == update._apply()
296296
assert new_schema.find_field("b").field_type == IntegerType()
297+
298+
299+
def test_commit_table_properties(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None:
300+
identifier = (database_name, table_name)
301+
test_catalog.create_namespace(namespace=database_name)
302+
table = test_catalog.create_table(identifier=identifier, schema=table_schema_nested, properties={"test_a": "test_a"})
303+
304+
assert test_catalog._parse_metadata_version(table.metadata_location) == 0
305+
306+
transaction = table.transaction()
307+
transaction.set_properties(test_a="test_aa", test_b="test_b", test_c="test_c")
308+
transaction.remove_properties("test_b")
309+
transaction.commit_transaction()
310+
311+
updated_table_metadata = table.metadata
312+
assert test_catalog._parse_metadata_version(table.metadata_location) == 1
313+
assert updated_table_metadata.properties == {"test_a": "test_aa", "test_c": "test_c"}

tests/catalog/test_glue.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,3 +553,25 @@ def test_commit_table_update_schema(
553553
assert new_schema
554554
assert new_schema == update._apply()
555555
assert new_schema.find_field("b").field_type == IntegerType()
556+
557+
558+
@mock_glue
559+
def test_commit_table_properties(
560+
_bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str
561+
) -> None:
562+
catalog_name = "glue"
563+
identifier = (database_name, table_name)
564+
test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"})
565+
test_catalog.create_namespace(namespace=database_name)
566+
table = test_catalog.create_table(identifier=identifier, schema=table_schema_nested, properties={"test_a": "test_a"})
567+
568+
assert test_catalog._parse_metadata_version(table.metadata_location) == 0
569+
570+
transaction = table.transaction()
571+
transaction.set_properties(test_a="test_aa", test_b="test_b", test_c="test_c")
572+
transaction.remove_properties("test_b")
573+
transaction.commit_transaction()
574+
575+
updated_table_metadata = table.metadata
576+
assert test_catalog._parse_metadata_version(table.metadata_location) == 1
577+
assert updated_table_metadata.properties == {"test_a": "test_aa", "test_c": "test_c"}

tests/table/test_init.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
AssertLastAssignedPartitionId,
5151
AssertRefSnapshotId,
5252
AssertTableUUID,
53+
RemovePropertiesUpdate,
5354
SetPropertiesUpdate,
5455
SetSnapshotRefUpdate,
5556
SnapshotRef,
@@ -529,6 +530,51 @@ def test_add_nested_list_type_column(table_v2: Table) -> None:
529530
assert new_schema.highest_field_id == 7
530531

531532

533+
def test_apply_set_properties_update(table_v2: Table) -> None:
534+
base_metadata = table_v2.metadata
535+
536+
new_metadata_no_update = update_table_metadata(base_metadata, (SetPropertiesUpdate(updates={}),))
537+
assert new_metadata_no_update == base_metadata
538+
539+
new_metadata = update_table_metadata(
540+
base_metadata, (SetPropertiesUpdate(updates={"read.split.target.size": "123", "test_a": "test_a", "test_b": "test_b"}),)
541+
)
542+
543+
assert base_metadata.properties == {"read.split.target.size": "134217728"}
544+
assert new_metadata.properties == {"read.split.target.size": "123", "test_a": "test_a", "test_b": "test_b"}
545+
546+
new_metadata_add_only = update_table_metadata(new_metadata, (SetPropertiesUpdate(updates={"test_c": "test_c"}),))
547+
548+
assert new_metadata_add_only.properties == {
549+
"read.split.target.size": "123",
550+
"test_a": "test_a",
551+
"test_b": "test_b",
552+
"test_c": "test_c",
553+
}
554+
555+
556+
def test_apply_remove_properties_update(table_v2: Table) -> None:
557+
base_metadata = update_table_metadata(
558+
table_v2.metadata,
559+
(SetPropertiesUpdate(updates={"test_a": "test_a", "test_b": "test_b", "test_c": "test_c", "test_d": "test_d"}),),
560+
)
561+
562+
new_metadata_no_removal = update_table_metadata(base_metadata, (RemovePropertiesUpdate(removals=[]),))
563+
564+
assert base_metadata == new_metadata_no_removal
565+
566+
new_metadata = update_table_metadata(base_metadata, (RemovePropertiesUpdate(removals=["test_a", "test_c"]),))
567+
568+
assert base_metadata.properties == {
569+
"read.split.target.size": "134217728",
570+
"test_a": "test_a",
571+
"test_b": "test_b",
572+
"test_c": "test_c",
573+
"test_d": "test_d",
574+
}
575+
assert new_metadata.properties == {"read.split.target.size": "134217728", "test_b": "test_b", "test_d": "test_d"}
576+
577+
532578
def test_apply_add_schema_update(table_v2: Table) -> None:
533579
transaction = table_v2.transaction()
534580
update = transaction.update_schema()
@@ -625,6 +671,8 @@ def test_update_metadata_with_multiple_updates(table_v1: Table) -> None:
625671
schema_update_1.add_column(path="b", field_type=IntegerType())
626672
schema_update_1.commit()
627673

674+
transaction.set_properties(owner="test", test_a="test_a", test_b="test_b", test_c="test_c")
675+
628676
test_updates = transaction._updates # pylint: disable=W0212
629677

630678
new_snapshot = Snapshot(
@@ -639,6 +687,7 @@ def test_update_metadata_with_multiple_updates(table_v1: Table) -> None:
639687

640688
test_updates += (
641689
AddSnapshotUpdate(snapshot=new_snapshot),
690+
SetPropertiesUpdate(updates={"test_a": "test_a1"}),
642691
SetSnapshotRefUpdate(
643692
ref_name="main",
644693
type="branch",
@@ -647,6 +696,7 @@ def test_update_metadata_with_multiple_updates(table_v1: Table) -> None:
647696
max_snapshot_age_ms=12312312312,
648697
min_snapshots_to_keep=1,
649698
),
699+
RemovePropertiesUpdate(removals=["test_c", "test_b"]),
650700
)
651701

652702
new_metadata = update_table_metadata(base_metadata, test_updates)
@@ -681,6 +731,9 @@ def test_update_metadata_with_multiple_updates(table_v1: Table) -> None:
681731
max_ref_age_ms=123123123,
682732
)
683733

734+
# Set/RemovePropertiesUpdate
735+
assert new_metadata.properties == {"owner": "test", "test_a": "test_a1"}
736+
684737

685738
def test_metadata_isolation_from_illegal_updates(table_v1: Table) -> None:
686739
base_metadata = table_v1.metadata

0 commit comments

Comments
 (0)