Skip to content

Commit 4278b6a

Browse files
Add niche but no-regret tests from audit follow-up
- test_replace_table_drops_identifier_field: pairs with the existing preserve test; verifies that a new schema without identifier_field_ids clears the previous set rather than silently carrying it forward. - test_replace_table_v2_does_not_carry_forward_void_field: v2 specs aren't append-only, so a dropped partition field is gone (unlike v1). - test_replace_after_format_version_upgrade: v1 -> v2 via replace, then a second replace on the now-v2 table must not retrigger the upgrade or fail.
1 parent 0847d20 commit 4278b6a

1 file changed

Lines changed: 47 additions & 0 deletions

File tree

tests/catalog/test_catalog_behaviors.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,23 @@ def test_replace_table_preserves_identifier_field_ids(catalog: Catalog, test_tab
492492
assert list(replaced.schema().identifier_field_ids) == [1]
493493

494494

495+
def test_replace_table_drops_identifier_field(catalog: Catalog, test_table_identifier: Identifier) -> None:
496+
"""Replacing with a schema that has no `identifier_field_ids` clears them on the table —
497+
the previous identifier set is not silently carried forward."""
498+
schema_with_id = Schema(
499+
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
500+
NestedField(field_id=2, name="data", field_type=StringType(), required=False),
501+
identifier_field_ids=[1],
502+
)
503+
_create_simple_table(catalog, test_table_identifier, schema=schema_with_id)
504+
schema_without_id = Schema(
505+
NestedField(field_id=1, name="id", field_type=LongType(), required=False),
506+
NestedField(field_id=2, name="data", field_type=StringType(), required=False),
507+
)
508+
replaced = catalog.replace_table(test_table_identifier, schema=schema_without_id)
509+
assert list(replaced.schema().identifier_field_ids) == []
510+
511+
495512
def test_replace_table_reuses_partition_spec_id(catalog: Catalog, test_table_identifier: Identifier) -> None:
496513
"""An identical partition spec reuses its spec_id rather than appending a new one."""
497514
spec = PartitionSpec(PartitionField(source_id=1, field_id=1000, name="id_part", transform=IdentityTransform()))
@@ -582,6 +599,36 @@ def test_replace_table_v1_carries_forward_partition_fields_as_void(catalog: Cata
582599
assert void_field.name == "id_part"
583600

584601

602+
def test_replace_table_v2_does_not_carry_forward_void_field(catalog: Catalog, test_table_identifier: Identifier) -> None:
603+
"""v2 specs are not append-only — a replace that drops a partition field does not
604+
carry it forward (unlike v1). The new default spec contains only the new field(s)."""
605+
spec = PartitionSpec(PartitionField(source_id=1, field_id=1000, name="id_part", transform=IdentityTransform()))
606+
_, schema = _create_simple_table(catalog, test_table_identifier, partition_spec=spec, format_version=2)
607+
608+
# Replace with default unpartitioned spec — the old field is gone from the default spec.
609+
replaced = catalog.replace_table(test_table_identifier, schema=schema)
610+
new_spec = replaced.spec()
611+
assert new_spec.is_unpartitioned()
612+
assert all(not isinstance(f.transform, VoidTransform) for f in new_spec.fields)
613+
614+
615+
def test_replace_after_format_version_upgrade(catalog: Catalog, test_table_identifier: Identifier) -> None:
616+
"""A v1 table can be upgraded to v2 via replace and then re-replaced without issue."""
617+
_, schema = _create_simple_table(catalog, test_table_identifier, format_version=1)
618+
upgraded = catalog.replace_table(test_table_identifier, schema=schema, properties={"format-version": "2"})
619+
assert upgraded.format_version == 2
620+
621+
# Second replace on the now-v2 table should not re-trigger an upgrade or fail.
622+
new_schema = Schema(
623+
NestedField(field_id=1, name="id", field_type=LongType(), required=False),
624+
NestedField(field_id=2, name="data", field_type=StringType(), required=False),
625+
NestedField(field_id=3, name="extra", field_type=BooleanType(), required=False),
626+
)
627+
replaced = catalog.replace_table(test_table_identifier, schema=new_schema)
628+
assert replaced.format_version == 2
629+
assert {f.name for f in replaced.schema().fields} == {"id", "data", "extra"}
630+
631+
585632
def test_replace_table_raises_when_table_does_not_exist(catalog: Catalog, test_table_identifier: Identifier) -> None:
586633
schema = Schema(NestedField(field_id=1, name="id", field_type=LongType(), required=False))
587634
with pytest.raises(NoSuchTableError):

0 commit comments

Comments
 (0)