Skip to content

Commit a88b976

Browse files
Readability pass on replace tests
Drop docstrings that restated the function name or assertions, drop narrative inline comments where variable names already carry the meaning, and tighten multi-paragraph docstrings to one line. Also shorten the integration test function names to match how the rest of the file is structured.
1 parent feff7dd commit a88b976

3 files changed

Lines changed: 8 additions & 70 deletions

File tree

tests/catalog/test_catalog_behaviors.py

Lines changed: 3 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,6 @@ def _simple_data(num_rows: int = 2) -> pa.Table:
417417

418418

419419
def test_replace_table_preserves_uuid_and_clears_current_snapshot(catalog: Catalog, test_table_identifier: Identifier) -> None:
420-
"""Replace preserves table_uuid and snapshot history, but clears the main ref."""
421420
_create_simple_table(catalog, test_table_identifier)
422421
original = catalog.load_table(test_table_identifier)
423422
original.append(_simple_data())
@@ -441,9 +440,7 @@ def test_replace_table_preserves_uuid_and_clears_current_snapshot(catalog: Catal
441440
@pytest.mark.parametrize(
442441
"extra_fields, expected_schema_ids, expected_current, expected_fields, expected_last_col_id",
443442
[
444-
# Identical to the existing schema → reuse the same schema_id, no new schema appended.
445443
([], [0], 0, {"id": 1, "data": 2}, 2),
446-
# Extra column → append a new schema; existing IDs preserved by name, new one above last_column_id.
447444
(
448445
[NestedField(field_id=99, name="extra", field_type=BooleanType(), required=False)],
449446
[0, 1],
@@ -463,7 +460,6 @@ def test_replace_table_schema_id_reuse(
463460
expected_fields: dict[str, int],
464461
expected_last_col_id: int,
465462
) -> None:
466-
"""A structurally identical schema reuses its schema_id; an extended schema adds a new one."""
467463
_, base_schema = _create_simple_table(catalog, test_table_identifier)
468464
new_schema = Schema(*base_schema.fields, *extra_fields)
469465
replaced = catalog.replace_table(test_table_identifier, schema=new_schema)
@@ -475,7 +471,6 @@ def test_replace_table_schema_id_reuse(
475471

476472

477473
def test_replace_table_preserves_identifier_field_ids(catalog: Catalog, test_table_identifier: Identifier) -> None:
478-
"""Identifier-field IDs on the new schema are honored after reuse-by-name."""
479474
schema = Schema(
480475
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
481476
NestedField(field_id=2, name="data", field_type=StringType(), required=False),
@@ -493,8 +488,6 @@ def test_replace_table_preserves_identifier_field_ids(catalog: Catalog, test_tab
493488

494489

495490
def test_replace_table_drops_identifier_field(catalog: Catalog, test_table_identifier: Identifier) -> None:
496-
"""Replacing with a schema that has no `identifier_field_ids` clears them on the table —
497-
the previous identifier set is not silently carried forward."""
498491
schema_with_id = Schema(
499492
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
500493
NestedField(field_id=2, name="data", field_type=StringType(), required=False),
@@ -510,7 +503,6 @@ def test_replace_table_drops_identifier_field(catalog: Catalog, test_table_ident
510503

511504

512505
def test_replace_table_reuses_partition_spec_id(catalog: Catalog, test_table_identifier: Identifier) -> None:
513-
"""An identical partition spec reuses its spec_id rather than appending a new one."""
514506
spec = PartitionSpec(PartitionField(source_id=1, field_id=1000, name="id_part", transform=IdentityTransform()))
515507
_, schema = _create_simple_table(catalog, test_table_identifier, partition_spec=spec)
516508
replaced = catalog.replace_table(test_table_identifier, schema=schema, partition_spec=spec)
@@ -519,30 +511,23 @@ def test_replace_table_reuses_partition_spec_id(catalog: Catalog, test_table_ide
519511

520512

521513
def test_replace_table_with_sort_order_changes(catalog: Catalog, test_table_identifier: Identifier) -> None:
522-
"""Replace can change the sort order. The new sort order is appended to the history and
523-
becomes the default; a follow-up replace back to a sort already in history reuses its
524-
order_id rather than appending a duplicate."""
525514
_, schema = _create_simple_table(catalog, test_table_identifier)
526515
sort = SortOrder(SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC))
527516

528-
# unsorted → sorted: a new order is added and becomes the default.
529517
sorted_table = catalog.replace_table(test_table_identifier, schema=schema, sort_order=sort)
530518
assert sorted_table.sort_order().fields == sort.fields
531519
sorted_order_id = sorted_table.metadata.default_sort_order_id
532520
assert sorted_order_id != 0
533521

534-
# sorted → unsorted: reuses the unsorted order_id 0 from history.
535522
unsorted_table = catalog.replace_table(test_table_identifier, schema=schema)
536523
assert unsorted_table.sort_order().is_unsorted
537524
assert unsorted_table.metadata.default_sort_order_id == 0
538525

539-
# unsorted → original sorted form: reuses the existing sorted order_id from history.
540526
replayed = catalog.replace_table(test_table_identifier, schema=schema, sort_order=sort)
541527
assert replayed.metadata.default_sort_order_id == sorted_order_id
542528

543529

544530
def test_replace_table_inherits_existing_location(catalog: Catalog, test_table_identifier: Identifier) -> None:
545-
"""`location=None` keeps the existing table's location."""
546531
_, schema = _create_simple_table(catalog, test_table_identifier)
547532
existing = catalog.load_table(test_table_identifier).metadata.location
548533
replaced = catalog.replace_table(test_table_identifier, schema=schema)
@@ -553,7 +538,6 @@ def test_replace_table_inherits_existing_location(catalog: Catalog, test_table_i
553538
def test_replace_table_uses_explicit_location(
554539
catalog: Catalog, test_table_identifier: Identifier, tmp_path: Path, trailing_slash: bool
555540
) -> None:
556-
"""An explicit `location` is used verbatim; trailing slash is stripped."""
557541
_, schema = _create_simple_table(catalog, test_table_identifier)
558542
bare = f"file://{tmp_path}/relocated"
559543
arg = bare + "/" if trailing_slash else bare
@@ -564,7 +548,6 @@ def test_replace_table_uses_explicit_location(
564548
def test_replace_table_merges_properties_with_overrides_and_additions(
565549
catalog: Catalog, test_table_identifier: Identifier
566550
) -> None:
567-
"""Properties are merged onto existing: new values override, untouched keys are preserved."""
568551
schema = Schema(NestedField(field_id=1, name="id", field_type=LongType(), required=False))
569552
_create_simple_table(catalog, test_table_identifier, schema=schema, properties={"keep": "yes", "override": "old"})
570553
replaced = catalog.replace_table(test_table_identifier, schema=schema, properties={"override": "new", "new_key": "v"})
@@ -574,7 +557,6 @@ def test_replace_table_merges_properties_with_overrides_and_additions(
574557

575558

576559
def test_replace_table_upgrades_format_version(catalog: Catalog, test_table_identifier: Identifier) -> None:
577-
"""`properties={'format-version': '2'}` on a v1 table emits an UpgradeFormatVersion update."""
578560
_, schema = _create_simple_table(catalog, test_table_identifier, format_version=1)
579561
assert catalog.load_table(test_table_identifier).format_version == 1
580562
replaced = catalog.replace_table(test_table_identifier, schema=schema, properties={"format-version": "2"})
@@ -584,48 +566,40 @@ def test_replace_table_upgrades_format_version(catalog: Catalog, test_table_iden
584566

585567

586568
def test_replace_table_rejects_format_version_downgrade(catalog: Catalog, test_table_identifier: Identifier) -> None:
587-
"""A `format-version` lower than the existing one must be rejected to avoid silently
588-
running the schema-conversion path with the wrong semantics."""
589569
_, schema = _create_simple_table(catalog, test_table_identifier, format_version=2)
590570
with pytest.raises(ValueError, match="Cannot downgrade format-version"):
591571
catalog.replace_table(test_table_identifier, schema=schema, properties={"format-version": "1"})
592572

593573

594574
def test_replace_table_v1_carries_forward_partition_fields_as_void(catalog: Catalog, test_table_identifier: Identifier) -> None:
595-
"""v1 specs are append-only; dropped partition fields must be carried forward as VoidTransform."""
575+
"""v1 specs are append-only; dropped partition fields are carried forward as VoidTransform."""
596576
spec = PartitionSpec(PartitionField(source_id=1, field_id=1000, name="id_part", transform=IdentityTransform()))
597577
_, schema = _create_simple_table(catalog, test_table_identifier, partition_spec=spec, format_version=1)
598578

599-
# Replace with default unpartitioned spec — the old field must be preserved as void.
600579
replaced = catalog.replace_table(test_table_identifier, schema=schema)
601580
new_spec = replaced.spec()
602581
void_field = next(f for f in new_spec.fields if f.field_id == 1000)
603582
assert isinstance(void_field.transform, VoidTransform)
604583
assert void_field.source_id == 1
605-
# Name is preserved when there's no collision in the new spec (the new spec is empty here).
606584
assert void_field.name == "id_part"
607585

608586

609587
def test_replace_table_v2_does_not_carry_forward_void_field(catalog: Catalog, test_table_identifier: Identifier) -> None:
610-
"""v2 specs are not append-only — a replace that drops a partition field does not
611-
carry it forward (unlike v1). The new default spec contains only the new field(s)."""
588+
"""v2 specs are not append-only — a dropped partition field is not carried forward (unlike v1)."""
612589
spec = PartitionSpec(PartitionField(source_id=1, field_id=1000, name="id_part", transform=IdentityTransform()))
613590
_, schema = _create_simple_table(catalog, test_table_identifier, partition_spec=spec, format_version=2)
614591

615-
# Replace with default unpartitioned spec — the old field is gone from the default spec.
616592
replaced = catalog.replace_table(test_table_identifier, schema=schema)
617593
new_spec = replaced.spec()
618594
assert new_spec.is_unpartitioned()
619595
assert all(not isinstance(f.transform, VoidTransform) for f in new_spec.fields)
620596

621597

622598
def test_replace_after_format_version_upgrade(catalog: Catalog, test_table_identifier: Identifier) -> None:
623-
"""A v1 table can be upgraded to v2 via replace and then re-replaced without issue."""
624599
_, schema = _create_simple_table(catalog, test_table_identifier, format_version=1)
625600
upgraded = catalog.replace_table(test_table_identifier, schema=schema, properties={"format-version": "2"})
626601
assert upgraded.format_version == 2
627602

628-
# Second replace on the now-v2 table should not re-trigger an upgrade or fail.
629603
new_schema = Schema(
630604
NestedField(field_id=1, name="id", field_type=LongType(), required=False),
631605
NestedField(field_id=2, name="data", field_type=StringType(), required=False),
@@ -643,7 +617,6 @@ def test_replace_table_raises_when_table_does_not_exist(catalog: Catalog, test_t
643617

644618

645619
def test_replace_table_transaction_can_stage_additional_changes(catalog: Catalog, test_table_identifier: Identifier) -> None:
646-
"""The transaction context lets callers stage extra updates (e.g. properties) before commit."""
647620
_, schema = _create_simple_table(catalog, test_table_identifier)
648621
with catalog.replace_table_transaction(test_table_identifier, schema=schema) as txn:
649622
txn.set_properties({"staged": "yes"})
@@ -652,12 +625,6 @@ def test_replace_table_transaction_can_stage_additional_changes(catalog: Catalog
652625

653626

654627
def test_replace_table_transaction_with_write_atomic_rtas(catalog: Catalog, test_table_identifier: Identifier) -> None:
655-
"""RTAS (Replace Table As Select): replace + write new data in one transaction.
656-
657-
The new schema and new data must land atomically. After commit:
658-
- the new snapshot is current (main ref restored by the fast_append),
659-
- the new snapshot's parent is None (no lineage to the pre-replace data), and
660-
- the pre-replace snapshot is preserved in history for time-travel."""
661628
_create_simple_table(catalog, test_table_identifier)
662629
catalog.load_table(test_table_identifier).append(_simple_data(num_rows=1))
663630
old_snapshot_id = catalog.load_table(test_table_identifier).current_snapshot().snapshot_id # type: ignore[union-attr]
@@ -680,9 +647,6 @@ def test_replace_table_transaction_with_write_atomic_rtas(catalog: Catalog, test
680647

681648

682649
def test_replace_table_transaction_rolls_back_on_failure(catalog: Catalog, test_table_identifier: Identifier) -> None:
683-
"""If the body of the transaction raises, no metadata change is committed.
684-
685-
The table's UUID, schemas, current snapshot, and current schema id must all be unchanged."""
686650
_create_simple_table(catalog, test_table_identifier)
687651
catalog.load_table(test_table_identifier).append(_simple_data())
688652
before = catalog.load_table(test_table_identifier).metadata
@@ -708,17 +672,13 @@ def run_failing_replace() -> None:
708672

709673

710674
def test_concurrent_replace_table(catalog: Catalog, test_table_identifier: Identifier) -> None:
711-
"""Two concurrent replace_table calls staged from the same base both adding a column
712-
must fail on the second commit with an `assert-last-assigned-field-id` violation —
713-
proving the `AssertLastAssignedFieldId` requirement actually guards against duplicate
714-
field-id assignment under concurrent writers."""
675+
"""`AssertLastAssignedFieldId` rejects the second of two replaces staged from the same base."""
715676
_create_simple_table(catalog, test_table_identifier)
716677
new_schema = Schema(
717678
NestedField(field_id=1, name="id", field_type=LongType(), required=False),
718679
NestedField(field_id=2, name="data", field_type=StringType(), required=False),
719680
NestedField(field_id=3, name="extra", field_type=BooleanType(), required=False),
720681
)
721-
# Both transactions build from the same base metadata with the same new schema.
722682
txn_a = catalog.replace_table_transaction(test_table_identifier, schema=new_schema)
723683
txn_b = catalog.replace_table_transaction(test_table_identifier, schema=new_schema)
724684

@@ -728,8 +688,6 @@ def test_concurrent_replace_table(catalog: Catalog, test_table_identifier: Ident
728688

729689

730690
def test_replace_table_allows_subsequent_append(catalog: Catalog, test_table_identifier: Identifier) -> None:
731-
"""After `replace_table` clears the current snapshot, a separate `append` produces a new
732-
snapshot containing only the post-replace data — the pre-replace rows are not visible."""
733691
_, schema = _create_simple_table(catalog, test_table_identifier)
734692
catalog.load_table(test_table_identifier).append(_simple_data(num_rows=3))
735693

tests/catalog/test_rest.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2927,8 +2927,6 @@ def test_replace_table_transaction_wire_payload(
29272927
example_table_metadata_with_snapshot_v1_rest_json: dict[str, Any],
29282928
example_table_metadata_no_snapshot_v1_rest_json: dict[str, Any],
29292929
) -> None:
2930-
"""The replace request must carry exactly one `assert-table-uuid` requirement and the
2931-
Set* + Add* updates needed to transform the existing table — order is not asserted."""
29322930
table_uuid = example_table_metadata_with_snapshot_v1_rest_json["metadata"]["table-uuid"]
29332931
example_table_metadata_no_snapshot_v1_rest_json["metadata"]["table-uuid"] = table_uuid
29342932
_mock_replace_endpoints(
@@ -2948,31 +2946,22 @@ def test_replace_table_transaction_wire_payload(
29482946
catalog.replace_table_transaction(identifier=("fokko", "fokko2"), schema=new_schema).commit_transaction()
29492947
request = rest_mock.last_request.json()
29502948

2951-
# Pin the requirement values to the fixture's last-column-id / last-partition-id so the
2952-
# assertion fails loudly if the fixture changes underneath us.
29532949
fixture_metadata = example_table_metadata_with_snapshot_v1_rest_json["metadata"]
29542950
assert request["requirements"] == [
29552951
{"type": "assert-table-uuid", "uuid": table_uuid},
29562952
{"type": "assert-last-assigned-field-id", "last-assigned-field-id": fixture_metadata["last-column-id"]},
29572953
{"type": "assert-last-assigned-partition-id", "last-assigned-partition-id": fixture_metadata["last-partition-id"]},
29582954
]
29592955

2960-
# No duplicate actions in the payload (the dict-by-action collapses them — guard against it).
29612956
actions = [u["action"] for u in request["updates"]]
29622957
assert len(actions) == len(set(actions)), f"duplicate actions in request: {actions}"
29632958
updates_by_action = {u["action"]: u for u in request["updates"]}
29642959

2965-
# main snapshot must be cleared on replace.
29662960
assert updates_by_action["remove-snapshot-ref"] == {"action": "remove-snapshot-ref", "ref-name": "main"}
2967-
# A new schema is added because the column set differs; field IDs for existing columns
2968-
# are reused by name. The highest id (3) flows from the schema's fields, which the
2969-
# server uses to bump the table's last-column-id on the server side.
29702961
added_schema = updates_by_action["add-schema"]["schema"]
29712962
assert {f["name"]: f["id"] for f in added_schema["fields"]} == {"id": 1, "data": 2, "new_col": 3}
2972-
# Set* updates are emitted unconditionally even when their resulting id is reused.
29732963
# schema-id=-1 is the wire sentinel meaning "the schema we just added in this commit".
29742964
assert updates_by_action["set-current-schema"]["schema-id"] == -1
2975-
# Spec/sort-order are reused from the fixture (default ids) since the replace doesn't change them.
29762965
assert updates_by_action["set-default-spec"]["spec-id"] == fixture_metadata["default-spec-id"]
29772966
assert updates_by_action["set-default-sort-order"]["sort-order-id"] == fixture_metadata["default-sort-order-id"]
29782967

@@ -2999,9 +2988,7 @@ def test_replace_table_issues_commit_post_immediately(
29992988
example_table_metadata_with_snapshot_v1_rest_json: dict[str, Any],
30002989
example_table_metadata_no_snapshot_v1_rest_json: dict[str, Any],
30012990
) -> None:
3002-
"""`replace_table` issues a commit POST during the call — distinguishing it from
3003-
`replace_table_transaction(...)` alone, which only issues the load GET (no POST until
3004-
the caller commits the transaction)."""
2991+
"""`replace_table` commits during the call; `replace_table_transaction` defers the POST until the caller commits."""
30052992
table_uuid = example_table_metadata_with_snapshot_v1_rest_json["metadata"]["table-uuid"]
30062993
example_table_metadata_no_snapshot_v1_rest_json["metadata"]["table-uuid"] = table_uuid
30072994
_mock_replace_endpoints(
@@ -3017,8 +3004,6 @@ def test_replace_table_issues_commit_post_immediately(
30173004
NestedField(field_id=2, name="data", field_type=StringType(), required=False),
30183005
)
30193006

3020-
# Baseline: opening the transaction alone makes only the load GET call; no POST is
3021-
# issued until the caller commits.
30223007
catalog.replace_table_transaction(identifier=("fokko", "fokko2"), schema=new_schema)
30233008
methods_after_open = [r.method for r in rest_mock.request_history]
30243009
assert "POST" not in methods_after_open

tests/integration/test_rest_catalog.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,8 @@ def test_create_namespace_if_already_existing(catalog: RestCatalog) -> None:
7474

7575
@pytest.mark.integration
7676
@pytest.mark.parametrize("catalog", [lf("session_catalog")])
77-
def test_replace_table_end_to_end_against_rest_server(catalog: Catalog) -> None:
78-
"""End-to-end smoke test: replace_table against a real REST server.
79-
80-
Detailed replace_table semantics are covered against InMemoryCatalog and SqlCatalog in
81-
`tests/catalog/test_catalog_behaviors.py`. This test verifies the REST wire path: server
82-
accepts the commit, preserves the UUID, and clears the current snapshot."""
77+
def test_replace_table(catalog: Catalog) -> None:
78+
"""End-to-end smoke test: replace_table against a real REST server."""
8379
identifier = f"default.test_replace_table_e2e_{catalog.name}"
8480
if not catalog.namespace_exists("default"):
8581
catalog.create_namespace("default")
@@ -115,9 +111,8 @@ def test_replace_table_end_to_end_against_rest_server(catalog: Catalog) -> None:
115111

116112
@pytest.mark.integration
117113
@pytest.mark.parametrize("catalog", [lf("session_catalog")])
118-
def test_replace_table_transaction_rtas_against_rest_server(catalog: Catalog) -> None:
119-
"""RTAS (Replace Table As Select) against a real REST server: the schema swap and the
120-
new-data write must land atomically — the new snapshot is current on commit."""
114+
def test_replace_table_transaction(catalog: Catalog) -> None:
115+
"""RTAS against a real REST server: the schema swap and the new-data write land atomically."""
121116
identifier = f"default.test_replace_rtas_{catalog.name}"
122117
if not catalog.namespace_exists("default"):
123118
catalog.create_namespace("default")

0 commit comments

Comments
 (0)