apache
diff --git a/‎mkdocs/docs/api.md‎
Lines changed: 4 additions & 2 deletions b/‎mkdocs/docs/api.md‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎pyiceberg/catalog/__init__.py‎
Lines changed: 19 additions & 7 deletions b/‎pyiceberg/catalog/__init__.py‎
Lines changed: 19 additions & 7 deletions
diff --git a/‎pyiceberg/catalog/rest/__init__.py‎
Lines changed: 0 additions & 20 deletions b/‎pyiceberg/catalog/rest/__init__.py‎
Lines changed: 0 additions & 20 deletions
diff --git a/‎pyiceberg/partitioning.py‎
Lines changed: 28 additions & 8 deletions b/‎pyiceberg/partitioning.py‎
Lines changed: 28 additions & 8 deletions
diff --git a/‎pyiceberg/schema.py‎
Lines changed: 4 additions & 0 deletions b/‎pyiceberg/schema.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎pyiceberg/table/__init__.py‎
Lines changed: 32 additions & 18 deletions b/‎pyiceberg/table/__init__.py‎
Lines changed: 32 additions & 18 deletions
@@ -187,7 +187,7 @@ with catalog.create_table_transaction(identifier="docs_example.bids", schema=sch
 
 ## Replace a table
 
-Atomically replace an existing table's schema, partition spec, sort order, location, and properties. The table UUID and history (snapshots, schemas, specs, sort orders, metadata log) are preserved; the current snapshot is cleared (the `main` branch ref is removed). This is the analog of Spark/Trino's `CREATE OR REPLACE TABLE` for the table-metadata side, and supports RTAS-style workflows when combined with subsequent writes.
+Atomically replace an existing table's schema, partition spec, sort order, location, and properties. The table UUID and history (snapshots, schemas, specs, sort orders, metadata log) are preserved; the current snapshot is cleared (the `main` branch ref is removed). Use this when you want to redefine the table's metadata; pair it with `replace_table_transaction` to atomically write new data alongside the metadata change (RTAS-style).
 
 ```python
 from pyiceberg.schema import Schema
@@ -206,7 +206,9 @@ catalog.replace_table(
 
 Field IDs from columns whose names appear in the previous schema are reused, so existing data files remain readable when the new schema is a compatible superset. New columns get fresh IDs above `last-column-id`.
 
-Use `replace_table_transaction` to stage additional changes (writes, property updates, schema evolution) before committing — the equivalent of `CREATE OR REPLACE TABLE AS SELECT`:
+Properties passed to `replace_table` are **merged** with the existing table properties (your values override; existing keys you don't pass are preserved). To remove a property as part of the replace, use `replace_table_transaction` and remove it explicitly within the transaction.
+
+Use `replace_table_transaction` to stage additional changes (writes, property updates, schema evolution) before committing — for example, swap the schema and write new data atomically:
 
 ```python
 with catalog.replace_table_transaction(identifier="docs_example.bids", schema=new_schema) as txn:
 
@@ -331,8 +331,8 @@ def delete_data_files(io: FileIO, manifests_to_delete: list[ManifestFile]) -> No
 def _raise_if_view_exists(catalog: Catalog, identifier: str | Identifier) -> None:
     """Raise TableAlreadyExistsError if a view exists at the same identifier.
 
-    Mirrors Java's `RESTSessionCatalog.replaceTransaction()` precondition. Catalogs that
-    don't support views raise `NotImplementedError` from `view_exists` — treat as "no view".
+    Catalogs that don't support views raise `NotImplementedError` from `view_exists` —
+    treat as "no view" in that case.
     """
     try:
         view_collision = catalog.view_exists(identifier)
@@ -483,7 +483,10 @@ def replace_table(
             location (str | None): New table location. Defaults to the existing location.
             partition_spec (PartitionSpec): New partition spec.
             sort_order (SortOrder): New sort order.
-            properties (Properties): New table properties (merged with existing).
+            properties (Properties): Properties to apply. Merged on top of the existing
+                table properties: keys present here override existing values; existing keys
+                not present here are preserved. To remove a property, follow up with a
+                transaction that removes it explicitly.
 
         Returns:
             Table: the replaced table instance.
@@ -516,7 +519,10 @@ def replace_table_transaction(
             location (str | None): New table location. Defaults to the existing location.
             partition_spec (PartitionSpec): New partition spec.
             sort_order (SortOrder): New sort order.
-            properties (Properties): New table properties (merged with existing).
+            properties (Properties): Properties to apply. Merged on top of the existing
+                table properties: keys present here override existing values; existing keys
+                not present here are preserved. To remove a property, follow up with a
+                transaction that removes it explicitly.
 
         Returns:
             ReplaceTableTransaction: A transaction for the replace operation.
@@ -538,8 +544,7 @@ def _replace_staged_table(
     ) -> tuple[StagedTable, Schema, PartitionSpec, SortOrder, str]:
         """Load the existing table and build fresh schema/spec/sort-order for replacement.
 
-        Mirrors the bookkeeping in `TableMetadata.buildReplacement` (iceberg-java):
-        - reuses existing field IDs by name (current schema)
+        - reuses existing field IDs by name (from the current schema)
         - reuses partition field IDs by `(source, transform)` across all specs (v2+),
           or carries forward the current spec with `VoidTransform`s (v1)
         - reassigns sort field IDs against the fresh schema
@@ -551,7 +556,14 @@ def _replace_staged_table(
         existing_table = self.load_table(identifier)
         existing_metadata = existing_table.metadata
 
-        resolved_format_version = int(properties.get(TableProperties.FORMAT_VERSION, existing_metadata.format_version))  # type: ignore
+        requested_format_version = properties.get(TableProperties.FORMAT_VERSION)
+        if requested_format_version is not None and int(requested_format_version) < existing_metadata.format_version:
+            raise ValueError(
+                f"Cannot downgrade format-version from {existing_metadata.format_version} to {requested_format_version}"
+            )
+        resolved_format_version = (
+            int(requested_format_version) if requested_format_version is not None else existing_metadata.format_version
+        )
         iceberg_schema = self._convert_schema_if_needed(schema, resolved_format_version)
 
         fresh_schema, _ = assign_fresh_schema_ids_for_replace(
 
@@ -966,26 +966,6 @@ def create_table_transaction(
         staged_table = self._response_to_staged_table(self.identifier_to_tuple(identifier), table_response)
         return CreateTableTransaction(staged_table)
 
-    @override
-    def replace_table(
-        self,
-        identifier: str | Identifier,
-        schema: Schema | pa.Schema,
-        location: str | None = None,
-        partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
-        sort_order: SortOrder = UNSORTED_SORT_ORDER,
-        properties: Properties = EMPTY_DICT,
-    ) -> Table:
-        txn = self.replace_table_transaction(
-            identifier=identifier,
-            schema=schema,
-            location=location,
-            partition_spec=partition_spec,
-            sort_order=sort_order,
-            properties=properties,
-        )
-        return txn.commit_transaction()
-
     @override
     @retry(**_RETRY_ARGS)
     def replace_table_transaction(
 
@@ -346,7 +346,6 @@ def assign_fresh_partition_spec_ids_for_replace(
 ) -> tuple[PartitionSpec, int]:
     """Assign partition field IDs for a replace operation, reusing IDs from existing specs.
 
-    Mirrors `TableMetadata.reassignPartitionIds` in iceberg-java:
     - For v2+, reuse partition field IDs by `(source_id, transform)` across all existing specs.
       New fields get IDs starting from `last_partition_id + 1`.
     - For v1, the current spec's fields must be preserved (v1 specs are append-only). Fields
@@ -374,8 +373,8 @@ def assign_fresh_partition_spec_ids_for_replace(
             spec, old_schema, fresh_schema, current_spec, effective_last_partition_id
         )
 
-    # v2+: reuse field IDs by (source_id, transform) across all specs.
-    # Use max() for dedup when the same (source_id, transform) appears in multiple specs.
+    # v2+: reuse field IDs by (source_id, transform) across all specs. When the same
+    # (source_id, transform) appears in multiple specs, prefer the highest field_id.
     transform_to_field_id: dict[tuple[int, str], int] = {}
     for existing_spec in existing_specs:
         for field in existing_spec.fields:
@@ -412,8 +411,9 @@ def assign_fresh_partition_spec_ids_for_replace(
             )
         )
 
-    new_last_partition_id = max(next_id, effective_last_partition_id)
-    return PartitionSpec(*partition_fields, spec_id=INITIAL_PARTITION_SPEC_ID), new_last_partition_id
+    # `next_id` starts at `effective_last_partition_id` and only increments, so it is the
+    # new last partition id.
+    return PartitionSpec(*partition_fields, spec_id=INITIAL_PARTITION_SPEC_ID), next_id
 
 
 def _assign_fresh_partition_spec_ids_for_replace_v1(
@@ -442,6 +442,7 @@ def _assign_fresh_partition_spec_ids_for_replace_v1(
 
     # Walk current spec, carrying forward each field. Matching new fields consume their key;
     # missing fields become void transforms.
+    used_names: set[str] = set(new_field_names)
     partition_fields = []
     for cur_field in current_spec.fields:
         key = (cur_field.source_id, str(cur_field.transform))
@@ -456,8 +457,10 @@ def _assign_fresh_partition_spec_ids_for_replace_v1(
                     transform=new_field.transform,
                 )
             )
+            used_names.add(new_field.name)
         else:
-            void_name = f"{cur_field.name}_{cur_field.field_id}" if cur_field.name in new_field_names else cur_field.name
+            void_name = _unique_void_name(cur_field.name, cur_field.field_id, used_names)
+            used_names.add(void_name)
             partition_fields.append(
                 PartitionField(
                     name=void_name,
@@ -480,8 +483,25 @@ def _assign_fresh_partition_spec_ids_for_replace_v1(
             )
         )
 
-    new_last_partition_id = max(next_id, effective_last_partition_id)
-    return PartitionSpec(*partition_fields, spec_id=INITIAL_PARTITION_SPEC_ID), new_last_partition_id
+    # `next_id` starts at `effective_last_partition_id` and only increments, so it is the
+    # new last partition id.
+    return PartitionSpec(*partition_fields, spec_id=INITIAL_PARTITION_SPEC_ID), next_id
+
+
+def _unique_void_name(base_name: str, field_id: int, used_names: set[str]) -> str:
+    """Pick a void-transform name that does not collide with already-used names.
+
+    First tries `base_name`; if taken, tries `base_name_{field_id}`; if still taken,
+    appends `_2`, `_3`, ... until unique.
+    """
+    if base_name not in used_names:
+        return base_name
+    candidate = f"{base_name}_{field_id}"
+    suffix = 2
+    while candidate in used_names:
+        candidate = f"{base_name}_{field_id}_{suffix}"
+        suffix += 1
+    return candidate
 
 
 T = TypeVar("T")
 
@@ -1386,6 +1386,10 @@ class _SetFreshIDsForReplace(_SetFreshIDs):
     For each field in the new schema, if a field with the same full name exists in the
     base schema, its ID is reused; otherwise a fresh ID is allocated starting from
     last_column_id + 1.
+
+    Note: ID reuse is purely name-based — a field whose name matches but whose type differs
+    (e.g. `int` → `string`) will reuse the base ID. This is intentional: replace allows
+    arbitrary schema changes; type compatibility is the caller's responsibility.
     """
 
     def __init__(self, old_id_to_base_id: dict[int, int], starting_id: int) -> None:
 
@@ -58,6 +58,8 @@
     AddSchemaUpdate,
     AddSortOrderUpdate,
     AssertCreate,
+    AssertLastAssignedFieldId,
+    AssertLastAssignedPartitionId,
     AssertRefSnapshotId,
     AssertTableUUID,
     AssignUUIDUpdate,
@@ -1018,6 +1020,18 @@ class ReplaceTableTransaction(Transaction):
     schema/spec/sort-order/location/properties are applied.
     """
 
+    def __init__(
+        self,
+        table: StagedTable,
+        new_schema: Schema,
+        new_spec: PartitionSpec,
+        new_sort_order: SortOrder,
+        new_location: str,
+        new_properties: Properties,
+    ) -> None:
+        super().__init__(table, autocommit=False)
+        self._initial_changes(table.metadata, new_schema, new_spec, new_sort_order, new_location, new_properties)
+
     def _initial_changes(
         self,
         table_metadata: TableMetadata,
@@ -1029,11 +1043,11 @@ def _initial_changes(
     ) -> None:
         """Set the initial changes that transform the existing table into the replacement.
 
-        Mirrors Java's `TableMetadata.buildReplacement` + `RESTSessionCatalog.replaceTransaction`:
-        ensures `SetCurrentSchema` / `SetDefaultPartitionSpec` / `SetDefaultSortOrder` are
-        always emitted (even when reused), and bumps `format-version` when requested.
+        Always emits `SetCurrentSchema` / `SetDefaultPartitionSpec` / `SetDefaultSortOrder`
+        (even when the resulting id is reused) so the request body unambiguously signals a
+        replace. Bumps `format-version` when the new properties request it.
         """
-        # Upgrade format-version if requested via properties (matches Java's buildReplacement).
+        # Upgrade format-version if requested via properties.
         requested_format_version_str = new_properties.get(TableProperties.FORMAT_VERSION)
         if requested_format_version_str is not None:
             requested_format_version = int(requested_format_version_str)
@@ -1115,30 +1129,30 @@ def _find_matching_sort_order_id(table_metadata: TableMetadata, sort_order: Sort
                 return existing.order_id
         return None
 
-    def __init__(
-        self,
-        table: StagedTable,
-        new_schema: Schema,
-        new_spec: PartitionSpec,
-        new_sort_order: SortOrder,
-        new_location: str,
-        new_properties: Properties,
-    ) -> None:
-        super().__init__(table, autocommit=False)
-        self._initial_changes(table.metadata, new_schema, new_spec, new_sort_order, new_location, new_properties)
-
     def commit_transaction(self) -> Table:
         """Commit the replace changes to the catalog.
 
-        Uses AssertTableUUID as the only requirement.
+        Requirements:
+        - `AssertTableUUID` — the table identity hasn't changed since load.
+        - `AssertLastAssignedFieldId` — guards against a concurrent commit bumping
+          `last-column-id` between load and commit (which would cause our newly-assigned
+          field IDs to collide).
+        - `AssertLastAssignedPartitionId` — same guard for partition field IDs.
 
         Returns:
             The table with the updates applied.
         """
         if len(self._updates) > 0:
+            base = self._table.metadata
+            requirements: tuple[TableRequirement, ...] = (
+                AssertTableUUID(uuid=base.table_uuid),
+                AssertLastAssignedFieldId(last_assigned_field_id=base.last_column_id),
+            )
+            if base.last_partition_id is not None:
+                requirements += (AssertLastAssignedPartitionId(last_assigned_partition_id=base.last_partition_id),)
             self._table._do_commit(  # pylint: disable=W0212
                 updates=self._updates,
-                requirements=(AssertTableUUID(uuid=self._table.metadata.table_uuid),),
+                requirements=requirements,
             )
 
         self._updates = ()