Skip to content

Commit b11ae97

Browse files
author
Paul Mathew
committed
test(upsert): drop redundant TestUpsertScanProjection test
Removes ``test_when_matched_true_keeps_star_projection`` per review feedback — the spy-on-init assertion that ``selected_fields=("*",)`` is preserved on the ``=True`` path is redundant with ``test_update_mode_actually_updates_non_key_columns``, which proves the wide projection works end-to-end by detecting non-key value drift through ``get_rows_to_update``. A regression that narrowed the ``=True`` projection unconditionally would fail the end-to-end test because ``rows_updated`` would drop to 0 and the read-back would see the stale value. Leaves two focused tests in ``TestUpsertScanProjection``: - ``test_when_matched_false_projects_join_cols_only`` pins the positive case (narrowing fires on the insert-on-no-match branch). - ``test_update_mode_actually_updates_non_key_columns`` pins the negative case (the update branch keeps the wide projection it needs). Co-authored-by: Cursor <[email protected]>
1 parent b81990d commit b11ae97

1 file changed

Lines changed: 8 additions & 34 deletions

File tree

tests/table/test_upsert.py

Lines changed: 8 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,41 +1012,15 @@ def test_when_matched_false_projects_join_cols_only(self, catalog: Catalog, capt
10121012
f"expected every DataScan during upsert to use selected_fields=('order_id',); got {selected}"
10131013
)
10141014

1015-
def test_when_matched_true_keeps_star_projection(self, catalog: Catalog, captured_scans: list[dict[str, Any]]) -> None:
1016-
"""The update branch's ``get_rows_to_update`` compares non-key
1017-
columns to detect actual value changes — projecting only
1018-
``join_cols`` would feed it data with no non-key columns to
1019-
compare and silently turn every match into a write-back. Must
1020-
keep ``("*",)``."""
1021-
table = self._build_partitioned_table(catalog, "default.test_upsert_projection_update_mode")
1022-
self._seed(table)
1023-
upsert_df = pa.Table.from_pylist(
1024-
[
1025-
{"order_id": 1, "order_date": datetime.date(2026, 1, 1), "order_type": "B"},
1026-
{"order_id": 3, "order_date": datetime.date(2026, 1, 3), "order_type": "B"},
1027-
],
1028-
schema=self._arrow_schema(),
1029-
)
1030-
1031-
before = len(captured_scans)
1032-
res = table.upsert(df=upsert_df, join_cols=["order_id"], when_matched_update_all=True)
1033-
upsert_scans = captured_scans[before:]
1034-
assert res.rows_updated == 1
1035-
assert res.rows_inserted == 1
1036-
1037-
assert upsert_scans, "upsert path constructed no DataScan — projection contract regression"
1038-
selected = [s.get("selected_fields") for s in upsert_scans]
1039-
assert all(sf == ("*",) for sf in selected), (
1040-
f"expected every DataScan during upsert to keep selected_fields=('*',) for the update branch; got {selected}"
1041-
)
1042-
10431015
def test_update_mode_actually_updates_non_key_columns(self, catalog: Catalog) -> None:
1044-
"""End-to-end correctness pin: with ``when_matched_update_all=True``
1045-
the destination scan must read non-key columns so
1046-
``get_rows_to_update`` can detect ``order_type`` changes. A
1047-
regression that narrows projection unconditionally would skip
1048-
the comparison and silently miss updates whose non-key columns
1049-
differ.
1016+
"""End-to-end correctness pin for the ``when_matched_update_all=True``
1017+
branch: the destination scan must read non-key columns so
1018+
``get_rows_to_update`` can detect ``order_type`` changes and
1019+
run the update. A regression that narrows projection
1020+
unconditionally would skip the comparison and silently miss
1021+
updates whose non-key columns differ — this test would fail
1022+
because ``rows_updated`` would drop to 0 and the read-back
1023+
would still see the old ``'A'``.
10501024
"""
10511025
identifier = "default.test_upsert_update_mode_correctness"
10521026
table = self._build_partitioned_table(catalog, identifier)

0 commit comments

Comments
 (0)