Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@

### Snowpark Python API Updates

#### Bug Fixes

#### New Features

- Added `get_wif_token` to `snowflake.snowpark.secrets` for workload identity federation tokens on the Snowflake server (not available in SPCS file-based secret environments).

#### Bug Fixes

- Fixed a bug where calling `DataFrame.alias()` twice on the same DataFrame (e.g. for a self-join) caused both aliases to share the same internal column-mapping dictionary. This made `col("R", "col")` resolve to the same column as `col("L", "col")`, producing incorrect join conditions and filter expressions.

## 1.51.1 (2026-05-28)

#### Documentation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -963,7 +963,7 @@ def __copy__(self):
new._snowflake_plan = None
new.flatten_disabled = False # by default a SelectStatement can be flattened.
new._api_calls = self._api_calls.copy() if self._api_calls is not None else None
new.df_aliased_col_name_to_real_col_name = (
new.df_aliased_col_name_to_real_col_name = deepcopy(
self.df_aliased_col_name_to_real_col_name
)
new._merge_projection_complexity_with_subquery = (
Expand Down
30 changes: 30 additions & 0 deletions tests/integ/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6838,6 +6838,36 @@ def test_dataframe_alias(session):
.select(df1["*"], df3["*"], df2["col1"]),
)

# Regression: aliasing the same DataFrame twice must produce independent
# df_aliased_col_name_to_real_col_name dicts so that col("R","col") resolves
# to the right-side column, not the left-side column.
# Before the fix, SelectStatement.__copy__ assigned the dict by reference,
# causing alias("L") and alias("R") on the same df to share the same dict.
df_self = session.create_dataframe(
[[1, 10], [2, 20], [3, 30]], schema=["id", "val"]
)

# Self-join ON condition using col() alias references: each row should match
# only itself (equi-join on unique key). With the shared-dict bug the ON
# condition degenerates to "id" = "id" (always true), producing a cross-join.
Utils.check_answer(
df_self.alias("L")
.join(df_self.alias("R"), col("L", "id") == col("R", "id"))
.select(col("L", "id"), col("L", "val"), col("R", "val")),
[(1, 10, 10), (2, 20, 20), (3, 30, 30)],
)

# Post-join filter using col() alias references: col("R","val") must resolve
# to the right-side column. With the shared-dict bug it resolved to the
# left-side column, making the filter semantically wrong.
Utils.check_answer(
df_self.alias("L")
.join(df_self.alias("R"), col("L", "id") == col("R", "id"))
.filter(col("R", "val") == 20)
.select(col("L", "id")),
[(2,)],
)


@pytest.mark.skipif(
"config.getoption('local_testing_mode', default=False)",
Expand Down
30 changes: 30 additions & 0 deletions tests/unit/test_deepcopy.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,36 @@ def _create_select_statement(mock_session, mock_analyzer):
return SelectStatement(from_=from_, analyzer=mock_analyzer)


def test_select_statement_copy_aliases_isolated(mock_session, mock_analyzer):
"""copy.copy(SelectStatement) must produce an independent df_aliased_col_name_to_real_col_name.

Before the fix, __copy__ assigned the dict by reference. Calling alias("L") then
alias("R") on the same DataFrame both wrote to the *same* dict, causing col("R","col")
to resolve to the left-side column after a self-join.
"""
from_ = SelectableEntity(
SnowflakeTable("TEST_TABLE", session=mock_session), analyzer=mock_analyzer
)
original = SelectStatement(from_=from_, analyzer=mock_analyzer)
original.df_aliased_col_name_to_real_col_name["A"] = {"col": "col"}

copied = copy.copy(original)

# The copy must be a distinct object.
assert (
copied.df_aliased_col_name_to_real_col_name
is not original.df_aliased_col_name_to_real_col_name
)

# Mutations to the copy must not affect the original.
copied.df_aliased_col_name_to_real_col_name["B"] = {"col": "col"}
assert "B" not in original.df_aliased_col_name_to_real_col_name

# Mutations to the original must not affect the copy.
original.df_aliased_col_name_to_real_col_name["C"] = {"col": "col"}
assert "C" not in copied.df_aliased_col_name_to_real_col_name


@pytest.mark.parametrize(
"selectable_factory,copy_func,reduce_describe_enabled,cte_enabled",
[
Expand Down
Loading