From 104249631102aa4c2056f2f7703234c2db44c765 Mon Sep 17 00:00:00 2001
From: Adam Ling <adam.ling@snowflake.com>
Date: Fri, 29 May 2026 22:38:39 +0000
Subject: [PATCH] fix df alias

---
 CHANGELOG.md                                  |  4 +++
 .../_internal/analyzer/select_statement.py    |  2 +-
 tests/integ/test_dataframe.py                 | 30 +++++++++++++++++++
 tests/unit/test_deepcopy.py                   | 30 +++++++++++++++++++
 4 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6dd3c2fe06..5a9469eede 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,10 @@
 
 - Added `get_wif_token` to `snowflake.snowpark.secrets` for workload identity federation tokens on the Snowflake server (not available in SPCS file-based secret environments).
 
+#### Bug Fixes
+
+- Fixed a bug where calling `DataFrame.alias()` twice on the same DataFrame (e.g. for a self-join) caused both aliases to share the same internal column-mapping dictionary. This made `col("R", "col")` resolve to the same column as `col("L", "col")`, producing incorrect join conditions and filter expressions.
+
 #### Documentation
 
 - Clarified that the JDBC driver JAR referenced via `udtf_configs.imports` in `DataFrameReader.jdbc()` must be downloaded from the database vendor and uploaded to a Snowflake stage.
diff --git a/src/snowflake/snowpark/_internal/analyzer/select_statement.py b/src/snowflake/snowpark/_internal/analyzer/select_statement.py
index f4ea001917..6e43c88ac5 100644
--- a/src/snowflake/snowpark/_internal/analyzer/select_statement.py
+++ b/src/snowflake/snowpark/_internal/analyzer/select_statement.py
@@ -963,7 +963,7 @@ def __copy__(self):
         new._snowflake_plan = None
         new.flatten_disabled = False  # by default a SelectStatement can be flattened.
         new._api_calls = self._api_calls.copy() if self._api_calls is not None else None
-        new.df_aliased_col_name_to_real_col_name = (
+        new.df_aliased_col_name_to_real_col_name = deepcopy(
             self.df_aliased_col_name_to_real_col_name
         )
         new._merge_projection_complexity_with_subquery = (
diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py
index cf3eed77ae..de27f81c4a 100644
--- a/tests/integ/test_dataframe.py
+++ b/tests/integ/test_dataframe.py
@@ -6838,6 +6838,36 @@ def test_dataframe_alias(session):
         .select(df1["*"], df3["*"], df2["col1"]),
     )
 
+    # Regression: aliasing the same DataFrame twice must produce independent
+    # df_aliased_col_name_to_real_col_name dicts so that col("R","col") resolves
+    # to the right-side column, not the left-side column.
+    # Before the fix, SelectStatement.__copy__ assigned the dict by reference,
+    # causing alias("L") and alias("R") on the same df to share the same dict.
+    df_self = session.create_dataframe(
+        [[1, 10], [2, 20], [3, 30]], schema=["id", "val"]
+    )
+
+    # Self-join ON condition using col() alias references: each row should match
+    # only itself (equi-join on unique key).  With the shared-dict bug the ON
+    # condition degenerates to "id" = "id" (always true), producing a cross-join.
+    Utils.check_answer(
+        df_self.alias("L")
+        .join(df_self.alias("R"), col("L", "id") == col("R", "id"))
+        .select(col("L", "id"), col("L", "val"), col("R", "val")),
+        [(1, 10, 10), (2, 20, 20), (3, 30, 30)],
+    )
+
+    # Post-join filter using col() alias references: col("R","val") must resolve
+    # to the right-side column.  With the shared-dict bug it resolved to the
+    # left-side column, making the filter semantically wrong.
+    Utils.check_answer(
+        df_self.alias("L")
+        .join(df_self.alias("R"), col("L", "id") == col("R", "id"))
+        .filter(col("R", "val") == 20)
+        .select(col("L", "id")),
+        [(2,)],
+    )
+
 
 @pytest.mark.skipif(
     "config.getoption('local_testing_mode', default=False)",
diff --git a/tests/unit/test_deepcopy.py b/tests/unit/test_deepcopy.py
index 713ed1819c..acc74ac327 100644
--- a/tests/unit/test_deepcopy.py
+++ b/tests/unit/test_deepcopy.py
@@ -301,6 +301,36 @@ def _create_select_statement(mock_session, mock_analyzer):
     return SelectStatement(from_=from_, analyzer=mock_analyzer)
 
 
+def test_select_statement_copy_aliases_isolated(mock_session, mock_analyzer):
+    """copy.copy(SelectStatement) must produce an independent df_aliased_col_name_to_real_col_name.
+
+    Before the fix, __copy__ assigned the dict by reference.  Calling alias("L") then
+    alias("R") on the same DataFrame both wrote to the *same* dict, causing col("R","col")
+    to resolve to the left-side column after a self-join.
+    """
+    from_ = SelectableEntity(
+        SnowflakeTable("TEST_TABLE", session=mock_session), analyzer=mock_analyzer
+    )
+    original = SelectStatement(from_=from_, analyzer=mock_analyzer)
+    original.df_aliased_col_name_to_real_col_name["A"] = {"col": "col"}
+
+    copied = copy.copy(original)
+
+    # The copy must be a distinct object.
+    assert (
+        copied.df_aliased_col_name_to_real_col_name
+        is not original.df_aliased_col_name_to_real_col_name
+    )
+
+    # Mutations to the copy must not affect the original.
+    copied.df_aliased_col_name_to_real_col_name["B"] = {"col": "col"}
+    assert "B" not in original.df_aliased_col_name_to_real_col_name
+
+    # Mutations to the original must not affect the copy.
+    original.df_aliased_col_name_to_real_col_name["C"] = {"col": "col"}
+    assert "C" not in copied.df_aliased_col_name_to_real_col_name
+
+
 @pytest.mark.parametrize(
     "selectable_factory,copy_func,reduce_describe_enabled,cte_enabled",
     [