From bf8dbd04973318431c009efafe54c0ec3371c5fa Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Fri, 29 May 2026 17:39:38 +0000 Subject: [PATCH 1/6] poc fix --- .../snowpark/_internal/compiler/cte_utils.py | 9 +++++- .../compiler/repeated_subquery_elimination.py | 9 ++++++ tests/integ/test_cte.py | 18 +++++++---- tests/unit/test_cte.py | 32 ++++++++++++++++++- 4 files changed, 59 insertions(+), 9 deletions(-) diff --git a/src/snowflake/snowpark/_internal/compiler/cte_utils.py b/src/snowflake/snowpark/_internal/compiler/cte_utils.py index b67200c6d5..2c22607e54 100644 --- a/src/snowflake/snowpark/_internal/compiler/cte_utils.py +++ b/src/snowflake/snowpark/_internal/compiler/cte_utils.py @@ -307,7 +307,14 @@ def stringify(d): if query_params: string = f"{string}#{query_params}" if hasattr(node, "expr_to_alias") and node.expr_to_alias: - string = f"{string}#{stringify(node.expr_to_alias)}" + # Sort by alias values (not UUID keys) so that two nodes representing + # the same computation hash identically even when the expression UUID + # keys differ (e.g. two deep-copied SelectStatement nodes for inv1/inv2 + # in a self-join accumulate different UUID keys via add_aliases but carry + # the same set of alias values). Different alias values (e.g. when + # _disambiguate adds a join suffix such as "_WITH_AD_GROUP") still + # produce different hashes, preserving the SNOW-2261400 fix. + string = f"{string}#{sorted(set(node.expr_to_alias.values()))}" if ( hasattr(node, "df_aliased_col_name_to_real_col_name") and node.df_aliased_col_name_to_real_col_name diff --git a/src/snowflake/snowpark/_internal/compiler/repeated_subquery_elimination.py b/src/snowflake/snowpark/_internal/compiler/repeated_subquery_elimination.py index e8287140a7..fe4edd7220 100644 --- a/src/snowflake/snowpark/_internal/compiler/repeated_subquery_elimination.py +++ b/src/snowflake/snowpark/_internal/compiler/repeated_subquery_elimination.py @@ -168,6 +168,15 @@ def _update_parents( resolved_with_block = resolved_with_block_map[ node.encoded_node_id_with_query ] + # The CTE was built from an earlier duplicate node that may have + # different expression UUID keys (though the same alias values). + # Merge this node's UUID→alias entries into the shared CTE plan so + # that parent nodes referencing this node's UUIDs can still resolve + # column aliases correctly after substitution. + if hasattr(node, "expr_to_alias") and node.expr_to_alias: + for k, v in node.expr_to_alias.items(): + if k not in resolved_with_block.expr_to_alias: + resolved_with_block.expr_to_alias[k] = v else: if ( self._query_generator.session.reduce_describe_query_enabled diff --git a/tests/integ/test_cte.py b/tests/integ/test_cte.py index bfa9e4cddc..c7996bbe9c 100644 --- a/tests/integ/test_cte.py +++ b/tests/integ/test_cte.py @@ -699,9 +699,10 @@ def test_cte_preserves_join_suffix_aliases(session, use_different_df): # the second one is incorrect join condition as we have rsuffix for join alias assert 'ON ("AD_GROUP_ID_WITH_AD_GROUP" = "AD_GROUP_ID")' in union_sql assert 'ON ("AD_GROUP_ID" = "AD_GROUP_ID")' not in union_sql - # when using different df_ad_group with disambiguation, because rsuffix in join, - # they have different alias map (expr_to_alias), so they are considered different and we can't convert them to a CTE - # However there is still a CTE for create_dataframe call + # Both cases produce 1 CTE: the disambiguated rhs_remapped wrapper nodes hash + # identically (same SQL + same alias values, different UUID keys), so they're + # merged into a single CTE via the expr_to_alias merge fix. The raw VALUES + # table is absorbed inline into that CTE body rather than becoming its own CTE. assert count_number_of_ctes(Utils.normalize_sql(union_sql)) == 1 @@ -874,12 +875,15 @@ def test_sql_simplifier(session): join_count=2, ) with SqlCounter(query_count=0, describe_count=0): - # When adding a lsuffix, expr alias map will be updated, so df2 and df3 are considered - # different and have different ids. So only df1 and df will be converted to a CTE + # With value-sort hashing, df1/df2/df3 now hash identically (same SQL + + # same alias values, different UUID keys). df2 and df3 are replaced with a + # shared CTE, but df1's left-join position remains inline. That gives 2 + # CTEs (base VALUES + filtered df1) and the filter appears twice (once in + # the CTE body, once inline for the left-join position). assert ( - count_number_of_ctes(Utils.normalize_sql(df6.queries["queries"][-1])) == 1 + count_number_of_ctes(Utils.normalize_sql(df6.queries["queries"][-1])) == 2 ) - assert Utils.normalize_sql(df6.queries["queries"][-1]).count(filter_clause) == 3 + assert Utils.normalize_sql(df6.queries["queries"][-1]).count(filter_clause) == 2 df7 = df1.with_column("c", lit(1)) df8 = df1.with_column("c", lit(1)).with_column("d", lit(1)) diff --git a/tests/unit/test_cte.py b/tests/unit/test_cte.py index 7f2f13531b..410ee1ec0e 100644 --- a/tests/unit/test_cte.py +++ b/tests/unit/test_cte.py @@ -118,6 +118,9 @@ def test_encode_node_id_with_query_select_sql(mock_session, mock_analyzer): def test_encode_node_id_with_query_includes_aliases(): + # expr_to_alias is hashed by sorted(set(values())) so two nodes with the + # same alias values but different UUID keys (e.g. deep-copied self-join + # branches) produce the same hash. node = SimpleNamespace( sql_query="select col1 from t", query_params=(("p1", 1), ("p2", "x")), @@ -134,13 +137,40 @@ def stringify_dict(d: dict) -> str: if node.query_params: expected_string = f"{expected_string}#{node.query_params}" if node.expr_to_alias: - expected_string = f"{expected_string}#{stringify_dict(node.expr_to_alias)}" + # Values-only sort (no UUID keys) normalizes away UUID differences + expected_string = ( + f"{expected_string}#{sorted(set(node.expr_to_alias.values()))}" + ) if node.df_aliased_col_name_to_real_col_name: expected_string = f"{expected_string}#{stringify_dict(node.df_aliased_col_name_to_real_col_name)}" expected_hash = hashlib.sha256(expected_string.encode()).hexdigest()[:10] assert encode_node_id_with_query(node) == f"{expected_hash}_SimpleNamespace" + # Two nodes with the same SQL and same alias values but different UUID keys + # must hash identically — this is the Q39 self-join case. + node_same_values_diff_keys = SimpleNamespace( + sql_query="select col1 from t", + query_params=(("p1", 1), ("p2", "x")), + expr_to_alias={"uuid_different": "ALIAS1"}, + df_aliased_col_name_to_real_col_name={"ALIAS1": "col1"}, + ) + assert encode_node_id_with_query(node) == encode_node_id_with_query( + node_same_values_diff_keys + ) + + # Two nodes with the same SQL but different alias values must hash + # differently — this preserves the SNOW-2261400 join-suffix fix. + node_different_values = SimpleNamespace( + sql_query="select col1 from t", + query_params=(("p1", 1), ("p2", "x")), + expr_to_alias={"uuid1": "ALIAS1_WITH_SUFFIX"}, + df_aliased_col_name_to_real_col_name={"ALIAS1": "col1"}, + ) + assert encode_node_id_with_query(node) != encode_node_id_with_query( + node_different_values + ) + def test_select_statement_contains_data_generation(mock_session, mock_analyzer): """SelectStatement.contains_data_generation should detect zero-arg From dd89363072c0235292e2f09d401672d9a11a027f Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Fri, 29 May 2026 21:49:46 +0000 Subject: [PATCH 2/6] fix --- .../snowpark/_internal/compiler/cte_utils.py | 12 +- .../compiler/repeated_subquery_elimination.py | 15 +-- tests/integ/test_cte.py | 113 ++++++++++++++++++ 3 files changed, 124 insertions(+), 16 deletions(-) diff --git a/src/snowflake/snowpark/_internal/compiler/cte_utils.py b/src/snowflake/snowpark/_internal/compiler/cte_utils.py index 2c22607e54..fff7f66b55 100644 --- a/src/snowflake/snowpark/_internal/compiler/cte_utils.py +++ b/src/snowflake/snowpark/_internal/compiler/cte_utils.py @@ -307,13 +307,11 @@ def stringify(d): if query_params: string = f"{string}#{query_params}" if hasattr(node, "expr_to_alias") and node.expr_to_alias: - # Sort by alias values (not UUID keys) so that two nodes representing - # the same computation hash identically even when the expression UUID - # keys differ (e.g. two deep-copied SelectStatement nodes for inv1/inv2 - # in a self-join accumulate different UUID keys via add_aliases but carry - # the same set of alias values). Different alias values (e.g. when - # _disambiguate adds a join suffix such as "_WITH_AD_GROUP") still - # produce different hashes, preserving the SNOW-2261400 fix. + # Hash by alias values only, not the UUID keys, since UUID keys are regenerated on every deep-copy/re-resolve (e.g. the two + # branches of a self-join). This lets nodes representing the same computation hash identically, enabling CTE dedup for self-joins. + # NOTE: since nodes with different UUID keys can now share a CTE, _replace_duplicate_node_with_cte must merge each duplicate's + # UUID→alias entries into the shared CTE so parent re-resolution can resolve any UUID variant (see companion comment there). + # Different alias values (e.g. a "_WITH_AD_GROUP" join suffix from _disambiguate) still hash differently, preserving SNOW-2261400. string = f"{string}#{sorted(set(node.expr_to_alias.values()))}" if ( hasattr(node, "df_aliased_col_name_to_real_col_name") diff --git a/src/snowflake/snowpark/_internal/compiler/repeated_subquery_elimination.py b/src/snowflake/snowpark/_internal/compiler/repeated_subquery_elimination.py index fe4edd7220..00be973952 100644 --- a/src/snowflake/snowpark/_internal/compiler/repeated_subquery_elimination.py +++ b/src/snowflake/snowpark/_internal/compiler/repeated_subquery_elimination.py @@ -168,15 +168,12 @@ def _update_parents( resolved_with_block = resolved_with_block_map[ node.encoded_node_id_with_query ] - # The CTE was built from an earlier duplicate node that may have - # different expression UUID keys (though the same alias values). - # Merge this node's UUID→alias entries into the shared CTE plan so - # that parent nodes referencing this node's UUIDs can still resolve - # column aliases correctly after substitution. - if hasattr(node, "expr_to_alias") and node.expr_to_alias: - for k, v in node.expr_to_alias.items(): - if k not in resolved_with_block.expr_to_alias: - resolved_with_block.expr_to_alias[k] = v + # encode_query_id hashes expr_to_alias by alias values only, so nodes sharing a hash may carry different UUID→alias + # entries. The parent re-resolves column aliases by this node's UUID keys, which differ from those of the node the CTE + # was built from. Merge this node's entries so every UUID variant resolves; otherwise resolution falls back to the raw + # column name and produces a wrong JOIN condition. + if getattr(node, "expr_to_alias", None): + resolved_with_block.expr_to_alias.update(node.expr_to_alias) else: if ( self._query_generator.session.reduce_describe_query_enabled diff --git a/tests/integ/test_cte.py b/tests/integ/test_cte.py index c7996bbe9c..fcb9a6694a 100644 --- a/tests/integ/test_cte.py +++ b/tests/integ/test_cte.py @@ -2,6 +2,7 @@ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved. # +import copy import re import tracemalloc from contextlib import contextmanager @@ -30,6 +31,8 @@ uuid_string, when_matched, to_timestamp, + stddev_samp, + when, ) from snowflake.snowpark.types import ( StructType, @@ -1953,3 +1956,113 @@ def test_uniform_cte_optimization_depends_on_gen(session, use_bare_random, expec vals = [row["VAL"] for row in result_df.collect()] assert (vals[:5] == vals[5:]) == expect_cte + + +def test_cte_tpcds_q39_style_self_join_deduplication(session): + """TPCDS_Q39-style self-join: filtered aggregation df aliased twice and self-joined. + + Verifies that the shared `inv` computation (group-by + agg + cov filter) is + pushed into a single CTE rather than being inlined once per alias branch. + The CTE body should contain stddev_samp/avg exactly once; the outer query + references it twice (once for inv1, once for inv2). + """ + if not session._sql_simplifier_enabled: + pytest.skip("SQL simplifier is not enabled") + + # Synthetic data shaped like the Q39 inventory result after the inner join: + # (item_sk, warehouse_sk, month, quantity). High-variance values so that + # BOTH months pass cov > 1 for each item/warehouse pair, making an incorrect + # cross-join (4 rows) detectable vs. the correct equi-join (2 rows). + raw = session.create_dataframe( + [ + (10, 1, 1, 10), + ( + 10, + 1, + 1, + 390, + ), # item 10, wh 1, month 1: mean=200, stdev≈268.7, cov≈1.34 > 1 + (10, 1, 2, 20), + ( + 10, + 1, + 2, + 380, + ), # item 10, wh 1, month 2: mean=200, stdev≈254.6, cov≈1.27 > 1 + (20, 2, 1, 5), + ( + 20, + 2, + 1, + 395, + ), # item 20, wh 2, month 1: mean=200, stdev≈275.8, cov≈1.38 > 1 + (20, 2, 2, 30), + ( + 20, + 2, + 2, + 370, + ), # item 20, wh 2, month 2: mean=200, stdev≈240.4, cov≈1.20 > 1 + ], + schema=["i_item_sk", "w_warehouse_sk", "d_moy", "qty"], + ) + + # Mirrors Q39's inner "foo" aggregation subquery. + agg = raw.group_by("i_item_sk", "w_warehouse_sk", "d_moy").agg( + stddev_samp("qty").alias("stdev"), + avg("qty").cast("double").alias("mean"), + ) + + # Mirrors Q39's outer "inv" CTE: compute cov and filter on cov > 1. + # All four (item, warehouse, month) combinations pass cov > 1. + inv = agg.with_column( + "cov", + when(col("mean") == 0, lit(None)).otherwise(col("stdev") / col("mean")), + ).filter(when(col("mean") == 0, lit(0)).otherwise(col("stdev") / col("mean")) > 1) + + inv_r = copy.copy(inv) + result = ( + inv.join(inv_r, on=["i_item_sk", "w_warehouse_sk"], rsuffix="_r") + .filter(col("d_moy") == 1) + .filter(col("d_moy_r") == 2) + ) + + sql = result.queries["queries"][-1] + normalized = Utils.normalize_sql(sql) + + with SqlCounter(query_count=0, describe_count=0): + # The shared `inv` computation should be deduplicated into exactly one CTE. + assert count_number_of_ctes(normalized) == 1 + + # The CTE should appear at least 3 times: once in the WITH definition + # and at least twice in the body (one per alias branch). + cte_name_match = re.search(r"WITH\s+(\w+)\s+AS", normalized) + assert cte_name_match is not None, "expected a WITH CTE in the generated SQL" + cte_name = cte_name_match.group(1) + assert ( + normalized.count(cte_name) >= 3 + ), f"CTE '{cte_name}' should appear in the definition and both join branches" + + # The aggregation (stddev_samp / GROUP BY) must appear exactly once — + # inside the CTE body. Two occurrences would mean `inv` is inlined + # separately for each alias branch instead of being shared. + assert ( + normalized.lower().count("stddev_samp") == 1 + ), "stddev_samp should appear once (in the CTE), not once per alias branch" + assert ( + normalized.upper().count("GROUP BY") == 1 + ), "GROUP BY should appear once (in the CTE), not once per alias branch" + + # Correctness: the CTE-optimized result must match the non-optimized result. + # Correct equi-join on (i_item_sk, w_warehouse_sk) produces 2 rows (item 10 + # and item 20, each pairing their month-1 and month-2 stats). A wrong + # cross-join would produce 4 rows, so this check_result is meaningful. + check_result( + session, + result, + expect_cte_optimized=True, + query_count=1, + describe_count=0, + union_count=0, + join_count=1, + ) From 1580af4af29bf3b79a862f182c091b6a687d3066 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Fri, 29 May 2026 22:19:00 +0000 Subject: [PATCH 3/6] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6dd3c2fe06..d9b1f23b32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ - Added `get_wif_token` to `snowflake.snowpark.secrets` for workload identity federation tokens on the Snowflake server (not available in SPCS file-based secret environments). +#### Improvements + +- Improved CTE optimization to deduplicate identical subtrees in self-joins, which were previously emitted as repeated subqueries. + #### Documentation - Clarified that the JDBC driver JAR referenced via `udtf_configs.imports` in `DataFrameReader.jdbc()` must be downloaded from the database vendor and uploaded to a Snowflake stage. From a726afe3e57716f2708ddab86219cb5bac098a53 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Mon, 1 Jun 2026 23:53:35 +0000 Subject: [PATCH 4/6] PR comment --- .../compiler/repeated_subquery_elimination.py | 54 +++++++++++++------ 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/src/snowflake/snowpark/_internal/compiler/repeated_subquery_elimination.py b/src/snowflake/snowpark/_internal/compiler/repeated_subquery_elimination.py index 00be973952..21613b92d8 100644 --- a/src/snowflake/snowpark/_internal/compiler/repeated_subquery_elimination.py +++ b/src/snowflake/snowpark/_internal/compiler/repeated_subquery_elimination.py @@ -108,6 +108,24 @@ def apply(self) -> RepeatedSubqueryEliminationResult: total_num_ctes=self._total_number_ctes, ) + @staticmethod + def _has_alias_conflict( + node: TreeNode, existing_cte: Optional[SnowflakePlan] + ) -> bool: + """Whether sharing ``existing_cte`` for ``node`` would map the same expr_id to a + different alias. encode_query_id hashes expr_to_alias by alias values only, so + nodes mapping the same expr_id to different aliases can collide. Merging such a + node into the shared CTE would silently drop an entry and corrupt parent column + resolution, so in that case we skip the CTE and render the node inline.""" + if existing_cte is None: + return False + node_expr_to_alias = getattr(node, "expr_to_alias", None) or {} + return any( + key in existing_cte.expr_to_alias + and existing_cte.expr_to_alias[key] != alias + for key, alias in node_expr_to_alias.items() + ) + def _replace_duplicate_node_with_cte( self, root: TreeNode, @@ -159,22 +177,20 @@ def _update_parents( if node in visited_nodes: continue - # if the node is a duplicated node and deduplication is not done for the node, - # start the deduplication transformation use CTE - if node.encoded_node_id_with_query in duplicated_node_ids: - if node.encoded_node_id_with_query in resolved_with_block_map: - # if the corresponding CTE block has been created, use the existing - # one. - resolved_with_block = resolved_with_block_map[ - node.encoded_node_id_with_query - ] - # encode_query_id hashes expr_to_alias by alias values only, so nodes sharing a hash may carry different UUID→alias - # entries. The parent re-resolves column aliases by this node's UUID keys, which differ from those of the node the CTE - # was built from. Merge this node's entries so every UUID variant resolves; otherwise resolution falls back to the raw - # column name and produces a wrong JOIN condition. - if getattr(node, "expr_to_alias", None): - resolved_with_block.expr_to_alias.update(node.expr_to_alias) - else: + # Decide whether this node should be represented by a (new or shared) CTE: + # it must be a detected duplicate, and sharing the CTE must not introduce an + # alias conflict (see _has_alias_conflict). When it cannot be a CTE, the node + # is left inline and only the parent-propagation path applies, exactly like a + # non-duplicated node. + resolved_with_block = resolved_with_block_map.get( + node.encoded_node_id_with_query + ) + is_cte_node = node.encoded_node_id_with_query in duplicated_node_ids and ( + not self._has_alias_conflict(node, resolved_with_block) + ) + if is_cte_node: + if resolved_with_block is None: + # no CTE block has been created for this node yet, create one. if ( self._query_generator.session.reduce_describe_query_enabled and context._is_snowpark_connect_compatible_mode @@ -193,6 +209,12 @@ def _update_parents( node.encoded_node_id_with_query ] = resolved_with_block self._total_number_ctes += 1 + elif getattr(node, "expr_to_alias", None): + # reuse the existing CTE block. expr_ids are regenerated on copy, so + # this node's keys differ from the node the CTE was built from; merge + # this node's entries so every expr_id variant stays resolvable during + # parent re-resolution. + resolved_with_block.expr_to_alias.update(node.expr_to_alias) _update_parents( node, should_replace_child=True, new_child=resolved_with_block ) From 4b15904047c65aebbba74cf1790c15e4ee704dc0 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Tue, 2 Jun 2026 00:44:58 +0000 Subject: [PATCH 5/6] update --- tests/unit/test_cte.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/unit/test_cte.py b/tests/unit/test_cte.py index 410ee1ec0e..b2ff14a0f8 100644 --- a/tests/unit/test_cte.py +++ b/tests/unit/test_cte.py @@ -30,6 +30,9 @@ encode_node_id_with_query, find_duplicate_subtrees, ) +from snowflake.snowpark._internal.compiler.repeated_subquery_elimination import ( + RepeatedSubqueryElimination, +) def create_test_case1(): @@ -172,6 +175,43 @@ def stringify_dict(d: dict) -> str: ) +def test_has_alias_conflict(): + # encode_query_id hashes expr_to_alias by alias values only, so two nodes can + # share a CTE while carrying different expr_id keys. _has_alias_conflict guards + # the only unsafe case: the same expr_id mapping to a *different* alias, where + # merging would silently drop an entry and corrupt parent column resolution. + has_conflict = RepeatedSubqueryElimination._has_alias_conflict + + node = SimpleNamespace(expr_to_alias={"uuid1": "ALIAS1"}) + + # No existing CTE yet (first occurrence) -> nothing to conflict with. + assert has_conflict(node, None) is False + + # Same expr_id mapped to the same alias -> safe to merge. + existing_same = SimpleNamespace(expr_to_alias={"uuid1": "ALIAS1"}) + assert has_conflict(node, existing_same) is False + + # Disjoint expr_id keys (the normal self-join case: same alias values, fresh + # UUIDs) -> no conflict, the entries simply coexist after merge. + existing_disjoint = SimpleNamespace(expr_to_alias={"uuid2": "ALIAS1"}) + assert has_conflict(node, existing_disjoint) is False + + # Same expr_id mapped to a *different* alias -> conflict, must not share CTE. + existing_conflict = SimpleNamespace(expr_to_alias={"uuid1": "ALIAS2"}) + assert has_conflict(node, existing_conflict) is True + + # A conflict on any one key is enough, even when other keys agree. + node_multi = SimpleNamespace(expr_to_alias={"uuid1": "ALIAS1", "uuid2": "ALIAS2"}) + existing_partial_conflict = SimpleNamespace( + expr_to_alias={"uuid1": "ALIAS1", "uuid2": "DIFFERENT"} + ) + assert has_conflict(node_multi, existing_partial_conflict) is True + + # Node without any expr_to_alias entries can never conflict. + node_empty = SimpleNamespace(expr_to_alias={}) + assert has_conflict(node_empty, existing_conflict) is False + + def test_select_statement_contains_data_generation(mock_session, mock_analyzer): """SelectStatement.contains_data_generation should detect zero-arg nondeterministic functions in projection, where, and order_by.""" From c29f9100f6134d4c83f337a6c1e31dc09e3befe1 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Wed, 3 Jun 2026 12:11:57 -0700 Subject: [PATCH 6/6] minor changelog update --- CHANGELOG.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 677d12ab06..5b4925bfea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,10 +4,6 @@ ### Snowpark Python API Updates -#### Bug Fixes - -- Fixed a bug where `cloudpickle` could not be resolved when registering a Python stored procedure or UDF with `runtime_version='3.13'`. - #### New Features - Added `get_wif_token` to `snowflake.snowpark.secrets` for workload identity federation tokens on the Snowflake server (not available in SPCS file-based secret environments). @@ -15,6 +11,7 @@ #### Bug Fixes - Fixed a bug where calling `DataFrame.alias()` twice on the same DataFrame (e.g. for a self-join) caused both aliases to share the same internal column-mapping dictionary. This made `col("R", "col")` resolve to the same column as `col("L", "col")`, producing incorrect join conditions and filter expressions. +- Fixed a bug where `cloudpickle` could not be resolved when registering a Python stored procedure or UDF with `runtime_version='3.13'`. #### Improvements