Skip to content

Commit a726afe

Browse files
committed
PR comment
1 parent 79d095b commit a726afe

1 file changed

Lines changed: 38 additions & 16 deletions

File tree

src/snowflake/snowpark/_internal/compiler/repeated_subquery_elimination.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,24 @@ def apply(self) -> RepeatedSubqueryEliminationResult:
108108
total_num_ctes=self._total_number_ctes,
109109
)
110110

111+
@staticmethod
112+
def _has_alias_conflict(
113+
node: TreeNode, existing_cte: Optional[SnowflakePlan]
114+
) -> bool:
115+
"""Whether sharing ``existing_cte`` for ``node`` would map the same expr_id to a
116+
different alias. encode_query_id hashes expr_to_alias by alias values only, so
117+
nodes mapping the same expr_id to different aliases can collide. Merging such a
118+
node into the shared CTE would silently drop an entry and corrupt parent column
119+
resolution, so in that case we skip the CTE and render the node inline."""
120+
if existing_cte is None:
121+
return False
122+
node_expr_to_alias = getattr(node, "expr_to_alias", None) or {}
123+
return any(
124+
key in existing_cte.expr_to_alias
125+
and existing_cte.expr_to_alias[key] != alias
126+
for key, alias in node_expr_to_alias.items()
127+
)
128+
111129
def _replace_duplicate_node_with_cte(
112130
self,
113131
root: TreeNode,
@@ -159,22 +177,20 @@ def _update_parents(
159177
if node in visited_nodes:
160178
continue
161179

162-
# if the node is a duplicated node and deduplication is not done for the node,
163-
# start the deduplication transformation use CTE
164-
if node.encoded_node_id_with_query in duplicated_node_ids:
165-
if node.encoded_node_id_with_query in resolved_with_block_map:
166-
# if the corresponding CTE block has been created, use the existing
167-
# one.
168-
resolved_with_block = resolved_with_block_map[
169-
node.encoded_node_id_with_query
170-
]
171-
# encode_query_id hashes expr_to_alias by alias values only, so nodes sharing a hash may carry different UUID→alias
172-
# entries. The parent re-resolves column aliases by this node's UUID keys, which differ from those of the node the CTE
173-
# was built from. Merge this node's entries so every UUID variant resolves; otherwise resolution falls back to the raw
174-
# column name and produces a wrong JOIN condition.
175-
if getattr(node, "expr_to_alias", None):
176-
resolved_with_block.expr_to_alias.update(node.expr_to_alias)
177-
else:
180+
# Decide whether this node should be represented by a (new or shared) CTE:
181+
# it must be a detected duplicate, and sharing the CTE must not introduce an
182+
# alias conflict (see _has_alias_conflict). When it cannot be a CTE, the node
183+
# is left inline and only the parent-propagation path applies, exactly like a
184+
# non-duplicated node.
185+
resolved_with_block = resolved_with_block_map.get(
186+
node.encoded_node_id_with_query
187+
)
188+
is_cte_node = node.encoded_node_id_with_query in duplicated_node_ids and (
189+
not self._has_alias_conflict(node, resolved_with_block)
190+
)
191+
if is_cte_node:
192+
if resolved_with_block is None:
193+
# no CTE block has been created for this node yet, create one.
178194
if (
179195
self._query_generator.session.reduce_describe_query_enabled
180196
and context._is_snowpark_connect_compatible_mode
@@ -193,6 +209,12 @@ def _update_parents(
193209
node.encoded_node_id_with_query
194210
] = resolved_with_block
195211
self._total_number_ctes += 1
212+
elif getattr(node, "expr_to_alias", None):
213+
# reuse the existing CTE block. expr_ids are regenerated on copy, so
214+
# this node's keys differ from the node the CTE was built from; merge
215+
# this node's entries so every expr_id variant stays resolvable during
216+
# parent re-resolution.
217+
resolved_with_block.expr_to_alias.update(node.expr_to_alias)
196218
_update_parents(
197219
node, should_replace_child=True, new_child=resolved_with_block
198220
)

0 commit comments

Comments
 (0)