more defensive code

sfc-gh-yuwang · sfc-gh-yuwang · commit 2d1cbf1e7df8 · 2026-04-24T13:41:56.000-07:00
diff --git a/src/snowflake/snowpark/_internal/analyzer/select_statement.py b/src/snowflake/snowpark/_internal/analyzer/select_statement.py
@@ -51,8 +51,10 @@
 
 from snowflake.snowpark._internal.analyzer import analyzer_utils
 from snowflake.snowpark._internal.analyzer.analyzer_utils import (
+    quote_name_without_upper_casing,
     result_scan_statement,
     schema_value_statement,
+    unquote_if_quoted,
 )
 from snowflake.snowpark._internal.analyzer.binary_expression import And
 from snowflake.snowpark._internal.analyzer.expression import (
@@ -85,8 +87,10 @@
     has_invalid_projection_merge_functions,
 )
 from snowflake.snowpark._internal.utils import (
-    is_sql_select_statement,
+    ALREADY_QUOTED,
     ExprAliasUpdateDict,
+    is_sql_select_statement,
+    quote_name,
 )
 import snowflake.snowpark.context as context
 
@@ -1591,51 +1595,71 @@ def select(self, cols: List[Expression]) -> "SelectStatement":
 
         # When describe reduction is on and the inner select already has resolved
         # attributes, infer new.attributes for this outer select by reusing datatype and
-        # nullable from the subquery: (1) index attributes by name, (2) walk
-        # new.projection, (3) only handle plain columns or Alias(column) — anything
-        # else aborts without setting partial attributes, (4) map each case to an
-        # Attribute named for the projected column, (5) assign only if every output
-        # column was inferred (length matches projection).
+        # nullable from the subquery: (0) skip if parent column names collide, (1) index
+        # attributes by normalized name, (2) walk new.projection, (3) only handle plain
+        # columns or Alias(column), (4) resolve source via quoted-identifier-aware lookup,
+        # (5) assign only if every output column was inferred (length matches projection).
         if self._session.reduce_describe_query_enabled and self.attributes is not None:
-            # subquery lookup by name
-            attributes_by_name = {attr.name: attr for attr in self.attributes}
-            inferred_attributes: List[Attribute] = []
-            assert new.projection is not None
-            # infer from each projected expression
-            for expr in new.projection:
-                source_column_name = None
-                projected_column_name = None
-                if isinstance(expr, (Attribute, UnresolvedAttribute)):
-                    # identity projection: output name equals input column
-                    source_column_name = expr.name
-                    projected_column_name = expr.name
-                elif isinstance(expr, Alias) and isinstance(
-                    expr.child, (Attribute, UnresolvedAttribute)
-                ):
-                    # rename: source column from child, output name from alias
-                    source_column_name = expr.child.name
-                    projected_column_name = expr.name
-                else:
-                    # non-simple expression: cannot infer types safely
-                    inferred_attributes = []
-                    break
-
-                source_attr = attributes_by_name.get(source_column_name)
-                if source_attr is None or projected_column_name is None:
-                    # missing subquery column for this projection — abort
-                    inferred_attributes = []
-                    break
-
-                # projected name with subquery type and nullability
-                inferred_attributes.append(
-                    Attribute(
-                        projected_column_name,
-                        source_attr.datatype,
-                        source_attr.nullable,
-                    )
-                )
-            if len(inferred_attributes) == len(new.projection):
-                # only commit when every column was inferred
+            parent_attributes = self.attributes
+            projection = new.projection
+            inferred_attributes: Optional[List[Attribute]] = None
+            # Skip: no projection to walk (do not assert; leave new.attributes unchanged).
+            if projection is not None:
+                # Skip: duplicate output names on the parent — dict/lookup would be ambiguous.
+                if len(parent_attributes) == len({a.name for a in parent_attributes}):
+                    attributes_by_normalized: Dict[str, Attribute] = {}
+                    collision = False
+                    for attr in parent_attributes:
+                        key = _normalized_snowflake_identifier_key(attr.name)
+                        existing = attributes_by_normalized.get(key)
+                        # Skip: two parent columns normalize to the same key.
+                        if existing is not None and existing is not attr:
+                            collision = True
+                            break
+                        attributes_by_normalized[key] = attr
+                    if not collision:
+                        inferred_attributes = []
+                        for expr in projection:
+                            source_column_name: Optional[str] = None
+                            projected_column_name: Optional[str] = None
+                            if isinstance(expr, (Attribute, UnresolvedAttribute)):
+                                source_column_name = expr.name
+                                projected_column_name = expr.name
+                            elif isinstance(expr, Alias) and isinstance(
+                                expr.child, (Attribute, UnresolvedAttribute)
+                            ):
+                                source_column_name = expr.child.name
+                                projected_column_name = expr.name
+                            else:
+                                # Skip: not a plain column or Alias(Attribute|UnresolvedAttribute).
+                                inferred_attributes = []
+                                break
+
+                            if (
+                                source_column_name is None
+                                or projected_column_name is None
+                            ):
+                                # Skip: missing projected output name.
+                                inferred_attributes = []
+                                break
+                            source_attr = attributes_by_normalized.get(
+                                _normalized_snowflake_identifier_key(source_column_name)
+                            )
+                            # Skip: no parent column for this source name.
+                            if source_attr is None:
+                                inferred_attributes = []
+                                break
+                            inferred_attributes.append(
+                                Attribute(
+                                    projected_column_name,
+                                    source_attr.datatype,
+                                    source_attr.nullable,
+                                )
+                            )
+                        if len(inferred_attributes) != len(projection):
+                            # Skip: incomplete inference (includes defensive mismatch).
+                            inferred_attributes = None
+            if inferred_attributes is not None:
                 new.attributes = inferred_attributes
 
         new.flatten_disabled = disable_next_level_flatten
@@ -2136,6 +2160,13 @@ class DeriveColumnDependencyError(Exception):
     """When deriving column dependencies from the subquery."""
 
 
+def _normalized_snowflake_identifier_key(name: str) -> str:
+    """Canonical quoted key: delimited identifiers preserve case; unquoted follow Snowflake uppercasing."""
+    if ALREADY_QUOTED.match(name):
+        return quote_name_without_upper_casing(unquote_if_quoted(name))
+    return quote_name(name)
+
+
 def parse_column_name(
     column: Expression,
     analyzer: "Analyzer",
diff --git a/tests/integ/test_reduce_describe_query.py b/tests/integ/test_reduce_describe_query.py
@@ -563,6 +563,26 @@ def test_chained_simple_renames_infer_from_previous_metadata(session):
             _ = df2._plan.attributes
 
 
+def test_quoted_case_sensitive_sql_column_metadata_inference(session):
+    """Delimited identifier from session.sql: chained select infers metadata without DESCRIBE."""
+    df = session.sql('SELECT 1 AS "MixedCase"')
+    with SqlCounter(query_count=0, describe_count=1, strict=False):
+        _ = df.schema
+
+    df2 = df.select(col('"MixedCase"'))
+    if session.reduce_describe_query_enabled:
+        assert df2._plan._metadata.attributes is not None
+        assert len(df2._plan._metadata.attributes) == 1
+        assert df2._plan._metadata.attributes[0].name == '"MixedCase"'
+
+    expected_describe = 0 if session.reduce_describe_query_enabled else 1
+    with SqlCounter(query_count=0, describe_count=expected_describe):
+        attrs = df2._plan.attributes
+    assert attrs is not None
+    assert len(attrs) == 1
+    assert attrs[0].name == '"MixedCase"'
+
+
 def test_non_simple_projection_skips_metadata_inference(session):
     """Expressions other than plain column or simple alias(column) do not infer attributes."""
     df = session.create_dataframe([[1, 2]], schema=["a", "b"])