Skip to content

Commit c3cc950

Browse files
SNOW-3384967: reduce describe query generated by alias (#4183)
1 parent f6addbf commit c3cc950

4 files changed

Lines changed: 354 additions & 6 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818

1919
- Added `artifact_repository` support to `udtf_configs` in `session.read.dbapi()`, enabling users to specify a custom artifact repository (e.g. PyPI) for packages used by the internal UDTF during distributed ingestion.
2020

21+
#### Improvements
22+
23+
- When `Session.reduce_describe_query_enabled` is enabled, fewer DESCRIBE queries are issued when the outer query only projects or renames columns from an inner subquery whose column types are already known.
24+
2125
#### Bug Fixes
2226

2327
- Fixed a bug where `TRY_CAST` reader option is ignored when calling `DataFrameReader.schema().csv()`.

src/snowflake/snowpark/_internal/analyzer/select_statement.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,9 @@
8585
has_invalid_projection_merge_functions,
8686
)
8787
from snowflake.snowpark._internal.utils import (
88-
is_sql_select_statement,
8988
ExprAliasUpdateDict,
89+
is_sql_select_statement,
90+
quote_name,
9091
)
9192
import snowflake.snowpark.context as context
9293

@@ -1592,6 +1593,72 @@ def select(self, cols: List[Expression]) -> "SelectStatement":
15921593
)
15931594
)
15941595

1596+
# When describe reduction is on and the inner select already has resolved
1597+
# attributes, infer new.attributes for this outer select by reusing datatype and
1598+
# nullable from the subquery: (0) skip if parent column names collide, (1) index
1599+
# attributes by quote_name (Snowflake identifier rules; invalid delimited forms
1600+
# raise), (2) walk new.projection, (3) only handle plain columns or Alias(column),
1601+
# (4) resolve source via the same quote_name key lookup, (5) assign only if every
1602+
# output column was inferred (length matches projection).
1603+
if self._session.reduce_describe_query_enabled and self.attributes is not None:
1604+
parent_attributes = self.attributes
1605+
projection = new.projection
1606+
inferred_attributes: Optional[List[Attribute]] = None
1607+
# Skip: no projection to walk (do not assert; leave new.attributes unchanged).
1608+
if projection is not None:
1609+
# Skip: duplicate output names on the parent — dict/lookup would be ambiguous.
1610+
attributes_by_normalized: Dict[str, Attribute] = {}
1611+
collision = False
1612+
for attr in parent_attributes:
1613+
key = quote_name(attr.name)
1614+
existing = attributes_by_normalized.get(key)
1615+
# Skip: two parent columns map to the same quote_name key.
1616+
if existing is not None and existing is not attr:
1617+
collision = True
1618+
break
1619+
attributes_by_normalized[key] = attr
1620+
if not collision:
1621+
inferred_attributes = []
1622+
for expr in projection:
1623+
source_column_name: Optional[str] = None
1624+
projected_column_name: Optional[str] = None
1625+
if isinstance(expr, (Attribute, UnresolvedAttribute)):
1626+
source_column_name = expr.name
1627+
projected_column_name = expr.name
1628+
elif isinstance(expr, Alias) and isinstance(
1629+
expr.child, (Attribute, UnresolvedAttribute)
1630+
):
1631+
source_column_name = expr.child.name
1632+
projected_column_name = expr.name
1633+
else:
1634+
# Skip: not a plain column or Alias(Attribute|UnresolvedAttribute).
1635+
inferred_attributes = []
1636+
break
1637+
1638+
if source_column_name is None or projected_column_name is None:
1639+
# Skip: missing projected output name.
1640+
inferred_attributes = []
1641+
break
1642+
source_attr = attributes_by_normalized.get(
1643+
quote_name(source_column_name)
1644+
)
1645+
# Skip: no parent column for this source name.
1646+
if source_attr is None:
1647+
inferred_attributes = []
1648+
break
1649+
inferred_attributes.append(
1650+
Attribute(
1651+
projected_column_name,
1652+
source_attr.datatype,
1653+
source_attr.nullable,
1654+
)
1655+
)
1656+
if len(inferred_attributes) != len(projection):
1657+
# Skip: incomplete inference (includes defensive mismatch).
1658+
inferred_attributes = None
1659+
if inferred_attributes is not None:
1660+
new.attributes = inferred_attributes
1661+
15951662
new.flatten_disabled = disable_next_level_flatten
15961663
assert new.projection is not None
15971664
new._column_states = derive_column_states_from_subquery(

tests/integ/test_cte.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def test_binary(session, type, action):
259259

260260
def test_join_with_alias_dataframe(session):
261261
expected_describe_count = (
262-
3
262+
2
263263
if (session.reduce_describe_query_enabled and session.sql_simplifier_enabled)
264264
else 4
265265
)

0 commit comments

Comments
 (0)