Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions sqlglot/optimizer/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,8 @@ def get_source_columns(self, name: str, only_visible: bool = False) -> Sequence[
):
columns = source_expr.named_selects

# in bigquery, unnest structs are automatically scoped as tables, so you can
# directly select a struct field in a query.
# this handles the case where the unnest is statically defined.
# in bigquery, unnest structs are automatically scoped as tables, so you can directly select
# a struct field in a query. This handles the case where the unnest is statically defined.
if self.dialect.UNNEST_COLUMN_ONLY and isinstance(source_expr, exp.Unnest):
if not source_expr.type or source_expr.type.is_type(exp.DType.UNKNOWN):
unnest_expr = seq_get(source_expr.expressions, 0)
Expand All @@ -178,7 +177,10 @@ def get_source_columns(self, name: str, only_visible: bool = False) -> Sequence[
):
explode_col = source_expr.this.this

if isinstance(explode_col, exp.Column) and source.parent:
# If the column is unqualified at this point, it couldn't be resolved when
# this scope's children were qualified; disambiguating it here would require
# enumerating this very source's columns, i.e recurse without bound
if isinstance(explode_col, exp.Column) and explode_col.table and source.parent:
Comment thread
georgesittas marked this conversation as resolved.
col_type = self._get_unnest_column_type(explode_col, source.parent)
Comment on lines +183 to 184

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the stack that shows the execution flow leading to the infinite recursion:

  File "/Users/georgesittas/Dev/sqlglot/sqlglot/optimizer/resolver.py", line 182, in get_source_columns
    col_type = self._get_unnest_column_type(explode_col, source.parent)
  File "/Users/georgesittas/Dev/sqlglot/sqlglot/optimizer/resolver.py", line 380, in _get_unnest_column_type
    table_identifier = parent_resolver.get_table(column)
  File "/Users/georgesittas/Dev/sqlglot/sqlglot/optimizer/resolver.py", line 45, in get_table
    table_name = self._get_table_name_from_sources(column_name)
  File "/Users/georgesittas/Dev/sqlglot/sqlglot/optimizer/resolver.py", line 234, in _get_table_name_from_sources
    self._get_all_source_columns()
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^
  File "/Users/georgesittas/Dev/sqlglot/sqlglot/optimizer/resolver.py", line 220, in _get_all_source_columns
    source_name: self.get_source_columns(source_name)
                 ~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
  File "/Users/georgesittas/Dev/sqlglot/sqlglot/optimizer/resolver.py", line 182, in get_source_columns
    col_type = self._get_unnest_column_type(explode_col, source.parent)
  ...

Both the happy and the unhappy paths work fine now, as demonstrated by the added tests & existing LATERAL EXPLODE tests passing.

columns.extend(self._struct_field_names(col_type))
elif isinstance(source, Scope) and isinstance(source.expression, exp.SetOperation):
Expand Down
26 changes: 26 additions & 0 deletions tests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,32 @@ def test_validate_columns(self):
)
self.assertEqual(expression.selects[0].type, exp.DataType.build("DOUBLE", dialect="spark"))

# An unqualified struct field is disambiguated through the lateral's extended columns
schema = {"my_table": {"items": "ARRAY<STRUCT<name STRING, age INT>>"}}
self.assertEqual(
optimizer.qualify.qualify(
parse_one(
"SELECT name FROM my_table LATERAL VIEW EXPLODE(items) ci AS ci",
read="spark",
),
schema=schema,
dialect="spark",
).sql(dialect="spark"),
"SELECT `ci`.`name` AS `name` FROM `my_table` AS `my_table` LATERAL VIEW EXPLODE(`my_table`.`items`) ci AS `ci`",
)

# Resolving an unqualified lateral column whose table is missing from the schema must
# raise instead of recursing infinitely
with self.assertRaisesRegex(OptimizeError, "Column 'ITEMS' could not be resolved"):
optimizer.qualify.qualify(
parse_one(
"SELECT f.value AS v FROM my_db.raw.events, LATERAL FLATTEN(items) AS f",
read="snowflake",
),
schema={"my_db": {"other": {"some_view": {"v": "VARIANT"}}}},
dialect="snowflake",
)

def test_qualify_columns__with_invisible(self):
schema = MappingSchema(self.schema, {"x": {"a"}, "y": {"b"}, "z": {"b"}})
self.check_file("qualify_columns__with_invisible", qualify_columns, schema=schema)
Expand Down
Loading