diff --git a/CHANGELOG.md b/CHANGELOG.md index af874d9b1..44ac0fa54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ ### Fixes +- Fix `QueryProcessor` crashing with `IndexError: string index out of range` when `view_definition` is empty or missing, such as when reading from streaming-table sources or freshly-created materialized views ([#1459](https://github.com/databricks/dbt-databricks/issues/1459)) - Fix `metric_view` failing with `METRIC_VIEW_INVALID_VIEW_DEFINITION` when models use bare `{{ ref(...) }}` for the `source:` field ([#1361](https://github.com/databricks/dbt-databricks/issues/1361)) - Fix `RefreshConfig.__eq__` self/other typo where two configs with the same `cron` but different `time_zone_value` compared equal - Fix streaming-table DROP-SCHEDULE path that was silently filtered out of the changeset diff --git a/dbt/adapters/databricks/relation_configs/query.py b/dbt/adapters/databricks/relation_configs/query.py index 12d1e3ce1..074b3ccce 100644 --- a/dbt/adapters/databricks/relation_configs/query.py +++ b/dbt/adapters/databricks/relation_configs/query.py @@ -27,7 +27,14 @@ class QueryProcessor(DatabricksComponentProcessor[QueryConfig]): @classmethod def from_relation_results(cls, result: RelationResults) -> QueryConfig: - view_definition = result["information_schema.views"]["view_definition"].strip() + view_definition_raw = result["information_schema.views"].get("view_definition") or "" + view_definition = view_definition_raw.strip() + if not view_definition: + # information_schema.views row was missing or empty — common for + # streaming-sourced derived views and freshly-created MVs whose + # metadata has not yet propagated. Return an empty QueryConfig so + # the materialization falls through to the create path. + return QueryConfig(query="") if view_definition[0] == "(" and view_definition[-1] == ")": view_definition = view_definition[1:-1] return QueryConfig(query=SqlUtils.clean_sql(view_definition)) @@ -48,5 +55,7 @@ class DescribeQueryProcessor(QueryProcessor): @classmethod def from_relation_results(cls, result: RelationResults) -> QueryConfig: table = result["describe_extended"] - row = next(x for x in table if x[0] == "View Text") + row = next((x for x in table if x[0] == "View Text"), None) + if row is None: + return QueryConfig(query="") return QueryConfig(query=SqlUtils.clean_sql(row[1])) diff --git a/tests/unit/relation_configs/test_query.py b/tests/unit/relation_configs/test_query.py index dd58e2c5d..7d66f9eb9 100644 --- a/tests/unit/relation_configs/test_query.py +++ b/tests/unit/relation_configs/test_query.py @@ -4,7 +4,11 @@ from agate import Row from dbt.exceptions import DbtRuntimeError -from dbt.adapters.databricks.relation_configs.query import QueryConfig, QueryProcessor +from dbt.adapters.databricks.relation_configs.query import ( + DescribeQueryProcessor, + QueryConfig, + QueryProcessor, +) sql = "select * from foo" @@ -50,3 +54,53 @@ def test_get_diff__different_query(self): } other = QueryProcessor.from_relation_results(results) assert model.get_diff(other) is model + + def test_from_results__empty_view_definition(self): + """view_definition is empty string — should return QueryConfig(query='') not IndexError.""" + results = {"information_schema.views": Row(["", ""], ["view_definition", "comment"])} + spec = QueryProcessor.from_relation_results(results) + assert spec == QueryConfig(query="") + + def test_from_results__none_view_definition(self): + """view_definition is None — should return QueryConfig(query='').""" + results = {"information_schema.views": Row([None, ""], ["view_definition", "comment"])} + spec = QueryProcessor.from_relation_results(results) + assert spec == QueryConfig(query="") + + def test_from_results__missing_row(self): + """No row returned (e.g. streaming table source) — should return QueryConfig(query='').""" + empty_row = Row(values=set()) + results = {"information_schema.views": empty_row} + spec = QueryProcessor.from_relation_results(results) + assert spec == QueryConfig(query="") + + def test_get_diff__empty_existing_query(self): + """If the persisted query is empty (IS row missing), any new query should detect a diff.""" + existing = QueryConfig(query="") + new = QueryConfig(query="select 1") + assert new.get_diff(existing) is new + + +class TestDescribeQueryProcessor: + def test_from_results__no_view_text_row(self): + """describe_extended has no 'View Text' row — should return QueryConfig(query='').""" + results = { + "describe_extended": [ + ("col_name", "data_type", "comment"), + ("id", "int", ""), + ] + } + spec = DescribeQueryProcessor.from_relation_results(results) + assert spec == QueryConfig(query="") + + def test_from_results__with_view_text_row(self): + """describe_extended has a valid 'View Text' row — should extract query.""" + sql = "select 1 as id" + results = { + "describe_extended": [ + ("col_name", "data_type", "comment"), + ("View Text", sql, ""), + ] + } + spec = DescribeQueryProcessor.from_relation_results(results) + assert spec == QueryConfig(query=sql)