diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f7f70d53..f7a5dd8c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - Fix missing f-string prefix in `JobRunsApi.submit` debug log ([#1471](https://github.com/databricks/dbt-databricks/pull/1471)) - Fix capability-branching macros falling through to their legacy path at parse/compile time on SQL warehouses. The parse-time stub of `has_dbr_capability` now returns `True` on warehouse profiles for capabilities flagged `sql_warehouse_supported`, so macros select the modern branch during compilation instead of the legacy fallback. ([#1449](https://github.com/databricks/dbt-databricks/pull/1449) closes [#1331](https://github.com/databricks/dbt-databricks/issues/1331)) - Fix snapshots not applying `databricks_tags` on columns ([#1442](https://github.com/databricks/dbt-databricks/pull/1442) closes [#1441](https://github.com/databricks/dbt-databricks/issues/1441)) +- Skip `DESCRIBE TABLE EXTENDED ... AS JSON` for foreign/federated tables in `get_columns_in_relation`, avoiding repeated failures and extra latency on those sources ([#1472](https://github.com/databricks/dbt-databricks/pull/1472)) ### Under the Hood diff --git a/dbt/adapters/databricks/impl.py b/dbt/adapters/databricks/impl.py index cc9be7a5b..be2560408 100644 --- a/dbt/adapters/databricks/impl.py +++ b/dbt/adapters/databricks/impl.py @@ -644,6 +644,7 @@ def get_columns_in_relation( # type: ignore[override] # TODO: Replace with streaming table capability check when 17.1 is current version # for SQL warehouses or relation.type == DatabricksRelationType.StreamingTable + or relation.is_foreign_table ) return self.get_column_behavior.get_columns_in_relation(self, relation, use_legacy_logic) diff --git a/tests/functional/adapter/columns/fixtures.py b/tests/functional/adapter/columns/fixtures.py index 3fe65dcac..f45329f5d 100644 --- a/tests/functional/adapter/columns/fixtures.py +++ b/tests/functional/adapter/columns/fixtures.py @@ -52,3 +52,17 @@ - name: string_col data_type: string """ + +foreign_table_source_model = """ +{{ config(materialized='table') }} +select cast(1 as bigint) as id, 'federated' as name +""" + +foreign_table_source_schema = """ +version: 2 +models: + - name: foreign_table_source + columns: + - name: id + - name: name +""" diff --git a/tests/functional/adapter/columns/test_foreign_table_get_columns.py b/tests/functional/adapter/columns/test_foreign_table_get_columns.py new file mode 100644 index 000000000..4d6aedc07 --- /dev/null +++ b/tests/functional/adapter/columns/test_foreign_table_get_columns.py @@ -0,0 +1,60 @@ +import pytest +from dbt.tests import util + +from dbt.adapters.databricks.column import DatabricksColumn +from dbt.adapters.databricks.relation import DatabricksRelation, DatabricksRelationType +from tests.functional.adapter.columns import fixtures +from tests.functional.adapter.fixtures import ( + RequiresDescribeAsJsonCapabilityMixin, + fail_if_get_columns_as_json_called_macros, +) + + +# The HMS cluster profile uses hive_metastore, where get_columns_in_relation already +# takes the legacy path via is_hive_metastore() — before the foreign-table check +# matters. This test only validates the fix on UC profiles where JSON column +# metadata would otherwise be preferred. +@pytest.mark.skip_profile("databricks_cluster") +class TestForeignTableGetColumns(RequiresDescribeAsJsonCapabilityMixin): + """Foreign tables must fetch columns without attempting DESCRIBE AS JSON.""" + + @pytest.fixture(scope="class") + def models(self): + # Lakehouse Federation foreign tables aren't available in CI, so we + # materialize a normal UC table and assert column fetch works when the + # relation is typed as Foreign (see test method below). + return { + "foreign_table_source.sql": fixtures.foreign_table_source_model, + "schema.yml": fixtures.foreign_table_source_schema, + } + + @pytest.fixture(scope="class") + def macros(self): + return {"fail_if_get_columns_as_json_called.sql": fail_if_get_columns_as_json_called_macros} + + @pytest.fixture(scope="class", autouse=True) + def setup(self, project): + util.run_dbt(["debug", "--connection"]) + util.run_dbt(["run"]) + + @pytest.fixture(scope="class") + def expected_columns(self): + return [ + DatabricksColumn(column="id", dtype="bigint"), + DatabricksColumn(column="name", dtype="string"), + ] + + def test_foreign_table_get_columns_returns_expected_columns(self, project, expected_columns): + # No real federated table in CI — reuse the managed table above but mark + # the relation Foreign so get_columns_in_relation exercises that code path. + foreign_relation = DatabricksRelation.create( + database=project.database, + schema=project.test_schema, + identifier="foreign_table_source", + type=DatabricksRelationType.Foreign, + ) + + with project.adapter.connection_named("_test"): + actual_columns = project.adapter.get_columns_in_relation(foreign_relation) + + assert actual_columns == expected_columns diff --git a/tests/functional/adapter/fixtures.py b/tests/functional/adapter/fixtures.py index 9bcec8112..ec38a0cff 100644 --- a/tests/functional/adapter/fixtures.py +++ b/tests/functional/adapter/fixtures.py @@ -19,6 +19,12 @@ {% endmacro %} """ +fail_if_get_columns_as_json_called_macros = """ +{% macro get_columns_comments_as_json(relation) %} + {{ exceptions.raise_compiler_error("get_columns_comments_as_json should not be called") }} +{% endmacro %} +""" + class MaterializationV1Mixin: @pytest.fixture(scope="class") diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py index 0c2b5ca80..cdc94fd75 100644 --- a/tests/unit/test_adapter.py +++ b/tests/unit/test_adapter.py @@ -1268,6 +1268,30 @@ def test_get_columns_materialized_view(self, mock_get_columns, adapter, unity_re assert result[0].dtype == "string" assert result[0].comment == "mv col" + @patch( + "dbt.adapters.databricks.behaviors.columns.GetColumnsByDescribe._get_columns_with_comments" + ) + def test_get_columns_foreign_table_uses_legacy_logic( + self, mock_get_columns, adapter, unity_relation + ): + foreign_relation = DatabricksRelation.create( + database=unity_relation.database, + schema=unity_relation.schema, + identifier=unity_relation.identifier, + type=DatabricksRelationType.Foreign, + ) + # Foreign/federated tables don't support AS JSON — always use legacy logic + with patch.object(adapter, "has_capability", return_value=True): + mock_get_columns.return_value = [ + {"col_name": "federated_col", "data_type": "string", "comment": ""}, + ] + result = adapter.get_columns_in_relation(foreign_relation) + assert mock_get_columns.call_count == 1 + mock_get_columns.assert_called_with(adapter, foreign_relation, "get_columns_comments") + assert len(result) == 1 + assert result[0].column == "federated_col" + assert result[0].dtype == "string" + @patch( "dbt.adapters.databricks.behaviors.columns.GetColumnsByDescribe._get_columns_with_comments" )