diff --git a/CHANGELOG.md b/CHANGELOG.md index f92edf59c..df104bda0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - Add support for metric views as a materialization ([#1285](https://github.com/databricks/dbt-databricks/pull/1285)) - Add support for row filters ([#1294](https://github.com/databricks/dbt-databricks/pull/1294)) - Add support for Python UDFs ([#1336](https://github.com/databricks/dbt-databricks/pull/1336)) +- Add support for key-only `databricks_tags` for table and column tagging. This can now be configured by setting tag values to empty strings `""` or `None`. ([#1339](https://github.com/databricks/dbt-databricks/pull/1339)) ## dbt-databricks 1.11.7 (Apr 17, 2026) diff --git a/dbt/adapters/databricks/relation_configs/column_tags.py b/dbt/adapters/databricks/relation_configs/column_tags.py index 93d5ea890..26c8f6aa5 100644 --- a/dbt/adapters/databricks/relation_configs/column_tags.py +++ b/dbt/adapters/databricks/relation_configs/column_tags.py @@ -55,7 +55,7 @@ def from_relation_results(cls, results: RelationResults) -> ColumnTagsConfig: # row contains [column_name, tag_name, tag_value] column_name = str(row[0]) tag_name = str(row[1]) - tag_value = str(row[2]) + tag_value = "" if row[2] is None else str(row[2]) if column_name not in set_column_tags: set_column_tags[column_name] = {} @@ -79,7 +79,7 @@ def from_relation_config(cls, relation_config: RelationConfig) -> ColumnTagsConf if databricks_tags: if isinstance(databricks_tags, dict): set_column_tags[col["name"]] = { - str(k): str(v) for k, v in databricks_tags.items() + str(k): "" if v is None else str(v) for k, v in databricks_tags.items() } else: raise DbtRuntimeError("databricks_tags must be a dictionary") diff --git a/dbt/adapters/databricks/relation_configs/tags.py b/dbt/adapters/databricks/relation_configs/tags.py index 9286bc9b3..757c3fdf0 100644 --- a/dbt/adapters/databricks/relation_configs/tags.py +++ b/dbt/adapters/databricks/relation_configs/tags.py @@ -33,7 +33,7 @@ def from_relation_results(cls, results: RelationResults) -> TagsConfig: if table: for row in table.rows: - tags[str(row[0])] = str(row[1]) + tags[str(row[0])] = "" if row[1] is None else str(row[1]) return TagsConfig(set_tags=tags) @@ -43,7 +43,7 @@ def from_relation_config(cls, relation_config: RelationConfig) -> TagsConfig: if not tags: return TagsConfig(set_tags=dict()) if isinstance(tags, dict): - tags = {str(k): str(v) for k, v in tags.items()} + tags = {str(k): "" if v is None else str(v) for k, v in tags.items()} return TagsConfig(set_tags=tags) else: raise DbtRuntimeError("databricks_tags must be a dictionary") diff --git a/tests/functional/adapter/column_tags/fixtures.py b/tests/functional/adapter/column_tags/fixtures.py index 5c26f835b..0e282d222 100644 --- a/tests/functional/adapter/column_tags/fixtures.py +++ b/tests/functional/adapter/column_tags/fixtures.py @@ -14,6 +14,8 @@ databricks_tags: pii: "true" sensitive: "true" + key_only: "" + null_value: """ updated_column_tag_model = """ @@ -30,6 +32,8 @@ databricks_tags: pii: "true" sensitive: "true" + key_only: "" + null_value: """ column_tags_seed = """ diff --git a/tests/functional/adapter/column_tags/test_column_tags.py b/tests/functional/adapter/column_tags/test_column_tags.py index e2407caed..1154b8a7e 100644 --- a/tests/functional/adapter/column_tags/test_column_tags.py +++ b/tests/functional/adapter/column_tags/test_column_tags.py @@ -32,6 +32,8 @@ def test_column_tags(self, project): expected_tags = { ("account_number", "pii", "true"), ("account_number", "sensitive", "true"), + ("account_number", "key_only", ""), + ("account_number", "null_value", ""), } actual_tags = {(row[0], row[1], row[2]) for row in tags} assert actual_tags == expected_tags @@ -52,6 +54,8 @@ def test_column_tags(self, project): ("id", "pii", "false"), ("account_number", "pii", "true"), ("account_number", "sensitive", "true"), + ("account_number", "key_only", ""), + ("account_number", "null_value", ""), } actual_tags = {(row[0], row[1], row[2]) for row in tags} assert actual_tags == expected_tags diff --git a/tests/functional/adapter/tags/fixtures.py b/tests/functional/adapter/tags/fixtures.py index 67422fd93..3dad09a82 100644 --- a/tests/functional/adapter/tags/fixtures.py +++ b/tests/functional/adapter/tags/fixtures.py @@ -1,7 +1,7 @@ tags_sql = """ {{ config( materialized = 'table', - databricks_tags = {'a': 'b', 'c': 'd'}, + databricks_tags = {'a': 'b', 'c': 'd', 'k': ''}, ) }} select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color @@ -19,7 +19,7 @@ streaming_table_tags_sql = """ {{ config( materialized='streaming_table', - databricks_tags = {'a': 'b', 'c': 'd'}, + databricks_tags = {'a': 'b', 'c': 'd', 'k': ''}, ) }} select * from stream {{ ref('my_seed') }} @@ -54,4 +54,5 @@ def model(dbt, spark): databricks_tags: a: b c: d + k: "" """ diff --git a/tests/functional/adapter/tags/test_databricks_tags.py b/tests/functional/adapter/tags/test_databricks_tags.py index a4eeb7741..e28d11510 100644 --- a/tests/functional/adapter/tags/test_databricks_tags.py +++ b/tests/functional/adapter/tags/test_databricks_tags.py @@ -23,8 +23,8 @@ def test_tags(self, project): " where schema_name = '{schema}' and table_name='tags'", fetch="all", ) - assert len(results) == 2 - expected_tags = {("a", "b"), ("c", "d")} + assert len(results) == 3 + expected_tags = {("a", "b"), ("c", "d"), ("k", "")} actual_tags = set((row[0], row[1]) for row in results) assert actual_tags == expected_tags @@ -56,8 +56,8 @@ def test_updated_tags(self, project): " where schema_name = '{schema}' and table_name='tags'", fetch="all", ) - assert len(results) == 3 - expected_tags = {("a", "b"), ("c", "d"), ("e", "f")} + assert len(results) == 4 + expected_tags = {("a", "b"), ("c", "d"), ("k", ""), ("e", "f")} actual_tags = set((row[0], row[1]) for row in results) assert actual_tags == expected_tags @@ -151,7 +151,7 @@ def test_updated_tags(self, project): " where schema_name = '{schema}' and table_name='tags'", fetch="all", ) - assert len(results) == 3 + assert len(results) == 4 @pytest.mark.python diff --git a/tests/unit/relation_configs/test_column_tags_config.py b/tests/unit/relation_configs/test_column_tags_config.py index 099f9da30..2205138e8 100644 --- a/tests/unit/relation_configs/test_column_tags_config.py +++ b/tests/unit/relation_configs/test_column_tags_config.py @@ -26,7 +26,7 @@ def test_from_relation_results__some(self): "information_schema.column_tags": Table( rows=[ ["col1", "tag_a", "value_a"], - ["col1", "tag_b", "value_b"], + ["col1", "tag_b", ""], # key-only tag ["col2", "tag_c", "value_c"], ], column_names=["column_name", "tag_name", "tag_value"], @@ -35,7 +35,7 @@ def test_from_relation_results__some(self): spec = ColumnTagsProcessor.from_relation_results(results) assert spec == ColumnTagsConfig( set_column_tags={ - "col1": {"tag_a": "value_a", "tag_b": "value_b"}, + "col1": {"tag_a": "value_a", "tag_b": ""}, "col2": {"tag_c": "value_c"}, } ) @@ -54,14 +54,18 @@ def test_from_relation_config__without_column_tags(self): def test_from_relation_config__with_dict(self): model = Mock() model.columns = { - "email": {"_extra": {"databricks_tags": {"pii": "true", "env": "prod"}}}, + "email": { + "_extra": { + "databricks_tags": {"pii": "", "env": "prod", "priority": 0, "enabled": False} + } + }, "id": {"_extra": {}}, "created_at": {}, } spec = ColumnTagsProcessor.from_relation_config(model) assert spec == ColumnTagsConfig( set_column_tags={ - "email": {"pii": "true", "env": "prod"}, + "email": {"pii": "", "env": "prod", "priority": "0", "enabled": "False"}, } ) @@ -71,14 +75,16 @@ def test_from_relation_config__with_column_info(self): "id": ColumnInfo(name="id", _extra={}), "email": ColumnInfo( name="email", - _extra={"databricks_tags": {"pii": "true", "env": "prod"}}, + _extra={ + "databricks_tags": {"pii": "", "env": "prod", "priority": 0, "enabled": False} + }, ), "created_at": ColumnInfo(name="created_at"), } spec = ColumnTagsProcessor.from_relation_config(model) assert spec == ColumnTagsConfig( set_column_tags={ - "email": {"pii": "true", "env": "prod"}, + "email": {"pii": "", "env": "prod", "priority": "0", "enabled": "False"}, } ) diff --git a/tests/unit/relation_configs/test_tags.py b/tests/unit/relation_configs/test_tags.py index e465739b8..a9e4bdc0b 100644 --- a/tests/unit/relation_configs/test_tags.py +++ b/tests/unit/relation_configs/test_tags.py @@ -24,6 +24,15 @@ def test_from_relation_results__some(self): spec = TagsProcessor.from_relation_results(results) assert spec == TagsConfig(set_tags={"a": "valA", "b": "valB"}) + def test_from_relation_results__key_only(self): + results = { + "information_schema.tags": Table( + rows=[["a", ""]], column_names=["tag_name", "tag_value"] + ) + } + spec = TagsProcessor.from_relation_results(results) + assert spec == TagsConfig(set_tags={"a": ""}) + def test_from_relation_config__without_tags(self): model = Mock() model.config.extra = {} @@ -36,6 +45,18 @@ def test_from_relation_config__with_tags(self): spec = TagsProcessor.from_relation_config(model) assert spec == TagsConfig(set_tags={"a": "valA", "b": "1"}) + def test_from_relation_config__with_key_only_tags(self): + model = Mock() + model.config.extra = {"databricks_tags": {"a": "", "b": None}} + spec = TagsProcessor.from_relation_config(model) + assert spec == TagsConfig(set_tags={"a": "", "b": ""}) + + def test_from_relation_config__with_falsy_tags(self): + model = Mock() + model.config.extra = {"databricks_tags": {"priority": 0, "enabled": False}} + spec = TagsProcessor.from_relation_config(model) + assert spec == TagsConfig(set_tags={"priority": "0", "enabled": "False"}) + def test_from_relation_config__with_incorrect_tags(self): model = Mock() model.config.extra = {"databricks_tags": ["a", "b"]} @@ -52,25 +73,25 @@ def test_get_diff__empty_and_some_exist(self): # Tags are "set only" - when config has no tags and relation has tags, # we don't unset the existing tags config = TagsConfig(set_tags={}) - other = TagsConfig(set_tags={"tag": "value"}) - diff = config.get_diff(other) + config_old = TagsConfig(set_tags={"tag": "value"}) + diff = config.get_diff(config_old) assert diff is None # No changes needed since we don't unset tags def test_get_diff__some_new_and_empty_existing(self): config = TagsConfig(set_tags={"tag": "value"}) - other = TagsConfig(set_tags={}) - diff = config.get_diff(other) + config_old = TagsConfig(set_tags={}) + diff = config.get_diff(config_old) assert diff == TagsConfig(set_tags={"tag": "value"}) def test_get_diff__mixed_case(self): # Tags are "set only" - only the new/updated tags are included config = TagsConfig(set_tags={"a": "value", "b": "value"}) - other = TagsConfig(set_tags={"b": "other_value", "c": "value"}) - diff = config.get_diff(other) + config_old = TagsConfig(set_tags={"b": "other_value", "c": "value"}) + diff = config.get_diff(config_old) assert diff == TagsConfig(set_tags={"a": "value", "b": "value"}) def test_get_diff__no_changes(self): config = TagsConfig(set_tags={"tag": "value"}) - other = TagsConfig(set_tags={"tag": "value"}) - diff = config.get_diff(other) + config_old = TagsConfig(set_tags={"tag": "value"}) + diff = config.get_diff(config_old) assert diff is None