Skip to content

Commit 578375a

Browse files
tejassp-dbsd-db
andauthored
perf: skip unnecessary metadata fetch calls for tags when not configured (#1387)
**Summary** - Skips fetch_tags and fetch_column_tags queries to information_schema during incremental and view materializations when the model has no tags configured - Falls back to fetching metadata when model config is unavailable (safe default) - No behavior change for models with tags configured — diffs still computed correctly **What changed** - Added requires_server_metadata_for_diff() to TagsConfig and ColumnTagsConfig to signal when server metadata is needed - IncrementalTableAPI._describe_relation and ViewAPI._describe_relation now check model config before making tag fetch calls - get_relation_config accepts the model config to pass through to _describe_relation - Fixed duplicate TagsProcessor in StreamingTableConfig.config_components **Test plan** - Unit tests: empty tags (skip), non-empty tags (fetch), null config fallback (fetch), both tags + column tags present, hive_metastore (skip) - Functional tests: override fetch_tags/fetch_column_tags macros to raise errors, confirming calls are actually skipped when expected PECOBLR-2497 --------- Co-authored-by: Shubham Dhal <shubham.dhal@databricks.com>
1 parent 75233c3 commit 578375a

15 files changed

Lines changed: 652 additions & 25 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
### Under the Hood
1515

1616
- **BREAKING:** `databricks_tags` defined at different hierarchy levels (e.g. project-level and model-level) now merge additively instead of the child config completely replacing the parent.
17+
- Skip `information_schema.tags` and `information_schema.column_tags` metadata fetches when table tags and column tags are not configured on a model. ([#1387](https://github.com/databricks/dbt-databricks/pull/1387))
1718

1819
## dbt-databricks 1.11.8 (TBD)
1920

dbt/adapters/databricks/impl.py

Lines changed: 96 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@
8181
StreamingTableConfig,
8282
)
8383
from dbt.adapters.databricks.relation_configs.table_format import TableFormat
84+
from dbt.adapters.databricks.relation_configs.tags import (
85+
TagsProcessor,
86+
)
8487
from dbt.adapters.databricks.relation_configs.tblproperties import TblPropertiesConfig
8588
from dbt.adapters.databricks.relation_configs.view import ViewConfig
8689
from dbt.adapters.databricks.utils import (
@@ -967,17 +970,21 @@ def parse_columns_and_constraints(
967970
return enriched_columns, parsed_constraints
968971

969972
@available.parse(lambda *a, **k: {})
970-
def get_relation_config(self, relation: DatabricksRelation) -> DatabricksRelationConfigBase:
973+
def get_relation_config(
974+
self,
975+
relation: DatabricksRelation,
976+
model_config: Optional[DatabricksRelationConfigBase] = None,
977+
) -> DatabricksRelationConfigBase:
971978
if relation.type == DatabricksRelationType.MaterializedView:
972-
return MaterializedViewAPI.get_from_relation(self, relation)
979+
return MaterializedViewAPI.get_from_relation(self, relation, model_config)
973980
elif relation.type == DatabricksRelationType.StreamingTable:
974-
return StreamingTableAPI.get_from_relation(self, relation)
981+
return StreamingTableAPI.get_from_relation(self, relation, model_config)
975982
elif relation.type == DatabricksRelationType.Table:
976-
return IncrementalTableAPI.get_from_relation(self, relation)
983+
return IncrementalTableAPI.get_from_relation(self, relation, model_config)
977984
elif relation.type == DatabricksRelationType.View:
978-
return ViewAPI.get_from_relation(self, relation)
985+
return ViewAPI.get_from_relation(self, relation, model_config)
979986
elif relation.type == DatabricksRelationType.MetricView:
980-
return MetricViewAPI.get_from_relation(self, relation)
987+
return MetricViewAPI.get_from_relation(self, relation, model_config)
981988
else:
982989
raise NotImplementedError(f"Relation type {relation.type} is not supported.")
983990

@@ -1064,12 +1071,15 @@ def config_type(cls) -> type[DatabricksRelationConfig]:
10641071

10651072
@classmethod
10661073
def get_from_relation(
1067-
cls, adapter: DatabricksAdapter, relation: DatabricksRelation
1074+
cls,
1075+
adapter: DatabricksAdapter,
1076+
relation: DatabricksRelation,
1077+
model_config: Optional[DatabricksRelationConfigBase] = None,
10681078
) -> DatabricksRelationConfig:
10691079
"""Get the relation config from the relation."""
10701080

10711081
assert relation.type == cls.relation_type
1072-
results = cls._describe_relation(adapter, relation)
1082+
results = cls._describe_relation(adapter, relation, model_config)
10731083
return cls.config_type().from_results(results)
10741084

10751085
@classmethod
@@ -1081,7 +1091,10 @@ def get_from_relation_config(cls, relation_config: RelationConfig) -> Databricks
10811091
@classmethod
10821092
@abstractmethod
10831093
def _describe_relation(
1084-
cls, adapter: DatabricksAdapter, relation: DatabricksRelation
1094+
cls,
1095+
adapter: DatabricksAdapter,
1096+
relation: DatabricksRelation,
1097+
model_config: Optional[DatabricksRelationConfigBase] = None,
10851098
) -> RelationResults:
10861099
"""Describe the relation and return the results."""
10871100

@@ -1091,11 +1104,14 @@ def _describe_relation(
10911104
class DeltaLiveTableAPIBase(RelationAPIBase[DatabricksRelationConfig]):
10921105
@classmethod
10931106
def get_from_relation(
1094-
cls, adapter: DatabricksAdapter, relation: DatabricksRelation
1107+
cls,
1108+
adapter: DatabricksAdapter,
1109+
relation: DatabricksRelation,
1110+
model_config: Optional[DatabricksRelationConfigBase] = None,
10951111
) -> DatabricksRelationConfig:
10961112
"""Get the relation config from the relation."""
10971113

1098-
relation_config = super().get_from_relation(adapter, relation)
1114+
relation_config = super().get_from_relation(adapter, relation, model_config)
10991115

11001116
# Ensure any current refreshes are completed before returning the relation config
11011117
tblproperties = cast(TblPropertiesConfig, relation_config.config["tblproperties"])
@@ -1115,7 +1131,10 @@ def config_type(cls) -> type[MaterializedViewConfig]:
11151131

11161132
@classmethod
11171133
def _describe_relation(
1118-
cls, adapter: DatabricksAdapter, relation: DatabricksRelation
1134+
cls,
1135+
adapter: DatabricksAdapter,
1136+
relation: DatabricksRelation,
1137+
model_config: Optional[DatabricksRelationConfigBase] = None,
11191138
) -> RelationResults:
11201139
kwargs = {"table_name": relation}
11211140
results: RelationResults = dict()
@@ -1127,9 +1146,18 @@ def _describe_relation(
11271146
results["information_schema.views"] = get_first_row(
11281147
adapter.execute_macro("get_view_description", kwargs=kwargs)
11291148
)
1130-
results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs)
11311149
results["show_tblproperties"] = adapter.execute_macro("fetch_tbl_properties", kwargs=kwargs)
1150+
1151+
# To be backward compatible model_config can be None. In that case, tags should be fetched
1152+
# to maintain backward compatibility.
1153+
table_tag_config = model_config.config.get(TagsProcessor.name) if model_config else None
1154+
if table_tag_config is None or table_tag_config.requires_server_metadata_for_diff():
1155+
results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs)
1156+
else:
1157+
results["information_schema.tags"] = None
1158+
11321159
results["row_filters"] = adapter.execute_macro("fetch_row_filters", kwargs=kwargs)
1160+
11331161
return results
11341162

11351163

@@ -1142,7 +1170,10 @@ def config_type(cls) -> type[StreamingTableConfig]:
11421170

11431171
@classmethod
11441172
def _describe_relation(
1145-
cls, adapter: DatabricksAdapter, relation: DatabricksRelation
1173+
cls,
1174+
adapter: DatabricksAdapter,
1175+
relation: DatabricksRelation,
1176+
model_config: Optional[DatabricksRelationConfigBase] = None,
11461177
) -> RelationResults:
11471178
kwargs = {"table_name": relation}
11481179
results: RelationResults = dict()
@@ -1166,16 +1197,37 @@ def config_type(cls) -> type[IncrementalTableConfig]:
11661197

11671198
@classmethod
11681199
def _describe_relation(
1169-
cls, adapter: DatabricksAdapter, relation: DatabricksRelation
1200+
cls,
1201+
adapter: DatabricksAdapter,
1202+
relation: DatabricksRelation,
1203+
model_config: Optional[DatabricksRelationConfigBase] = None,
11701204
) -> RelationResults:
11711205
results = {}
11721206
kwargs = {"relation": relation}
11731207

11741208
if not relation.is_hive_metastore():
1175-
results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs)
1176-
results["information_schema.column_tags"] = adapter.execute_macro(
1177-
"fetch_column_tags", kwargs=kwargs
1209+
# To be backward compatible model_config can be None. In that case, tags should be
1210+
# fetched to maintain backward compatibility.
1211+
table_tag_config = model_config.config.get(TagsProcessor.name) if model_config else None
1212+
if table_tag_config is None or table_tag_config.requires_server_metadata_for_diff():
1213+
results["information_schema.tags"] = adapter.execute_macro(
1214+
"fetch_tags", kwargs=kwargs
1215+
)
1216+
else:
1217+
results["information_schema.tags"] = None
1218+
1219+
# To be backward compatible model_config can be None. In that case, tags should be
1220+
# fetched to maintain backward compatibility.
1221+
column_tag_config = (
1222+
model_config.config.get(ColumnTagsProcessor.name) if model_config else None
11781223
)
1224+
if column_tag_config is None or column_tag_config.requires_server_metadata_for_diff():
1225+
results["information_schema.column_tags"] = adapter.execute_macro(
1226+
"fetch_column_tags", kwargs=kwargs
1227+
)
1228+
else:
1229+
results["information_schema.column_tags"] = None
1230+
11791231
results["non_null_constraint_columns"] = adapter.execute_macro(
11801232
"fetch_non_null_constraint_columns", kwargs=kwargs
11811233
)
@@ -1205,15 +1257,26 @@ def config_type(cls) -> type[ViewConfig]:
12051257

12061258
@classmethod
12071259
def _describe_relation(
1208-
cls, adapter: DatabricksAdapter, relation: DatabricksRelation
1260+
cls,
1261+
adapter: DatabricksAdapter,
1262+
relation: DatabricksRelation,
1263+
model_config: Optional[DatabricksRelationConfigBase] = None,
12091264
) -> RelationResults:
12101265
results = {}
12111266
kwargs = {"relation": relation}
12121267

12131268
results["information_schema.views"] = get_first_row(
12141269
adapter.execute_macro("get_view_description", kwargs=kwargs)
12151270
)
1216-
results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs)
1271+
1272+
# To be backward compatible model_config can be None. In that case, tags should be fetched
1273+
# to maintain backward compatibility.
1274+
table_tag_config = model_config.config.get(TagsProcessor.name) if model_config else None
1275+
if table_tag_config is None or table_tag_config.requires_server_metadata_for_diff():
1276+
results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs)
1277+
else:
1278+
results["information_schema.tags"] = None
1279+
12171280
results["show_tblproperties"] = adapter.execute_macro("fetch_tbl_properties", kwargs=kwargs)
12181281

12191282
kwargs = {"table_name": relation}
@@ -1232,14 +1295,25 @@ def config_type(cls) -> type[MetricViewConfig]:
12321295

12331296
@classmethod
12341297
def _describe_relation(
1235-
cls, adapter: DatabricksAdapter, relation: DatabricksRelation
1298+
cls,
1299+
adapter: DatabricksAdapter,
1300+
relation: DatabricksRelation,
1301+
model_config: Optional[DatabricksRelationConfigBase] = None,
12361302
) -> RelationResults:
12371303
results = {}
12381304
kwargs = {"relation": relation}
1239-
results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs)
12401305
results["show_tblproperties"] = adapter.execute_macro("fetch_tbl_properties", kwargs=kwargs)
1306+
1307+
kwargs = {"relation": relation}
1308+
table_tag_config = model_config.config.get(TagsProcessor.name) if model_config else None
1309+
if table_tag_config is None or table_tag_config.requires_server_metadata_for_diff():
1310+
results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs)
1311+
else:
1312+
results["information_schema.tags"] = None
1313+
12411314
kwargs = {"table_name": relation}
12421315
results["describe_extended"] = adapter.execute_macro(
12431316
DESCRIBE_TABLE_EXTENDED_MACRO_NAME, kwargs=kwargs
12441317
)
1318+
12451319
return results

dbt/adapters/databricks/relation_configs/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ def get_diff(self, other: Self) -> Optional[Self]:
3434
return self
3535
return None
3636

37+
def requires_server_metadata_for_diff(self) -> bool:
38+
"""
39+
Indicates whether server metadata is required to compute the diff for this component.
40+
"""
41+
return True
42+
3743

3844
class DatabricksRelationChangeSet(BaseModel):
3945
"""Class for encapsulating the changes that need to be applied to a Databricks relation."""

dbt/adapters/databricks/relation_configs/column_tags.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ def get_diff(self, other: "ColumnTagsConfig") -> Optional["ColumnTagsConfig"]:
4141
return ColumnTagsConfig(set_column_tags=set_column_tags)
4242
return None
4343

44+
def requires_server_metadata_for_diff(self) -> bool:
45+
"""
46+
Indicates whether server metadata is required to compute the diff for this component.
47+
"""
48+
return self.set_column_tags is not None and len(self.set_column_tags) > 0
49+
4450

4551
class ColumnTagsProcessor(DatabricksComponentProcessor[ColumnTagsConfig]):
4652
name: ClassVar[str] = "column_tags"

dbt/adapters/databricks/relation_configs/tags.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ def get_diff(self, other: "TagsConfig") -> Optional["TagsConfig"]:
2222
return TagsConfig(set_tags=self.set_tags)
2323
return None
2424

25+
def requires_server_metadata_for_diff(self) -> bool:
26+
"""
27+
Indicates whether server metadata is required to compute the diff for this component.
28+
"""
29+
return self.set_tags is not None and len(self.set_tags) > 0
30+
2531

2632
class TagsProcessor(DatabricksComponentProcessor[TagsConfig]):
2733
name: ClassVar[str] = "tags"

dbt/include/databricks/macros/materializations/incremental/incremental.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,8 @@
133133
{{ set_overwrite_mode('DYNAMIC') }}
134134
{%- endif -%}
135135
{#-- Relation must be merged --#}
136-
{%- set _existing_config = adapter.get_relation_config(existing_relation) -%}
137136
{%- set model_config = adapter.get_config_from_model(config.model) -%}
137+
{%- set _existing_config = adapter.get_relation_config(existing_relation, model_config) -%}
138138
{%- set _configuration_changes = model_config.get_changeset(_existing_config) -%}
139139
{%- call statement('create_temp_relation', language=language) -%}
140140
{{ create_table_as(True, temp_relation, compiled_code, language) }}
@@ -241,8 +241,8 @@
241241
{% macro process_config_changes(target_relation) %}
242242
{% set apply_config_changes = config.get('incremental_apply_config_changes', True) | as_bool %}
243243
{% if apply_config_changes %}
244-
{%- set existing_config = adapter.get_relation_config(target_relation) -%}
245244
{%- set model_config = adapter.get_config_from_model(config.model) -%}
245+
{%- set existing_config = adapter.get_relation_config(target_relation, model_config) -%}
246246
{%- set configuration_changes = model_config.get_changeset(existing_config) -%}
247247
{{ apply_config_changeset(target_relation, model, configuration_changes) }}
248248
{% endif %}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{%- macro get_configuration_changes(existing_relation) -%}
2-
{%- set existing_config = adapter.get_relation_config(existing_relation) -%}
32
{%- set model_config = adapter.get_config_from_model(config.model) -%}
3+
{%- set existing_config = adapter.get_relation_config(existing_relation, model_config) -%}
44
{%- set configuration_changes = model_config.get_changeset(existing_config) -%}
55
{% do return(configuration_changes) %}
66
{%- endmacro -%}

tests/functional/adapter/fixtures.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,21 @@
11
import pytest
22

3+
fail_if_tag_fetch_called_macros = """
4+
{% macro fetch_tags(relation) %}
5+
{{ exceptions.raise_compiler_error("fetch_tags should not be called") }}
6+
{% endmacro %}
7+
"""
8+
9+
fail_if_tag_and_column_tag_fetch_called_macros = """
10+
{% macro fetch_tags(relation) %}
11+
{{ exceptions.raise_compiler_error("fetch_tags should not be called") }}
12+
{% endmacro %}
13+
14+
{% macro fetch_column_tags(relation) %}
15+
{{ exceptions.raise_compiler_error("fetch_column_tags should not be called") }}
16+
{% endmacro %}
17+
"""
18+
319

420
class MaterializationV1Mixin:
521
@pytest.fixture(scope="class")

tests/functional/adapter/incremental/fixtures.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,47 @@
7676
- name: color
7777
"""
7878

79+
metadata_fetch_incremental_sql = """
80+
{{ config(
81+
materialized = 'incremental',
82+
unique_key = 'id',
83+
) }}
84+
85+
select cast(1 as bigint) as id
86+
"""
87+
88+
metadata_fetch_no_tags_schema = """
89+
version: 2
90+
91+
models:
92+
- name: metadata_fetch_incremental
93+
columns:
94+
- name: id
95+
"""
96+
97+
metadata_fetch_table_tags_schema = """
98+
version: 2
99+
100+
models:
101+
- name: metadata_fetch_incremental
102+
config:
103+
databricks_tags:
104+
classification: internal
105+
columns:
106+
- name: id
107+
"""
108+
109+
metadata_fetch_column_tags_schema = """
110+
version: 2
111+
112+
models:
113+
- name: metadata_fetch_incremental
114+
columns:
115+
- name: id
116+
databricks_tags:
117+
classification: internal
118+
"""
119+
79120
tblproperties_a = """
80121
version: 2
81122

0 commit comments

Comments
 (0)