diff --git a/CHANGELOG.md b/CHANGELOG.md index c05a046d4..2f50eb22c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### Features - Add catalogs.yml v2 support (requires `use_catalogs_v2: true` in dbt-core) ([1440](https://github.com/databricks/dbt-databricks/pull/1440)) +- Add `skip_optimize` model config to opt out of the post-materialization `OPTIMIZE` call without dropping `zorder` / `liquid_clustered_by` / `auto_liquid_cluster` from the table definition. Useful when `OPTIMIZE` is delegated to Predictive Optimization or scheduled out of band. Complements the existing run-wide `DATABRICKS_SKIP_OPTIMIZE` var by allowing project-, folder-, or model-level opt-out via standard dbt config inheritance ([#703](https://github.com/databricks/dbt-databricks/issues/703)). ### Fixes - Apply column-level `databricks_tags` for incremental models on the V1 materialization path (`use_materialization_v2: false`, the default). They were silently dropped at create and on subsequent tag changes; the V1 incremental materialization now applies them, matching the `table` materialization and the V2 path. ([#1520](https://github.com/databricks/dbt-databricks/pull/1520) closes [#1307](https://github.com/databricks/dbt-databricks/issues/1307)) diff --git a/dbt/adapters/databricks/impl.py b/dbt/adapters/databricks/impl.py index a2fe37500..a32f5a97f 100644 --- a/dbt/adapters/databricks/impl.py +++ b/dbt/adapters/databricks/impl.py @@ -205,6 +205,7 @@ class DatabricksConfig(AdapterConfig): query_tags: Optional[str] = None tblproperties: Optional[dict[str, str]] = None zorder: Optional[Union[list[str], str]] = None + skip_optimize: Optional[bool] = None unique_tmp_table_suffix: bool = False skip_non_matched_step: Optional[bool] = None skip_matched_step: Optional[bool] = None diff --git a/dbt/include/databricks/macros/relations/optimize.sql b/dbt/include/databricks/macros/relations/optimize.sql index 0f8fcfd89..318c41217 100644 --- a/dbt/include/databricks/macros/relations/optimize.sql +++ b/dbt/include/databricks/macros/relations/optimize.sql @@ -3,7 +3,8 @@ {% endmacro %} {%- macro databricks__optimize(relation) -%} - {%- if var('DATABRICKS_SKIP_OPTIMIZE', 'false')|lower != 'true' and + {%- if config.get('skip_optimize', false) | as_bool -%} + {%- elif var('DATABRICKS_SKIP_OPTIMIZE', 'false')|lower != 'true' and var('databricks_skip_optimize', 'false')|lower != 'true' and adapter.resolve_file_format(config) == 'delta' -%} {%- if (config.get('zorder', False) or config.get('liquid_clustered_by', False)) or config.get('auto_liquid_cluster', False) -%} diff --git a/tests/unit/macros/base.py b/tests/unit/macros/base.py index c7a04a98a..c888b958c 100644 --- a/tests/unit/macros/base.py +++ b/tests/unit/macros/base.py @@ -116,13 +116,25 @@ def databricks_env(self, macro_folders_to_load) -> Environment: """ The environment used for rendering Databricks macros """ - return Environment( + env = Environment( loader=FileSystemLoader( [f"dbt/include/databricks/{folder}" for folder in macro_folders_to_load] ), extensions=["jinja2.ext.do"], ) + def _as_bool(value): + if isinstance(value, bool): + return value + if str(value).lower() in ("true", "1", "yes"): + return True + if str(value).lower() in ("false", "0", "no"): + return False + raise ValueError(f"Cannot convert {value!r} to bool") + + env.filters["as_bool"] = _as_bool + return env + @pytest.fixture def databricks_template_names(self) -> list: """ diff --git a/tests/unit/macros/relations/test_optimize_macros.py b/tests/unit/macros/relations/test_optimize_macros.py index ef452ed9f..efcf5032e 100644 --- a/tests/unit/macros/relations/test_optimize_macros.py +++ b/tests/unit/macros/relations/test_optimize_macros.py @@ -41,3 +41,20 @@ def test_macros_optimize_with_skip(self, key_val, var, template_bundle): r = self.render_bundle(template_bundle, "optimize") assert r == "" + + @pytest.mark.parametrize( + "cluster_key,cluster_val", + [ + ("zorder", "foo"), + ("liquid_clustered_by", ["foo"]), + ("auto_liquid_cluster", True), + ], + ) + def test_macros_optimize_with_skip_optimize_config( + self, cluster_key, cluster_val, config, template_bundle + ): + config[cluster_key] = cluster_val + config["skip_optimize"] = True + r = self.render_bundle(template_bundle, "optimize") + + assert r == ""