Skip to content

Commit f40989e

Browse files
authored
test: cover materialized view in-place ALTER (no rebuild) on updateable change (#1522)
## What Adds functional coverage asserting that an **updateable-component** change to a materialized view — `databricks_tags` or the refresh `schedule` — is applied via an **in-place `ALTER`** and does **not** rebuild the MV. This is the `requires_full_refresh=False` branch of `MaterializedViewConfig.get_changeset` (`updateable_component_keys = ["refresh", "tags", "row_filter"]`). ## Why The existing suite proves this branch only via debug-log substrings (`assert_message_in_logs("Applying ALTER" / "Applying REPLACE")`), which is not server-observable, and the unit test (`test_get_changeset__some_changes`) pins only the `requires_full_refresh=True` case. So "an updateable change does not rebuild the MV" had no server-side coverage. ## How it's proven Source-row staleness: a row is inserted into the source **after** the MV is materialized. The test confirms the row landed in the source (`count == 3`) yet the MV stays at `2` across each change — an in-place `ALTER` neither recomputes the query nor issues a `REFRESH`, whereas a `CREATE OR REPLACE` (the misclassification this guards against) would pull the new row in. One class walks a single MV through a tags change, then a refresh-schedule change. To keep the staleness assertion sound, the MV starts `MANUAL` (no schedule) and the refresh step moves it to `EVERY 4 WEEKS`, so the server never auto-refreshes it during the run. ## Testing `pytest tests/functional/adapter/materialized_view_tests/test_mv_alter_no_rebuild.py` → **1 passed** (profile `databricks_uc_sql_endpoint`). Section collects cleanly; `ruff` / `ruff format` / `mypy` pass. Test-only change — no changelog entry, consistent with #1511 / #1512 / #1514.
1 parent cc40cea commit f40989e

2 files changed

Lines changed: 137 additions & 0 deletions

File tree

tests/functional/adapter/materialized_view_tests/fixtures.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,3 +220,40 @@ def materialized_view_with_every(every_value: str) -> str:
220220
) }}
221221
select * from {{ ref('mv_metadata_fetch_seed') }}
222222
"""
223+
224+
225+
mv_norebuild_seed_csv = """id,value
226+
1,100
227+
2,200
228+
""".lstrip()
229+
230+
# Updateable-change / no-rebuild fixtures: an identical query whose only difference
231+
# step-to-step is exactly one updateable component (tags, then refresh schedule).
232+
# The MV starts MANUAL (no schedule) and the refresh step moves it to EVERY 4 WEEKS.
233+
mv_norebuild_v1 = """
234+
{{ config(
235+
materialized='materialized_view',
236+
on_configuration_change='apply',
237+
databricks_tags={'lifecycle': 'a'},
238+
) }}
239+
select * from {{ ref('mv_norebuild_seed') }}
240+
"""
241+
242+
mv_norebuild_v2_tag_changed = """
243+
{{ config(
244+
materialized='materialized_view',
245+
on_configuration_change='apply',
246+
databricks_tags={'lifecycle': 'a', 'extra': 'b'},
247+
) }}
248+
select * from {{ ref('mv_norebuild_seed') }}
249+
"""
250+
251+
mv_norebuild_v3_refresh_changed = """
252+
{{ config(
253+
materialized='materialized_view',
254+
on_configuration_change='apply',
255+
databricks_tags={'lifecycle': 'a', 'extra': 'b'},
256+
schedule={'every': '4 WEEKS'},
257+
) }}
258+
select * from {{ ref('mv_norebuild_seed') }}
259+
"""
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
"""An updateable-component change (databricks_tags, refresh schedule) is applied via
2+
an in-place ALTER and does not rebuild the materialized view.
3+
4+
Proven against live state via source-row staleness: a row inserted into the source
5+
after the MV is materialized stays invisible across an updateable-only change. An
6+
in-place ALTER neither recomputes the query nor issues a REFRESH; a full CREATE OR
7+
REPLACE (or a REFRESH) would pull the new row in.
8+
"""
9+
10+
import pytest
11+
from dbt.adapters.base import BaseRelation
12+
from dbt.tests import util
13+
14+
from dbt.adapters.databricks.relation import DatabricksRelationType
15+
from dbt.adapters.databricks.relation_configs.materialized_view import (
16+
MaterializedViewConfig,
17+
)
18+
from tests.functional.adapter.materialized_view_tests import fixtures
19+
20+
21+
@pytest.mark.dlt
22+
@pytest.mark.skip_profile("databricks_cluster", "databricks_uc_cluster")
23+
class TestMaterializedViewUpdateableChangeAltersWithoutRebuild:
24+
"""Walk one MV through each updateable component (tags, then refresh schedule),
25+
asserting each change applies via in-place ALTER and leaves the MV's data untouched."""
26+
27+
@pytest.fixture(scope="class", autouse=True)
28+
def seeds(self):
29+
yield {"mv_norebuild_seed.csv": fixtures.mv_norebuild_seed_csv}
30+
31+
@pytest.fixture(scope="class", autouse=True)
32+
def models(self):
33+
yield {"mv_norebuild.sql": fixtures.mv_norebuild_v1}
34+
35+
@staticmethod
36+
def _mv(project) -> BaseRelation:
37+
return project.adapter.Relation.create(
38+
identifier="mv_norebuild",
39+
schema=project.test_schema,
40+
database=project.database,
41+
type=DatabricksRelationType.MaterializedView,
42+
)
43+
44+
@staticmethod
45+
def _seed(project) -> BaseRelation:
46+
return project.adapter.Relation.create(
47+
identifier="mv_norebuild_seed",
48+
schema=project.test_schema,
49+
database=project.database,
50+
)
51+
52+
@staticmethod
53+
def _row_count(project, relation: BaseRelation) -> int:
54+
return project.run_sql(f"select count(*) from {relation}", fetch="one")[0]
55+
56+
@staticmethod
57+
def _refresh_config(project, mv: BaseRelation):
58+
# get_relation_config also polls the DLT pipeline until any in-flight refresh
59+
# completes, so it doubles as a settle point before re-querying row counts.
60+
with util.get_connection(project.adapter):
61+
cfg = project.adapter.get_relation_config(mv)
62+
assert isinstance(cfg, MaterializedViewConfig)
63+
return cfg.config["refresh"]
64+
65+
def test_updateable_change_does_not_rebuild(self, project):
66+
util.run_dbt(["seed"])
67+
util.run_dbt(["run", "--models", "mv_norebuild"])
68+
mv = self._mv(project)
69+
seed = self._seed(project)
70+
assert self._row_count(project, mv) == 2
71+
# MANUAL to start: the server never auto-refreshes it (no schedule).
72+
assert self._refresh_config(project, mv).mode.value == "manual"
73+
74+
# Mutate the source AFTER the MV is materialized.
75+
project.run_sql(f"insert into {seed} values (3, 300)")
76+
# The insert really landed in the source (so a rebuild/refresh WOULD show 3)...
77+
assert self._row_count(project, seed) == 3
78+
# ...yet the MV is still stale at 2 (not auto-refreshed).
79+
assert self._row_count(project, mv) == 2
80+
81+
# 1) tags change -> in-place ALTER SET TAGS, no rebuild.
82+
util.write_file(fixtures.mv_norebuild_v2_tag_changed, "models", "mv_norebuild.sql")
83+
util.run_dbt(["run", "--models", "mv_norebuild"])
84+
self._refresh_config(project, mv) # settle any in-flight op
85+
assert self._row_count(project, mv) == 2, (
86+
"MV picked up the post-create source insert on a tags-only change; an"
87+
" updateable change must apply via in-place ALTER, not CREATE OR REPLACE/REFRESH"
88+
)
89+
90+
# 2) refresh-schedule change (MANUAL -> EVERY 4 WEEKS) -> in-place ALTER, no rebuild.
91+
util.write_file(fixtures.mv_norebuild_v3_refresh_changed, "models", "mv_norebuild.sql")
92+
util.run_dbt(["run", "--models", "mv_norebuild"])
93+
refresh = self._refresh_config(project, mv)
94+
# Effect: the schedule changed.
95+
assert refresh.mode.value == "every"
96+
# No rebuild: the post-create source insert is still invisible.
97+
assert self._row_count(project, mv) == 2, (
98+
"MV picked up the post-create source insert on a refresh-schedule-only change;"
99+
" an updateable change must apply via in-place ALTER, not CREATE OR REPLACE"
100+
)

0 commit comments

Comments
 (0)