Skip to content

Commit 33ed712

Browse files
Make StarRocks AMVs not recreate themselves during sqlmesh run
Signed-off-by: Mateusz Jukiewicz <mateusz@marketer.tech>
1 parent f258b5e commit 33ed712

6 files changed

Lines changed: 90 additions & 1 deletion

File tree

docs/integrations/engines/starrocks.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,11 @@ You can specify `partitioning`, `distribution`, `order by` and `properties` the
491491

492492
**Notes:**
493493

494-
* If you create materialized views with `replace=true`, SQLMesh may drop and recreate the MV. When an MV is dropped, its data is removed and the MV must be refreshed/rebuilt again.
494+
* SQLMesh does not recreate materialized views on every `sqlmesh run`. Once an MV exists, SQLMesh leaves it in place and lets StarRocks keep it current. This is intentional:
495+
* StarRocks async MVs revalidate themselves automatically, even when the underlying data is dropped, so a periodic drop-and-recreate is unnecessary.
496+
* StarRocks async MVs either refresh automatically (per their `refresh_scheme`) or can be refreshed explicitly with `REFRESH MATERIALIZED VIEW`, which also enables partition-level (incremental) refresh. A SQLMesh-driven recreate would instead force a full rebuild.
497+
498+
The MV is (re)built only when it does not yet exist — for example when you first deploy it, or when a model change produces a new version. To change a materialized view's definition, update the model and run `sqlmesh plan`.
495499
* There are some restriction for `partitioning`, you need to refer StarRocks' doc for MV partitioning specification.
496500
* StarRocks MV schema supports a column list but does **not** support explicit data types in that list. Column data types come from the `AS SELECT ...` query.
497501
* If you create MVs from a dataframe via the Python API, provide `target_columns_to_types` (a `Dict[str, exp.DataType]`). If you don't care about exact types, you can set all columns to `VARCHAR` as a fallback:

sqlmesh/core/engine_adapter/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class EngineAdapter:
114114
SCHEMA_DIFFER_KWARGS: t.Dict[str, t.Any] = {}
115115
SUPPORTS_TUPLE_IN = True
116116
HAS_VIEW_BINDING = False
117+
RECREATE_MATERIALIZED_VIEW_ON_EVALUATION = True
117118
SUPPORTS_REPLACE_TABLE = True
118119
SUPPORTS_GRANTS = False
119120
DEFAULT_CATALOG_TYPE = DIALECT

sqlmesh/core/engine_adapter/starrocks.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,6 +1691,13 @@ class StarRocksEngineAdapter(
16911691
implement custom MV schema rendering in create_view/_create_materialized_view.
16921692
"""
16931693

1694+
RECREATE_MATERIALIZED_VIEW_ON_EVALUATION = False
1695+
"""
1696+
StarRocks async materialized views maintain themselves: they revalidate automatically even if the
1697+
underlying data is dropped, and the data is kept current either by StarRocks' automatic refresh or
1698+
by an explicit `REFRESH MATERIALIZED VIEW` (which also enables partition-level incremental refresh).
1699+
"""
1700+
16941701
SUPPORTS_REPLACE_TABLE = False
16951702
"""No REPLACE TABLE syntax; use DROP + CREATE instead"""
16961703

sqlmesh/core/snapshot/evaluator.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2734,6 +2734,19 @@ def insert(
27342734
is_materialized_view and is_first_insert
27352735
)
27362736

2737+
# Some engines (e.g. StarRocks) maintain materialized views automatically (via auto/scheduled
2738+
# REFRESH) and can only recreate them via a destructive DROP + CREATE, which deletes the
2739+
# materialized data and forces a full rebuild. For those, an existing MV must not be recreated
2740+
# on routine evaluation (e.g. every `sqlmesh run`); only build it on the first insert (a new
2741+
# version) or when a rebuild is explicitly forced (intervals cleared by `should_force_rebuild`,
2742+
# which sets `is_first_insert`).
2743+
if (
2744+
is_materialized_view
2745+
and not is_first_insert
2746+
and not self.adapter.RECREATE_MATERIALIZED_VIEW_ON_EVALUATION
2747+
):
2748+
must_recreate_view = False
2749+
27372750
if self.adapter.table_exists(table_name) and not must_recreate_view:
27382751
logger.info("Skipping creation of the view '%s'", table_name)
27392752
return

tests/core/engine_adapter/test_starrocks.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,15 @@ def test_create_materialized_view_without_refresh_raises(
350350
view_properties={"replication_num": exp.Literal.string("1")},
351351
)
352352

353+
def test_does_not_recreate_materialized_view_on_evaluation(self):
354+
"""StarRocks async MVs maintain themselves, so SQLMesh must not recreate them on every run.
355+
356+
The adapter opts out of per-evaluation recreation via
357+
RECREATE_MATERIALIZED_VIEW_ON_EVALUATION = False, which the evaluator's ViewStrategy honors
358+
for materialized views that already exist.
359+
"""
360+
assert StarRocksEngineAdapter.RECREATE_MATERIALIZED_VIEW_ON_EVALUATION is False
361+
353362
def test_delete_where_true_optimization(
354363
self, make_mocked_engine_adapter: t.Callable[..., StarRocksEngineAdapter]
355364
):

tests/core/test_snapshot_evaluator.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,61 @@ def test_evaluate_materialized_view(
707707
assert adapter_mock.create_view.call_count == 1
708708

709709

710+
@pytest.mark.parametrize(
711+
"view_exists, has_intervals, expected_create_view_calls",
712+
[
713+
# MV already exists and has intervals -> routine evaluation (e.g. `sqlmesh run`): do NOT recreate
714+
(True, True, 0),
715+
# MV does not exist yet -> first build: create it
716+
(False, False, 1),
717+
# MV missing but intervals present -> still a first insert (table gone): rebuild it
718+
(False, True, 1),
719+
],
720+
)
721+
def test_evaluate_materialized_view_not_recreated_on_evaluation(
722+
adapter_mock,
723+
make_snapshot,
724+
view_exists: bool,
725+
has_intervals: bool,
726+
expected_create_view_calls: int,
727+
):
728+
# Engines that maintain MVs themselves (e.g. StarRocks) opt out of recreating an existing MV on
729+
# every evaluation by setting RECREATE_MATERIALIZED_VIEW_ON_EVALUATION = False.
730+
adapter_mock.RECREATE_MATERIALIZED_VIEW_ON_EVALUATION = False
731+
adapter_mock.table_exists.return_value = view_exists
732+
evaluator = SnapshotEvaluator(adapter_mock)
733+
734+
model = load_sql_based_model(
735+
parse( # type: ignore
736+
"""
737+
MODEL (
738+
name test_schema.test_model,
739+
kind VIEW (
740+
materialized true
741+
)
742+
);
743+
744+
SELECT a::int FROM tbl;
745+
"""
746+
),
747+
)
748+
749+
snapshot = make_snapshot(model)
750+
snapshot.categorize_as(SnapshotChangeCategory.BREAKING)
751+
if has_intervals:
752+
snapshot.add_interval("2023-01-01", "2023-01-01")
753+
754+
evaluator.evaluate(
755+
snapshot,
756+
start="2020-01-01",
757+
end="2020-01-02",
758+
execution_time="2020-01-02",
759+
snapshots={},
760+
)
761+
762+
assert adapter_mock.create_view.call_count == expected_create_view_calls
763+
764+
710765
def test_evaluate_materialized_view_with_partitioned_by_cluster_by(
711766
mocker: MockerFixture, adapter_mock, make_snapshot
712767
):

0 commit comments

Comments
 (0)