Skip to content

Commit cc2f49d

Browse files
authored
Merge pull request #673 from Benjamin-Knight/feat/#641-dml-table-refresh
Add DML table refresh method for table materializations (#641).
2 parents 5090436 + a2b3b1f commit cc2f49d

3 files changed

Lines changed: 773 additions & 15 deletions

File tree

dbt/include/sqlserver/macros/materializations/models/table/table.sql

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,19 @@
1717
-- grab current tables grants config for comparision later on
1818
{% set grant_config = config.get('grants') %}
1919

20+
{%- set table_refresh_method = config.get('table_refresh_method', 'rename') -%}
21+
{%- if table_refresh_method not in ['rename', 'dml'] -%}
22+
{{ exceptions.raise_compiler_error(
23+
"Invalid table_refresh_method '" ~ table_refresh_method ~ "'. "
24+
"Valid values are: 'rename' (default), 'dml'."
25+
) }}
26+
{%- endif -%}
27+
{%- set use_dml_refresh = (
28+
table_refresh_method == 'dml'
29+
and existing_relation is not none
30+
and existing_relation.type == 'table'
31+
) -%}
32+
2033
-- drop the temp relations if they exist already in the database
2134
{{ drop_relation_if_exists(preexisting_intermediate_relation) }}
2235
{{ drop_relation_if_exists(preexisting_backup_relation) }}
@@ -26,24 +39,28 @@
2639
-- `BEGIN` happens here:
2740
{{ run_hooks(pre_hooks, inside_transaction=True) }}
2841

29-
-- build model
30-
{% call statement('main') -%}
31-
{{ get_create_table_as_sql(False, intermediate_relation, sql) }}
32-
{%- endcall %}
42+
{% if use_dml_refresh %}
43+
{{ sqlserver__table_dml_refresh(target_relation, sql) }}
44+
{% else %}
45+
-- build model
46+
{% call statement('main') -%}
47+
{{ get_create_table_as_sql(False, intermediate_relation, sql) }}
48+
{%- endcall %}
3349

34-
-- cleanup
35-
{% if existing_relation is not none %}
36-
/* Do the equivalent of rename_if_exists. 'existing_relation' could have been dropped
37-
since the variable was first set. */
38-
{% set existing_relation = load_cached_relation(existing_relation) %}
50+
-- cleanup
3951
{% if existing_relation is not none %}
40-
{{ adapter.rename_relation(existing_relation, backup_relation) }}
52+
/* Do the equivalent of rename_if_exists. 'existing_relation' could have been dropped
53+
since the variable was first set. */
54+
{% set existing_relation = load_cached_relation(existing_relation) %}
55+
{% if existing_relation is not none %}
56+
{{ adapter.rename_relation(existing_relation, backup_relation) }}
57+
{% endif %}
4158
{% endif %}
42-
{% endif %}
4359

44-
{{ adapter.rename_relation(intermediate_relation, target_relation) }}
60+
{{ adapter.rename_relation(intermediate_relation, target_relation) }}
4561

46-
{% do create_indexes(target_relation) %}
62+
{% do create_indexes(target_relation) %}
63+
{% endif %}
4764

4865
{{ run_hooks(post_hooks, inside_transaction=True) }}
4966

@@ -55,8 +72,10 @@
5572
-- `COMMIT` happens here
5673
{{ adapter.commit() }}
5774

58-
-- finally, drop the existing/backup relation after the commit
59-
{{ drop_relation_if_exists(backup_relation) }}
75+
{% if not use_dml_refresh %}
76+
-- finally, drop the existing/backup relation after the commit
77+
{{ drop_relation_if_exists(backup_relation) }}
78+
{% endif %}
6079

6180
{{ run_hooks(post_hooks, inside_transaction=False) }}
6281

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
{% macro sqlserver__table_dml_refresh(target_relation, sql) %}
2+
{#
3+
DML-only table refresh for use under RCSI.
4+
5+
Instead of rename-swap (which uses DDL and creates a window where the
6+
table name doesnt resolve), this macro:
7+
1. Builds new data into a scratch table via SELECT INTO (minimally logged)
8+
2. Compares schemas — if columns changed, falls back to rename-swap
9+
3. Swaps data via DELETE + INSERT inside an explicit transaction
10+
(RCSI ensures concurrent readers see old data until COMMIT)
11+
4. Cleans up the scratch table
12+
13+
The scratch table is a regular table with a __dbt_refresh suffix,
14+
not a global temp table. This avoids cross-session visibility issues
15+
and ensures cleanup on failure (DROP IF EXISTS at the start of each run).
16+
#}
17+
18+
{%- set refresh_relation = target_relation.incorporate(
19+
path={"identifier": target_relation.identifier ~ '__dbt_refresh'}
20+
) -%}
21+
{%- set tmp_vw_relation = refresh_relation.incorporate(
22+
path={"identifier": refresh_relation.identifier ~ '__dbt_tmp_vw'}
23+
) -%}
24+
25+
{# Clean up any leftovers from a prior failed run #}
26+
{% call statement('dml_refresh_cleanup_pre') -%}
27+
DROP VIEW IF EXISTS {{ tmp_vw_relation.include(database=False) }};
28+
DROP TABLE IF EXISTS {{ refresh_relation }};
29+
{%- endcall %}
30+
31+
{# Build new data into scratch table via temp view (handles CTEs in model SQL) #}
32+
{# Named 'main' because dbt requires a statement('main') call in every materialization #}
33+
{% call statement('dml_refresh_create_view') -%}
34+
{{ get_create_view_as_sql(tmp_vw_relation, sql) }}
35+
{%- endcall %}
36+
37+
{% call statement('main') -%}
38+
SELECT * INTO {{ refresh_relation }} FROM {{ tmp_vw_relation }};
39+
{%- endcall %}
40+
41+
{% call statement('dml_refresh_drop_view') -%}
42+
DROP VIEW IF EXISTS {{ tmp_vw_relation.include(database=False) }};
43+
{%- endcall %}
44+
45+
{# Compare schemas: if columns differ, fall back to rename-swap #}
46+
{%- set schema_changes = check_for_schema_changes(refresh_relation, target_relation) -%}
47+
{%- set schema_match = not schema_changes['schema_changed'] -%}
48+
49+
{% if schema_match %}
50+
{# Use the target's physical column order for both INSERT and SELECT. #}
51+
{# The scratch table has the same columns but possibly in a different order, #}
52+
{# so naming columns explicitly makes the swap order-independent. #}
53+
{%- set target_columns = adapter.get_columns_in_relation(target_relation) -%}
54+
{%- set column_list = target_columns | map(attribute='quoted') | join(', ') -%}
55+
56+
{# Atomic DML swap — RCSI protects concurrent readers #}
57+
{# dbt-sqlserver uses autocommit=True and add_begin_query/add_commit_query #}
58+
{# are no-ops, so this creates a simple (non-nested) transaction. #}
59+
{% call statement('dml_refresh_swap') -%}
60+
BEGIN TRANSACTION;
61+
DELETE FROM {{ target_relation }};
62+
INSERT INTO {{ target_relation }} ({{ column_list }})
63+
SELECT {{ column_list }} FROM {{ refresh_relation }};
64+
COMMIT TRANSACTION;
65+
{%- endcall %}
66+
67+
{# Cleanup scratch table #}
68+
{% call statement('dml_refresh_cleanup_post') -%}
69+
DROP TABLE IF EXISTS {{ refresh_relation }};
70+
{%- endcall %}
71+
72+
{% else %}
73+
{# Schema changed — fall back to rename-swap for this run #}
74+
{{ log("Schema change detected for " ~ target_relation ~ " — falling back to rename-swap", info=true) }}
75+
76+
{%- set backup_relation_type = target_relation.type -%}
77+
{%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%}
78+
{{ drop_relation_if_exists(backup_relation) }}
79+
80+
{# Rename scratch table into position #}
81+
{% set existing_relation = load_cached_relation(target_relation) %}
82+
{% if existing_relation is not none %}
83+
{{ adapter.rename_relation(existing_relation, backup_relation) }}
84+
{% endif %}
85+
86+
{{ adapter.rename_relation(refresh_relation, target_relation) }}
87+
88+
{% do create_indexes(target_relation) %}
89+
90+
{{ drop_relation_if_exists(backup_relation) }}
91+
92+
{# scratch table is now the target, nothing to drop #}
93+
{% endif %}
94+
95+
{% endmacro %}

0 commit comments

Comments
 (0)