Skip to content

Commit e8f41bf

Browse files
authored
fix!: remove most fields from dbt target (#1601)
1 parent 3a68f9c commit e8f41bf

3 files changed

Lines changed: 90 additions & 2 deletions

File tree

sqlmesh/dbt/target.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,14 @@
4242
t.Type[IncrementalUnmanagedKind],
4343
]
4444

45+
# We only serialize a subset of fields in order to avoid persisting sensitive information
46+
SERIALIZABLE_FIELDS = {
47+
"type",
48+
"name",
49+
"database",
50+
"schema_",
51+
}
52+
4553

4654
class TargetConfig(abc.ABC, DbtConfig):
4755
"""
@@ -99,7 +107,7 @@ def to_sqlmesh(self) -> ConnectionConfig:
99107
raise NotImplementedError
100108

101109
def attribute_dict(self) -> AttributeDict:
102-
fields = self.dict().copy()
110+
fields = self.dict(include=SERIALIZABLE_FIELDS).copy()
103111
fields["target_name"] = self.name
104112
return AttributeDict(fields)
105113

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
"""Remove dbt target fields from snapshots outside of limited list of approved fields"""
2+
import json
3+
4+
import pandas as pd
5+
from sqlglot import exp
6+
7+
from sqlmesh.utils.migration import index_text_type
8+
9+
10+
def migrate(state_sync): # type: ignore
11+
engine_adapter = state_sync.engine_adapter
12+
schema = state_sync.schema
13+
snapshots_table = "_snapshots"
14+
if schema:
15+
snapshots_table = f"{schema}.{snapshots_table}"
16+
17+
new_snapshots = []
18+
found_dbt_target = False
19+
for name, identifier, version, snapshot, kind_name in engine_adapter.fetchall(
20+
exp.select("name", "identifier", "version", "snapshot", "kind_name").from_(snapshots_table),
21+
quote_identifiers=True,
22+
):
23+
parsed_snapshot = json.loads(snapshot)
24+
node = parsed_snapshot["node"]
25+
dbt_target = node.get("jinja_macros", {}).get("global_objs", {}).get("target", {})
26+
# Double check that `target_name` exists as a field since we know that all dbt targets have `target_name`
27+
# We do this in case someone has a target macro defined that is not related to dbt
28+
if dbt_target and dbt_target.get("target_name"):
29+
found_dbt_target = True
30+
node["jinja_macros"]["global_objs"]["target"] = {
31+
"type": dbt_target.get("type", "None"),
32+
"name": dbt_target.get("name", "None"),
33+
"schema": dbt_target.get("schema", "None"),
34+
"database": dbt_target.get("database", "None"),
35+
"target_name": dbt_target["target_name"],
36+
}
37+
38+
new_snapshots.append(
39+
{
40+
"name": name,
41+
"identifier": identifier,
42+
"version": version,
43+
"snapshot": json.dumps(parsed_snapshot),
44+
"kind_name": kind_name,
45+
}
46+
)
47+
48+
if found_dbt_target:
49+
engine_adapter.delete_from(snapshots_table, "TRUE")
50+
51+
text_type = index_text_type(engine_adapter.dialect)
52+
53+
engine_adapter.insert_append(
54+
snapshots_table,
55+
pd.DataFrame(new_snapshots),
56+
columns_to_types={
57+
"name": exp.DataType.build(text_type),
58+
"identifier": exp.DataType.build(text_type),
59+
"version": exp.DataType.build(text_type),
60+
"snapshot": exp.DataType.build("text"),
61+
"kind_name": exp.DataType.build(text_type),
62+
},
63+
contains_json=True,
64+
)

tests/dbt/test_transformation.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import typing as t
23
from pathlib import Path
34

@@ -19,6 +20,7 @@
1920
SqlModel,
2021
ViewKind,
2122
)
23+
from sqlmesh.core.state_sync.engine_adapter import _snapshot_to_json
2224
from sqlmesh.dbt.builtin import _relation_info_to_relation
2325
from sqlmesh.dbt.column import (
2426
ColumnConfig,
@@ -236,7 +238,9 @@ def test_target_jinja(sushi_test_project: Project):
236238
assert context.render("{{ target.name }}") == "in_memory"
237239
assert context.render("{{ target.schema }}") == "sushi"
238240
assert context.render("{{ target.type }}") == "duckdb"
239-
assert context.render("{{ target.profile_name }}") == "sushi"
241+
# Path and Profile name are not included in serializable fields
242+
assert context.render("{{ target.path }}") == "None"
243+
assert context.render("{{ target.profile_name }}") == "None"
240244

241245

242246
def test_project_name_jinja(sushi_test_project: Project):
@@ -650,3 +654,15 @@ def test_bigquery_table_properties(sushi_test_project: Project, mocker: MockerFi
650654
).table_properties == {
651655
"partition_expiration_days": exp.convert(7),
652656
}
657+
658+
659+
def test_snapshot_json_payload():
660+
sushi_context = Context(paths=["tests/fixtures/dbt/sushi_test"])
661+
snapshot_json = json.loads(_snapshot_to_json(sushi_context.snapshots["sushi.top_waiters"]))
662+
assert snapshot_json["node"]["jinja_macros"]["global_objs"]["target"] == {
663+
"type": "duckdb",
664+
"name": "in_memory",
665+
"schema": "sushi",
666+
"database": "memory",
667+
"target_name": "in_memory",
668+
}

0 commit comments

Comments
 (0)