Fix!: use dialect when generating types for mapping schema (#1531)

georgesittas · web-flow · commit a7e021ff8cf0 · 2023-10-06T11:31:51.000+03:00
* Fix: use dialect when generating types for mapping schema

* Add migration script

* PR feedback
diff --git a/sqlmesh/core/model/definition.py b/sqlmesh/core/model/definition.py
@@ -504,7 +504,7 @@ def update_schema(
                 nested_set(
                     self.mapping_schema,
                     tuple(str(part) for part in table.parts),
-                    {k: str(v) for k, v in mapping_schema.items()},
+                    {k: v.sql(dialect=self.dialect) for k, v in mapping_schema.items()},  # type: ignore
                 )
             else:
                 # Reset the entire mapping if at least one upstream dependency is missing from the mapping
diff --git a/sqlmesh/migrations/v0028_generate_schema_types_using_dialect.py b/sqlmesh/migrations/v0028_generate_schema_types_using_dialect.py
@@ -0,0 +1,68 @@
+"""Generate mapping schema data types using the corresponding model's dialect."""
+import json
+
+import pandas as pd
+from sqlglot import exp, parse_one
+
+from sqlmesh.utils.migration import index_text_type
+
+
+def migrate(state_sync):  # type: ignore
+    engine_adapter = state_sync.engine_adapter
+    schema = state_sync.schema
+    snapshots_table = "_snapshots"
+    if schema:
+        snapshots_table = f"{schema}.{snapshots_table}"
+
+    new_snapshots = []
+    for name, identifier, version, snapshot, kind_name in engine_adapter.fetchall(
+        exp.select("name", "identifier", "version", "snapshot", "kind_name").from_(snapshots_table),
+        quote_identifiers=True,
+    ):
+        parsed_snapshot = json.loads(snapshot)
+        node = parsed_snapshot["node"]
+
+        mapping_schema = node.get("mapping_schema")
+        if mapping_schema:
+            node["mapping_schema"] = _convert_schema_types(mapping_schema, node["dialect"])
+
+        new_snapshots.append(
+            {
+                "name": name,
+                "identifier": identifier,
+                "version": version,
+                "snapshot": json.dumps(parsed_snapshot),
+                "kind_name": kind_name,
+            }
+        )
+
+    if new_snapshots:
+        engine_adapter.delete_from(snapshots_table, "TRUE")
+
+        text_type = index_text_type(engine_adapter.dialect)
+
+        engine_adapter.insert_append(
+            snapshots_table,
+            pd.DataFrame(new_snapshots),
+            columns_to_types={
+                "name": exp.DataType.build(text_type),
+                "identifier": exp.DataType.build(text_type),
+                "version": exp.DataType.build(text_type),
+                "snapshot": exp.DataType.build("text"),
+                "kind_name": exp.DataType.build(text_type),
+            },
+            contains_json=True,
+        )
+
+
+def _convert_schema_types(schema, dialect):  # type: ignore
+    if not schema:
+        return schema
+
+    for k, v in schema.items():
+        if isinstance(v, dict):
+            _convert_schema_types(v, dialect)
+        else:
+            schema[k] = parse_one(v).sql(dialect=dialect)
+
+    return schema
diff --git a/tests/core/test_context.py b/tests/core/test_context.py
@@ -5,7 +5,7 @@
 
 import pytest
 from pytest_mock.plugin import MockerFixture
-from sqlglot import MappingSchema, parse_one
+from sqlglot import MappingSchema, exp, parse_one
 from sqlglot.errors import SchemaError
 
 import sqlmesh.core.constants
@@ -474,9 +474,11 @@ def test_default_schema_and_config(sushi_context_pre_scheduling) -> None:
     context.upsert_model(c)
 
     c.update_schema(
-        MappingSchema({"a": {"col": "int"}}), default_schema="schema", default_catalog="catalog"
+        MappingSchema({"a": {"col": exp.DataType.build("int")}}),
+        default_schema="schema",
+        default_catalog="catalog",
     )
-    assert c.mapping_schema == {"catalog": {"schema": {"a": {"col": "int"}}}}
+    assert c.mapping_schema == {"catalog": {"schema": {"a": {"col": "INT"}}}}
 
 
 def test_gateway_macro(sushi_context: Context) -> None:

Original file line number	Diff line number	Diff line change
`@@ -504,7 +504,7 @@ def update_schema(`
`504`	`504`	`nested_set(`
`505`	`505`	`self.mapping_schema,`
`506`	`506`	`tuple(str(part) for part in table.parts),`
`507`		`- {k: str(v) for k, v in mapping_schema.items()},`
	`507`	`+ {k: v.sql(dialect=self.dialect) for k, v in mapping_schema.items()}, # type: ignore`
`508`	`508`	`)`
`509`	`509`	`else:`
`510`	`510`	`# Reset the entire mapping if at least one upstream dependency is missing from the mapping`