Feat: Introduce format flag for models and audits (#4203)

VaggelisD · web-flow · commit f2f7cdeeb30e · 2025-04-21T18:58:07.000+03:00
diff --git a/docs/concepts/models/overview.md b/docs/concepts/models/overview.md
@@ -457,6 +457,9 @@ to `false` causes SQLMesh to disable query canonicalization & simplification. Th
 ### ignored_rules
 : Specifies which linter rules should be ignored/excluded for this model.
 
+### formatting
+:   Whether the model will be formatted. All models are formatted by default. Setting this to `false` causes SQLMesh to ignore this model during `sqlmesh format`. 
+
 ## Incremental Model Properties
 
 These properties can be specified in an incremental model's `kind` definition.
diff --git a/docs/reference/model_configuration.md b/docs/reference/model_configuration.md
@@ -40,7 +40,7 @@ Configuration options for SQLMesh model properties. Supported by all model kinds
 | `gateway`             | Specifies the gateway to use for the execution of this model. When not specified, the default gateway is used.                                                                                                                                                                                                       |       str        |    N     |
 | `optimize_query`             | Whether the model's query should be optimized. This attribute is `true` by default. Setting it to `false` causes SQLMesh to disable query canonicalization & simplification. This should be turned off only if the optimized query leads to errors such as surpassing text limit.                                                                                                                                                                                                      |       bool        |    N     |
 | `ignored_rules`             |  A list of linter rule names (or "ALL") to be ignored/excluded for this model                                                                                                                                                                                             |       str \| array[str]        |    N     |
-
+| `formatting`             | Whether the model will be formatted. All models are formatted by default. Setting this to `false` causes SQLMesh to ignore this model during `sqlmesh format`.                                                                                                                                                                                                      |       bool        |    N     |
 ### Model defaults
 
 The SQLMesh project-level configuration must contain the `model_defaults` key and must specify a value for its `dialect` key. Other values are set automatically unless explicitly overridden in the model definition. Learn more about project-level configuration in the [configuration guide](../guides/configuration.md).
diff --git a/sqlmesh/core/audit/definition.py b/sqlmesh/core/audit/definition.py
@@ -71,6 +71,7 @@ class AuditMixin(AuditCommonMetaMixin):
     defaults: t.Dict[str, exp.Expression]
     expressions_: t.Optional[t.List[exp.Expression]]
     jinja_macros: JinjaMacroRegistry
+    formatting: t.Optional[bool] = Field(default=None, exclude=True)
 
     @property
     def expressions(self) -> t.List[exp.Expression]:
diff --git a/sqlmesh/core/config/model.py b/sqlmesh/core/config/model.py
@@ -60,6 +60,7 @@ class ModelDefaultsConfig(BaseConfig):
     allow_partials: t.Optional[t.Union[str, bool]] = None
     interval_unit: t.Optional[t.Union[str, IntervalUnit]] = None
     enabled: t.Optional[t.Union[str, bool]] = None
+    formatting: t.Optional[t.Union[str, bool]] = None
 
     _model_kind_validator = model_kind_validator
     _on_destructive_change_validator = on_destructive_change_validator
diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py
@@ -1091,9 +1091,10 @@ def format(
 
         for target in filtered_targets:
             if (
-                target._path is None
+                target._path is None or target.formatting is False
             ):  # introduced to satisfy type checker as still want to pull filter out as many targets as possible before loop
                 continue
+
             with open(target._path, "r+", encoding="utf-8") as file:
                 before = file.read()
                 expressions = parse(before, default_dialect=self.config_for_node(target).dialect)
diff --git a/sqlmesh/core/model/common.py b/sqlmesh/core/model/common.py
@@ -386,6 +386,7 @@ def sorted_python_env_payloads(python_env: t.Dict[str, Executable]) -> t.List[st
     "allow_partials",
     "enabled",
     "optimize_query",
+    "formatting",
     mode="before",
     check_fields=False,
 )(parse_bool)
diff --git a/sqlmesh/core/model/meta.py b/sqlmesh/core/model/meta.py
@@ -80,6 +80,7 @@ class ModelMeta(_Node):
     ignored_rules_: t.Optional[t.Set[str]] = Field(
         default=None, exclude=True, alias="ignored_rules"
     )
+    formatting: t.Optional[bool] = Field(default=None, exclude=True)
 
     _bool_validator = bool_validator
     _model_kind_validator = model_kind_validator
diff --git a/tests/core/test_audit.py b/tests/core/test_audit.py
@@ -1,3 +1,4 @@
+import json
 import pytest
 from sqlglot import exp, parse_one
 
@@ -959,3 +960,31 @@ def test_multiple_audits_with_same_name():
     # Testing that audit arguments are identical for second and third audit
     # This establishes that identical audits are preserved
     assert model.audits[1][1] == model.audits[2][1]
+
+
+def test_audit_formatting_flag_serde():
+    expressions = parse(
+        """
+        AUDIT (
+            name my_audit,
+            dialect bigquery,
+            formatting false,
+        );
+
+        SELECT * FROM db.table WHERE col = @VAR('test_var')
+    """
+    )
+
+    audit = load_audit(
+        expressions,
+        path="/path/to/audit",
+        dialect="bigquery",
+        variables={"test_var": "test_val", "test_var_unused": "unused_val"},
+    )
+
+    audit_json = audit.json()
+
+    assert "formatting" not in json.loads(audit_json)
+
+    deserialized_audit = ModelAudit.parse_raw(audit_json)
+    assert deserialized_audit.dict() == audit.dict()
diff --git a/tests/core/test_format.py b/tests/core/test_format.py
@@ -8,6 +8,7 @@
 from sqlmesh.core.model import SqlModel, load_sql_based_model
 from tests.utils.test_filesystem import create_temp_file
 from unittest.mock import call
+from sqlmesh.core.config import ModelDefaultsConfig
 
 
 def test_format_files(tmp_path: pathlib.Path, mocker: MockerFixture):
@@ -100,3 +101,46 @@ def test_format_files(tmp_path: pathlib.Path, mocker: MockerFixture):
         upd4
         == "MODEL (\n  name audit.model,\n  audits (\n    inline_audit\n  )\n);\n\nSELECT\n  3 AS item_id;\n\nAUDIT (\n  name inline_audit\n);\n\nSELECT\n  *\nFROM @this_model\nWHERE\n  item_id < 0"
     )
+
+
+def test_ignore_formating_files(tmp_path: pathlib.Path):
+    models_dir = pathlib.Path("models")
+    audits_dir = pathlib.Path("audits")
+
+    # Case 1: Model and Audit are not formatted if the flag is set to false (overriding defaults)
+    model1_text = "MODEL(name this.model1, dialect 'duckdb', formatting false); SELECT 1 col"
+    model1 = create_temp_file(tmp_path, pathlib.Path(models_dir, "model_1.sql"), model1_text)
+
+    audit1_text = "AUDIT(name audit1, dialect 'duckdb', formatting false); SELECT col1 col2 FROM @this_model WHERE     foo < 0;"
+    audit1 = create_temp_file(tmp_path, pathlib.Path(audits_dir, "audit_1.sql"), audit1_text)
+
+    audit2_text = "AUDIT(name audit2, dialect 'duckdb', standalone true, formatting false); SELECT col1 col2 FROM @this_model WHERE     foo < 0;"
+    audit2 = create_temp_file(tmp_path, pathlib.Path(audits_dir, "audit_2.sql"), audit2_text)
+
+    Context(
+        paths=tmp_path, config=Config(model_defaults=ModelDefaultsConfig(formatting=True))
+    ).format()
+
+    assert model1.read_text(encoding="utf-8") == model1_text
+    assert audit1.read_text(encoding="utf-8") == audit1_text
+    assert audit2.read_text(encoding="utf-8") == audit2_text
+
+    # Case 2: Model is formatted (or not) based on it's flag and the defaults flag
+    model2_text = "MODEL(name this.model2, dialect 'duckdb'); SELECT 1 col"
+    model2 = create_temp_file(tmp_path, pathlib.Path(models_dir, "model_2.sql"), model2_text)
+
+    model3_text = "MODEL(name this.model3, dialect 'duckdb', formatting true); SELECT 1 col"
+    model3 = create_temp_file(tmp_path, pathlib.Path(models_dir, "model_3.sql"), model3_text)
+
+    Context(
+        paths=tmp_path, config=Config(model_defaults=ModelDefaultsConfig(formatting=False))
+    ).format()
+
+    # Case 2.1: Model is not formatted if the defaults flag is set to false
+    assert model2.read_text(encoding="utf-8") == model2_text
+
+    # Case 2.2: Model is formatted if it's flag is set to true, overriding defaults
+    assert (
+        model3.read_text(encoding="utf-8")
+        == "MODEL (\n  name this.model3,\n  dialect 'duckdb',\n  formatting TRUE\n);\n\nSELECT\n  1 AS col"
+    )
diff --git a/tests/core/test_model.py b/tests/core/test_model.py
@@ -9029,3 +9029,25 @@ def test_var_in_def(assert_exp_eq):
         SELECT '1970-01-01' AS "ds"
         """,
     )
+
+
+def test_formatting_flag_serde():
+    expressions = d.parse(
+        """
+        MODEL(
+            name test_model,
+            formatting False,
+        );
+        SELECT * FROM tbl;
+    """,
+        default_dialect="duckdb",
+    )
+
+    model = load_sql_based_model(expressions)
+
+    model_json = model.json()
+
+    assert "formatting" not in json.loads(model_json)
+
+    deserialized_model = SqlModel.parse_raw(model_json)
+    assert deserialized_model.dict() == model.dict()

Original file line number	Diff line number	Diff line change
`@@ -80,6 +80,7 @@ class ModelMeta(_Node):`
`80`	`80`	`ignored_rules_: t.Optional[t.Set[str]] = Field(`
`81`	`81`	`default=None, exclude=True, alias="ignored_rules"`
`82`	`82`	`)`
	`83`	`+ formatting: t.Optional[bool] = Field(default=None, exclude=True)`
`83`	`84`
`84`	`85`	`_bool_validator = bool_validator`
`85`	`86`	`_model_kind_validator = model_kind_validator`