11from __future__ import annotations
22
33import ast
4+ import json
45import logging
56import sys
67import types
2324from sqlmesh .core import dialect as d
2425from sqlmesh .core .macros import MacroRegistry , macro
2526from sqlmesh .core .model .common import expression_validator
26- from sqlmesh .core .model .kind import ModelKindName , SeedKind , _Incremental
27+ from sqlmesh .core .model .kind import (
28+ IncrementalByTimeRangeKind ,
29+ IncrementalByUniqueKeyKind ,
30+ ModelKindName ,
31+ SeedKind ,
32+ _Incremental ,
33+ )
2734from sqlmesh .core .model .meta import ModelMeta
2835from sqlmesh .core .model .seed import Seed , create_seed
2936from sqlmesh .core .renderer import ExpressionRenderer , QueryRenderer
3037from sqlmesh .utils import str_to_bool
3138from sqlmesh .utils .date import TimeLike , make_inclusive , to_datetime
3239from sqlmesh .utils .errors import ConfigError , SQLMeshError , raise_config_error
40+ from sqlmesh .utils .hashing import hash_data
3341from sqlmesh .utils .jinja import JinjaMacroRegistry , extract_macro_references
3442from sqlmesh .utils .metaprogramming import (
3543 Executable ,
4856 from sqlmesh .utils .jinja import MacroReference
4957
5058if sys .version_info >= (3 , 9 ):
51- from typing import Annotated , Literal
59+ from typing import Literal
5260else :
53- from typing_extensions import Annotated , Literal
61+ from typing_extensions import Literal
5462
5563logger = logging .getLogger (__name__ )
5664
@@ -628,6 +636,103 @@ def is_breaking_change(self, previous: Model) -> t.Optional[bool]:
628636 """
629637 raise NotImplementedError
630638
639+ @property
640+ def data_hash (self ) -> str :
641+ """
642+ Computes the data hash for the node.
643+
644+ Returns:
645+ The data hash for the node.
646+ """
647+ return hash_data (self ._data_hash_fields )
648+
649+ @property
650+ def _data_hash_fields (self ) -> t .List [str ]:
651+ data = [
652+ str (self .sorted_python_env ),
653+ self .kind .name ,
654+ self .cron ,
655+ self .storage_format ,
656+ str (self .lookback ),
657+ * (expr .sql () for expr in (self .partitioned_by or [])),
658+ * (self .clustered_by or []),
659+ self .stamp ,
660+ ]
661+
662+ for column_name , column_type in (self .columns_to_types_ or {}).items ():
663+ data .append (column_name )
664+ data .append (column_type .sql ())
665+
666+ if isinstance (self .kind , IncrementalByTimeRangeKind ):
667+ data .append (self .kind .time_column .column )
668+ data .append (self .kind .time_column .format )
669+ elif isinstance (self .kind , IncrementalByUniqueKeyKind ):
670+ data .extend (self .kind .unique_key )
671+
672+ return data # type: ignore
673+
674+ def metadata_hash (self , audits : t .Dict [str , Audit ]) -> str :
675+ """
676+ Computes the metadata hash for the node.
677+
678+ Args:
679+ audits: Available audits by name.
680+
681+ Returns:
682+ The metadata hash for the node.
683+ """
684+ from sqlmesh .core .audit import BUILT_IN_AUDITS
685+
686+ metadata = [
687+ self .dialect ,
688+ self .owner ,
689+ self .description ,
690+ str (self .start ) if self .start else None ,
691+ str (self .retention ) if self .retention else None ,
692+ str (self .batch_size ) if self .batch_size is not None else None ,
693+ json .dumps (self .mapping_schema , sort_keys = True ),
694+ * sorted (self .tags ),
695+ * sorted (self .grain ),
696+ str (self .forward_only ),
697+ str (self .disable_restatement ),
698+ ]
699+
700+ for audit_name , audit_args in sorted (self .audits , key = lambda a : a [0 ]):
701+ metadata .append (audit_name )
702+
703+ if audit_name in BUILT_IN_AUDITS :
704+ for arg_name , arg_value in audit_args .items ():
705+ metadata .append (arg_name )
706+ metadata .append (arg_value .sql (comments = True ))
707+ elif audit_name in audits :
708+ audit = audits [audit_name ]
709+ query = (
710+ audit .query
711+ if self .hash_raw_query
712+ else audit .render_query (self , ** t .cast (t .Dict [str , t .Any ], audit_args ))
713+ or audit .query
714+ )
715+ metadata .extend (
716+ [
717+ query .sql (comments = True ),
718+ audit .dialect ,
719+ str (audit .skip ),
720+ str (audit .blocking ),
721+ ]
722+ )
723+ else :
724+ raise SQLMeshError (f"Unexpected audit name '{ audit_name } '." )
725+
726+ # Add comments from the query.
727+ if self .is_sql :
728+ rendered_query = self .render_query ()
729+ if rendered_query :
730+ for e , _ , _ in rendered_query .walk ():
731+ if e .comments :
732+ metadata .extend (e .comments )
733+
734+ return hash_data (metadata )
735+
631736
632737class _SqlBasedModel (_Model ):
633738 pre_statements_ : t .Optional [t .List [exp .Expression ]] = Field (
@@ -728,6 +833,20 @@ def _statement_renderer(self, expression: exp.Expression) -> ExpressionRenderer:
728833 )
729834 return self .__statement_renderers [expression_key ]
730835
836+ @property
837+ def _data_hash_fields (self ) -> t .List [str ]:
838+ pre_statements = (
839+ self .pre_statements if self .hash_raw_query else self .render_pre_statements ()
840+ )
841+ post_statements = (
842+ self .post_statements if self .hash_raw_query else self .render_post_statements ()
843+ )
844+ macro_defs = self .macro_definitions if self .hash_raw_query else []
845+ return [
846+ * super ()._data_hash_fields ,
847+ * [e .sql (comments = False ) for e in (* pre_statements , * post_statements , * macro_defs )],
848+ ]
849+
731850
732851class SqlModel (_SqlBasedModel ):
733852 """The model definition which relies on a SQL query to fetch the data.
@@ -926,6 +1045,24 @@ def _query_renderer(self) -> QueryRenderer:
9261045 )
9271046 return self .__query_renderer
9281047
1048+ @property
1049+ def _data_hash_fields (self ) -> t .List [str ]:
1050+ data = super ()._data_hash_fields
1051+
1052+ query = self .query if self .hash_raw_query else self .render_query () or self .query
1053+ data .append (query .sql (comments = False ))
1054+
1055+ for macro_name , macro in sorted (self .jinja_macros .root_macros .items ()):
1056+ data .append (macro_name )
1057+ data .append (macro .definition )
1058+
1059+ for _ , package in sorted (self .jinja_macros .packages .items (), key = lambda x : x [0 ]):
1060+ for macro_name , macro in sorted (package .items (), key = lambda x : x [0 ]):
1061+ data .append (macro_name )
1062+ data .append (macro .definition )
1063+
1064+ return data
1065+
9291066 def __repr__ (self ) -> str :
9301067 return f"Model<name: { self .name } , query: { self .query .sql (dialect = self .dialect )[0 :30 ]} >"
9311068
@@ -1087,6 +1224,14 @@ def _ensure_hydrated(self) -> None:
10871224 if not self .is_hydrated :
10881225 raise SQLMeshError (f"Seed model '{ self .name } ' is not hydrated." )
10891226
1227+ @property
1228+ def _data_hash_fields (self ) -> t .List [str ]:
1229+ data = super ()._data_hash_fields
1230+ for column_name , column_hash in self .column_hashes .items ():
1231+ data .append (column_name )
1232+ data .append (column_hash )
1233+ return data
1234+
10901235 def __repr__ (self ) -> str :
10911236 return f"Model<name: { self .name } , seed: { self .kind .path } >"
10921237
@@ -1139,6 +1284,12 @@ def is_python(self) -> bool:
11391284 def is_breaking_change (self , previous : Model ) -> t .Optional [bool ]:
11401285 return None
11411286
1287+ @property
1288+ def _data_hash_fields (self ) -> t .List [str ]:
1289+ data = super ()._data_hash_fields
1290+ data .append (self .entrypoint )
1291+ return data
1292+
11421293 def __repr__ (self ) -> str :
11431294 return f"Model<name: { self .name } , entrypoint: { self .entrypoint } >"
11441295
@@ -1156,9 +1307,7 @@ def is_breaking_change(self, previous: Model) -> t.Optional[bool]:
11561307 return None
11571308
11581309
1159- Model = Annotated [
1160- t .Union [SqlModel , SeedModel , PythonModel , ExternalModel ], Field (discriminator = "source_type" )
1161- ]
1310+ Model = t .Union [SqlModel , SeedModel , PythonModel , ExternalModel ]
11621311
11631312
11641313def load_model (
0 commit comments