11from __future__ import annotations
22
33import typing as t
4- from datetime import timedelta
54from enum import Enum
65
76from pydantic import Field , root_validator , validator
87from sqlglot import exp
8+ from sqlglot .helper import ensure_list
99
1010from sqlmesh .core import dialect as d
1111from sqlmesh .core .model .kind import (
1515 ViewKind ,
1616 _Incremental ,
1717)
18- from sqlmesh .utils import unique
1918from sqlmesh .utils .cron import CroniterCache
2019from sqlmesh .utils .date import TimeLike , to_datetime
2120from sqlmesh .utils .errors import ConfigError
2524class IntervalUnit (str , Enum ):
2625 """IntervalUnit is the inferred granularity of an incremental model.
2726
28- IntervalUnit can be one of 4 types, DAY, HOUR, MINUTE. The unit is inferred
27+ IntervalUnit can be one of 5 types, YEAR, MONTH , DAY, HOUR, MINUTE. The unit is inferred
2928 based on the cron schedule of a model. The minimum time delta between a sample set of dates
3029 is used to determine which unit a model's schedule is.
3130 """
3231
32+ YEAR = "year"
33+ MONTH = "month"
3334 DAY = "day"
3435 HOUR = "hour"
3536 MINUTE = "minute"
3637
38+ @property
39+ def is_date_granularity (self ) -> bool :
40+ return self in (IntervalUnit .YEAR , IntervalUnit .MONTH , IntervalUnit .DAY )
41+
3742
3843AuditReference = t .Tuple [str , t .Dict [str , exp .Expression ]]
3944
@@ -51,7 +56,7 @@ class ModelMeta(PydanticModel):
5156 start : t .Optional [TimeLike ]
5257 retention : t .Optional [int ] # not implemented yet
5358 storage_format : t .Optional [str ]
54- partitioned_by_ : t .List [str ] = Field (default = [], alias = "partitioned_by" )
59+ partitioned_by_ : t .List [exp . Expression ] = Field (default = [], alias = "partitioned_by" )
5560 depends_on_ : t .Optional [t .Set [str ]] = Field (default = None , alias = "depends_on" )
5661 columns_to_types_ : t .Optional [t .Dict [str , exp .DataType ]] = Field (default = None , alias = "columns" )
5762 column_descriptions_ : t .Optional [t .Dict [str , str ]]
@@ -110,7 +115,7 @@ def extract(v: exp.Expression) -> t.Tuple[str, t.Dict[str, str]]:
110115 ]
111116 return v
112117
113- @validator ("partitioned_by_" , " tags" , "grain" , pre = True )
118+ @validator ("tags" , "grain" , pre = True )
114119 def _value_or_tuple_validator (cls , v : t .Any ) -> t .Any :
115120 if isinstance (v , (exp .Tuple , exp .Array )):
116121 return [e .name for e in v .expressions ]
@@ -136,6 +141,39 @@ def _cron_validator(cls, v: t.Any) -> t.Optional[str]:
136141 raise ConfigError (f"Invalid cron expression '{ cron } '" )
137142 return cron
138143
144+ @validator ("partitioned_by_" , pre = True )
145+ def _partition_by_validator (
146+ cls , v : t .Any , values : t .Dict [str , t .Any ]
147+ ) -> t .List [exp .Expression ]:
148+ partitions : t .List [exp .Expression ]
149+ if isinstance (v , (exp .Tuple , exp .Array )):
150+ partitions = v .expressions
151+ elif isinstance (v , exp .Expression ):
152+ partitions = [v ]
153+ else :
154+ dialect = values .get ("dialect" )
155+ partitions = [
156+ d .parse_one (entry , dialect = dialect ) if isinstance (entry , str ) else entry
157+ for entry in ensure_list (v )
158+ ]
159+ partitions = [
160+ exp .to_column (expr .name ) if isinstance (expr , exp .Identifier ) else expr
161+ for expr in partitions
162+ ]
163+
164+ for partition in partitions :
165+ num_cols = len (list (partition .find_all (exp .Column )))
166+ error_msg : t .Optional [str ] = None
167+ if num_cols == 0 :
168+ error_msg = "does not contain a column"
169+ elif num_cols > 1 :
170+ error_msg = "contains multiple columns"
171+
172+ if error_msg :
173+ raise ConfigError (f"partitioned_by field '{ partition } ' { error_msg } " )
174+
175+ return partitions
176+
139177 @validator ("columns_to_types_" , pre = True )
140178 def _columns_validator (
141179 cls , v : t .Any , values : t .Dict [str , t .Any ]
@@ -194,9 +232,12 @@ def unique_key(self) -> t.List[str]:
194232 return []
195233
196234 @property
197- def partitioned_by (self ) -> t .List [str ]:
198- time_column = [self .time_column .column ] if self .time_column else []
199- return unique ([* time_column , * self .partitioned_by_ ])
235+ def partitioned_by (self ) -> t .List [exp .Expression ]:
236+ if self .time_column and self .time_column .column not in [
237+ col .name for col in self ._partition_by_columns
238+ ]:
239+ return [* [exp .to_column (self .time_column .column )], * self .partitioned_by_ ]
240+ return self .partitioned_by_
200241
201242 @property
202243 def column_descriptions (self ) -> t .Dict [str , str ]:
@@ -208,18 +249,13 @@ def lookback(self) -> int:
208249 """The incremental lookback window."""
209250 return (self .kind .lookback if isinstance (self .kind , _Incremental ) else 0 ) or 0
210251
211- @property
212- def lookback_delta (self ) -> timedelta :
213- """The incremental lookback time delta."""
214- if isinstance (self .kind , _Incremental ):
215- interval_unit = self .interval_unit ()
216- if interval_unit == IntervalUnit .DAY :
217- return timedelta (days = self .lookback )
218- if interval_unit == IntervalUnit .HOUR :
219- return timedelta (hours = self .lookback )
220- if interval_unit == IntervalUnit .MINUTE :
221- return timedelta (minutes = self .lookback )
222- return timedelta ()
252+ def lookback_start (self , start : TimeLike ) -> TimeLike :
253+ if self .lookback == 0 :
254+ return start
255+
256+ for _ in range (self .lookback ):
257+ start = self .cron_prev (start )
258+ return start
223259
224260 @property
225261 def batch_size (self ) -> t .Optional [int ]:
@@ -241,7 +277,11 @@ def interval_unit(self, sample_size: int = 10) -> IntervalUnit:
241277 croniter = CroniterCache (self .cron )
242278 samples = [croniter .get_next () for _ in range (sample_size )]
243279 min_interval = min (b - a for a , b in zip (samples , samples [1 :]))
244- if min_interval >= 86400 :
280+ if min_interval >= 31536000 :
281+ self ._interval_unit = IntervalUnit .YEAR
282+ elif min_interval >= 2419200 :
283+ self ._interval_unit = IntervalUnit .MONTH
284+ elif min_interval >= 86400 :
245285 self ._interval_unit = IntervalUnit .DAY
246286 elif min_interval >= 3600 :
247287 self ._interval_unit = IntervalUnit .HOUR
@@ -252,8 +292,8 @@ def interval_unit(self, sample_size: int = 10) -> IntervalUnit:
252292 def normalized_cron (self ) -> str :
253293 """Returns the UTC normalized cron based on sampling heuristics.
254294
255- SQLMesh supports 3 interval units, daily, hourly, and minutes. If a job is scheduled
256- daily at 1PM, the actual intervals are shifted back to midnight UTC.
295+ SQLMesh supports 5 interval units, yearly, monthly, daily, hourly, and minutes. If a
296+ job is scheduled daily at 1PM, the actual intervals are shifted back to midnight UTC.
257297
258298 Returns:
259299 The cron string representing either daily, hourly, or minutes.
@@ -265,6 +305,10 @@ def normalized_cron(self) -> str:
265305 return "0 * * * *"
266306 if unit == IntervalUnit .DAY :
267307 return "0 0 * * *"
308+ if unit == IntervalUnit .MONTH :
309+ return "0 0 1 * *"
310+ if unit == IntervalUnit .YEAR :
311+ return "0 0 1 1 *"
268312 return ""
269313
270314 def croniter (self , value : TimeLike ) -> CroniterCache :
@@ -309,3 +353,7 @@ def cron_floor(self, value: TimeLike) -> TimeLike:
309353 The timestamp floor.
310354 """
311355 return self .croniter (self .cron_next (value )).get_prev ()
356+
357+ @property
358+ def _partition_by_columns (self ) -> t .List [exp .Column ]:
359+ return [col for expr in self .partitioned_by_ for col in expr .find_all (exp .Column )]
0 commit comments