Skip to content

Commit 94f11ab

Browse files
authored
Fix: Improve Relative Date Categorical Expressions (#1523)
* fix: improve relative expressions * fix: improve relative expressions
1 parent 4a23b49 commit 94f11ab

4 files changed

Lines changed: 89 additions & 4 deletions

File tree

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
"dbt-core",
6868
"dbt-duckdb>=1.4.2",
6969
"Faker",
70+
"freezegun",
7071
"google-auth",
7172
"isort==5.10.1",
7273
"mkdocs-include-markdown-plugin==4.0.3",

sqlmesh/utils/date.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import re
34
import time
45
import typing as t
56
import warnings
@@ -11,6 +12,8 @@
1112
from datetime import date, datetime, timedelta, timezone
1213

1314
import dateparser
15+
from dateparser import freshness_date_parser as freshness_date_parser_module
16+
from dateparser.freshness_date_parser import freshness_date_parser
1417
from sqlglot import exp
1518

1619
UTC = timezone.utc
@@ -22,6 +25,14 @@
2225
from sqlmesh.core.scheduler import Interval
2326

2427

28+
# The Freshness Date Data Parser doesn't support plural units so we add the `s?` to the expression
29+
freshness_date_parser_module.PATTERN = re.compile(
30+
r"(\d+[.,]?\d*)\s*(%s)s?\b" % freshness_date_parser_module._UNITS, re.I | re.S | re.U # type: ignore
31+
)
32+
DAY_SHORTCUT_EXPRESSIONS = {"today", "yesterday", "tomorrow"}
33+
TIME_UNITS = {"hours", "minutes", "seconds"}
34+
35+
2536
def now(minute_floor: bool = True) -> datetime:
2637
"""
2738
Current utc datetime with optional minute level accuracy / granularity.
@@ -134,7 +145,11 @@ def to_datetime(value: TimeLike, relative_base: t.Optional[datetime] = None) ->
134145
epoch = None
135146

136147
if epoch is None:
137-
dt = dateparser.parse(str(value), settings={"RELATIVE_BASE": relative_base or now()})
148+
relative_base = relative_base or now()
149+
expression = str(value)
150+
if is_catagorical_relative_expression(expression):
151+
relative_base = relative_base.replace(hour=0, minute=0, second=0, microsecond=0)
152+
dt = dateparser.parse(expression, settings={"RELATIVE_BASE": relative_base})
138153
else:
139154
try:
140155
dt = datetime.strptime(str(value), DATE_INT_FMT)
@@ -279,3 +294,12 @@ def time_like_to_str(time_like: TimeLike) -> str:
279294
if is_date(time_like):
280295
return to_ds(time_like)
281296
return to_ts(time_like)
297+
298+
299+
def is_catagorical_relative_expression(expression: str) -> bool:
300+
if expression.strip().lower() in DAY_SHORTCUT_EXPRESSIONS:
301+
return True
302+
grain_kwargs = freshness_date_parser.get_kwargs(expression)
303+
if not grain_kwargs:
304+
return False
305+
return not any(k in TIME_UNITS for k in grain_kwargs)

tests/core/test_integration.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,7 @@ def test_incremental_time_self_reference(
698698
SnapshotIntervals(
699699
snapshot_name="sushi.customer_revenue_lifetime",
700700
intervals=[
701+
(to_timestamp(to_date("7 days ago")), to_timestamp(to_date("6 days ago"))),
701702
(to_timestamp(to_date("6 days ago")), to_timestamp(to_date("5 days ago"))),
702703
(to_timestamp(to_date("5 days ago")), to_timestamp(to_date("4 days ago"))),
703704
(to_timestamp(to_date("4 days ago")), to_timestamp(to_date("3 days ago"))),
@@ -709,6 +710,7 @@ def test_incremental_time_self_reference(
709710
SnapshotIntervals(
710711
snapshot_name="sushi.customer_revenue_by_day",
711712
intervals=[
713+
(to_timestamp(to_date("7 days ago")), to_timestamp(to_date("6 days ago"))),
712714
(to_timestamp(to_date("6 days ago")), to_timestamp(to_date("5 days ago"))),
713715
],
714716
),
@@ -720,9 +722,9 @@ def test_incremental_time_self_reference(
720722
num_batch_calls = Counter(
721723
[x[0][0] for x in sushi_context.console.update_snapshot_evaluation_progress.call_args_list] # type: ignore
722724
)
723-
# Validate that we made 6 calls to the customer_revenue_lifetime snapshot and 1 call to the customer_revenue_by_day snapshot
725+
# Validate that we made 7 calls to the customer_revenue_lifetime snapshot and 1 call to the customer_revenue_by_day snapshot
724726
assert num_batch_calls == {
725-
sushi_context.snapshots["sushi.customer_revenue_lifetime"]: 6,
727+
sushi_context.snapshots["sushi.customer_revenue_lifetime"]: 7,
726728
sushi_context.snapshots["sushi.customer_revenue_by_day"]: 1,
727729
}
728730
# Validate that the results are the same as before the restate

tests/utils/test_date.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,15 @@
11
from datetime import date, datetime
22

33
import pytest
4+
from freezegun import freeze_time
45

5-
from sqlmesh.utils.date import UTC, make_inclusive, to_datetime, to_timestamp
6+
from sqlmesh.utils.date import (
7+
UTC,
8+
is_catagorical_relative_expression,
9+
make_inclusive,
10+
to_datetime,
11+
to_timestamp,
12+
)
613

714

815
def test_to_datetime() -> None:
@@ -21,6 +28,31 @@ def test_to_datetime() -> None:
2128
assert to_datetime("0") == datetime(1970, 1, 1, tzinfo=UTC)
2229

2330

31+
@pytest.mark.parametrize(
32+
"expression, result",
33+
[
34+
("1 second ago", datetime(2023, 1, 20, 12, 29, 59, tzinfo=UTC)),
35+
("1 minute ago", datetime(2023, 1, 20, 12, 29, 00, tzinfo=UTC)),
36+
("1 hour ago", datetime(2023, 1, 20, 11, 30, 00, tzinfo=UTC)),
37+
("1 day ago", datetime(2023, 1, 19, 00, 00, 00, tzinfo=UTC)),
38+
("1 week ago", datetime(2023, 1, 13, 00, 00, 00, tzinfo=UTC)),
39+
("1 month ago", datetime(2022, 12, 20, 00, 00, 00, tzinfo=UTC)),
40+
("1 year ago", datetime(2022, 1, 20, 00, 00, 00, tzinfo=UTC)),
41+
("1 decade ago", datetime(2013, 1, 20, 00, 00, 00, tzinfo=UTC)),
42+
("3 days 2 hours ago", datetime(2023, 1, 17, 10, 30, 00, tzinfo=UTC)),
43+
("2 years 5 second ago", datetime(2021, 1, 20, 12, 29, 55, tzinfo=UTC)),
44+
("24 hours ago", datetime(2023, 1, 19, 12, 30, 00, tzinfo=UTC)),
45+
("1 year 5 days ago", datetime(2022, 1, 15, 00, 00, 00, tzinfo=UTC)),
46+
("yesterday", datetime(2023, 1, 19, 00, 00, 00, tzinfo=UTC)),
47+
("today", datetime(2023, 1, 20, 00, 00, 00, tzinfo=UTC)),
48+
("tomorrow", datetime(2023, 1, 21, 00, 00, 00, tzinfo=UTC)),
49+
],
50+
)
51+
def test_to_datetime_with_expressions(expression, result) -> None:
52+
with freeze_time("2023-01-20 12:30:30"):
53+
assert to_datetime(expression) == result
54+
55+
2456
def test_to_timestamp() -> None:
2557
assert to_timestamp("2020-01-01") == 1577836800000
2658

@@ -55,3 +87,29 @@ def test_make_inclusive(start_in, end_in, start_out, end_out) -> None:
5587
to_datetime(start_out),
5688
to_datetime(end_out),
5789
)
90+
91+
92+
@pytest.mark.parametrize(
93+
"expression, result",
94+
[
95+
("1 second ago", False),
96+
("1 minute ago", False),
97+
("1 hour ago", False),
98+
("1 day ago", True),
99+
("1 week ago", True),
100+
("1 month ago", True),
101+
("1 year ago", True),
102+
("1 decade ago", True),
103+
("3 hours ago", False),
104+
("24 hours ago", False),
105+
("1 day 5 hours ago", False),
106+
("1 year 5 minutes ago", False),
107+
("2023-01-01", False),
108+
("2023-01-01 12:00:00", False),
109+
("yesterday", True),
110+
("today", True),
111+
("tomorrow", True),
112+
],
113+
)
114+
def test_is_catagorical_relative_expression(expression, result):
115+
assert is_catagorical_relative_expression(expression) == result

0 commit comments

Comments
 (0)