Skip to content

Commit c133ff1

Browse files
Add data_freshness_sla and volume_threshold tests
Add two new Elementary tests: - data_freshness_sla: checks if data was updated before a specified SLA deadline - volume_threshold: monitors row count changes with configurable warn/error thresholds, using Elementary's metric caching to avoid redundant computation Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent d33eb04 commit c133ff1

2 files changed

Lines changed: 477 additions & 0 deletions

File tree

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
{#
2+
Test: data_freshness_sla
3+
4+
Verifies that data in a model was updated before a specified SLA deadline time.
5+
Checks the max timestamp value of a specified column in the data itself.
6+
7+
Use case: "Is the data fresh?" / "Was the data updated on time?"
8+
9+
Parameters:
10+
timestamp_column (required): Column name containing timestamps to check for freshness
11+
sla_time (required): Deadline time. Supports formats like "07:00", "7am", "2:30pm", "14:30"
12+
timezone (required): IANA timezone name (e.g., "America/Los_Angeles", "Europe/London")
13+
day_of_week (optional): Day(s) to check. String or list: "Monday", ["Monday", "Wednesday"]
14+
day_of_month (optional): Day(s) of month to check. Integer or list: 1, [1, 15]
15+
where_expression (optional): Additional WHERE clause filter for the data query
16+
17+
Schedule behavior:
18+
- If neither day_of_week nor day_of_month is set: check every day (default)
19+
- If day_of_week is set: only check on those days
20+
- If day_of_month is set: only check on those days
21+
- If both are set: check if today matches EITHER filter (OR logic)
22+
23+
Example usage:
24+
models:
25+
- name: my_model
26+
tests:
27+
- elementary.data_freshness_sla:
28+
timestamp_column: updated_at
29+
sla_time: "07:00"
30+
timezone: "America/Los_Angeles"
31+
32+
- name: daily_events
33+
tests:
34+
- elementary.data_freshness_sla:
35+
timestamp_column: event_timestamp
36+
sla_time: "6am"
37+
timezone: "Europe/Amsterdam"
38+
where_expression: "event_type = 'completed'"
39+
40+
- name: weekly_report_data
41+
tests:
42+
- elementary.data_freshness_sla:
43+
timestamp_column: report_date
44+
sla_time: "09:00"
45+
timezone: "Asia/Tokyo"
46+
day_of_week: ["Monday"]
47+
48+
Test passes if:
49+
- Today is not a scheduled check day (based on day_of_week/day_of_month)
50+
- OR the max timestamp in the data is from today (before or after deadline)
51+
- OR the SLA deadline for today hasn't passed yet
52+
53+
Test fails if:
54+
- Today is a scheduled check day AND the deadline has passed AND:
55+
- No data exists in the table
56+
- The max timestamp is from a previous day (data not updated today)
57+
#}
58+
59+
{% test data_freshness_sla(model, timestamp_column, sla_time, timezone, day_of_week=none, day_of_month=none, where_expression=none) %}
60+
{{ config(tags=['elementary-tests']) }}
61+
62+
{%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
63+
64+
{# Validate required parameters #}
65+
{% if not timestamp_column %}
66+
{{ exceptions.raise_compiler_error("The 'timestamp_column' parameter is required. Example: timestamp_column: 'updated_at'") }}
67+
{% endif %}
68+
69+
{% if not sla_time %}
70+
{{ exceptions.raise_compiler_error("The 'sla_time' parameter is required. Example: sla_time: '07:00'") }}
71+
{% endif %}
72+
73+
{# Validate timezone #}
74+
{% do elementary.validate_timezone(timezone) %}
75+
76+
{# Normalize and validate day filters #}
77+
{% set day_of_week_filter = elementary.normalize_day_of_week(day_of_week) %}
78+
{% set day_of_month_filter = elementary.normalize_day_of_month(day_of_month) %}
79+
80+
{# Get model relation and validate #}
81+
{% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %}
82+
{% if not model_relation %}
83+
{{ exceptions.raise_compiler_error("Unsupported model: " ~ model ~ " (this might happen if you override 'ref' or 'source')") }}
84+
{% endif %}
85+
86+
{# Validate timestamp column exists and is a timestamp type #}
87+
{% set timestamp_column_data_type = elementary.find_normalized_data_type_for_column(model_relation, timestamp_column) %}
88+
{% if not elementary.is_column_timestamp(model_relation, timestamp_column, timestamp_column_data_type) %}
89+
{{ exceptions.raise_compiler_error("Column '" ~ timestamp_column ~ "' is not a timestamp type. The timestamp_column must be a timestamp or datetime column.") }}
90+
{% endif %}
91+
92+
{# Parse the SLA time #}
93+
{% set parsed_time = elementary.parse_sla_time(sla_time) %}
94+
{% set formatted_sla_time = elementary.format_sla_time(parsed_time) %}
95+
96+
{# Calculate SLA deadline in UTC (also returns current day info) #}
97+
{% set sla_info = elementary.calculate_sla_deadline_utc(parsed_time.hour, parsed_time.minute, timezone) %}
98+
99+
{# Check if today is a scheduled check day #}
100+
{% set should_check = elementary.should_check_sla_today(
101+
sla_info.day_of_week,
102+
sla_info.day_of_month,
103+
day_of_week_filter,
104+
day_of_month_filter
105+
) %}
106+
107+
{# If today is not a scheduled check day, skip (pass) #}
108+
{% if not should_check %}
109+
{{ elementary.edr_log('Skipping data_freshness_sla test for ' ~ model_relation.identifier ~ ' - not a scheduled check day (' ~ sla_info.day_of_week ~ ', day ' ~ sla_info.day_of_month ~ ')') }}
110+
{{ elementary.no_results_query() }}
111+
{% else %}
112+
113+
{{ elementary.edr_log('Running data_freshness_sla test for ' ~ model_relation.identifier ~ ' with SLA ' ~ formatted_sla_time ~ ' ' ~ timezone) }}
114+
115+
{# Build the query #}
116+
{{ elementary.get_data_freshness_sla_query(
117+
model_relation=model_relation,
118+
timestamp_column=timestamp_column,
119+
sla_deadline_utc=sla_info.sla_deadline_utc,
120+
target_date=sla_info.target_date,
121+
target_date_start_utc=sla_info.target_date_start_utc,
122+
target_date_end_utc=sla_info.target_date_end_utc,
123+
deadline_passed=sla_info.deadline_passed,
124+
formatted_sla_time=formatted_sla_time,
125+
timezone=timezone,
126+
where_expression=where_expression
127+
) }}
128+
129+
{% endif %}
130+
131+
{%- else %}
132+
{{ elementary.no_results_query() }}
133+
{%- endif %}
134+
135+
{% endtest %}
136+
137+
138+
{#
139+
Build SQL query to check if data was updated before SLA deadline.
140+
141+
Logic:
142+
- Query the model table to get MAX(timestamp_column)
143+
- Convert max timestamp to UTC for comparison
144+
- If max timestamp is from today (in target timezone): data is fresh, SLA met
145+
- If deadline hasn't passed yet: Don't fail (still time)
146+
- Otherwise: Data is stale, SLA missed
147+
#}
148+
{% macro get_data_freshness_sla_query(model_relation, timestamp_column, sla_deadline_utc, target_date, target_date_start_utc, target_date_end_utc, deadline_passed, formatted_sla_time, timezone, where_expression) %}
149+
150+
with
151+
152+
sla_deadline as (
153+
select
154+
{{ elementary.edr_cast_as_timestamp("'" ~ sla_deadline_utc ~ "'") }} as deadline_utc,
155+
{{ elementary.edr_cast_as_timestamp("'" ~ target_date_start_utc ~ "'") }} as target_date_start_utc,
156+
{{ elementary.edr_cast_as_timestamp("'" ~ target_date_end_utc ~ "'") }} as target_date_end_utc,
157+
'{{ target_date }}' as target_date
158+
),
159+
160+
{# Get the max timestamp from the data #}
161+
max_data_timestamp as (
162+
select
163+
max({{ elementary.edr_cast_as_timestamp(timestamp_column) }}) as max_timestamp_utc
164+
from {{ model_relation }}
165+
{% if where_expression %}
166+
where {{ where_expression }}
167+
{% endif %}
168+
),
169+
170+
{# Determine freshness status #}
171+
freshness_result as (
172+
select
173+
sd.target_date,
174+
sd.deadline_utc as sla_deadline_utc,
175+
mdt.max_timestamp_utc,
176+
case
177+
{# Data was updated today (max timestamp is within today's UTC range) #}
178+
when mdt.max_timestamp_utc >= sd.target_date_start_utc
179+
and mdt.max_timestamp_utc <= sd.target_date_end_utc then 'DATA_FRESH'
180+
{# No data exists #}
181+
when mdt.max_timestamp_utc is null then 'NO_DATA'
182+
{# Data exists but is from a previous day #}
183+
else 'DATA_STALE'
184+
end as freshness_status
185+
from sla_deadline sd
186+
cross join max_data_timestamp mdt
187+
),
188+
189+
final_result as (
190+
select
191+
'{{ model_relation.identifier }}' as model_name,
192+
target_date,
193+
'{{ formatted_sla_time }}' as sla_time,
194+
'{{ timezone }}' as timezone,
195+
cast(sla_deadline_utc as {{ elementary.edr_type_string() }}) as sla_deadline_utc,
196+
freshness_status,
197+
cast(max_timestamp_utc as {{ elementary.edr_type_string() }}) as max_timestamp_utc,
198+
case
199+
when freshness_status = 'DATA_FRESH' then false
200+
{# If deadline hasn't passed, don't fail yet #}
201+
{% if deadline_passed %}
202+
when not TRUE then false
203+
{% else %}
204+
when not FALSE then false
205+
{% endif %}
206+
else true
207+
end as is_failure,
208+
case
209+
when freshness_status = 'NO_DATA' then
210+
'No data found in "{{ model_relation.identifier }}"' ||
211+
{% if where_expression %}
212+
' (with filter: {{ where_expression }})' ||
213+
{% endif %}
214+
'. Expected data to be updated before {{ formatted_sla_time }} {{ timezone }}.'
215+
when freshness_status = 'DATA_STALE' then
216+
'Data in "{{ model_relation.identifier }}" is stale. Last update was at ' ||
217+
cast(max_timestamp_utc as {{ elementary.edr_type_string() }}) ||
218+
' UTC, which is before today. Expected fresh data before {{ formatted_sla_time }} {{ timezone }}.'
219+
else
220+
'Data in "{{ model_relation.identifier }}" is fresh - last update at ' ||
221+
cast(max_timestamp_utc as {{ elementary.edr_type_string() }}) ||
222+
' UTC (before SLA deadline {{ formatted_sla_time }} {{ timezone }}).'
223+
end as result_description
224+
from freshness_result
225+
)
226+
227+
select
228+
model_name,
229+
target_date,
230+
sla_time,
231+
timezone,
232+
sla_deadline_utc,
233+
freshness_status,
234+
max_timestamp_utc,
235+
result_description
236+
from final_result
237+
where is_failure = true
238+
239+
{% endmacro %}

0 commit comments

Comments
 (0)