Skip to content

Commit 3e2a068

Browse files
Address PR feedback: revert dimension anomalies change and refactor test with parametrization
Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
1 parent b4de512 commit 3e2a068

2 files changed

Lines changed: 29 additions & 49 deletions

File tree

integration_tests/tests/test_all_columns_anomalies.py

Lines changed: 27 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -157,17 +157,28 @@ def test_anomalyless_all_columns_anomalies_all_monitors_sanity(
157157

158158
# Anomalies currently not supported on ClickHouse
159159
@pytest.mark.skip_targets(["clickhouse"])
160-
def test_exclude_detection_from_training_all_columns(
161-
test_id: str, dbt_project: DbtProject
160+
@pytest.mark.parametrize(
161+
"exclude_detection,expected_status",
162+
[
163+
(False, "pass"),
164+
(True, "fail"),
165+
],
166+
ids=["without_exclusion", "with_exclusion"],
167+
)
168+
def test_anomaly_in_detection_period(
169+
test_id: str,
170+
dbt_project: DbtProject,
171+
exclude_detection: bool,
172+
expected_status: str,
162173
):
163174
"""
164175
Test the exclude_detection_period_from_training flag functionality for column anomalies.
165176
166177
Scenario:
167178
- 30 days of normal data with consistent null_count pattern (2 nulls per day)
168179
- 7 days of anomalous data (10 nulls per day) in detection period
169-
- Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
170-
- With exclusion: anomaly excluded from training, test fails (detects anomaly)
180+
- Without exclusion (exclude_detection=False): anomaly gets included in training baseline, test passes
181+
- With exclusion (exclude_detection=True): anomaly excluded from training, test fails (detects anomaly)
171182
"""
172183
utc_now = datetime.utcnow()
173184

@@ -207,67 +218,37 @@ def test_exclude_detection_from_training_all_columns(
207218
TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
208219
"superhero": "Superman" if i % 2 == 0 else "Batman",
209220
}
210-
for _ in range(0) # No non-null values to keep total similar
221+
for _ in range(0)
211222
]
212223
)
213224

214225
all_data = normal_data + anomalous_data
215226

216-
# Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
217-
test_args_without_exclusion = {
227+
test_args = {
218228
"timestamp_column": TIMESTAMP_COLUMN,
219229
"column_anomalies": ["null_count"],
220230
"training_period": {"period": "day", "count": 30},
221231
"detection_period": {"period": "day", "count": 7},
222232
"time_bucket": {"period": "day", "count": 1},
223-
"sensitivity": 5, # Higher sensitivity to allow anomaly to be absorbed
224-
# exclude_detection_period_from_training is not set (defaults to False/None)
233+
"sensitivity": 5,
225234
}
226235

227-
test_results_without_exclusion = dbt_project.test(
228-
test_id + "_without_exclusion",
229-
DBT_TEST_NAME,
230-
test_args_without_exclusion,
231-
data=all_data,
232-
multiple_results=True,
233-
)
234-
235-
# This should PASS because the anomaly is included in training, making it part of the baseline
236-
superhero_result = next(
237-
(
238-
res
239-
for res in test_results_without_exclusion
240-
if res["column_name"].lower() == "superhero"
241-
),
242-
None,
243-
)
244-
assert (
245-
superhero_result and superhero_result["status"] == "pass"
246-
), "Test should pass when anomaly is included in training"
247-
248-
# Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
249-
test_args_with_exclusion = {
250-
**test_args_without_exclusion,
251-
"exclude_detection_period_from_training": True,
252-
}
236+
if exclude_detection:
237+
test_args["exclude_detection_period_from_training"] = True
253238

254-
test_results_with_exclusion = dbt_project.test(
255-
test_id + "_with_exclusion",
239+
test_results = dbt_project.test(
240+
test_id,
256241
DBT_TEST_NAME,
257-
test_args_with_exclusion,
242+
test_args,
258243
data=all_data,
259244
multiple_results=True,
260245
)
261246

262-
# This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
263247
superhero_result = next(
264-
(
265-
res
266-
for res in test_results_with_exclusion
267-
if res["column_name"].lower() == "superhero"
268-
),
248+
(res for res in test_results if res["column_name"].lower() == "superhero"),
269249
None,
270250
)
251+
assert superhero_result is not None, "superhero column result not found"
271252
assert (
272-
superhero_result and superhero_result["status"] == "fail"
273-
), "Test should fail when anomaly is excluded from training"
253+
superhero_result["status"] == expected_status
254+
), f"Expected status '{expected_status}' but got '{superhero_result['status']}' (exclude_detection={exclude_detection})"

macros/edr/tests/test_dimension_anomalies.sql

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results, exclude_detection_period_from_training=false) %}
1+
{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results) %}
22
{{ config(tags = ['elementary-tests']) }}
33
{%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
44
{% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %}
@@ -39,8 +39,7 @@
3939
anomaly_exclude_metrics=anomaly_exclude_metrics,
4040
detection_period=detection_period,
4141
training_period=training_period,
42-
exclude_final_results=exclude_final_results,
43-
exclude_detection_period_from_training=exclude_detection_period_from_training) %}
42+
exclude_final_results=exclude_final_results) %}
4443

4544
{%- if not test_configuration %}
4645
{{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }}

0 commit comments

Comments
 (0)