@@ -157,17 +157,28 @@ def test_anomalyless_all_columns_anomalies_all_monitors_sanity(
157157
158158# Anomalies currently not supported on ClickHouse
159159@pytest .mark .skip_targets (["clickhouse" ])
160- def test_exclude_detection_from_training_all_columns (
161- test_id : str , dbt_project : DbtProject
160+ @pytest .mark .parametrize (
161+ "exclude_detection,expected_status" ,
162+ [
163+ (False , "pass" ),
164+ (True , "fail" ),
165+ ],
166+ ids = ["without_exclusion" , "with_exclusion" ],
167+ )
168+ def test_anomaly_in_detection_period (
169+ test_id : str ,
170+ dbt_project : DbtProject ,
171+ exclude_detection : bool ,
172+ expected_status : str ,
162173):
163174 """
164175 Test the exclude_detection_period_from_training flag functionality for column anomalies.
165176
166177 Scenario:
167178 - 30 days of normal data with consistent null_count pattern (2 nulls per day)
168179 - 7 days of anomalous data (10 nulls per day) in detection period
169- - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
170- - With exclusion: anomaly excluded from training, test fails (detects anomaly)
180+ - Without exclusion (exclude_detection=False) : anomaly gets included in training baseline, test passes
181+ - With exclusion (exclude_detection=True) : anomaly excluded from training, test fails (detects anomaly)
171182 """
172183 utc_now = datetime .utcnow ()
173184
@@ -207,67 +218,37 @@ def test_exclude_detection_from_training_all_columns(
207218 TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ),
208219 "superhero" : "Superman" if i % 2 == 0 else "Batman" ,
209220 }
210- for _ in range (0 ) # No non-null values to keep total similar
221+ for _ in range (0 )
211222 ]
212223 )
213224
214225 all_data = normal_data + anomalous_data
215226
216- # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
217- test_args_without_exclusion = {
227+ test_args = {
218228 "timestamp_column" : TIMESTAMP_COLUMN ,
219229 "column_anomalies" : ["null_count" ],
220230 "training_period" : {"period" : "day" , "count" : 30 },
221231 "detection_period" : {"period" : "day" , "count" : 7 },
222232 "time_bucket" : {"period" : "day" , "count" : 1 },
223- "sensitivity" : 5 , # Higher sensitivity to allow anomaly to be absorbed
224- # exclude_detection_period_from_training is not set (defaults to False/None)
233+ "sensitivity" : 5 ,
225234 }
226235
227- test_results_without_exclusion = dbt_project .test (
228- test_id + "_without_exclusion" ,
229- DBT_TEST_NAME ,
230- test_args_without_exclusion ,
231- data = all_data ,
232- multiple_results = True ,
233- )
234-
235- # This should PASS because the anomaly is included in training, making it part of the baseline
236- superhero_result = next (
237- (
238- res
239- for res in test_results_without_exclusion
240- if res ["column_name" ].lower () == "superhero"
241- ),
242- None ,
243- )
244- assert (
245- superhero_result and superhero_result ["status" ] == "pass"
246- ), "Test should pass when anomaly is included in training"
247-
248- # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
249- test_args_with_exclusion = {
250- ** test_args_without_exclusion ,
251- "exclude_detection_period_from_training" : True ,
252- }
236+ if exclude_detection :
237+ test_args ["exclude_detection_period_from_training" ] = True
253238
254- test_results_with_exclusion = dbt_project .test (
255- test_id + "_with_exclusion" ,
239+ test_results = dbt_project .test (
240+ test_id ,
256241 DBT_TEST_NAME ,
257- test_args_with_exclusion ,
242+ test_args ,
258243 data = all_data ,
259244 multiple_results = True ,
260245 )
261246
262- # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
263247 superhero_result = next (
264- (
265- res
266- for res in test_results_with_exclusion
267- if res ["column_name" ].lower () == "superhero"
268- ),
248+ (res for res in test_results if res ["column_name" ].lower () == "superhero" ),
269249 None ,
270250 )
251+ assert superhero_result is not None , "superhero column result not found"
271252 assert (
272- superhero_result and superhero_result ["status" ] == "fail"
273- ), "Test should fail when anomaly is excluded from training "
253+ superhero_result ["status" ] == expected_status
254+ ), f"Expected status ' { expected_status } ' but got ' { superhero_result [ 'status' ] } ' (exclude_detection= { exclude_detection } ) "
0 commit comments