@@ -164,21 +164,23 @@ def test_exclude_detection_from_training_all_columns(
164164 Test the exclude_detection_period_from_training flag functionality for column anomalies.
165165
166166 Scenario:
167- - 30 days of normal data with consistent null_count pattern (2 nulls per day)
168- - 7 days of anomalous data (10 nulls per day) in detection period
167+ - 30 days of normal data with variance in null_count pattern (8, 10, 12 nulls per day)
168+ - 7 days of anomalous data (20 nulls per day) in detection period
169169 - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
170170 - With exclusion: anomaly excluded from training, test fails (detects anomaly)
171171 """
172172 utc_now = datetime .utcnow ()
173173
174- # Generate 30 days of normal data with consistent null_count (2 nulls per day)
174+ # Generate 30 days of normal data with variance in null_count (8, 10, 12 pattern)
175+ normal_pattern = [8 , 10 , 12 ]
175176 normal_data = []
176177 for i in range (30 ):
177178 date = utc_now - timedelta (days = 37 - i )
179+ null_count = normal_pattern [i % 3 ]
178180 normal_data .extend (
179181 [
180182 {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ), "superhero" : None }
181- for _ in range (2 )
183+ for _ in range (null_count )
182184 ]
183185 )
184186 normal_data .extend (
@@ -187,18 +189,18 @@ def test_exclude_detection_from_training_all_columns(
187189 TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ),
188190 "superhero" : "Superman" if i % 2 == 0 else "Batman" ,
189191 }
190- for _ in range (8 )
192+ for _ in range (40 - null_count )
191193 ]
192194 )
193195
194- # Generate 7 days of anomalous data (10 nulls per day) - this will be in detection period
196+ # Generate 7 days of anomalous data (20 nulls per day) - 100% increase from mean
195197 anomalous_data = []
196198 for i in range (7 ):
197199 date = utc_now - timedelta (days = 7 - i )
198200 anomalous_data .extend (
199201 [
200202 {TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ), "superhero" : None }
201- for _ in range (10 )
203+ for _ in range (20 )
202204 ]
203205 )
204206 anomalous_data .extend (
@@ -207,7 +209,7 @@ def test_exclude_detection_from_training_all_columns(
207209 TIMESTAMP_COLUMN : date .strftime (DATE_FORMAT ),
208210 "superhero" : "Superman" if i % 2 == 0 else "Batman" ,
209211 }
210- for _ in range (0 ) # No non-null values to keep total similar
212+ for _ in range (20 ) # Keep total rows similar
211213 ]
212214 )
213215
@@ -225,7 +227,7 @@ def test_exclude_detection_from_training_all_columns(
225227 }
226228
227229 test_results_without_exclusion = dbt_project .test (
228- test_id + "_without_exclusion " ,
230+ test_id + "_f " ,
229231 DBT_TEST_NAME ,
230232 test_args_without_exclusion ,
231233 data = all_data ,
@@ -252,7 +254,7 @@ def test_exclude_detection_from_training_all_columns(
252254 }
253255
254256 test_results_with_exclusion = dbt_project .test (
255- test_id + "_with_exclusion " ,
257+ test_id + "_t " ,
256258 DBT_TEST_NAME ,
257259 test_args_with_exclusion ,
258260 data = all_data ,
0 commit comments