Skip to content

Commit 80380a8

Browse files
Fix test_exclude_detection_from_training_all_columns: shorten test ID suffixes and adjust test data for proper anomaly detection
Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
1 parent 36878be commit 80380a8

1 file changed

Lines changed: 12 additions & 10 deletions

File tree

integration_tests/tests/test_all_columns_anomalies.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -164,21 +164,23 @@ def test_exclude_detection_from_training_all_columns(
164164
Test the exclude_detection_period_from_training flag functionality for column anomalies.
165165
166166
Scenario:
167-
- 30 days of normal data with consistent null_count pattern (2 nulls per day)
168-
- 7 days of anomalous data (10 nulls per day) in detection period
167+
- 30 days of normal data with variance in null_count pattern (8, 10, 12 nulls per day)
168+
- 7 days of anomalous data (20 nulls per day) in detection period
169169
- Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
170170
- With exclusion: anomaly excluded from training, test fails (detects anomaly)
171171
"""
172172
utc_now = datetime.utcnow()
173173

174-
# Generate 30 days of normal data with consistent null_count (2 nulls per day)
174+
# Generate 30 days of normal data with variance in null_count (8, 10, 12 pattern)
175+
normal_pattern = [8, 10, 12]
175176
normal_data = []
176177
for i in range(30):
177178
date = utc_now - timedelta(days=37 - i)
179+
null_count = normal_pattern[i % 3]
178180
normal_data.extend(
179181
[
180182
{TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": None}
181-
for _ in range(2)
183+
for _ in range(null_count)
182184
]
183185
)
184186
normal_data.extend(
@@ -187,18 +189,18 @@ def test_exclude_detection_from_training_all_columns(
187189
TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
188190
"superhero": "Superman" if i % 2 == 0 else "Batman",
189191
}
190-
for _ in range(8)
192+
for _ in range(40 - null_count)
191193
]
192194
)
193195

194-
# Generate 7 days of anomalous data (10 nulls per day) - this will be in detection period
196+
# Generate 7 days of anomalous data (20 nulls per day) - 100% increase from mean
195197
anomalous_data = []
196198
for i in range(7):
197199
date = utc_now - timedelta(days=7 - i)
198200
anomalous_data.extend(
199201
[
200202
{TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": None}
201-
for _ in range(10)
203+
for _ in range(20)
202204
]
203205
)
204206
anomalous_data.extend(
@@ -207,7 +209,7 @@ def test_exclude_detection_from_training_all_columns(
207209
TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
208210
"superhero": "Superman" if i % 2 == 0 else "Batman",
209211
}
210-
for _ in range(0) # No non-null values to keep total similar
212+
for _ in range(20) # Keep total rows similar
211213
]
212214
)
213215

@@ -225,7 +227,7 @@ def test_exclude_detection_from_training_all_columns(
225227
}
226228

227229
test_results_without_exclusion = dbt_project.test(
228-
test_id + "_without_exclusion",
230+
test_id + "_f",
229231
DBT_TEST_NAME,
230232
test_args_without_exclusion,
231233
data=all_data,
@@ -252,7 +254,7 @@ def test_exclude_detection_from_training_all_columns(
252254
}
253255

254256
test_results_with_exclusion = dbt_project.test(
255-
test_id + "_with_exclusion",
257+
test_id + "_t",
256258
DBT_TEST_NAME,
257259
test_args_with_exclusion,
258260
data=all_data,

0 commit comments

Comments
 (0)