Skip to content

Commit 812d9c2

Browse files
committed
improved tests
1 parent 8cc4050 commit 812d9c2

1 file changed

Lines changed: 46 additions & 78 deletions

File tree

integration_tests/tests/test_column_pii_sampling.py

Lines changed: 46 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ def test_column_pii_sampling_enabled(test_id: str, dbt_project: DbtProject):
4343
"enable_elementary_test_materialization": True,
4444
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
4545
"disable_samples_on_pii_tags": True,
46-
"pii_tags": ["pii"],
4746
},
4847
)
4948
assert test_result["status"] == "fail"
@@ -92,6 +91,43 @@ def test_column_pii_sampling_disabled(test_id: str, dbt_project: DbtProject):
9291
assert samples[0]["n_records"] == 10
9392

9493

94+
@pytest.mark.skip_targets(["clickhouse"])
95+
def test_column_pii_default_tag_override(test_id: str, dbt_project: DbtProject):
96+
"""Test that default PII tag can be overridden with a custom tag"""
97+
data = [
98+
{SENSITIVE_COLUMN: "user@example.com", SAFE_COLUMN: None} for i in range(10)
99+
]
100+
101+
test_result = dbt_project.test(
102+
test_id,
103+
"unique",
104+
test_args=dict(column_name=SENSITIVE_COLUMN),
105+
data=data,
106+
columns=[
107+
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
108+
],
109+
test_vars={
110+
"enable_elementary_test_materialization": True,
111+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
112+
"disable_samples_on_pii_tags": True,
113+
"pii_tags": ["sensitive"],
114+
},
115+
)
116+
assert test_result["status"] == "fail"
117+
118+
samples = [
119+
json.loads(row["result_row"])
120+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
121+
]
122+
123+
# sample should be {'unique_field': 'user@example.com', 'n_records': 10}
124+
assert len(samples) == 1
125+
assert "unique_field" in samples[0]
126+
assert samples[0]["unique_field"] == "user@example.com"
127+
assert "n_records" in samples[0]
128+
assert samples[0]["n_records"] == 10
129+
130+
95131
@pytest.mark.skip_targets(["clickhouse"])
96132
def test_column_pii_sampling_tags_exist_but_flag_disabled(
97133
test_id: str, dbt_project: DbtProject
@@ -105,15 +141,14 @@ def test_column_pii_sampling_tags_exist_but_flag_disabled(
105141
test_args=dict(column_name=SAFE_COLUMN),
106142
data=data,
107143
columns=[
108-
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
144+
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pIi"]}},
109145
{"name": SAFE_COLUMN},
110146
],
111147
test_column=None,
112148
test_vars={
113149
"enable_elementary_test_materialization": True,
114150
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
115151
"disable_samples_on_pii_tags": False, # Flag is disabled
116-
"pii_tags": ["pii"],
117152
},
118153
)
119154
assert test_result["status"] == "fail"
@@ -167,7 +202,7 @@ def test_column_pii_sampling_all_columns_pii(test_id: str, dbt_project: DbtProje
167202

168203

169204
@pytest.mark.skip_targets(["clickhouse"])
170-
def test_unique_test_column_mapping(test_id: str, dbt_project: DbtProject):
205+
def test_unique_test_custom_tag(test_id: str, dbt_project: DbtProject):
171206
"""Test that column mapping correctly maps unique test columns"""
172207
data = [{SENSITIVE_COLUMN: "user@example.com", SAFE_COLUMN: i} for i in range(10)]
173208

@@ -177,14 +212,14 @@ def test_unique_test_column_mapping(test_id: str, dbt_project: DbtProject):
177212
test_args=dict(column_name=SENSITIVE_COLUMN),
178213
data=data,
179214
columns=[
180-
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
215+
{"name": SENSITIVE_COLUMN, "config": {"tags": ["custom_tag"]}},
181216
{"name": SAFE_COLUMN},
182217
],
183218
test_vars={
184219
"enable_elementary_test_materialization": True,
185220
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
186221
"disable_samples_on_pii_tags": True,
187-
"pii_tags": ["pii"],
222+
"pii_tags": ["custom_tag"],
188223
},
189224
)
190225
assert test_result["status"] == "fail"
@@ -198,7 +233,7 @@ def test_unique_test_column_mapping(test_id: str, dbt_project: DbtProject):
198233

199234

200235
@pytest.mark.skip_targets(["clickhouse"])
201-
def test_accepted_values_test_column_mapping(test_id: str, dbt_project: DbtProject):
236+
def test_accepted_values_multi_tags(test_id: str, dbt_project: DbtProject):
202237
"""Test that column mapping correctly maps accepted_values test columns"""
203238
data = [{SENSITIVE_COLUMN: "invalid_value", SAFE_COLUMN: i} for i in range(10)]
204239

@@ -208,7 +243,7 @@ def test_accepted_values_test_column_mapping(test_id: str, dbt_project: DbtProje
208243
test_args=dict(column_name=SENSITIVE_COLUMN, values=["valid1", "valid2"]),
209244
data=data,
210245
columns=[
211-
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
246+
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii", "custom_tag"]}},
212247
{"name": SAFE_COLUMN},
213248
],
214249
test_vars={
@@ -229,7 +264,7 @@ def test_accepted_values_test_column_mapping(test_id: str, dbt_project: DbtProje
229264

230265

231266
@pytest.mark.skip_targets(["clickhouse"])
232-
def test_not_null_test_column_mapping(test_id: str, dbt_project: DbtProject):
267+
def test_not_null_test_multi_matched_tags(test_id: str, dbt_project: DbtProject):
233268
"""Test that column mapping correctly handles not_null test columns"""
234269
data = [{SENSITIVE_COLUMN: None, SAFE_COLUMN: i} for i in range(10)]
235270

@@ -239,14 +274,14 @@ def test_not_null_test_column_mapping(test_id: str, dbt_project: DbtProject):
239274
test_args=dict(column_name=SENSITIVE_COLUMN),
240275
data=data,
241276
columns=[
242-
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
277+
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii", "sensitive"]}},
243278
{"name": SAFE_COLUMN},
244279
],
245280
test_vars={
246281
"enable_elementary_test_materialization": True,
247282
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
248283
"disable_samples_on_pii_tags": True,
249-
"pii_tags": ["pii"],
284+
"pii_tags": ["pii", "sensitive"],
250285
},
251286
)
252287
assert test_result["status"] == "fail"
@@ -353,70 +388,3 @@ def test_meta_tags_and_accepted_values(test_id: str, dbt_project: DbtProject):
353388
]
354389

355390
assert len(samples) == 0
356-
357-
358-
@pytest.mark.skip_targets(["clickhouse"])
359-
def test_custom_sql_test_with_multiple_pii_columns(
360-
test_id: str, dbt_project: DbtProject
361-
):
362-
"""Test that custom SQL tests with multiple PII columns are handled correctly"""
363-
data = [
364-
{SENSITIVE_COLUMN: "user@example.com", "phone": "123-456-7890", SAFE_COLUMN: i}
365-
for i in range(10)
366-
]
367-
368-
# Test with unique to simulate complex multi-column scenarios
369-
test_result = dbt_project.test(
370-
test_id,
371-
"unique",
372-
test_args=dict(column_name=SENSITIVE_COLUMN),
373-
data=data,
374-
columns=[
375-
{"name": SENSITIVE_COLUMN, "tags": ["pii"]},
376-
{"name": "phone", "tags": ["pii"]},
377-
{"name": SAFE_COLUMN},
378-
],
379-
test_vars={
380-
"enable_elementary_test_materialization": True,
381-
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
382-
"disable_samples_on_pii_tags": True,
383-
"pii_tags": ["pii"],
384-
},
385-
)
386-
assert test_result["status"] == "fail"
387-
388-
samples = [
389-
json.loads(row["result_row"])
390-
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
391-
]
392-
assert len(samples) == 0
393-
394-
395-
@pytest.mark.skip_targets(["clickhouse"])
396-
def test_custom_sql_test_with_subquery_and_pii(test_id: str, dbt_project: DbtProject):
397-
"""Test that custom SQL tests with subqueries and PII columns work correctly"""
398-
data = [{SENSITIVE_COLUMN: "user@example.com", SAFE_COLUMN: i} for i in range(10)]
399-
400-
# Test with not_null to simulate subquery-like scenarios
401-
test_result = dbt_project.test(
402-
test_id,
403-
"not_null",
404-
test_args=dict(column_name=SENSITIVE_COLUMN),
405-
data=data,
406-
columns=[
407-
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
408-
{"name": SAFE_COLUMN},
409-
],
410-
test_vars={
411-
"enable_elementary_test_materialization": True,
412-
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
413-
"disable_samples_on_pii_tags": True,
414-
"pii_tags": ["pii"],
415-
},
416-
)
417-
assert test_result["status"] == "pass"
418-
samples = [
419-
json.loads(row["result_row"])
420-
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
421-
]
422-
assert len(samples) == 0

0 commit comments

Comments
 (0)