@@ -43,7 +43,6 @@ def test_column_pii_sampling_enabled(test_id: str, dbt_project: DbtProject):
4343 "enable_elementary_test_materialization" : True ,
4444 "test_sample_row_count" : TEST_SAMPLE_ROW_COUNT ,
4545 "disable_samples_on_pii_tags" : True ,
46- "pii_tags" : ["pii" ],
4746 },
4847 )
4948 assert test_result ["status" ] == "fail"
@@ -92,6 +91,43 @@ def test_column_pii_sampling_disabled(test_id: str, dbt_project: DbtProject):
9291 assert samples [0 ]["n_records" ] == 10
9392
9493
94+ @pytest .mark .skip_targets (["clickhouse" ])
95+ def test_column_pii_default_tag_override (test_id : str , dbt_project : DbtProject ):
96+ """Test that default PII tag can be overridden with a custom tag"""
97+ data = [
98+ {SENSITIVE_COLUMN : "user@example.com" , SAFE_COLUMN : None } for i in range (10 )
99+ ]
100+
101+ test_result = dbt_project .test (
102+ test_id ,
103+ "unique" ,
104+ test_args = dict (column_name = SENSITIVE_COLUMN ),
105+ data = data ,
106+ columns = [
107+ {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" ]}},
108+ ],
109+ test_vars = {
110+ "enable_elementary_test_materialization" : True ,
111+ "test_sample_row_count" : TEST_SAMPLE_ROW_COUNT ,
112+ "disable_samples_on_pii_tags" : True ,
113+ "pii_tags" : ["sensitive" ],
114+ },
115+ )
116+ assert test_result ["status" ] == "fail"
117+
118+ samples = [
119+ json .loads (row ["result_row" ])
120+ for row in dbt_project .run_query (SAMPLES_QUERY .format (test_id = test_id ))
121+ ]
122+
123+ # sample should be {'unique_field': 'user@example.com', 'n_records': 10}
124+ assert len (samples ) == 1
125+ assert "unique_field" in samples [0 ]
126+ assert samples [0 ]["unique_field" ] == "user@example.com"
127+ assert "n_records" in samples [0 ]
128+ assert samples [0 ]["n_records" ] == 10
129+
130+
95131@pytest .mark .skip_targets (["clickhouse" ])
96132def test_column_pii_sampling_tags_exist_but_flag_disabled (
97133 test_id : str , dbt_project : DbtProject
@@ -105,15 +141,14 @@ def test_column_pii_sampling_tags_exist_but_flag_disabled(
105141 test_args = dict (column_name = SAFE_COLUMN ),
106142 data = data ,
107143 columns = [
108- {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii " ]}},
144+ {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pIi " ]}},
109145 {"name" : SAFE_COLUMN },
110146 ],
111147 test_column = None ,
112148 test_vars = {
113149 "enable_elementary_test_materialization" : True ,
114150 "test_sample_row_count" : TEST_SAMPLE_ROW_COUNT ,
115151 "disable_samples_on_pii_tags" : False , # Flag is disabled
116- "pii_tags" : ["pii" ],
117152 },
118153 )
119154 assert test_result ["status" ] == "fail"
@@ -167,7 +202,7 @@ def test_column_pii_sampling_all_columns_pii(test_id: str, dbt_project: DbtProje
167202
168203
169204@pytest .mark .skip_targets (["clickhouse" ])
170- def test_unique_test_column_mapping (test_id : str , dbt_project : DbtProject ):
205+ def test_unique_test_custom_tag (test_id : str , dbt_project : DbtProject ):
171206 """Test that column mapping correctly maps unique test columns"""
172207 data = [{SENSITIVE_COLUMN : "user@example.com" , SAFE_COLUMN : i } for i in range (10 )]
173208
@@ -177,14 +212,14 @@ def test_unique_test_column_mapping(test_id: str, dbt_project: DbtProject):
177212 test_args = dict (column_name = SENSITIVE_COLUMN ),
178213 data = data ,
179214 columns = [
180- {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii " ]}},
215+ {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["custom_tag " ]}},
181216 {"name" : SAFE_COLUMN },
182217 ],
183218 test_vars = {
184219 "enable_elementary_test_materialization" : True ,
185220 "test_sample_row_count" : TEST_SAMPLE_ROW_COUNT ,
186221 "disable_samples_on_pii_tags" : True ,
187- "pii_tags" : ["pii " ],
222+ "pii_tags" : ["custom_tag " ],
188223 },
189224 )
190225 assert test_result ["status" ] == "fail"
@@ -198,7 +233,7 @@ def test_unique_test_column_mapping(test_id: str, dbt_project: DbtProject):
198233
199234
200235@pytest .mark .skip_targets (["clickhouse" ])
201- def test_accepted_values_test_column_mapping (test_id : str , dbt_project : DbtProject ):
236+ def test_accepted_values_multi_tags (test_id : str , dbt_project : DbtProject ):
202237 """Test that column mapping correctly maps accepted_values test columns"""
203238 data = [{SENSITIVE_COLUMN : "invalid_value" , SAFE_COLUMN : i } for i in range (10 )]
204239
@@ -208,7 +243,7 @@ def test_accepted_values_test_column_mapping(test_id: str, dbt_project: DbtProje
208243 test_args = dict (column_name = SENSITIVE_COLUMN , values = ["valid1" , "valid2" ]),
209244 data = data ,
210245 columns = [
211- {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" ]}},
246+ {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" , "custom_tag" ]}},
212247 {"name" : SAFE_COLUMN },
213248 ],
214249 test_vars = {
@@ -229,7 +264,7 @@ def test_accepted_values_test_column_mapping(test_id: str, dbt_project: DbtProje
229264
230265
231266@pytest .mark .skip_targets (["clickhouse" ])
232- def test_not_null_test_column_mapping (test_id : str , dbt_project : DbtProject ):
267+ def test_not_null_test_multi_matched_tags (test_id : str , dbt_project : DbtProject ):
233268 """Test that column mapping correctly handles not_null test columns"""
234269 data = [{SENSITIVE_COLUMN : None , SAFE_COLUMN : i } for i in range (10 )]
235270
@@ -239,14 +274,14 @@ def test_not_null_test_column_mapping(test_id: str, dbt_project: DbtProject):
239274 test_args = dict (column_name = SENSITIVE_COLUMN ),
240275 data = data ,
241276 columns = [
242- {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" ]}},
277+ {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" , "sensitive" ]}},
243278 {"name" : SAFE_COLUMN },
244279 ],
245280 test_vars = {
246281 "enable_elementary_test_materialization" : True ,
247282 "test_sample_row_count" : TEST_SAMPLE_ROW_COUNT ,
248283 "disable_samples_on_pii_tags" : True ,
249- "pii_tags" : ["pii" ],
284+ "pii_tags" : ["pii" , "sensitive" ],
250285 },
251286 )
252287 assert test_result ["status" ] == "fail"
@@ -353,70 +388,3 @@ def test_meta_tags_and_accepted_values(test_id: str, dbt_project: DbtProject):
353388 ]
354389
355390 assert len (samples ) == 0
356-
357-
358- @pytest .mark .skip_targets (["clickhouse" ])
359- def test_custom_sql_test_with_multiple_pii_columns (
360- test_id : str , dbt_project : DbtProject
361- ):
362- """Test that custom SQL tests with multiple PII columns are handled correctly"""
363- data = [
364- {SENSITIVE_COLUMN : "user@example.com" , "phone" : "123-456-7890" , SAFE_COLUMN : i }
365- for i in range (10 )
366- ]
367-
368- # Test with unique to simulate complex multi-column scenarios
369- test_result = dbt_project .test (
370- test_id ,
371- "unique" ,
372- test_args = dict (column_name = SENSITIVE_COLUMN ),
373- data = data ,
374- columns = [
375- {"name" : SENSITIVE_COLUMN , "tags" : ["pii" ]},
376- {"name" : "phone" , "tags" : ["pii" ]},
377- {"name" : SAFE_COLUMN },
378- ],
379- test_vars = {
380- "enable_elementary_test_materialization" : True ,
381- "test_sample_row_count" : TEST_SAMPLE_ROW_COUNT ,
382- "disable_samples_on_pii_tags" : True ,
383- "pii_tags" : ["pii" ],
384- },
385- )
386- assert test_result ["status" ] == "fail"
387-
388- samples = [
389- json .loads (row ["result_row" ])
390- for row in dbt_project .run_query (SAMPLES_QUERY .format (test_id = test_id ))
391- ]
392- assert len (samples ) == 0
393-
394-
395- @pytest .mark .skip_targets (["clickhouse" ])
396- def test_custom_sql_test_with_subquery_and_pii (test_id : str , dbt_project : DbtProject ):
397- """Test that custom SQL tests with subqueries and PII columns work correctly"""
398- data = [{SENSITIVE_COLUMN : "user@example.com" , SAFE_COLUMN : i } for i in range (10 )]
399-
400- # Test with not_null to simulate subquery-like scenarios
401- test_result = dbt_project .test (
402- test_id ,
403- "not_null" ,
404- test_args = dict (column_name = SENSITIVE_COLUMN ),
405- data = data ,
406- columns = [
407- {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" ]}},
408- {"name" : SAFE_COLUMN },
409- ],
410- test_vars = {
411- "enable_elementary_test_materialization" : True ,
412- "test_sample_row_count" : TEST_SAMPLE_ROW_COUNT ,
413- "disable_samples_on_pii_tags" : True ,
414- "pii_tags" : ["pii" ],
415- },
416- )
417- assert test_result ["status" ] == "pass"
418- samples = [
419- json .loads (row ["result_row" ])
420- for row in dbt_project .run_query (SAMPLES_QUERY .format (test_id = test_id ))
421- ]
422- assert len (samples ) == 0
0 commit comments