Skip to content

Commit f521f09

Browse files
Add disable_samples configuration flag
- Add table-level disable_samples flag that overrides PII detection - Flag is set via meta configuration: meta: {disable_samples: true} - Works independently of PII tags and takes precedence - Add comprehensive integration tests for all scenarios - Addresses Linear issue ELE-4833 additional requirements Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
1 parent c56e916 commit f521f09

2 files changed

Lines changed: 99 additions & 1 deletion

File tree

integration_tests/tests/test_sampling_pii.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,96 @@ def test_sampling_pii_feature_disabled(test_id: str, dbt_project: DbtProject):
108108
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
109109
]
110110
assert len(samples) == TEST_SAMPLE_ROW_COUNT
111+
112+
113+
@pytest.mark.skip_targets(["clickhouse"])
114+
def test_sampling_disable_samples_flag(test_id: str, dbt_project: DbtProject):
115+
"""Test that disable_samples flag prevents sample collection regardless of PII tags"""
116+
null_count = 50
117+
data = [{COLUMN_NAME: None} for _ in range(null_count)]
118+
119+
test_result = dbt_project.test(
120+
test_id,
121+
"not_null",
122+
dict(column_name=COLUMN_NAME),
123+
data=data,
124+
as_model=True,
125+
model_config={
126+
"config": {"meta": {"disable_samples": True}, "tags": ["normal"]}
127+
},
128+
test_vars={
129+
"enable_elementary_test_materialization": True,
130+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
131+
"disable_samples_on_pii_tables": False,
132+
"pii_table_tags": ["pii"],
133+
},
134+
)
135+
assert test_result["status"] == "fail"
136+
137+
samples = [
138+
json.loads(row["result_row"])
139+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
140+
]
141+
assert len(samples) == 0
142+
143+
144+
@pytest.mark.skip_targets(["clickhouse"])
145+
def test_sampling_disable_samples_overrides_pii(test_id: str, dbt_project: DbtProject):
146+
"""Test that disable_samples flag overrides PII detection when both are present"""
147+
null_count = 50
148+
data = [{COLUMN_NAME: None} for _ in range(null_count)]
149+
150+
test_result = dbt_project.test(
151+
test_id,
152+
"not_null",
153+
dict(column_name=COLUMN_NAME),
154+
data=data,
155+
as_model=True,
156+
model_config={"config": {"meta": {"disable_samples": True}, "tags": ["pii"]}},
157+
test_vars={
158+
"enable_elementary_test_materialization": True,
159+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
160+
"disable_samples_on_pii_tables": True,
161+
"pii_table_tags": ["pii"],
162+
},
163+
)
164+
assert test_result["status"] == "fail"
165+
166+
samples = [
167+
json.loads(row["result_row"])
168+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
169+
]
170+
assert len(samples) == 0
171+
172+
173+
@pytest.mark.skip_targets(["clickhouse"])
174+
def test_sampling_disable_samples_false_allows_samples(
175+
test_id: str, dbt_project: DbtProject
176+
):
177+
"""Test that disable_samples: false allows sample collection normally"""
178+
null_count = 50
179+
data = [{COLUMN_NAME: None} for _ in range(null_count)]
180+
181+
test_result = dbt_project.test(
182+
test_id,
183+
"not_null",
184+
dict(column_name=COLUMN_NAME),
185+
data=data,
186+
as_model=True,
187+
model_config={
188+
"config": {"meta": {"disable_samples": False}, "tags": ["normal"]}
189+
},
190+
test_vars={
191+
"enable_elementary_test_materialization": True,
192+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
193+
"disable_samples_on_pii_tables": False,
194+
"pii_table_tags": ["pii"],
195+
},
196+
)
197+
assert test_result["status"] == "fail"
198+
199+
samples = [
200+
json.loads(row["result_row"])
201+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
202+
]
203+
assert len(samples) == TEST_SAMPLE_ROW_COUNT

macros/edr/materializations/test/test.sql

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,14 @@
5151
{% macro handle_dbt_test(flattened_test, materialization_macro) %}
5252
{% set result = materialization_macro() %}
5353
{% set sample_limit = elementary.get_config_var('test_sample_row_count') %}
54-
{% if elementary.is_pii_table(flattened_test) %}
54+
55+
{% set disable_samples = elementary.insensitive_get_dict_value(flattened_test, 'meta', {}).get('disable_samples', false) %}
56+
{% if disable_samples %}
57+
{% set sample_limit = 0 %}
58+
{% elif elementary.is_pii_table(flattened_test) %}
5559
{% set sample_limit = 0 %}
5660
{% endif %}
61+
5762
{% set result_rows = elementary.query_test_result_rows(sample_limit=sample_limit,
5863
ignore_passed_tests=true) %}
5964
{% set elementary_test_results_row = elementary.get_dbt_test_result_row(flattened_test, result_rows) %}

0 commit comments

Comments
 (0)