diff --git a/integration_tests/tests/dbt_project.py b/integration_tests/tests/dbt_project.py index a871f0f53..6aea795b8 100644 --- a/integration_tests/tests/dbt_project.py +++ b/integration_tests/tests/dbt_project.py @@ -149,6 +149,7 @@ def test( test_vars: Optional[dict] = None, elementary_enabled: bool = True, model_config: Optional[Dict[str, Any]] = None, + column_config: Optional[Dict[str, Any]] = None, *, multiple_results: bool = False, ) -> Union[Dict[str, Any], List[Dict[str, Any]]]: @@ -173,9 +174,10 @@ def test( if test_column is None: table_yaml["tests"] = [{dbt_test_name: test_args}] else: - table_yaml["columns"] = [ - {"name": test_column, "tests": [{dbt_test_name: test_args}]} - ] + column_def = {"name": test_column, "tests": [{dbt_test_name: test_args}]} + if column_config: + column_def["config"] = column_config + table_yaml["columns"] = [column_def] temp_table_ctx: Any if as_model: diff --git a/integration_tests/tests/test_column_pii_sampling.py b/integration_tests/tests/test_column_pii_sampling.py new file mode 100644 index 000000000..dff983da6 --- /dev/null +++ b/integration_tests/tests/test_column_pii_sampling.py @@ -0,0 +1,390 @@ +import json + +import pytest +from dbt_project import DbtProject + +SENSITIVE_COLUMN = "email" +SAFE_COLUMN = "order_count" + +SAMPLES_QUERY = """ + with latest_elementary_test_result as ( + select id + from {{{{ ref("elementary_test_results") }}}} + where lower(table_name) = lower('{test_id}') + order by created_at desc + limit 1 + ) + + select result_row + from {{{{ ref("test_result_rows") }}}} + where elementary_test_results_id in (select * from latest_elementary_test_result) +""" + +TEST_SAMPLE_ROW_COUNT = 5 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_column_pii_sampling_enabled(test_id: str, dbt_project: DbtProject): + """Test that PII columns are excluded when column-level PII protection is enabled""" + data = [ + {SENSITIVE_COLUMN: "user@example.com", SAFE_COLUMN: None} for i in range(10) + ] + + test_result = dbt_project.test( + test_id, + "unique", + test_args=dict(column_name=SENSITIVE_COLUMN), + data=data, + columns=[ + {"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}}, + {"name": SAFE_COLUMN}, + ], + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": TEST_SAMPLE_ROW_COUNT, + "disable_samples_on_pii_tags": True, + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + + assert len(samples) == 0 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_column_pii_sampling_disabled(test_id: str, dbt_project: DbtProject): + """Test that all columns are included when column-level PII protection is disabled""" + data = [ + {SENSITIVE_COLUMN: "user@example.com", SAFE_COLUMN: None} for i in range(10) + ] + + test_result = dbt_project.test( + test_id, + "unique", + test_args=dict(column_name=SENSITIVE_COLUMN), + data=data, + columns=[ + {"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}}, + ], + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": TEST_SAMPLE_ROW_COUNT, + "disable_samples_on_pii_tags": False, + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + + # sample should be {'unique_field': 'user@example.com', 'n_records': 10} + assert len(samples) == 1 + assert "unique_field" in samples[0] + assert samples[0]["unique_field"] == "user@example.com" + assert "n_records" in samples[0] + assert samples[0]["n_records"] == 10 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_column_pii_default_tag_override(test_id: str, dbt_project: DbtProject): + """Test that default PII tag can be overridden with a custom tag""" + data = [ + {SENSITIVE_COLUMN: "user@example.com", SAFE_COLUMN: None} for i in range(10) + ] + + test_result = dbt_project.test( + test_id, + "unique", + test_args=dict(column_name=SENSITIVE_COLUMN), + data=data, + columns=[ + {"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}}, + ], + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": TEST_SAMPLE_ROW_COUNT, + "disable_samples_on_pii_tags": True, + "pii_tags": ["sensitive"], + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + + # sample should be {'unique_field': 'user@example.com', 'n_records': 10} + assert len(samples) == 1 + assert "unique_field" in samples[0] + assert samples[0]["unique_field"] == "user@example.com" + assert "n_records" in samples[0] + assert samples[0]["n_records"] == 10 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_column_pii_sampling_tags_exist_but_flag_disabled( + test_id: str, dbt_project: DbtProject +): + """Test that when PII tags exist but disable_samples_on_pii_tags is false, samples are collected normally""" + data = [{SENSITIVE_COLUMN: "user@example.com", SAFE_COLUMN: 1} for i in range(10)] + + test_result = dbt_project.test( + test_id, + "unique", + test_args=dict(column_name=SAFE_COLUMN), + data=data, + columns=[ + {"name": SENSITIVE_COLUMN, "config": {"tags": ["pIi"]}}, + {"name": SAFE_COLUMN}, + ], + test_column=None, + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": TEST_SAMPLE_ROW_COUNT, + "disable_samples_on_pii_tags": False, # Flag is disabled + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + + # When flag is disabled, we get the full sample (not limited by PII filtering) + assert len(samples) == 1 + assert "unique_field" in samples[0] + assert samples[0]["unique_field"] == 1 + assert "n_records" in samples[0] + assert samples[0]["n_records"] == 10 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_column_pii_sampling_all_columns_pii(test_id: str, dbt_project: DbtProject): + """Test behavior when all columns are tagged as PII""" + data = [ + {SENSITIVE_COLUMN: f"user{i}@example.com", SAFE_COLUMN: i} for i in range(10) + ] + + test_result = dbt_project.test( + test_id, + "not_null", + test_args=dict(column_name=SAFE_COLUMN), + data=data, + columns=[ + {"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}}, + {"name": SAFE_COLUMN, "config": {"tags": ["pii"]}}, + ], + test_column=None, + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": TEST_SAMPLE_ROW_COUNT, + "disable_samples_on_pii_tags": True, + "pii_tags": ["pii"], + }, + ) + assert test_result["status"] == "pass" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + + # When all columns are PII, no samples should be collected + assert len(samples) == 0 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_unique_test_custom_tag(test_id: str, dbt_project: DbtProject): + """Test that column mapping correctly maps unique test columns""" + data = [{SENSITIVE_COLUMN: "user@example.com", SAFE_COLUMN: i} for i in range(10)] + + test_result = dbt_project.test( + test_id, + "unique", + test_args=dict(column_name=SENSITIVE_COLUMN), + data=data, + columns=[ + {"name": SENSITIVE_COLUMN, "config": {"tags": ["custom_tag"]}}, + {"name": SAFE_COLUMN}, + ], + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": TEST_SAMPLE_ROW_COUNT, + "disable_samples_on_pii_tags": True, + "pii_tags": ["custom_tag"], + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + + assert len(samples) == 0 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_accepted_values_multi_tags(test_id: str, dbt_project: DbtProject): + """Test that column mapping correctly maps accepted_values test columns""" + data = [{SENSITIVE_COLUMN: "invalid_value", SAFE_COLUMN: i} for i in range(10)] + + test_result = dbt_project.test( + test_id, + "accepted_values", + test_args=dict(column_name=SENSITIVE_COLUMN, values=["valid1", "valid2"]), + data=data, + columns=[ + {"name": SENSITIVE_COLUMN, "config": {"tags": ["pii", "custom_tag"]}}, + {"name": SAFE_COLUMN}, + ], + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": TEST_SAMPLE_ROW_COUNT, + "disable_samples_on_pii_tags": True, + "pii_tags": ["pii"], + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + + assert len(samples) == 0 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_not_null_test_multi_matched_tags(test_id: str, dbt_project: DbtProject): + """Test that column mapping correctly handles not_null test columns""" + data = [{SENSITIVE_COLUMN: None, SAFE_COLUMN: i} for i in range(10)] + + test_result = dbt_project.test( + test_id, + "not_null", + test_args=dict(column_name=SENSITIVE_COLUMN), + data=data, + columns=[ + {"name": SENSITIVE_COLUMN, "config": {"tags": ["pii", "sensitive"]}}, + {"name": SAFE_COLUMN}, + ], + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": TEST_SAMPLE_ROW_COUNT, + "disable_samples_on_pii_tags": True, + "pii_tags": ["pii", "sensitive"], + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + + assert len(samples) == 0 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_multiple_pii_columns_mapping(test_id: str, dbt_project: DbtProject): + """Test that column mapping handles multiple PII columns correctly""" + data = [ + {SENSITIVE_COLUMN: "user@example.com", "phone": "123-456-7890", SAFE_COLUMN: i} + for i in range(10) + ] + + test_result = dbt_project.test( + test_id, + "unique", + test_args=dict(column_name=SENSITIVE_COLUMN), + data=data, + columns=[ + {"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}}, + {"name": "phone", "config": {"tags": ["pii"]}}, + {"name": SAFE_COLUMN}, + ], + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": TEST_SAMPLE_ROW_COUNT, + "disable_samples_on_pii_tags": True, + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + + assert len(samples) == 0 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_custom_sql_test_with_pii_column_simple(test_id: str, dbt_project: DbtProject): + """Test that custom SQL tests with PII columns are handled correctly""" + data = [{SENSITIVE_COLUMN: "user@example.com", SAFE_COLUMN: i} for i in range(10)] + + test_result = dbt_project.test( + test_id, + "unique", + test_args=dict(column_name=SENSITIVE_COLUMN), + data=data, + columns=[ + {"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}}, + {"name": SAFE_COLUMN}, + ], + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": TEST_SAMPLE_ROW_COUNT, + "disable_samples_on_pii_tags": True, + "pii_tags": ["pii"], + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + + assert len(samples) == 0 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_meta_tags_and_accepted_values(test_id: str, dbt_project: DbtProject): + data = [{SENSITIVE_COLUMN: "user@example.com", SAFE_COLUMN: i} for i in range(10)] + + # Test with accepted_values to simulate complex column mapping + test_result = dbt_project.test( + test_id, + "accepted_values", + test_args=dict(column_name=SENSITIVE_COLUMN, values=["invalid@example.com"]), + data=data, + columns=[ + {"name": SENSITIVE_COLUMN, "meta": {"tags": ["pii"]}}, + {"name": SAFE_COLUMN}, + ], + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": TEST_SAMPLE_ROW_COUNT, + "disable_samples_on_pii_tags": True, + "pii_tags": ["pii"], + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + + assert len(samples) == 0 diff --git a/integration_tests/tests/test_disable_samples_config.py b/integration_tests/tests/test_disable_samples_config.py new file mode 100644 index 000000000..849672a98 --- /dev/null +++ b/integration_tests/tests/test_disable_samples_config.py @@ -0,0 +1,162 @@ +import json + +import pytest +from dbt_project import DbtProject + +COLUMN_NAME = "sensitive_data" + +SAMPLES_QUERY = """ + with latest_elementary_test_result as ( + select id + from {{{{ ref("elementary_test_results") }}}} + where lower(table_name) = lower('{test_id}') + order by created_at desc + limit 1 + ) + + select result_row + from {{{{ ref("test_result_rows") }}}} + where elementary_test_results_id in (select * from latest_elementary_test_result) +""" + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_disable_samples_config_prevents_sampling( + test_id: str, dbt_project: DbtProject +): + null_count = 20 + data = [{COLUMN_NAME: None} for _ in range(null_count)] + + test_result = dbt_project.test( + test_id, + "not_null", + dict(column_name=COLUMN_NAME, meta={"disable_test_samples": True}), + data=data, + as_model=True, + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": 5, + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + assert len(samples) == 0 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_disable_samples_false_allows_sampling(test_id: str, dbt_project: DbtProject): + null_count = 20 + data = [{COLUMN_NAME: None} for _ in range(null_count)] + + test_result = dbt_project.test( + test_id, + "not_null", + dict(column_name=COLUMN_NAME, meta={"disable_test_samples": False}), + data=data, + as_model=True, + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": 5, + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + assert len(samples) == 5 + for sample in samples: + assert COLUMN_NAME in sample + assert sample[COLUMN_NAME] is None + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_disable_samples_config_overrides_pii_tags( + test_id: str, dbt_project: DbtProject +): + null_count = 20 + data = [{COLUMN_NAME: None} for _ in range(null_count)] + + test_result = dbt_project.test( + test_id, + "not_null", + dict( + column_name=COLUMN_NAME, + meta={"disable_test_samples": True, "tags": ["pii"]}, + ), + data=data, + as_model=True, + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": 5, + "disable_samples_on_pii_tags": True, + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + assert len(samples) == 0 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_disable_samples_and_pii_interaction(test_id: str, dbt_project: DbtProject): + """Test that disable_test_samples and PII columns both get excluded""" + data = [ + {"col1": None, "col2": f"pii{i}", "col3": f"disabled{i}"} for i in range(10) + ] + + test_result = dbt_project.test( + test_id, + "not_null", + dict(column_name="col1", meta={"disable_test_samples": True}), + data=data, + as_model=True, + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": 5, + "disable_samples_on_pii_tags": True, + "pii_tags": ["pii"], + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + + assert len(samples) == 0 + + +@pytest.mark.skip_targets(["clickhouse"]) +def test_disable_samples_with_multiple_columns(test_id: str, dbt_project: DbtProject): + """Test that disable_test_samples excludes only the disabled column""" + data = [{"col1": None, "col2": f"value{i}"} for i in range(10)] + + test_result = dbt_project.test( + test_id, + "not_null", + dict(column_name="col1", meta={"disable_test_samples": True}), + data=data, + as_model=True, + test_vars={ + "enable_elementary_test_materialization": True, + "test_sample_row_count": 5, + }, + ) + assert test_result["status"] == "fail" + + samples = [ + json.loads(row["result_row"]) + for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id)) + ] + + assert len(samples) == 0 diff --git a/macros/edr/materializations/test/test.sql b/macros/edr/materializations/test/test.sql index c0cb48ce9..9452978d4 100644 --- a/macros/edr/materializations/test/test.sql +++ b/macros/edr/materializations/test/test.sql @@ -61,10 +61,11 @@ {% set sample_limit = 0 %} {% elif elementary.is_pii_table(flattened_test) %} {% set sample_limit = 0 %} + {% elif elementary.should_disable_sampling_for_pii(flattened_test) %} + {% set sample_limit = 0 %} {% endif %} - {% set result_rows = elementary.query_test_result_rows(sample_limit=sample_limit, - ignore_passed_tests=true) %} + {% set result_rows = elementary.query_test_result_rows(sample_limit=sample_limit, ignore_passed_tests=true) %} {% set elementary_test_results_row = elementary.get_dbt_test_result_row(flattened_test, result_rows) %} {% do elementary.cache_elementary_test_results_rows([elementary_test_results_row]) %} {% do return(result) %} @@ -124,6 +125,7 @@ {% do elementary.debug_log("Skipping sample query because the test passed.") %} {% do return([]) %} {% endif %} + {% set query %} with test_results as ( {{ sql }} @@ -133,6 +135,79 @@ {% do return(elementary.agate_to_dicts(elementary.run_query(query))) %} {% endmacro %} +{% macro get_columns_to_exclude_from_sampling(flattened_test) %} + {% set columns_to_exclude = [] %} + + {% if not flattened_test %} + {% do return(columns_to_exclude) %} + {% endif %} + + {% if elementary.get_config_var('disable_samples_on_pii_tags') %} + {% set pii_columns = elementary.get_pii_columns_from_parent_model(flattened_test) %} + {% set columns_to_exclude = columns_to_exclude + pii_columns %} + {% endif %} + + {% if elementary.is_sampling_disabled_for_column(flattened_test) %} + {% set test_column_name = elementary.insensitive_get_dict_value(flattened_test, 'test_column_name') %} + {% if test_column_name and test_column_name not in columns_to_exclude %} + {% do columns_to_exclude.append(test_column_name) %} + {% endif %} + {% endif %} + + {% do return(columns_to_exclude) %} +{% endmacro %} + +{# if test query contains PII columns or *, disable sampling entirely #} +{% macro should_disable_sampling_for_pii(flattened_test) %} + {% if not elementary.get_config_var('disable_samples_on_pii_tags') %} + {% do return(false) %} + {% endif %} + + {% set pii_columns = elementary.get_pii_columns_from_parent_model(flattened_test) %} + {% if not pii_columns %} + {% do return(false) %} + {% endif %} + + {# Get the compiled test query #} + {% set test_query = elementary.get_compiled_code(flattened_test) %} + {% set test_query_lower = test_query.lower() %} + + {# Check if query uses * (select all columns) #} + {# Note: This is intentionally conservative and may over-censor in cases like + "SELECT * FROM other_table" in CTEs, but it's better to be safe with PII data #} + {% if '*' in test_query_lower %} + {% do return(true) %} + {% endif %} + + {# Check if any PII column appears in the test query #} + {% for pii_column in pii_columns %} + {% if pii_column.lower() in test_query_lower %} + {% do return(true) %} + {% endif %} + {% endfor %} + + {% do return(false) %} +{% endmacro %} + +{% macro is_sampling_disabled_for_column(flattened_test) %} + {% set test_column_name = elementary.insensitive_get_dict_value(flattened_test, 'test_column_name') %} + {% set parent_model_unique_id = elementary.insensitive_get_dict_value(flattened_test, 'parent_model_unique_id') %} + + {% if not test_column_name or not parent_model_unique_id %} + {% do return(false) %} + {% endif %} + + {% set parent_model = elementary.get_node(parent_model_unique_id) %} + {% if parent_model and parent_model.get('columns') %} + {% set column_config = parent_model.get('columns', {}).get(test_column_name, {}).get('config', {}) %} + {% set disable_test_samples = elementary.safe_get_with_default(column_config, 'disable_test_samples', false) %} + {% do return(disable_test_samples) %} + {% endif %} + + {% do return(false) %} +{% endmacro %} + + {% macro cache_elementary_test_results_rows(elementary_test_results_rows) %} {% do elementary.get_cache("elementary_test_results").update({model.unique_id: elementary_test_results_rows}) %} {% endmacro %} diff --git a/macros/edr/system/system_utils/get_pii_columns_from_parent_model.sql b/macros/edr/system/system_utils/get_pii_columns_from_parent_model.sql new file mode 100644 index 000000000..b60d374c1 --- /dev/null +++ b/macros/edr/system/system_utils/get_pii_columns_from_parent_model.sql @@ -0,0 +1,56 @@ +{% macro get_column_tags(column_node) %} + {% set _tags_sources = [ + column_node.get('tags', []), + column_node.get('config', {}).get('tags', []), + column_node.get('meta', {}).get('tags', []), + ] %} + + {% set all_column_tags = [] %} + {% for src in _tags_sources %} + {% set tags_list = src if src is iterable and not (src is string) else [src] %} + {% do all_column_tags.extend(tags_list) %} + {% endfor %} + + {% do return(all_column_tags | map('lower') | unique | list) %} +{% endmacro %} + +{% macro get_pii_columns_from_parent_model(flattened_test) %} + {% set pii_columns = [] %} + + {% if not elementary.get_config_var('disable_samples_on_pii_tags') %} + {% do return(pii_columns) %} + {% endif %} + + {% set parent_model_unique_id = elementary.insensitive_get_dict_value(flattened_test, 'parent_model_unique_id') %} + {% set parent_model = elementary.get_node(parent_model_unique_id) %} + + {% if not parent_model %} + {% do return(pii_columns) %} + {% endif %} + + {% set raw_pii_tags = elementary.get_config_var('pii_tags') %} + {% if raw_pii_tags is string %} + {% set pii_tags = [raw_pii_tags|lower] %} + {% else %} + {% set pii_tags = (raw_pii_tags or []) | map('lower') | list %} + {% endif %} + + {# Check individual columns for PII tags #} + {% set column_nodes = parent_model.get("columns") %} + {% if not column_nodes %} + {% do return(pii_columns) %} + {% endif %} + + {% for column_node in column_nodes.values() %} + {% set all_column_tags_lower = elementary.get_column_tags(column_node) %} + + {% for pii_tag in pii_tags %} + {% if pii_tag in all_column_tags_lower %} + {% do pii_columns.append(column_node.get('name')) %} + {% break %} + {% endif %} + {% endfor %} + {% endfor %} + + {% do return(pii_columns) %} +{% endmacro %}