|
5 | 5 |
|
6 | 6 |
|
7 | 7 | @pytest.mark.skip_profile("databricks_cluster") |
8 | | -class TestV1IncrementalColumnTags: |
9 | | - """Test that V1 incremental path applies column tag changes via process_config_changes.""" |
| 8 | +class TestV1IncrementalSkipConfigChanges: |
| 9 | + """Test that incremental_apply_config_changes=false skips metadata fetch queries in V1 path.""" |
10 | 10 |
|
11 | 11 | @pytest.fixture(scope="class") |
12 | 12 | def models(self): |
13 | 13 | return { |
14 | | - "v1_config_changes_sql.sql": fixtures.v1_config_changes_sql, |
15 | | - "schema.yml": fixtures.v1_column_tags_a, |
16 | | - } |
17 | | - |
18 | | - def test_changing_column_tags(self, project): |
19 | | - # First run creates the table |
20 | | - util.run_dbt(["run"]) |
21 | | - |
22 | | - # Update column tags |
23 | | - util.write_file(fixtures.v1_column_tags_b, "models", "schema.yml") |
24 | | - util.run_dbt(["run"]) |
25 | | - |
26 | | - # Verify column tags were applied |
27 | | - results = project.run_sql( |
28 | | - f""" |
29 | | - select column_name, tag_name, tag_value |
30 | | - from `system`.`information_schema`.`column_tags` |
31 | | - where schema_name = '{project.test_schema}' |
32 | | - and table_name = 'v1_config_changes_sql' |
33 | | - order by column_name, tag_name |
34 | | - """, |
35 | | - fetch="all", |
36 | | - ) |
37 | | - |
38 | | - tags_dict = {} |
39 | | - for row in results: |
40 | | - col = row.column_name |
41 | | - if col not in tags_dict: |
42 | | - tags_dict[col] = {} |
43 | | - tags_dict[col][row.tag_name] = row.tag_value |
44 | | - |
45 | | - # Verify expected final state |
46 | | - expected_tags = { |
47 | | - "id": {"pii": "true"}, |
48 | | - "msg": {"source": "app"}, |
| 14 | + "v1_skip_config_changes_sql.sql": fixtures.v1_skip_config_changes_sql, |
49 | 15 | } |
50 | | - assert tags_dict == expected_tags |
51 | | - |
52 | | - |
53 | | -@pytest.mark.skip_profile("databricks_cluster") |
54 | | -class TestV1IncrementalColumnMasks: |
55 | | - """Test that V1 incremental path applies column mask changes via process_config_changes.""" |
56 | 16 |
|
57 | 17 | @pytest.fixture(scope="class") |
58 | | - def models(self): |
| 18 | + def macros(self): |
59 | 19 | return { |
60 | | - "column_mask_sql.sql": fixtures.column_mask_sql, |
61 | | - "schema.yml": fixtures.column_mask_base, |
| 20 | + "fail_if_metadata_fetched.sql": fixtures.fail_if_metadata_fetched_macros, |
62 | 21 | } |
63 | 22 |
|
64 | | - def test_changing_column_masks(self, project): |
65 | | - # Create mask functions |
66 | | - project.run_sql( |
67 | | - f""" |
68 | | - CREATE OR REPLACE FUNCTION |
69 | | - {project.database}.{project.test_schema}.full_mask(password STRING) |
70 | | - RETURNS STRING |
71 | | - RETURN '*****'; |
72 | | - """ |
73 | | - ) |
74 | | - project.run_sql( |
75 | | - f""" |
76 | | - CREATE OR REPLACE FUNCTION |
77 | | - {project.database}.{project.test_schema}.email_mask(value STRING) |
78 | | - RETURNS STRING |
79 | | - RETURN CONCAT( |
80 | | - REPEAT('*', POSITION('@' IN value) - 1), |
81 | | - SUBSTR(value, POSITION('@' IN value)) |
82 | | - ); |
83 | | - """ |
84 | | - ) |
85 | | - |
86 | | - # First run with initial masks |
| 23 | + def test_incremental_run_skips_metadata_queries(self, project): |
| 24 | + # First run creates the table |
87 | 25 | util.run_dbt(["run"]) |
88 | | - masks = project.run_sql( |
89 | | - "SELECT id, name, email, password FROM column_mask_sql", |
90 | | - fetch="all", |
91 | | - ) |
92 | | - assert len(masks) == 1 |
93 | | - assert masks[0][1] == "*****" # name (masked) |
94 | | - assert masks[0][3] == "password123" # password (unmasked) |
95 | | - |
96 | | - # Update masks and verify changes |
97 | | - util.write_file(fixtures.column_mask_valid_mask_updates, "models", "schema.yml") |
| 26 | + # Second run exercises the incremental merge path. |
| 27 | + # If metadata fetch macros are called, they will raise errors and the run will fail. |
98 | 28 | util.run_dbt(["run"]) |
99 | 29 |
|
100 | | - result = project.run_sql( |
101 | | - "SELECT id, name, email, password FROM column_mask_sql", fetch="all" |
102 | | - ) |
103 | | - assert len(result) == 1 |
104 | | - assert result[0][1] == "hello" # name (unmasked) |
105 | | - assert result[0][3] == "*****" # password (masked) |
106 | | - |
107 | 30 |
|
108 | 31 | @pytest.mark.skip_profile("databricks_cluster") |
109 | | -class TestV1IncrementalSkipConfigChanges: |
110 | | - """Test that incremental_apply_config_changes=false skips metadata fetch queries.""" |
| 32 | +class TestV1IncrementalColumnMasksNotApplied: |
| 33 | + """Test that column masks are NOT applied in V1 incremental path. |
| 34 | +
|
| 35 | + Column masks must only be applied in V2 where the empty table is created before data |
| 36 | + arrives. In V1 (CTAS), data is written immediately, so applying masks after the fact |
| 37 | + would leave a window where data is unmasked — a security/privacy vulnerability. |
| 38 | + """ |
111 | 39 |
|
112 | 40 | @pytest.fixture(scope="class") |
113 | 41 | def models(self): |
114 | 42 | return { |
115 | | - "v1_skip_config_changes_sql.sql": fixtures.v1_skip_config_changes_sql, |
| 43 | + "v1_column_mask_model_sql.sql": fixtures.v1_column_mask_model_sql, |
| 44 | + "schema.yml": fixtures.v1_column_mask_schema, |
116 | 45 | } |
117 | 46 |
|
118 | 47 | @pytest.fixture(scope="class") |
119 | 48 | def macros(self): |
120 | 49 | return { |
121 | | - "fail_if_metadata_fetched.sql": fixtures.fail_if_metadata_fetched_macros, |
| 50 | + "fail_if_column_masks_applied.sql": fixtures.fail_if_column_masks_applied_macro, |
122 | 51 | } |
123 | 52 |
|
124 | | - def test_incremental_run_skips_metadata_queries(self, project): |
| 53 | + def test_column_masks_not_applied_in_v1(self, project): |
| 54 | + # Create the mask function so the model config is valid |
| 55 | + project.run_sql( |
| 56 | + f""" |
| 57 | + CREATE OR REPLACE FUNCTION |
| 58 | + {project.database}.{project.test_schema}.full_mask(val STRING) |
| 59 | + RETURNS STRING |
| 60 | + RETURN '*****'; |
| 61 | + """ |
| 62 | + ) |
| 63 | + |
125 | 64 | # First run creates the table |
126 | 65 | util.run_dbt(["run"]) |
127 | | - # Second run exercises the incremental merge path. |
128 | | - # If metadata fetch macros are called, they will raise errors and the run will fail. |
| 66 | + # Second run exercises the incremental merge path with config change detection. |
| 67 | + # If apply_column_masks is called, the overridden macro raises an error. |
129 | 68 | util.run_dbt(["run"]) |
0 commit comments