|
| 1 | +# Copyright 2024 The Feast Authors |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +from unittest.mock import MagicMock, patch |
| 16 | + |
| 17 | +import pandas as pd |
| 18 | +import pytest |
| 19 | + |
| 20 | +from feast.entity import Entity |
| 21 | +from feast.infra.registry.snowflake import SnowflakeRegistry, SnowflakeRegistryConfig |
| 22 | + |
| 23 | + |
| 24 | +@pytest.fixture |
| 25 | +def registry(): |
| 26 | + config = SnowflakeRegistryConfig( |
| 27 | + database="TEST_DB", |
| 28 | + schema="PUBLIC", |
| 29 | + account="test_account", |
| 30 | + user="test_user", |
| 31 | + password="test_password", # pragma: allowlist secret |
| 32 | + ) |
| 33 | + mock_conn = MagicMock() |
| 34 | + mock_conn.__enter__ = MagicMock(return_value=MagicMock()) |
| 35 | + mock_conn.__exit__ = MagicMock(return_value=False) |
| 36 | + |
| 37 | + # execute_snowflake_statement during __init__ creates tables; return empty cursor |
| 38 | + mock_cursor = MagicMock() |
| 39 | + mock_cursor.fetch_pandas_all.return_value = pd.DataFrame() |
| 40 | + |
| 41 | + with ( |
| 42 | + patch( |
| 43 | + "feast.infra.registry.snowflake.GetSnowflakeConnection", |
| 44 | + return_value=mock_conn, |
| 45 | + ), |
| 46 | + patch( |
| 47 | + "feast.infra.registry.snowflake.execute_snowflake_statement", |
| 48 | + return_value=mock_cursor, |
| 49 | + ), |
| 50 | + ): |
| 51 | + reg = SnowflakeRegistry(config, "test_project", None) |
| 52 | + return reg |
| 53 | + |
| 54 | + |
| 55 | +def test_apply_object_update_includes_project_id_in_where_clause(registry): |
| 56 | + """ |
| 57 | + Regression test for feast-dev/feast#6208. |
| 58 | +
|
| 59 | + _apply_object UPDATE path was missing project_id in the WHERE clause, |
| 60 | + allowing a same-named object in one project to silently overwrite the |
| 61 | + same-named object in a different project when a Snowflake registry is shared. |
| 62 | +
|
| 63 | + The SELECT path correctly scopes by project_id; the UPDATE must do the same. |
| 64 | + """ |
| 65 | + entity = Entity(name="driver", join_keys=["driver_id"]) |
| 66 | + project = "project_b" |
| 67 | + |
| 68 | + # Non-empty DataFrame tells _apply_object the row already exists → UPDATE path |
| 69 | + existing_row = pd.DataFrame({"project_id": [project]}) |
| 70 | + |
| 71 | + captured_queries = [] |
| 72 | + |
| 73 | + def capture_and_respond(conn, query): |
| 74 | + normalised = " ".join(query.split()) |
| 75 | + captured_queries.append(normalised) |
| 76 | + cursor = MagicMock() |
| 77 | + cursor.fetch_pandas_all.return_value = ( |
| 78 | + existing_row if "SELECT" in normalised else pd.DataFrame() |
| 79 | + ) |
| 80 | + return cursor |
| 81 | + |
| 82 | + mock_conn = MagicMock() |
| 83 | + mock_conn.__enter__ = MagicMock(return_value=MagicMock()) |
| 84 | + mock_conn.__exit__ = MagicMock(return_value=False) |
| 85 | + |
| 86 | + with ( |
| 87 | + patch( |
| 88 | + "feast.infra.registry.snowflake.GetSnowflakeConnection", |
| 89 | + return_value=mock_conn, |
| 90 | + ), |
| 91 | + patch( |
| 92 | + "feast.infra.registry.snowflake.execute_snowflake_statement", |
| 93 | + side_effect=capture_and_respond, |
| 94 | + ), |
| 95 | + patch.object(registry, "_maybe_init_project_metadata"), |
| 96 | + patch.object(registry, "_initialize_project_if_not_exists"), |
| 97 | + patch.object(registry, "get_project", return_value=MagicMock()), |
| 98 | + patch.object(registry, "apply_project"), |
| 99 | + patch.object(registry, "_set_last_updated_metadata"), |
| 100 | + ): |
| 101 | + registry._apply_object( |
| 102 | + "ENTITIES", |
| 103 | + project, |
| 104 | + "ENTITY_NAME", |
| 105 | + entity, |
| 106 | + "ENTITY_PROTO", |
| 107 | + ) |
| 108 | + |
| 109 | + update_queries = [q for q in captured_queries if q.startswith("UPDATE")] |
| 110 | + assert len(update_queries) == 1, ( |
| 111 | + f"Expected exactly 1 UPDATE query, got {len(update_queries)}: {update_queries}" |
| 112 | + ) |
| 113 | + |
| 114 | + update_query = update_queries[0] |
| 115 | + assert f"project_id = '{project}'" in update_query, ( |
| 116 | + f"feast#6208: UPDATE WHERE clause is missing project_id filter — " |
| 117 | + f"cross-project overwrites are possible in shared Snowflake registries.\n" |
| 118 | + f"Query was: {update_query}" |
| 119 | + ) |
| 120 | + |
| 121 | + |
| 122 | +def test_apply_object_does_not_overwrite_sibling_project(registry): |
| 123 | + """ |
| 124 | + Cross-project isolation: applying an object in project_b must not overwrite |
| 125 | + the same-named object in project_a when sharing a Snowflake registry. |
| 126 | +
|
| 127 | + Regression coverage for feast-dev/feast#6208: the UPDATE path must scope by |
| 128 | + project_id so writes in one project cannot bleed into a sibling project. |
| 129 | + """ |
| 130 | + entity = Entity(name="driver", join_keys=["driver_id"]) |
| 131 | + project_a, project_b = "project_a", "project_b" |
| 132 | + |
| 133 | + # Simulate both projects having a pre-existing "driver" entity row |
| 134 | + row_b = pd.DataFrame({"project_id": [project_b]}) |
| 135 | + |
| 136 | + update_queries = [] |
| 137 | + |
| 138 | + def simulated_snowflake(conn, query): |
| 139 | + normalised = " ".join(query.split()) |
| 140 | + cursor = MagicMock() |
| 141 | + if "SELECT" in normalised: |
| 142 | + # Scope the response to the project in the query; no row for project_a |
| 143 | + if f"project_id = '{project_b}'" in normalised: |
| 144 | + cursor.fetch_pandas_all.return_value = row_b |
| 145 | + else: |
| 146 | + cursor.fetch_pandas_all.return_value = pd.DataFrame() |
| 147 | + elif "UPDATE" in normalised: |
| 148 | + update_queries.append(normalised) |
| 149 | + cursor.fetch_pandas_all.return_value = pd.DataFrame() |
| 150 | + else: |
| 151 | + cursor.fetch_pandas_all.return_value = pd.DataFrame() |
| 152 | + return cursor |
| 153 | + |
| 154 | + mock_conn = MagicMock() |
| 155 | + mock_conn.__enter__ = MagicMock(return_value=MagicMock()) |
| 156 | + mock_conn.__exit__ = MagicMock(return_value=False) |
| 157 | + |
| 158 | + with ( |
| 159 | + patch( |
| 160 | + "feast.infra.registry.snowflake.GetSnowflakeConnection", |
| 161 | + return_value=mock_conn, |
| 162 | + ), |
| 163 | + patch( |
| 164 | + "feast.infra.registry.snowflake.execute_snowflake_statement", |
| 165 | + side_effect=simulated_snowflake, |
| 166 | + ), |
| 167 | + patch.object(registry, "_maybe_init_project_metadata"), |
| 168 | + patch.object(registry, "_initialize_project_if_not_exists"), |
| 169 | + patch.object(registry, "get_project", return_value=MagicMock()), |
| 170 | + patch.object(registry, "apply_project"), |
| 171 | + patch.object(registry, "_set_last_updated_metadata"), |
| 172 | + ): |
| 173 | + registry._apply_object( |
| 174 | + "ENTITIES", project_b, "ENTITY_NAME", entity, "ENTITY_PROTO" |
| 175 | + ) |
| 176 | + |
| 177 | + assert len(update_queries) == 1, ( |
| 178 | + f"Expected exactly 1 UPDATE, got {len(update_queries)}: {update_queries}" |
| 179 | + ) |
| 180 | + update_query = update_queries[0] |
| 181 | + assert f"project_id = '{project_b}'" in update_query, ( |
| 182 | + f"feast#6208: UPDATE is not scoped to {project_b!r} — cross-project overwrite possible.\n" |
| 183 | + f"Query: {update_query}" |
| 184 | + ) |
| 185 | + assert f"project_id = '{project_a}'" not in update_query, ( |
| 186 | + f"feast#6208: UPDATE WHERE clause references {project_a!r} — unintended cross-project write.\n" |
| 187 | + f"Query: {update_query}" |
| 188 | + ) |
0 commit comments