|
| 1 | +# Copyright 2025 Collate |
| 2 | +# Licensed under the Collate Community License, Version 1.0 (the "License"); |
| 3 | +# you may not use this file except in compliance with the License. |
| 4 | +# You may obtain a copy of the License at |
| 5 | +# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE |
| 6 | +# Unless required by applicable law or agreed to in writing, software |
| 7 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 8 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 9 | +# See the License for the specific language governing permissions and |
| 10 | +# limitations under the License. |
| 11 | + |
| 12 | +""" |
| 13 | +Regression tests for the BigQuery test-connection fix: |
| 14 | +
|
| 15 | +- multi-project test connection scopes each probe to its own project |
| 16 | +- ``_test_connection`` invokes the test function with the migrated signature |
| 17 | + (``metadata`` only) so a service-connection object can never leak into |
| 18 | + ``timeout_seconds`` again |
| 19 | +- the engine built for the test connection is always disposed |
| 20 | +""" |
| 21 | + |
| 22 | +from unittest.mock import MagicMock, patch |
| 23 | + |
| 24 | +from metadata.generated.schema.entity.services.connections.database.bigQueryConnection import ( |
| 25 | + BigQueryConnection as BigQueryConnectionConfig, |
| 26 | +) |
| 27 | +from metadata.ingestion.source.database.bigquery.connection import BigQueryConnection |
| 28 | +from metadata.ingestion.source.database.bigquery.helper import ( |
| 29 | + clone_connection_for_project, |
| 30 | +) |
| 31 | +from metadata.ingestion.source.database.bigquery.metadata import BigquerySource |
| 32 | + |
| 33 | +_CONNECTION_MODULE = "metadata.ingestion.source.database.bigquery.connection" |
| 34 | +_METADATA_MODULE = "metadata.ingestion.source.database.bigquery.metadata" |
| 35 | + |
| 36 | +_GCP_CONFIG = { |
| 37 | + "type": "service_account", |
| 38 | + "projectId": "placeholder", |
| 39 | + "privateKeyId": "key-id", |
| 40 | + "privateKey": "private-key", |
| 41 | + "clientEmail": "user@example.com", |
| 42 | + "clientId": "1234", |
| 43 | + "authUri": "https://accounts.google.com/o/oauth2/auth", |
| 44 | + "tokenUri": "https://oauth2.googleapis.com/token", |
| 45 | + "authProviderX509CertUrl": "https://www.googleapis.com/oauth2/v1/certs", |
| 46 | + "clientX509CertUrl": "https://www.googleapis.com/oauth2/v1/certs", |
| 47 | +} |
| 48 | + |
| 49 | + |
| 50 | +def _bq_config(project_id) -> BigQueryConnectionConfig: |
| 51 | + return BigQueryConnectionConfig.model_validate( |
| 52 | + {"type": "BigQuery", "credentials": {"gcpConfig": {**_GCP_CONFIG, "projectId": project_id}}} |
| 53 | + ) |
| 54 | + |
| 55 | + |
| 56 | +def test_clone_connection_scopes_to_single_project(): |
| 57 | + connection = _bq_config(["proj-a", "proj-b"]) |
| 58 | + |
| 59 | + cloned = clone_connection_for_project("proj-a", connection) |
| 60 | + |
| 61 | + assert cloned.credentials.gcpConfig.projectId.root == "proj-a" |
| 62 | + # the original multi-project connection must stay untouched (deepcopy isolation) |
| 63 | + assert connection.credentials.gcpConfig.projectId.root == ["proj-a", "proj-b"] |
| 64 | + |
| 65 | + |
| 66 | +@patch(f"{_CONNECTION_MODULE}.test_connection_steps") |
| 67 | +@patch(f"{_CONNECTION_MODULE}.create_generic_db_connection") |
| 68 | +@patch(f"{_CONNECTION_MODULE}.set_google_credentials") |
| 69 | +def test_test_connection_probes_each_project_with_a_valid_timeout(mock_creds, mock_create, mock_steps): |
| 70 | + mock_create.return_value = MagicMock() |
| 71 | + source = object.__new__(BigquerySource) |
| 72 | + source.metadata = MagicMock() |
| 73 | + source.service_connection = _bq_config(["proj-a", "proj-b"]) |
| 74 | + source.project_ids = ["proj-a", "proj-b"] |
| 75 | + source.temp_credentials_file_path = [] |
| 76 | + |
| 77 | + # real clone_connection_for_project + get_test_connection_fn path; only the |
| 78 | + # external boundaries (engine factory, credentials, step runner) are stubbed |
| 79 | + BigquerySource._test_connection(source) |
| 80 | + |
| 81 | + assert mock_steps.call_count == 2 |
| 82 | + for call in mock_steps.call_args_list: |
| 83 | + assert call.kwargs["service_type"] == "BigQuery" |
| 84 | + # the regression guard: the original bug passed the service connection |
| 85 | + # positionally into timeout_seconds, which then reached signal.alarm(). |
| 86 | + # A valid timeout reaching the step runner proves the signature is right. |
| 87 | + assert isinstance(call.kwargs["timeout_seconds"], int) |
| 88 | + |
| 89 | + |
| 90 | +@patch(f"{_CONNECTION_MODULE}.test_connection_steps") |
| 91 | +@patch(f"{_CONNECTION_MODULE}.create_generic_db_connection") |
| 92 | +@patch(f"{_CONNECTION_MODULE}.set_google_credentials") |
| 93 | +def test_test_connection_disposes_engine(mock_creds, mock_create, mock_steps): |
| 94 | + engine = MagicMock() |
| 95 | + mock_create.return_value = engine |
| 96 | + |
| 97 | + connection = BigQueryConnection(_bq_config("proj-a")) |
| 98 | + connection.test_connection(metadata=MagicMock()) |
| 99 | + |
| 100 | + mock_create.assert_called_once() |
| 101 | + # both halves of the fix: _get_client registers engine.dispose AND |
| 102 | + # test_connection calls self.close(), so the engine is always released. |
| 103 | + engine.dispose.assert_called_once() |
0 commit comments