diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py index 986a4414cc..4632a16a8f 100644 --- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py +++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py @@ -82,7 +82,7 @@ def __init__( mappings: dict[str, Any] | None = None, settings: dict[str, Any] | None = DEFAULT_SETTINGS, create_index: bool = True, - http_auth: Any = ( + http_auth: tuple[Secret, Secret] | tuple[str, str] | list[str] | str | AWSAuth | None = ( Secret.from_env_var("OPENSEARCH_USERNAME", strict=False), # noqa: B008 Secret.from_env_var("OPENSEARCH_PASSWORD", strict=False), # noqa: B008 ), @@ -138,17 +138,6 @@ def __init__( self._mappings = mappings or self._get_default_mappings() self._settings = settings self._create_index = create_index - self._http_auth_are_secrets = False - - # Handle authentication - if isinstance(http_auth, (tuple, list)) and len(http_auth) == 2: # noqa: PLR2004 - username, password = http_auth - if isinstance(username, Secret) and isinstance(password, Secret): - self._http_auth_are_secrets = True - username_val = username.resolve_value() - password_val = password.resolve_value() - http_auth = [username_val, password_val] if username_val and password_val else None - self._http_auth = http_auth self._use_ssl = use_ssl self._verify_certs = verify_certs @@ -218,17 +207,16 @@ def to_dict(self) -> dict[str, Any]: Dictionary with serialized data. """ # Handle http_auth serialization - http_auth: list[dict[str, Any]] | dict[str, Any] | tuple[str, str] | list[str] | str = "" - if isinstance(self._http_auth, list) and self._http_auth_are_secrets: - # Recreate the Secret objects for serialization - http_auth = [ - Secret.from_env_var("OPENSEARCH_USERNAME", strict=False).to_dict(), - Secret.from_env_var("OPENSEARCH_PASSWORD", strict=False).to_dict(), - ] + http_auth: list[dict[str, Any]] | dict[str, Any] | None = None + if ( + isinstance(self._http_auth, (tuple, list)) + and len(self._http_auth) == 2 # noqa: PLR2004 + and isinstance(self._http_auth[0], Secret) + and isinstance(self._http_auth[1], Secret) + ): + http_auth = [self._http_auth[0].to_dict(), self._http_auth[1].to_dict()] elif isinstance(self._http_auth, AWSAuth): http_auth = self._http_auth.to_dict() - else: - http_auth = self._http_auth return default_to_dict( self, @@ -267,12 +255,31 @@ def from_dict(cls, data: dict[str, Any]) -> "OpenSearchDocumentStore": init_params["http_auth"] = [Secret.from_dict(item) for item in http_auth] if are_secrets else http_auth return default_from_dict(cls, data) + def _resolve_http_auth(self) -> tuple[str, str] | list[str] | str | AWSAuth | None: + """Resolves Secret objects in http_auth to their plain values.""" + if isinstance(self._http_auth, (tuple, list)) and len(self._http_auth) == 2: # noqa: PLR2004 + first, second = self._http_auth + if isinstance(first, Secret) and isinstance(second, Secret): + username = first.resolve_value() + password = second.resolve_value() + if username and password: + return [username, password] + if not username and not password: + return None + msg = "http_auth requires both username and password to be set, but only one was provided." + raise DocumentStoreError(msg) + if isinstance(first, str) and isinstance(second, str): + return (first, second) + if isinstance(self._http_auth, (str, AWSAuth)): + return self._http_auth + return None + def _ensure_initialized(self): # Ideally, we have a warm-up stage for document stores as well as components. if not self._client: self._client = OpenSearch( hosts=self._hosts, - http_auth=self._http_auth, + http_auth=self._resolve_http_auth(), use_ssl=self._use_ssl, verify_certs=self._verify_certs, timeout=self._timeout, @@ -284,7 +291,8 @@ def _ensure_initialized(self): async def _ensure_initialized_async(self): if not self._async_client: - async_http_auth = AsyncAWSAuth(self._http_auth) if isinstance(self._http_auth, AWSAuth) else self._http_auth + resolved_auth = self._resolve_http_auth() + async_http_auth = AsyncAWSAuth(resolved_auth) if isinstance(resolved_auth, AWSAuth) else resolved_auth self._async_client = AsyncOpenSearch( hosts=self._hosts, http_auth=async_http_auth, diff --git a/integrations/opensearch/tests/test_auth.py b/integrations/opensearch/tests/test_auth.py index 984d251bae..26e231620a 100644 --- a/integrations/opensearch/tests/test_auth.py +++ b/integrations/opensearch/tests/test_auth.py @@ -254,7 +254,7 @@ def test_ds_from_dict_basic_auth(self, _mock_opensearch_client): document_store._ensure_initialized() assert document_store._client _mock_opensearch_client.assert_called_once() - assert _mock_opensearch_client.call_args[1]["http_auth"] == ["user", "pw"] + assert _mock_opensearch_client.call_args[1]["http_auth"] == ("user", "pw") @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_ds_from_dict_aws_auth(self, _mock_opensearch_client, monkeypatch: pytest.MonkeyPatch): @@ -304,7 +304,7 @@ def test_ds_to_dict_basic_auth(self, _mock_opensearch_client): "settings": {"index.knn": True}, "return_embedding": False, "create_index": True, - "http_auth": ("user", "pw"), + "http_auth": None, "use_ssl": None, "verify_certs": None, "timeout": None, diff --git a/integrations/opensearch/tests/test_bm25_retriever.py b/integrations/opensearch/tests/test_bm25_retriever.py index d6bb6350e4..7d1d302430 100644 --- a/integrations/opensearch/tests/test_bm25_retriever.py +++ b/integrations/opensearch/tests/test_bm25_retriever.py @@ -56,7 +56,10 @@ def test_to_dict(_mock_opensearch_client): "settings": {"index.knn": True}, "return_embedding": False, "create_index": True, - "http_auth": None, + "http_auth": [ + {"type": "env_var", "env_vars": ["OPENSEARCH_USERNAME"], "strict": False}, + {"type": "env_var", "env_vars": ["OPENSEARCH_PASSWORD"], "strict": False}, + ], "use_ssl": None, "verify_certs": None, "timeout": None, diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py index 5b55307cea..fac751035f 100644 --- a/integrations/opensearch/tests/test_document_store.py +++ b/integrations/opensearch/tests/test_document_store.py @@ -38,7 +38,10 @@ def test_to_dict(_mock_opensearch_client): "settings": {"index.knn": True}, "return_embedding": False, "create_index": True, - "http_auth": None, + "http_auth": [ + {"type": "env_var", "env_vars": ["OPENSEARCH_USERNAME"], "strict": False}, + {"type": "env_var", "env_vars": ["OPENSEARCH_PASSWORD"], "strict": False}, + ], "use_ssl": None, "verify_certs": None, "timeout": None, @@ -46,6 +49,16 @@ def test_to_dict(_mock_opensearch_client): } +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_to_dict_with_http_auth_str(_mock_opensearch_client): + """ + Verify that plain strings secrets are not serialized. + """ + document_store = OpenSearchDocumentStore(hosts="some hosts", http_auth=("admin", "admin")) + res = document_store.to_dict() + assert res["init_parameters"]["http_auth"] is None + + @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_from_dict(_mock_opensearch_client): data = { @@ -93,6 +106,54 @@ def test_from_dict(_mock_opensearch_client): assert document_store._timeout == 60 +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_from_dict_with_http_auth_str(_mock_opensearch_client): + """ + Verify that serialized plain strings secrets can be properly deserialized. + """ + data = { + "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", + "init_parameters": { + "hosts": "some hosts", + "index": "default", + "http_auth": ("admin", "admin"), + }, + } + document_store = OpenSearchDocumentStore.from_dict(data) + assert document_store._http_auth == ("admin", "admin") + + +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_resolve_http_auth_with_secrets(_mock_opensearch_client, monkeypatch): + monkeypatch.setenv("OPENSEARCH_USERNAME", "admin") + monkeypatch.setenv("OPENSEARCH_PASSWORD", "secret") + store = OpenSearchDocumentStore(hosts="testhost") + assert store._resolve_http_auth() == ["admin", "secret"] + + +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_resolve_http_auth_with_no_secrets(_mock_opensearch_client, monkeypatch): + monkeypatch.delenv("OPENSEARCH_USERNAME", raising=False) + monkeypatch.delenv("OPENSEARCH_PASSWORD", raising=False) + store = OpenSearchDocumentStore(hosts="testhost") + assert store._resolve_http_auth() is None + + +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_resolve_http_auth_with_partial_secrets(_mock_opensearch_client, monkeypatch): + monkeypatch.setenv("OPENSEARCH_USERNAME", "admin") + monkeypatch.delenv("OPENSEARCH_PASSWORD", raising=False) + store = OpenSearchDocumentStore(hosts="testhost") + with pytest.raises(DocumentStoreError, match="http_auth requires both username and password"): + store._resolve_http_auth() + + +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_resolve_http_auth_with_plain_strings(_mock_opensearch_client): + store = OpenSearchDocumentStore(hosts="testhost", http_auth=("admin", "admin")) + assert store._resolve_http_auth() == ("admin", "admin") + + @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_init_is_lazy(_mock_opensearch_client): OpenSearchDocumentStore(hosts="testhost") @@ -110,16 +171,19 @@ def test_get_default_mappings(_mock_opensearch_client): } +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") @patch("haystack_integrations.document_stores.opensearch.document_store.bulk") -def test_routing_extracted_from_metadata(mock_bulk, document_store): +def test_routing_extracted_from_metadata(mock_bulk, _mock_opensearch_client): """Test routing extraction from document metadata""" mock_bulk.return_value = (2, []) + store = OpenSearchDocumentStore(hosts="testhost", http_auth=("admin", "admin")) + docs = [ Document(id="1", content="Doc", meta={"_routing": "user_a", "other": "data"}), Document(id="2", content="Doc"), ] - document_store.write_documents(docs) + store.write_documents(docs) actions = list(mock_bulk.call_args.kwargs["actions"]) @@ -135,13 +199,16 @@ def test_routing_extracted_from_metadata(mock_bulk, document_store): assert "_routing" not in actions[1]["_source"].get("meta", {}) +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") @patch("haystack_integrations.document_stores.opensearch.document_store.bulk") -def test_routing_in_delete(mock_bulk, document_store): +def test_routing_in_delete(mock_bulk, _mock_opensearch_client): """Test routing parameter in delete operations""" mock_bulk.return_value = (2, []) + store = OpenSearchDocumentStore(hosts="testhost", http_auth=("admin", "admin")) + routing_map = {"1": "user_a", "2": "user_b"} - document_store.delete_documents(["1", "2", "3"], routing=routing_map) + store.delete_documents(["1", "2", "3"], routing=routing_map) actions = list(mock_bulk.call_args.kwargs["actions"]) diff --git a/integrations/opensearch/tests/test_embedding_retriever.py b/integrations/opensearch/tests/test_embedding_retriever.py index af7384c549..2d33cbd308 100644 --- a/integrations/opensearch/tests/test_embedding_retriever.py +++ b/integrations/opensearch/tests/test_embedding_retriever.py @@ -72,7 +72,10 @@ def test_to_dict(_mock_opensearch_client): }, "return_embedding": False, "create_index": True, - "http_auth": None, + "http_auth": [ + {"type": "env_var", "env_vars": ["OPENSEARCH_USERNAME"], "strict": False}, + {"type": "env_var", "env_vars": ["OPENSEARCH_PASSWORD"], "strict": False}, + ], "use_ssl": None, "verify_certs": None, "timeout": None, diff --git a/integrations/opensearch/tests/test_open_search_hybrid_retriever.py b/integrations/opensearch/tests/test_open_search_hybrid_retriever.py index 38da9ca16f..110b1ccf2d 100644 --- a/integrations/opensearch/tests/test_open_search_hybrid_retriever.py +++ b/integrations/opensearch/tests/test_open_search_hybrid_retriever.py @@ -46,7 +46,10 @@ class TestOpenSearchHybridRetriever: "settings": {"index.knn": True}, "create_index": True, "return_embedding": False, - "http_auth": None, + "http_auth": [ + {"type": "env_var", "env_vars": ["OPENSEARCH_USERNAME"], "strict": False}, + {"type": "env_var", "env_vars": ["OPENSEARCH_PASSWORD"], "strict": False}, + ], "use_ssl": None, "verify_certs": None, "timeout": None,