Skip to content

Commit cef4925

Browse files
authored
fix: OpenSearch - do not serialize string Secrets + authentication refactoring (#2967)
* fix: OpenSearch - do not serialize string Secrets + refactoring * test _resolve_http_auth * stricter type
1 parent 0c470d1 commit cef4925

6 files changed

Lines changed: 117 additions & 33 deletions

File tree

integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def __init__(
8282
mappings: dict[str, Any] | None = None,
8383
settings: dict[str, Any] | None = DEFAULT_SETTINGS,
8484
create_index: bool = True,
85-
http_auth: Any = (
85+
http_auth: tuple[Secret, Secret] | tuple[str, str] | list[str] | str | AWSAuth | None = (
8686
Secret.from_env_var("OPENSEARCH_USERNAME", strict=False), # noqa: B008
8787
Secret.from_env_var("OPENSEARCH_PASSWORD", strict=False), # noqa: B008
8888
),
@@ -138,17 +138,6 @@ def __init__(
138138
self._mappings = mappings or self._get_default_mappings()
139139
self._settings = settings
140140
self._create_index = create_index
141-
self._http_auth_are_secrets = False
142-
143-
# Handle authentication
144-
if isinstance(http_auth, (tuple, list)) and len(http_auth) == 2: # noqa: PLR2004
145-
username, password = http_auth
146-
if isinstance(username, Secret) and isinstance(password, Secret):
147-
self._http_auth_are_secrets = True
148-
username_val = username.resolve_value()
149-
password_val = password.resolve_value()
150-
http_auth = [username_val, password_val] if username_val and password_val else None
151-
152141
self._http_auth = http_auth
153142
self._use_ssl = use_ssl
154143
self._verify_certs = verify_certs
@@ -218,17 +207,16 @@ def to_dict(self) -> dict[str, Any]:
218207
Dictionary with serialized data.
219208
"""
220209
# Handle http_auth serialization
221-
http_auth: list[dict[str, Any]] | dict[str, Any] | tuple[str, str] | list[str] | str = ""
222-
if isinstance(self._http_auth, list) and self._http_auth_are_secrets:
223-
# Recreate the Secret objects for serialization
224-
http_auth = [
225-
Secret.from_env_var("OPENSEARCH_USERNAME", strict=False).to_dict(),
226-
Secret.from_env_var("OPENSEARCH_PASSWORD", strict=False).to_dict(),
227-
]
210+
http_auth: list[dict[str, Any]] | dict[str, Any] | None = None
211+
if (
212+
isinstance(self._http_auth, (tuple, list))
213+
and len(self._http_auth) == 2 # noqa: PLR2004
214+
and isinstance(self._http_auth[0], Secret)
215+
and isinstance(self._http_auth[1], Secret)
216+
):
217+
http_auth = [self._http_auth[0].to_dict(), self._http_auth[1].to_dict()]
228218
elif isinstance(self._http_auth, AWSAuth):
229219
http_auth = self._http_auth.to_dict()
230-
else:
231-
http_auth = self._http_auth
232220

233221
return default_to_dict(
234222
self,
@@ -267,12 +255,31 @@ def from_dict(cls, data: dict[str, Any]) -> "OpenSearchDocumentStore":
267255
init_params["http_auth"] = [Secret.from_dict(item) for item in http_auth] if are_secrets else http_auth
268256
return default_from_dict(cls, data)
269257

258+
def _resolve_http_auth(self) -> tuple[str, str] | list[str] | str | AWSAuth | None:
259+
"""Resolves Secret objects in http_auth to their plain values."""
260+
if isinstance(self._http_auth, (tuple, list)) and len(self._http_auth) == 2: # noqa: PLR2004
261+
first, second = self._http_auth
262+
if isinstance(first, Secret) and isinstance(second, Secret):
263+
username = first.resolve_value()
264+
password = second.resolve_value()
265+
if username and password:
266+
return [username, password]
267+
if not username and not password:
268+
return None
269+
msg = "http_auth requires both username and password to be set, but only one was provided."
270+
raise DocumentStoreError(msg)
271+
if isinstance(first, str) and isinstance(second, str):
272+
return (first, second)
273+
if isinstance(self._http_auth, (str, AWSAuth)):
274+
return self._http_auth
275+
return None
276+
270277
def _ensure_initialized(self):
271278
# Ideally, we have a warm-up stage for document stores as well as components.
272279
if not self._client:
273280
self._client = OpenSearch(
274281
hosts=self._hosts,
275-
http_auth=self._http_auth,
282+
http_auth=self._resolve_http_auth(),
276283
use_ssl=self._use_ssl,
277284
verify_certs=self._verify_certs,
278285
timeout=self._timeout,
@@ -284,7 +291,8 @@ def _ensure_initialized(self):
284291

285292
async def _ensure_initialized_async(self):
286293
if not self._async_client:
287-
async_http_auth = AsyncAWSAuth(self._http_auth) if isinstance(self._http_auth, AWSAuth) else self._http_auth
294+
resolved_auth = self._resolve_http_auth()
295+
async_http_auth = AsyncAWSAuth(resolved_auth) if isinstance(resolved_auth, AWSAuth) else resolved_auth
288296
self._async_client = AsyncOpenSearch(
289297
hosts=self._hosts,
290298
http_auth=async_http_auth,

integrations/opensearch/tests/test_auth.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def test_ds_from_dict_basic_auth(self, _mock_opensearch_client):
254254
document_store._ensure_initialized()
255255
assert document_store._client
256256
_mock_opensearch_client.assert_called_once()
257-
assert _mock_opensearch_client.call_args[1]["http_auth"] == ["user", "pw"]
257+
assert _mock_opensearch_client.call_args[1]["http_auth"] == ("user", "pw")
258258

259259
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
260260
def test_ds_from_dict_aws_auth(self, _mock_opensearch_client, monkeypatch: pytest.MonkeyPatch):
@@ -304,7 +304,7 @@ def test_ds_to_dict_basic_auth(self, _mock_opensearch_client):
304304
"settings": {"index.knn": True},
305305
"return_embedding": False,
306306
"create_index": True,
307-
"http_auth": ("user", "pw"),
307+
"http_auth": None,
308308
"use_ssl": None,
309309
"verify_certs": None,
310310
"timeout": None,

integrations/opensearch/tests/test_bm25_retriever.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,10 @@ def test_to_dict(_mock_opensearch_client):
5656
"settings": {"index.knn": True},
5757
"return_embedding": False,
5858
"create_index": True,
59-
"http_auth": None,
59+
"http_auth": [
60+
{"type": "env_var", "env_vars": ["OPENSEARCH_USERNAME"], "strict": False},
61+
{"type": "env_var", "env_vars": ["OPENSEARCH_PASSWORD"], "strict": False},
62+
],
6063
"use_ssl": None,
6164
"verify_certs": None,
6265
"timeout": None,

integrations/opensearch/tests/test_document_store.py

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,27 @@ def test_to_dict(_mock_opensearch_client):
3838
"settings": {"index.knn": True},
3939
"return_embedding": False,
4040
"create_index": True,
41-
"http_auth": None,
41+
"http_auth": [
42+
{"type": "env_var", "env_vars": ["OPENSEARCH_USERNAME"], "strict": False},
43+
{"type": "env_var", "env_vars": ["OPENSEARCH_PASSWORD"], "strict": False},
44+
],
4245
"use_ssl": None,
4346
"verify_certs": None,
4447
"timeout": None,
4548
},
4649
}
4750

4851

52+
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
53+
def test_to_dict_with_http_auth_str(_mock_opensearch_client):
54+
"""
55+
Verify that plain strings secrets are not serialized.
56+
"""
57+
document_store = OpenSearchDocumentStore(hosts="some hosts", http_auth=("admin", "admin"))
58+
res = document_store.to_dict()
59+
assert res["init_parameters"]["http_auth"] is None
60+
61+
4962
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
5063
def test_from_dict(_mock_opensearch_client):
5164
data = {
@@ -93,6 +106,54 @@ def test_from_dict(_mock_opensearch_client):
93106
assert document_store._timeout == 60
94107

95108

109+
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
110+
def test_from_dict_with_http_auth_str(_mock_opensearch_client):
111+
"""
112+
Verify that serialized plain strings secrets can be properly deserialized.
113+
"""
114+
data = {
115+
"type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore",
116+
"init_parameters": {
117+
"hosts": "some hosts",
118+
"index": "default",
119+
"http_auth": ("admin", "admin"),
120+
},
121+
}
122+
document_store = OpenSearchDocumentStore.from_dict(data)
123+
assert document_store._http_auth == ("admin", "admin")
124+
125+
126+
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
127+
def test_resolve_http_auth_with_secrets(_mock_opensearch_client, monkeypatch):
128+
monkeypatch.setenv("OPENSEARCH_USERNAME", "admin")
129+
monkeypatch.setenv("OPENSEARCH_PASSWORD", "secret")
130+
store = OpenSearchDocumentStore(hosts="testhost")
131+
assert store._resolve_http_auth() == ["admin", "secret"]
132+
133+
134+
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
135+
def test_resolve_http_auth_with_no_secrets(_mock_opensearch_client, monkeypatch):
136+
monkeypatch.delenv("OPENSEARCH_USERNAME", raising=False)
137+
monkeypatch.delenv("OPENSEARCH_PASSWORD", raising=False)
138+
store = OpenSearchDocumentStore(hosts="testhost")
139+
assert store._resolve_http_auth() is None
140+
141+
142+
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
143+
def test_resolve_http_auth_with_partial_secrets(_mock_opensearch_client, monkeypatch):
144+
monkeypatch.setenv("OPENSEARCH_USERNAME", "admin")
145+
monkeypatch.delenv("OPENSEARCH_PASSWORD", raising=False)
146+
store = OpenSearchDocumentStore(hosts="testhost")
147+
with pytest.raises(DocumentStoreError, match="http_auth requires both username and password"):
148+
store._resolve_http_auth()
149+
150+
151+
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
152+
def test_resolve_http_auth_with_plain_strings(_mock_opensearch_client):
153+
store = OpenSearchDocumentStore(hosts="testhost", http_auth=("admin", "admin"))
154+
assert store._resolve_http_auth() == ("admin", "admin")
155+
156+
96157
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
97158
def test_init_is_lazy(_mock_opensearch_client):
98159
OpenSearchDocumentStore(hosts="testhost")
@@ -110,16 +171,19 @@ def test_get_default_mappings(_mock_opensearch_client):
110171
}
111172

112173

174+
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
113175
@patch("haystack_integrations.document_stores.opensearch.document_store.bulk")
114-
def test_routing_extracted_from_metadata(mock_bulk, document_store):
176+
def test_routing_extracted_from_metadata(mock_bulk, _mock_opensearch_client):
115177
"""Test routing extraction from document metadata"""
116178
mock_bulk.return_value = (2, [])
117179

180+
store = OpenSearchDocumentStore(hosts="testhost", http_auth=("admin", "admin"))
181+
118182
docs = [
119183
Document(id="1", content="Doc", meta={"_routing": "user_a", "other": "data"}),
120184
Document(id="2", content="Doc"),
121185
]
122-
document_store.write_documents(docs)
186+
store.write_documents(docs)
123187

124188
actions = list(mock_bulk.call_args.kwargs["actions"])
125189

@@ -135,13 +199,16 @@ def test_routing_extracted_from_metadata(mock_bulk, document_store):
135199
assert "_routing" not in actions[1]["_source"].get("meta", {})
136200

137201

202+
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
138203
@patch("haystack_integrations.document_stores.opensearch.document_store.bulk")
139-
def test_routing_in_delete(mock_bulk, document_store):
204+
def test_routing_in_delete(mock_bulk, _mock_opensearch_client):
140205
"""Test routing parameter in delete operations"""
141206
mock_bulk.return_value = (2, [])
142207

208+
store = OpenSearchDocumentStore(hosts="testhost", http_auth=("admin", "admin"))
209+
143210
routing_map = {"1": "user_a", "2": "user_b"}
144-
document_store.delete_documents(["1", "2", "3"], routing=routing_map)
211+
store.delete_documents(["1", "2", "3"], routing=routing_map)
145212

146213
actions = list(mock_bulk.call_args.kwargs["actions"])
147214

integrations/opensearch/tests/test_embedding_retriever.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,10 @@ def test_to_dict(_mock_opensearch_client):
7272
},
7373
"return_embedding": False,
7474
"create_index": True,
75-
"http_auth": None,
75+
"http_auth": [
76+
{"type": "env_var", "env_vars": ["OPENSEARCH_USERNAME"], "strict": False},
77+
{"type": "env_var", "env_vars": ["OPENSEARCH_PASSWORD"], "strict": False},
78+
],
7679
"use_ssl": None,
7780
"verify_certs": None,
7881
"timeout": None,

integrations/opensearch/tests/test_open_search_hybrid_retriever.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,10 @@ class TestOpenSearchHybridRetriever:
4646
"settings": {"index.knn": True},
4747
"create_index": True,
4848
"return_embedding": False,
49-
"http_auth": None,
49+
"http_auth": [
50+
{"type": "env_var", "env_vars": ["OPENSEARCH_USERNAME"], "strict": False},
51+
{"type": "env_var", "env_vars": ["OPENSEARCH_PASSWORD"], "strict": False},
52+
],
5053
"use_ssl": None,
5154
"verify_certs": None,
5255
"timeout": None,

0 commit comments

Comments
 (0)