Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def __init__(
mappings: dict[str, Any] | None = None,
settings: dict[str, Any] | None = DEFAULT_SETTINGS,
create_index: bool = True,
http_auth: Any = (
http_auth: tuple[Secret, Secret] | tuple[str, str] | list[str] | str | AWSAuth | None = (
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess that's fine to streamline, to be 100% accurrate better would be to also allow a callable/prototype with this signature (same as AWSAuth's call)

    def __call__(
        self,
        method: str,
        url: str,
        body: Any = None,
        headers: dict[str, str] | None = None,
    ) -> dict[str, str]:

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, but we only support the explicitly handled http_auth types via _resolve_http_auth().

Adding a generic callable would be misleading unless we also pass it through at runtime (which we don't do in the current implementation), so I'd keep the type strict for now

Secret.from_env_var("OPENSEARCH_USERNAME", strict=False), # noqa: B008
Secret.from_env_var("OPENSEARCH_PASSWORD", strict=False), # noqa: B008
),
Expand Down Expand Up @@ -138,17 +138,6 @@ def __init__(
self._mappings = mappings or self._get_default_mappings()
self._settings = settings
self._create_index = create_index
self._http_auth_are_secrets = False

# Handle authentication
if isinstance(http_auth, (tuple, list)) and len(http_auth) == 2: # noqa: PLR2004
username, password = http_auth
if isinstance(username, Secret) and isinstance(password, Secret):
self._http_auth_are_secrets = True
username_val = username.resolve_value()
password_val = password.resolve_value()
http_auth = [username_val, password_val] if username_val and password_val else None

self._http_auth = http_auth
self._use_ssl = use_ssl
self._verify_certs = verify_certs
Expand Down Expand Up @@ -218,17 +207,16 @@ def to_dict(self) -> dict[str, Any]:
Dictionary with serialized data.
"""
# Handle http_auth serialization
http_auth: list[dict[str, Any]] | dict[str, Any] | tuple[str, str] | list[str] | str = ""
if isinstance(self._http_auth, list) and self._http_auth_are_secrets:
# Recreate the Secret objects for serialization
http_auth = [
Secret.from_env_var("OPENSEARCH_USERNAME", strict=False).to_dict(),
Secret.from_env_var("OPENSEARCH_PASSWORD", strict=False).to_dict(),
]
http_auth: list[dict[str, Any]] | dict[str, Any] | None = None
if (
isinstance(self._http_auth, (tuple, list))
and len(self._http_auth) == 2 # noqa: PLR2004
and isinstance(self._http_auth[0], Secret)
and isinstance(self._http_auth[1], Secret)
):
http_auth = [self._http_auth[0].to_dict(), self._http_auth[1].to_dict()]
elif isinstance(self._http_auth, AWSAuth):
http_auth = self._http_auth.to_dict()
else:
http_auth = self._http_auth

return default_to_dict(
self,
Expand Down Expand Up @@ -267,12 +255,31 @@ def from_dict(cls, data: dict[str, Any]) -> "OpenSearchDocumentStore":
init_params["http_auth"] = [Secret.from_dict(item) for item in http_auth] if are_secrets else http_auth
return default_from_dict(cls, data)

def _resolve_http_auth(self) -> tuple[str, str] | list[str] | str | AWSAuth | None:
"""Resolves Secret objects in http_auth to their plain values."""
if isinstance(self._http_auth, (tuple, list)) and len(self._http_auth) == 2: # noqa: PLR2004
first, second = self._http_auth
if isinstance(first, Secret) and isinstance(second, Secret):
username = first.resolve_value()
password = second.resolve_value()
if username and password:
return [username, password]
if not username and not password:
return None
msg = "http_auth requires both username and password to be set, but only one was provided."
raise DocumentStoreError(msg)
if isinstance(first, str) and isinstance(second, str):
return (first, second)
if isinstance(self._http_auth, (str, AWSAuth)):
return self._http_auth
return None

def _ensure_initialized(self):
# Ideally, we have a warm-up stage for document stores as well as components.
if not self._client:
self._client = OpenSearch(
hosts=self._hosts,
http_auth=self._http_auth,
http_auth=self._resolve_http_auth(),
use_ssl=self._use_ssl,
verify_certs=self._verify_certs,
timeout=self._timeout,
Expand All @@ -284,7 +291,8 @@ def _ensure_initialized(self):

async def _ensure_initialized_async(self):
if not self._async_client:
async_http_auth = AsyncAWSAuth(self._http_auth) if isinstance(self._http_auth, AWSAuth) else self._http_auth
resolved_auth = self._resolve_http_auth()
async_http_auth = AsyncAWSAuth(resolved_auth) if isinstance(resolved_auth, AWSAuth) else resolved_auth
self._async_client = AsyncOpenSearch(
hosts=self._hosts,
http_auth=async_http_auth,
Expand Down
4 changes: 2 additions & 2 deletions integrations/opensearch/tests/test_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def test_ds_from_dict_basic_auth(self, _mock_opensearch_client):
document_store._ensure_initialized()
assert document_store._client
_mock_opensearch_client.assert_called_once()
assert _mock_opensearch_client.call_args[1]["http_auth"] == ["user", "pw"]
assert _mock_opensearch_client.call_args[1]["http_auth"] == ("user", "pw")

@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
def test_ds_from_dict_aws_auth(self, _mock_opensearch_client, monkeypatch: pytest.MonkeyPatch):
Expand Down Expand Up @@ -304,7 +304,7 @@ def test_ds_to_dict_basic_auth(self, _mock_opensearch_client):
"settings": {"index.knn": True},
"return_embedding": False,
"create_index": True,
"http_auth": ("user", "pw"),
"http_auth": None,
"use_ssl": None,
"verify_certs": None,
"timeout": None,
Expand Down
5 changes: 4 additions & 1 deletion integrations/opensearch/tests/test_bm25_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,10 @@ def test_to_dict(_mock_opensearch_client):
"settings": {"index.knn": True},
"return_embedding": False,
"create_index": True,
"http_auth": None,
"http_auth": [
{"type": "env_var", "env_vars": ["OPENSEARCH_USERNAME"], "strict": False},
{"type": "env_var", "env_vars": ["OPENSEARCH_PASSWORD"], "strict": False},
],
"use_ssl": None,
"verify_certs": None,
"timeout": None,
Expand Down
77 changes: 72 additions & 5 deletions integrations/opensearch/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,27 @@ def test_to_dict(_mock_opensearch_client):
"settings": {"index.knn": True},
"return_embedding": False,
"create_index": True,
"http_auth": None,
"http_auth": [
{"type": "env_var", "env_vars": ["OPENSEARCH_USERNAME"], "strict": False},
{"type": "env_var", "env_vars": ["OPENSEARCH_PASSWORD"], "strict": False},
],
"use_ssl": None,
"verify_certs": None,
"timeout": None,
},
}


@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
def test_to_dict_with_http_auth_str(_mock_opensearch_client):
"""
Verify that plain strings secrets are not serialized.
"""
document_store = OpenSearchDocumentStore(hosts="some hosts", http_auth=("admin", "admin"))
res = document_store.to_dict()
assert res["init_parameters"]["http_auth"] is None


@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
def test_from_dict(_mock_opensearch_client):
data = {
Expand Down Expand Up @@ -93,6 +106,54 @@ def test_from_dict(_mock_opensearch_client):
assert document_store._timeout == 60


@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
def test_from_dict_with_http_auth_str(_mock_opensearch_client):
"""
Verify that serialized plain strings secrets can be properly deserialized.
"""
data = {
"type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore",
"init_parameters": {
"hosts": "some hosts",
"index": "default",
"http_auth": ("admin", "admin"),
},
}
document_store = OpenSearchDocumentStore.from_dict(data)
assert document_store._http_auth == ("admin", "admin")


@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
def test_resolve_http_auth_with_secrets(_mock_opensearch_client, monkeypatch):
monkeypatch.setenv("OPENSEARCH_USERNAME", "admin")
monkeypatch.setenv("OPENSEARCH_PASSWORD", "secret")
store = OpenSearchDocumentStore(hosts="testhost")
assert store._resolve_http_auth() == ["admin", "secret"]


@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
def test_resolve_http_auth_with_no_secrets(_mock_opensearch_client, monkeypatch):
monkeypatch.delenv("OPENSEARCH_USERNAME", raising=False)
monkeypatch.delenv("OPENSEARCH_PASSWORD", raising=False)
store = OpenSearchDocumentStore(hosts="testhost")
assert store._resolve_http_auth() is None


@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
def test_resolve_http_auth_with_partial_secrets(_mock_opensearch_client, monkeypatch):
monkeypatch.setenv("OPENSEARCH_USERNAME", "admin")
monkeypatch.delenv("OPENSEARCH_PASSWORD", raising=False)
store = OpenSearchDocumentStore(hosts="testhost")
with pytest.raises(DocumentStoreError, match="http_auth requires both username and password"):
store._resolve_http_auth()


@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
def test_resolve_http_auth_with_plain_strings(_mock_opensearch_client):
store = OpenSearchDocumentStore(hosts="testhost", http_auth=("admin", "admin"))
assert store._resolve_http_auth() == ("admin", "admin")


@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
def test_init_is_lazy(_mock_opensearch_client):
OpenSearchDocumentStore(hosts="testhost")
Expand All @@ -110,16 +171,19 @@ def test_get_default_mappings(_mock_opensearch_client):
}


@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
@patch("haystack_integrations.document_stores.opensearch.document_store.bulk")
def test_routing_extracted_from_metadata(mock_bulk, document_store):
def test_routing_extracted_from_metadata(mock_bulk, _mock_opensearch_client):
"""Test routing extraction from document metadata"""
mock_bulk.return_value = (2, [])

store = OpenSearchDocumentStore(hosts="testhost", http_auth=("admin", "admin"))

docs = [
Document(id="1", content="Doc", meta={"_routing": "user_a", "other": "data"}),
Document(id="2", content="Doc"),
]
document_store.write_documents(docs)
store.write_documents(docs)

actions = list(mock_bulk.call_args.kwargs["actions"])

Expand All @@ -135,13 +199,16 @@ def test_routing_extracted_from_metadata(mock_bulk, document_store):
assert "_routing" not in actions[1]["_source"].get("meta", {})


@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
@patch("haystack_integrations.document_stores.opensearch.document_store.bulk")
def test_routing_in_delete(mock_bulk, document_store):
def test_routing_in_delete(mock_bulk, _mock_opensearch_client):
"""Test routing parameter in delete operations"""
mock_bulk.return_value = (2, [])

store = OpenSearchDocumentStore(hosts="testhost", http_auth=("admin", "admin"))

routing_map = {"1": "user_a", "2": "user_b"}
document_store.delete_documents(["1", "2", "3"], routing=routing_map)
store.delete_documents(["1", "2", "3"], routing=routing_map)

actions = list(mock_bulk.call_args.kwargs["actions"])

Expand Down
5 changes: 4 additions & 1 deletion integrations/opensearch/tests/test_embedding_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,10 @@ def test_to_dict(_mock_opensearch_client):
},
"return_embedding": False,
"create_index": True,
"http_auth": None,
"http_auth": [
{"type": "env_var", "env_vars": ["OPENSEARCH_USERNAME"], "strict": False},
{"type": "env_var", "env_vars": ["OPENSEARCH_PASSWORD"], "strict": False},
],
"use_ssl": None,
"verify_certs": None,
"timeout": None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ class TestOpenSearchHybridRetriever:
"settings": {"index.knn": True},
"create_index": True,
"return_embedding": False,
"http_auth": None,
"http_auth": [
{"type": "env_var", "env_vars": ["OPENSEARCH_USERNAME"], "strict": False},
{"type": "env_var", "env_vars": ["OPENSEARCH_PASSWORD"], "strict": False},
],
"use_ssl": None,
"verify_certs": None,
"timeout": None,
Expand Down
Loading