diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py index df9be9a2a9..8e95e1d47f 100644 --- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py +++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py @@ -400,7 +400,10 @@ async def _ensure_index_exists_async(self) -> None: index=self._index, ) mapping = await self._async_client.indices.get_mapping(index=self._index) - properties = mapping[self._index]["mappings"].get("properties", {}) + # get_mapping keys the response by the real index name; when self._index is an alias + # the key differs from the alias, so we use next(iter(...)) to handle both cases. + actual_index = next(iter(mapping)) + properties = mapping[actual_index]["mappings"].get("properties", {}) self._populate_nested_fields_from_mapping(properties) elif self._create_index: # Create the index if it doesn't exist @@ -417,7 +420,10 @@ def _ensure_index_exists(self) -> None: index=self._index, ) mapping = self._client.indices.get_mapping(index=self._index) - properties = mapping[self._index]["mappings"].get("properties", {}) + # get_mapping keys the response by the real index name; when self._index is an alias + # the key differs from the alias, so we use next(iter(...)) to handle both cases. + actual_index = next(iter(mapping)) + properties = mapping[actual_index]["mappings"].get("properties", {}) self._populate_nested_fields_from_mapping(properties) elif self._create_index: # Create the index if it doesn't exist diff --git a/integrations/opensearch/tests/test_auth.py b/integrations/opensearch/tests/test_auth.py index bc5e26ed90..d9b3813840 100644 --- a/integrations/opensearch/tests/test_auth.py +++ b/integrations/opensearch/tests/test_auth.py @@ -226,6 +226,7 @@ def mock_boto3_session(self): @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_ds_init_with_basic_auth(self, _mock_opensearch_client): + _mock_opensearch_client.return_value.indices.exists.return_value = False document_store = OpenSearchDocumentStore(hosts="testhost", http_auth=("user", "pw")) document_store._ensure_initialized() assert document_store._client @@ -234,6 +235,7 @@ def test_ds_init_with_basic_auth(self, _mock_opensearch_client): @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_ds_init_without_auth(self, _mock_opensearch_client): + _mock_opensearch_client.return_value.indices.exists.return_value = False document_store = OpenSearchDocumentStore(hosts="testhost") document_store._ensure_initialized() assert document_store._client @@ -242,6 +244,7 @@ def test_ds_init_without_auth(self, _mock_opensearch_client): @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_ds_init_aws_auth(self, _mock_opensearch_client): + _mock_opensearch_client.return_value.indices.exists.return_value = False document_store = OpenSearchDocumentStore( hosts="testhost", http_auth=AWSAuth(aws_region_name=Secret.from_token("dummy-region")), @@ -257,6 +260,7 @@ def test_ds_init_aws_auth(self, _mock_opensearch_client): @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_ds_from_dict_basic_auth(self, _mock_opensearch_client): + _mock_opensearch_client.return_value.indices.exists.return_value = False document_store = OpenSearchDocumentStore.from_dict( { "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", @@ -275,6 +279,7 @@ def test_ds_from_dict_basic_auth(self, _mock_opensearch_client): @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_ds_from_dict_aws_auth(self, _mock_opensearch_client, monkeypatch: pytest.MonkeyPatch): + _mock_opensearch_client.return_value.indices.exists.return_value = False monkeypatch.setenv("AWS_DEFAULT_REGION", "dummy-region") document_store = OpenSearchDocumentStore.from_dict( { @@ -379,6 +384,7 @@ def test_ds_to_dict_aws_auth(self, _mock_opensearch_client, monkeypatch: pytest. @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_ds_init_with_env_var_secrets(self, _mock_opensearch_client, monkeypatch): """Test the default initialization using environment variables""" + _mock_opensearch_client.return_value.indices.exists.return_value = False monkeypatch.setenv("OPENSEARCH_USERNAME", "user") monkeypatch.setenv("OPENSEARCH_PASSWORD", "pass") @@ -391,6 +397,7 @@ def test_ds_init_with_env_var_secrets(self, _mock_opensearch_client, monkeypatch @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_ds_init_with_missing_env_vars(self, _mock_opensearch_client): """Test that auth is None when environment variables are missing""" + _mock_opensearch_client.return_value.indices.exists.return_value = False document_store = OpenSearchDocumentStore(hosts="testhost") document_store._ensure_initialized() assert document_store._client @@ -419,6 +426,7 @@ def test_ds_to_dict_with_env_var_secrets(self, _mock_opensearch_client, monkeypa @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_ds_from_dict_with_env_var_secrets(self, _mock_opensearch_client, monkeypatch): """Test deserialization with environment variables""" + _mock_opensearch_client.return_value.indices.exists.return_value = False # Set environment variables so the secrets resolve properly monkeypatch.setenv("OPENSEARCH_USERNAME", "user") monkeypatch.setenv("OPENSEARCH_PASSWORD", "pass") diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py index 37b8a69c4b..0053037319 100644 --- a/integrations/opensearch/tests/test_document_store.py +++ b/integrations/opensearch/tests/test_document_store.py @@ -185,6 +185,7 @@ def test_get_default_mappings(_mock_opensearch_client): def test_routing_extracted_from_metadata(mock_bulk, _mock_opensearch_client): """Test routing extraction from document metadata""" mock_bulk.return_value = (2, []) + _mock_opensearch_client.return_value.indices.exists.return_value = False store = OpenSearchDocumentStore(hosts="testhost", http_auth=("admin", "admin")) @@ -213,6 +214,7 @@ def test_routing_extracted_from_metadata(mock_bulk, _mock_opensearch_client): def test_routing_in_delete(mock_bulk, _mock_opensearch_client): """Test routing parameter in delete operations""" mock_bulk.return_value = (2, []) + _mock_opensearch_client.return_value.indices.exists.return_value = False store = OpenSearchDocumentStore(hosts="testhost", http_auth=("admin", "admin")) @@ -553,6 +555,143 @@ def test_explicit_nested_fields_no_detection_on_write(mock_bulk, _mock_opensearc store._client.indices.put_mapping.assert_not_called() +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_ensure_index_exists_direct_index(_mock_opensearch_client): + """When an index exists and is referenced directly, mappings are loaded without error.""" + store = OpenSearchDocumentStore(hosts="testhost", index="my-index", http_auth=("a", "b")) + mock_client = MagicMock() + store._client = mock_client + mock_client.indices.exists.return_value = True + mock_client.indices.get_mapping.return_value = { + "my-index": {"mappings": {"properties": {"content": {"type": "text"}}}} + } + + store._ensure_index_exists() + + mock_client.indices.exists.assert_called_once_with(index="my-index") + mock_client.indices.get_mapping.assert_called_once_with(index="my-index") + mock_client.indices.create.assert_not_called() + + +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_ensure_index_exists_with_alias(_mock_opensearch_client): + """When self._index is an alias, get_mapping keys the response by the real index name. + + Before the fix this raised KeyError because mapping[self._index] was used directly. + """ + store = OpenSearchDocumentStore(hosts="testhost", index="my-alias", http_auth=("a", "b")) + mock_client = MagicMock() + store._client = mock_client + mock_client.indices.exists.return_value = True + mock_client.indices.get_mapping.return_value = { + "my-real-index-v1": {"mappings": {"properties": {"content": {"type": "text"}}}} + } + + store._ensure_index_exists() # must not raise KeyError + + mock_client.indices.get_mapping.assert_called_once_with(index="my-alias") + mock_client.indices.create.assert_not_called() + + +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_ensure_index_exists_creates_index_when_not_exists(_mock_opensearch_client): + """When the index does not exist and create_index=True, the index is created.""" + store = OpenSearchDocumentStore(hosts="testhost", index="new-index", http_auth=("a", "b")) + mock_client = MagicMock() + store._client = mock_client + mock_client.indices.exists.return_value = False + + store._ensure_index_exists() + + mock_client.indices.create.assert_called_once() + mock_client.indices.get_mapping.assert_not_called() + + +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_ensure_index_exists_no_create_when_disabled(_mock_opensearch_client): + """When the index does not exist and create_index=False, no index is created.""" + store = OpenSearchDocumentStore(hosts="testhost", index="new-index", create_index=False, http_auth=("a", "b")) + mock_client = MagicMock() + store._client = mock_client + mock_client.indices.exists.return_value = False + + store._ensure_index_exists() + + mock_client.indices.create.assert_not_called() + mock_client.indices.get_mapping.assert_not_called() + + +@pytest.mark.asyncio +@patch("haystack_integrations.document_stores.opensearch.document_store.AsyncOpenSearch") +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +async def test_ensure_index_exists_async_direct_index(_mock_sync_client, _mock_async_client): + """Async: When an index exists and is referenced directly, mappings are loaded without error.""" + store = OpenSearchDocumentStore(hosts="testhost", index="my-index", http_auth=("a", "b")) + mock_client = AsyncMock() + store._async_client = mock_client + mock_client.indices.exists = AsyncMock(return_value=True) + mock_client.indices.get_mapping = AsyncMock( + return_value={"my-index": {"mappings": {"properties": {"content": {"type": "text"}}}}} + ) + + await store._ensure_index_exists_async() + + mock_client.indices.exists.assert_called_once_with(index="my-index") + mock_client.indices.get_mapping.assert_called_once_with(index="my-index") + mock_client.indices.create.assert_not_called() + + +@pytest.mark.asyncio +@patch("haystack_integrations.document_stores.opensearch.document_store.AsyncOpenSearch") +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +async def test_ensure_index_exists_async_with_alias(_mock_sync_client, _mock_async_client): + """Async: When self._index is an alias, get_mapping keys by real index name; no KeyError.""" + store = OpenSearchDocumentStore(hosts="testhost", index="my-alias", http_auth=("a", "b")) + mock_client = AsyncMock() + store._async_client = mock_client + mock_client.indices.exists = AsyncMock(return_value=True) + mock_client.indices.get_mapping = AsyncMock( + return_value={"my-real-index-v1": {"mappings": {"properties": {"content": {"type": "text"}}}}} + ) + + await store._ensure_index_exists_async() # must not raise KeyError + + mock_client.indices.get_mapping.assert_called_once_with(index="my-alias") + mock_client.indices.create.assert_not_called() + + +@pytest.mark.asyncio +@patch("haystack_integrations.document_stores.opensearch.document_store.AsyncOpenSearch") +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +async def test_ensure_index_exists_async_creates_index_when_not_exists(_mock_sync_client, _mock_async_client): + """Async: When the index does not exist and create_index=True, the index is created.""" + store = OpenSearchDocumentStore(hosts="testhost", index="new-index", http_auth=("a", "b")) + mock_client = AsyncMock() + store._async_client = mock_client + mock_client.indices.exists = AsyncMock(return_value=False) + + await store._ensure_index_exists_async() + + mock_client.indices.create.assert_called_once() + mock_client.indices.get_mapping.assert_not_called() + + +@pytest.mark.asyncio +@patch("haystack_integrations.document_stores.opensearch.document_store.AsyncOpenSearch") +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +async def test_ensure_index_exists_async_no_create_when_disabled(_mock_sync_client, _mock_async_client): + """Async: When the index does not exist and create_index=False, no index is created.""" + store = OpenSearchDocumentStore(hosts="testhost", index="new-index", create_index=False, http_auth=("a", "b")) + mock_client = AsyncMock() + store._async_client = mock_client + mock_client.indices.exists = AsyncMock(return_value=False) + + await store._ensure_index_exists_async() + + mock_client.indices.create.assert_not_called() + mock_client.indices.get_mapping.assert_not_called() + + @pytest.mark.integration class TestDocumentStore( OpenSearchDocumentStoreTestMixin, @@ -1487,3 +1626,26 @@ def test_nested_fields_different_paths_filter(self, document_store_nested: OpenS ) assert len(results) == 1 assert results[0].content == "both" + + def test_document_store_with_alias(self, document_store: OpenSearchDocumentStore): + """Initializing the document store with an alias instead of a concrete index name must not raise KeyError.""" + alias_name = f"alias_for_{document_store._index}" + client = document_store._client + client.indices.put_alias(index=document_store._index, name=alias_name) + try: + alias_store = OpenSearchDocumentStore( + hosts=["https://localhost:9200"], + http_auth=("admin", "SecureHaystack!2026"), + verify_certs=False, + index=alias_name, + embedding_dim=768, + ) + alias_store._ensure_initialized() + + docs = [Document(content="doc via alias")] + assert alias_store.write_documents(docs) == 1 + results = alias_store.filter_documents() + assert len(results) == 1 + assert results[0].content == "doc via alias" + finally: + client.indices.delete_alias(index=document_store._index, name=alias_name) diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py index 3aeb6d0686..08234f53dd 100644 --- a/integrations/opensearch/tests/test_document_store_async.py +++ b/integrations/opensearch/tests/test_document_store_async.py @@ -627,3 +627,29 @@ async def test_query_sql_async_pagination_flow(self, document_store: OpenSearchD for row in result["datarows"]: assert len(row) == 3 + + @pytest.mark.asyncio + async def test_document_store_async_with_alias(self, document_store: OpenSearchDocumentStore): + """Async init with an alias as the index name must not raise KeyError.""" + alias_name = f"alias_for_{document_store._index}" + client = document_store._client + client.indices.put_alias(index=document_store._index, name=alias_name) + try: + alias_store = OpenSearchDocumentStore( + hosts=["https://localhost:9200"], + http_auth=("admin", "SecureHaystack!2026"), + verify_certs=False, + index=alias_name, + embedding_dim=768, + ) + await alias_store._ensure_initialized_async() + + docs = [Document(content="async doc via alias")] + assert await alias_store.write_documents_async(docs) == 1 + results = await alias_store.filter_documents_async() + assert len(results) == 1 + assert results[0].content == "async doc via alias" + finally: + client.indices.delete_alias(index=document_store._index, name=alias_name) + if alias_store._async_client: + await alias_store._async_client.close()