Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion integrations/opensearch/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
services:
opensearch:
image: "opensearchproject/opensearch:2.11.0"
image: "opensearchproject/opensearch:3.5.0"
ports:
- 9200:9200
- 9600:9600
restart: on-failure
environment:
- discovery.type=single-node
- "ES_JAVA_OPTS=-Xms1024m -Xmx1024m"
- "OPENSEARCH_INITIAL_ADMIN_PASSWORD=SecureHaystack!2026"
healthcheck:
test: curl --fail https://localhost:9200/_cat/health -ku admin:admin || exit 1
interval: 10s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2003,12 +2003,9 @@ def _query_sql(self, query: str, fetch_size: int | None = None) -> dict[str, Any
if fetch_size is not None:
body["fetch_size"] = fetch_size

params = {"format": "json"}

response_data = self._client.transport.perform_request(
method="POST",
url="/_plugins/_sql",
params=params,
body=body,
)

Expand Down Expand Up @@ -2039,12 +2036,9 @@ async def _query_sql_async(self, query: str, fetch_size: int | None = None) -> d
if fetch_size is not None:
body["fetch_size"] = fetch_size

params = {"format": "json"}

response_data = await self._async_client.transport.perform_request(
method="POST",
url="/_plugins/_sql",
params=params,
body=body,
)

Expand Down
18 changes: 9 additions & 9 deletions integrations/opensearch/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ def document_store():
store = OpenSearchDocumentStore(
hosts=hosts,
index=index,
http_auth=("admin", "admin"),
http_auth=("admin", "SecureHaystack!2026"),
verify_certs=False,
embedding_dim=768,
return_embedding=True,
method={"space_type": "cosinesimil", "engine": "nmslib", "name": "hnsw"},
method={"space_type": "cosinesimil", "engine": "lucene", "name": "hnsw"},
Comment thread
anakin87 marked this conversation as resolved.
)
store._ensure_initialized()
yield store
Expand All @@ -52,11 +52,11 @@ def document_store_2():
store = OpenSearchDocumentStore(
hosts=hosts,
index=index,
http_auth=("admin", "admin"),
http_auth=("admin", "SecureHaystack!2026"),
verify_certs=False,
embedding_dim=768,
return_embedding=False,
method={"space_type": "cosinesimil", "engine": "nmslib", "name": "hnsw"},
method={"space_type": "cosinesimil", "engine": "lucene", "name": "hnsw"},
)
yield store

Expand All @@ -81,10 +81,10 @@ def document_store_readonly():
store = OpenSearchDocumentStore(
hosts=hosts,
index=index,
http_auth=("admin", "admin"),
http_auth=("admin", "SecureHaystack!2026"),
verify_certs=False,
embedding_dim=768,
method={"space_type": "cosinesimil", "engine": "nmslib", "name": "hnsw"},
method={"space_type": "cosinesimil", "engine": "lucene", "name": "hnsw"},
create_index=False,
)
store._ensure_initialized()
Expand All @@ -109,11 +109,11 @@ def document_store_embedding_dim_4_no_emb_returned():
store = OpenSearchDocumentStore(
hosts=hosts,
index=index,
http_auth=("admin", "admin"),
http_auth=("admin", "SecureHaystack!2026"),
verify_certs=False,
embedding_dim=4,
return_embedding=False,
method={"space_type": "cosinesimil", "engine": "nmslib", "name": "hnsw"},
method={"space_type": "cosinesimil", "engine": "lucene", "name": "hnsw"},
)
yield store

Expand All @@ -133,7 +133,7 @@ def document_store_embedding_dim_4_no_emb_returned_faiss():
store = OpenSearchDocumentStore(
hosts=hosts,
index=index,
http_auth=("admin", "admin"),
http_auth=("admin", "SecureHaystack!2026"),
verify_certs=False,
embedding_dim=4,
method={"space_type": "innerproduct", "engine": "faiss", "name": "hnsw"},
Expand Down
49 changes: 26 additions & 23 deletions integrations/opensearch/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,24 @@ def document_store(self, document_store):

def assert_documents_are_equal(self, received: list[Document], expected: list[Document]):
"""
The OpenSearchDocumentStore.filter_documents() method returns a Documents with their score set.
We don't want to compare the score, so we set it to None before comparing the documents.
The OpenSearchDocumentStore.filter_documents() method returns documents with their score set.

We don't want to compare the score, so we set it to None before comparing.

Embeddings are not exactly the same when retrieved from OpenSearch (float round-trip),
so we compare them approximately and then set both to None for the final equality check.
"""
for doc in received:
doc.score = None
assert received == expected
assert len(received) == len(expected)
Comment thread
anakin87 marked this conversation as resolved.
received = sorted(received, key=lambda x: x.id)
expected = sorted(expected, key=lambda x: x.id)
for received_doc, expected_doc in zip(received, expected, strict=True):
received_doc.score = None
if received_doc.embedding is None:
assert expected_doc.embedding is None
else:
assert received_doc.embedding == pytest.approx(expected_doc.embedding)
received_doc.embedding, expected_doc.embedding = None, None
assert received_doc == expected_doc

def test_write_documents(self, document_store: OpenSearchDocumentStore):
docs = [Document(id="1")]
Expand Down Expand Up @@ -983,24 +995,15 @@ def test_query_sql(self, document_store: OpenSearchDocumentStore):

# Verify raw JSON response structure
assert isinstance(result, dict)
assert "hits" in result
assert "hits" in result["hits"]
assert len(result["hits"]["hits"]) == 2 # Two documents with category A

# Extract _source from each hit
hits = result["hits"]["hits"]
assert all(isinstance(hit, dict) and "_source" in hit for hit in hits)

categories = [hit["_source"].get("category") for hit in hits]
assert all(cat == "A" for cat in categories)

# verify all expected fields are present in _source
for hit in hits:
source = hit["_source"]
assert "content" in source
assert "category" in source
assert "status" in source
assert "priority" in source
assert "schema" in result
assert "datarows" in result
assert "size" in result
assert "status" in result
assert [entry["name"] for entry in result["schema"]] == ["content", "category", "status", "priority"]
assert len(result["datarows"]) == 2 # Two documents with category A

categories = [row[1] for row in result["datarows"]]
assert all(category == "A" for category in categories)

# error handling for invalid SQL query
invalid_query = "SELECT * FROM non_existent_index"
Expand Down
57 changes: 27 additions & 30 deletions integrations/opensearch/tests/test_document_store_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,24 +827,15 @@ async def test_query_sql(self, document_store: OpenSearchDocumentStore):

# Verify raw JSON response structure
assert isinstance(result, dict)
assert "hits" in result
assert "hits" in result["hits"]
assert len(result["hits"]["hits"]) == 2 # Two documents with category A
assert "schema" in result
assert "datarows" in result
assert "size" in result
assert "status" in result
assert [entry["name"] for entry in result["schema"]] == ["content", "category", "status", "priority"]
assert len(result["datarows"]) == 2 # Two documents with category A

# Extract _source from each hit
hits = result["hits"]["hits"]
assert all(isinstance(hit, dict) and "_source" in hit for hit in hits)

categories = [hit["_source"].get("category") for hit in hits]
assert all(cat == "A" for cat in categories)

# verify all expected fields are present in _source
for hit in hits:
source = hit["_source"]
assert "content" in source
assert "category" in source
assert "status" in source
assert "priority" in source
categories = [row[1] for row in result["datarows"]]
assert all(category == "A" for category in categories)

# error handling for invalid SQL query
invalid_query = "SELECT * FROM non_existent_index"
Expand All @@ -869,9 +860,14 @@ async def test_query_sql_async_with_fetch_size(self, document_store: OpenSearchD

# Should return raw JSON response (exact count depends on OpenSearch behavior)
assert isinstance(result, dict)
assert "hits" in result
assert "hits" in result["hits"]
assert all(isinstance(hit, dict) and "_source" in hit for hit in result["hits"]["hits"])
assert "schema" in result
assert "datarows" in result
assert "size" in result
assert "status" in result
assert [entry["name"] for entry in result["schema"]] == ["content", "category", "index"]
assert len(result["datarows"]) > 0
assert len(result["datarows"]) <= 5
assert result.get("cursor") is not None

@pytest.mark.integration
@pytest.mark.asyncio
Expand All @@ -889,13 +885,14 @@ async def test_query_sql_async_pagination_flow(self, document_store: OpenSearchD
# Query with small fetch_size to test pagination
result = await document_store._query_sql_async(sql_query, fetch_size=10)
assert isinstance(result, dict)
assert "hits" in result
assert "hits" in result["hits"]
assert len(result["hits"]["hits"]) > 0

# Verify all results have expected fields in _source
for hit in result["hits"]["hits"]:
source = hit["_source"]
assert "content" in source
assert "category" in source
assert "index" in source
assert "schema" in result
assert "datarows" in result
assert "size" in result
assert "status" in result
assert [entry["name"] for entry in result["schema"]] == ["content", "category", "index"]
assert len(result["datarows"]) > 0
assert len(result["datarows"]) <= 10

# Verify all results contain expected row columns
for row in result["datarows"]:
assert len(row) == 3
20 changes: 15 additions & 5 deletions integrations/opensearch/tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,19 @@ def test_normalize_ranges():
class TestFilters(FilterDocumentsTest):
def assert_documents_are_equal(self, received: list[Document], expected: list[Document]):
"""
The OpenSearchDocumentStore.filter_documents() method returns a Documents with their score set.
We don't want to compare the score, so we set it to None before comparing the documents.
The OpenSearchDocumentStore.filter_documents() method returns documents with their score set.
We don't want to compare the score, so we set it to None before comparing.
Embeddings are not exactly the same when retrieved from OpenSearch (float round-trip),
so we compare them approximately and then set both to None for the final equality check.
"""
for doc in received:
doc.score = None
assert received == expected
assert len(received) == len(expected)
received = sorted(received, key=lambda x: x.id)
expected = sorted(expected, key=lambda x: x.id)
for received_doc, expected_doc in zip(received, expected, strict=True):
received_doc.score = None
if received_doc.embedding is None:
assert expected_doc.embedding is None
else:
assert received_doc.embedding == pytest.approx(expected_doc.embedding)
received_doc.embedding, expected_doc.embedding = None, None
assert received_doc == expected_doc
Loading