Skip to content

Commit c8a60e1

Browse files
fix: OpenSearch-3.x SQL stopped supporting DSL-format SQL JSON (#2876)
* updating tests due to removal of format=json * updating async tests due to removal of format=json * removing strict = True from zip * reverting: removing strict = True from zip
1 parent 4d3295b commit c8a60e1

7 files changed

Lines changed: 188 additions & 159 deletions

File tree

integrations/opensearch/docker-compose.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
services:
22
opensearch:
3-
image: "opensearchproject/opensearch:2.11.0"
3+
image: "opensearchproject/opensearch:3.5.0"
44
ports:
55
- 9200:9200
66
- 9600:9600
77
restart: on-failure
88
environment:
99
- discovery.type=single-node
1010
- "ES_JAVA_OPTS=-Xms1024m -Xmx1024m"
11+
- "OPENSEARCH_INITIAL_ADMIN_PASSWORD=SecureHaystack!2026"
1112
healthcheck:
1213
test: curl --fail https://localhost:9200/_cat/health -ku admin:admin || exit 1
1314
interval: 10s

integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2003,12 +2003,9 @@ def _query_sql(self, query: str, fetch_size: int | None = None) -> dict[str, Any
20032003
if fetch_size is not None:
20042004
body["fetch_size"] = fetch_size
20052005

2006-
params = {"format": "json"}
2007-
20082006
response_data = self._client.transport.perform_request(
20092007
method="POST",
20102008
url="/_plugins/_sql",
2011-
params=params,
20122009
body=body,
20132010
)
20142011

@@ -2039,12 +2036,9 @@ async def _query_sql_async(self, query: str, fetch_size: int | None = None) -> d
20392036
if fetch_size is not None:
20402037
body["fetch_size"] = fetch_size
20412038

2042-
params = {"format": "json"}
2043-
20442039
response_data = await self._async_client.transport.perform_request(
20452040
method="POST",
20462041
url="/_plugins/_sql",
2047-
params=params,
20482042
body=body,
20492043
)
20502044

integrations/opensearch/tests/conftest.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@ def document_store():
2828
store = OpenSearchDocumentStore(
2929
hosts=hosts,
3030
index=index,
31-
http_auth=("admin", "admin"),
31+
http_auth=("admin", "SecureHaystack!2026"),
3232
verify_certs=False,
3333
embedding_dim=768,
3434
return_embedding=True,
35-
method={"space_type": "cosinesimil", "engine": "nmslib", "name": "hnsw"},
35+
method={"space_type": "cosinesimil", "engine": "lucene", "name": "hnsw"},
3636
)
3737
store._ensure_initialized()
3838
yield store
@@ -52,11 +52,11 @@ def document_store_2():
5252
store = OpenSearchDocumentStore(
5353
hosts=hosts,
5454
index=index,
55-
http_auth=("admin", "admin"),
55+
http_auth=("admin", "SecureHaystack!2026"),
5656
verify_certs=False,
5757
embedding_dim=768,
5858
return_embedding=False,
59-
method={"space_type": "cosinesimil", "engine": "nmslib", "name": "hnsw"},
59+
method={"space_type": "cosinesimil", "engine": "lucene", "name": "hnsw"},
6060
)
6161
yield store
6262

@@ -81,10 +81,10 @@ def document_store_readonly():
8181
store = OpenSearchDocumentStore(
8282
hosts=hosts,
8383
index=index,
84-
http_auth=("admin", "admin"),
84+
http_auth=("admin", "SecureHaystack!2026"),
8585
verify_certs=False,
8686
embedding_dim=768,
87-
method={"space_type": "cosinesimil", "engine": "nmslib", "name": "hnsw"},
87+
method={"space_type": "cosinesimil", "engine": "lucene", "name": "hnsw"},
8888
create_index=False,
8989
)
9090
store._ensure_initialized()
@@ -109,11 +109,11 @@ def document_store_embedding_dim_4_no_emb_returned():
109109
store = OpenSearchDocumentStore(
110110
hosts=hosts,
111111
index=index,
112-
http_auth=("admin", "admin"),
112+
http_auth=("admin", "SecureHaystack!2026"),
113113
verify_certs=False,
114114
embedding_dim=4,
115115
return_embedding=False,
116-
method={"space_type": "cosinesimil", "engine": "nmslib", "name": "hnsw"},
116+
method={"space_type": "cosinesimil", "engine": "lucene", "name": "hnsw"},
117117
)
118118
yield store
119119

@@ -133,7 +133,7 @@ def document_store_embedding_dim_4_no_emb_returned_faiss():
133133
store = OpenSearchDocumentStore(
134134
hosts=hosts,
135135
index=index,
136-
http_auth=("admin", "admin"),
136+
http_auth=("admin", "SecureHaystack!2026"),
137137
verify_certs=False,
138138
embedding_dim=4,
139139
method={"space_type": "innerproduct", "engine": "faiss", "name": "hnsw"},

integrations/opensearch/tests/test_document_store.py

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -164,12 +164,24 @@ def document_store(self, document_store):
164164

165165
def assert_documents_are_equal(self, received: list[Document], expected: list[Document]):
166166
"""
167-
The OpenSearchDocumentStore.filter_documents() method returns a Documents with their score set.
168-
We don't want to compare the score, so we set it to None before comparing the documents.
167+
The OpenSearchDocumentStore.filter_documents() method returns documents with their score set.
168+
169+
We don't want to compare the score, so we set it to None before comparing.
170+
171+
Embeddings are not exactly the same when retrieved from OpenSearch (float round-trip),
172+
so we compare them approximately and then set both to None for the final equality check.
169173
"""
170-
for doc in received:
171-
doc.score = None
172-
assert received == expected
174+
assert len(received) == len(expected)
175+
received = sorted(received, key=lambda x: x.id)
176+
expected = sorted(expected, key=lambda x: x.id)
177+
for received_doc, expected_doc in zip(received, expected, strict=True):
178+
received_doc.score = None
179+
if received_doc.embedding is None:
180+
assert expected_doc.embedding is None
181+
else:
182+
assert received_doc.embedding == pytest.approx(expected_doc.embedding)
183+
received_doc.embedding, expected_doc.embedding = None, None
184+
assert received_doc == expected_doc
173185

174186
def test_write_documents(self, document_store: OpenSearchDocumentStore):
175187
docs = [Document(id="1")]
@@ -983,24 +995,15 @@ def test_query_sql(self, document_store: OpenSearchDocumentStore):
983995

984996
# Verify raw JSON response structure
985997
assert isinstance(result, dict)
986-
assert "hits" in result
987-
assert "hits" in result["hits"]
988-
assert len(result["hits"]["hits"]) == 2 # Two documents with category A
989-
990-
# Extract _source from each hit
991-
hits = result["hits"]["hits"]
992-
assert all(isinstance(hit, dict) and "_source" in hit for hit in hits)
993-
994-
categories = [hit["_source"].get("category") for hit in hits]
995-
assert all(cat == "A" for cat in categories)
996-
997-
# verify all expected fields are present in _source
998-
for hit in hits:
999-
source = hit["_source"]
1000-
assert "content" in source
1001-
assert "category" in source
1002-
assert "status" in source
1003-
assert "priority" in source
998+
assert "schema" in result
999+
assert "datarows" in result
1000+
assert "size" in result
1001+
assert "status" in result
1002+
assert [entry["name"] for entry in result["schema"]] == ["content", "category", "status", "priority"]
1003+
assert len(result["datarows"]) == 2 # Two documents with category A
1004+
1005+
categories = [row[1] for row in result["datarows"]]
1006+
assert all(category == "A" for category in categories)
10041007

10051008
# error handling for invalid SQL query
10061009
invalid_query = "SELECT * FROM non_existent_index"

integrations/opensearch/tests/test_document_store_async.py

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -827,24 +827,15 @@ async def test_query_sql(self, document_store: OpenSearchDocumentStore):
827827

828828
# Verify raw JSON response structure
829829
assert isinstance(result, dict)
830-
assert "hits" in result
831-
assert "hits" in result["hits"]
832-
assert len(result["hits"]["hits"]) == 2 # Two documents with category A
830+
assert "schema" in result
831+
assert "datarows" in result
832+
assert "size" in result
833+
assert "status" in result
834+
assert [entry["name"] for entry in result["schema"]] == ["content", "category", "status", "priority"]
835+
assert len(result["datarows"]) == 2 # Two documents with category A
833836

834-
# Extract _source from each hit
835-
hits = result["hits"]["hits"]
836-
assert all(isinstance(hit, dict) and "_source" in hit for hit in hits)
837-
838-
categories = [hit["_source"].get("category") for hit in hits]
839-
assert all(cat == "A" for cat in categories)
840-
841-
# verify all expected fields are present in _source
842-
for hit in hits:
843-
source = hit["_source"]
844-
assert "content" in source
845-
assert "category" in source
846-
assert "status" in source
847-
assert "priority" in source
837+
categories = [row[1] for row in result["datarows"]]
838+
assert all(category == "A" for category in categories)
848839

849840
# error handling for invalid SQL query
850841
invalid_query = "SELECT * FROM non_existent_index"
@@ -869,9 +860,14 @@ async def test_query_sql_async_with_fetch_size(self, document_store: OpenSearchD
869860

870861
# Should return raw JSON response (exact count depends on OpenSearch behavior)
871862
assert isinstance(result, dict)
872-
assert "hits" in result
873-
assert "hits" in result["hits"]
874-
assert all(isinstance(hit, dict) and "_source" in hit for hit in result["hits"]["hits"])
863+
assert "schema" in result
864+
assert "datarows" in result
865+
assert "size" in result
866+
assert "status" in result
867+
assert [entry["name"] for entry in result["schema"]] == ["content", "category", "index"]
868+
assert len(result["datarows"]) > 0
869+
assert len(result["datarows"]) <= 5
870+
assert result.get("cursor") is not None
875871

876872
@pytest.mark.integration
877873
@pytest.mark.asyncio
@@ -889,13 +885,14 @@ async def test_query_sql_async_pagination_flow(self, document_store: OpenSearchD
889885
# Query with small fetch_size to test pagination
890886
result = await document_store._query_sql_async(sql_query, fetch_size=10)
891887
assert isinstance(result, dict)
892-
assert "hits" in result
893-
assert "hits" in result["hits"]
894-
assert len(result["hits"]["hits"]) > 0
895-
896-
# Verify all results have expected fields in _source
897-
for hit in result["hits"]["hits"]:
898-
source = hit["_source"]
899-
assert "content" in source
900-
assert "category" in source
901-
assert "index" in source
888+
assert "schema" in result
889+
assert "datarows" in result
890+
assert "size" in result
891+
assert "status" in result
892+
assert [entry["name"] for entry in result["schema"]] == ["content", "category", "index"]
893+
assert len(result["datarows"]) > 0
894+
assert len(result["datarows"]) <= 10
895+
896+
# Verify all results contain expected row columns
897+
for row in result["datarows"]:
898+
assert len(row) == 3

integrations/opensearch/tests/test_filters.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -229,9 +229,19 @@ def test_normalize_ranges():
229229
class TestFilters(FilterDocumentsTest):
230230
def assert_documents_are_equal(self, received: list[Document], expected: list[Document]):
231231
"""
232-
The OpenSearchDocumentStore.filter_documents() method returns a Documents with their score set.
233-
We don't want to compare the score, so we set it to None before comparing the documents.
232+
The OpenSearchDocumentStore.filter_documents() method returns documents with their score set.
233+
We don't want to compare the score, so we set it to None before comparing.
234+
Embeddings are not exactly the same when retrieved from OpenSearch (float round-trip),
235+
so we compare them approximately and then set both to None for the final equality check.
234236
"""
235-
for doc in received:
236-
doc.score = None
237-
assert received == expected
237+
assert len(received) == len(expected)
238+
received = sorted(received, key=lambda x: x.id)
239+
expected = sorted(expected, key=lambda x: x.id)
240+
for received_doc, expected_doc in zip(received, expected, strict=True):
241+
received_doc.score = None
242+
if received_doc.embedding is None:
243+
assert expected_doc.embedding is None
244+
else:
245+
assert received_doc.embedding == pytest.approx(expected_doc.embedding)
246+
received_doc.embedding, expected_doc.embedding = None, None
247+
assert received_doc == expected_doc

0 commit comments

Comments
 (0)