Skip to content

Commit 47f72c0

Browse files
test: replacing each DocumentStore specific tests and used the generalised ones from haystack.testing.document_store (#2812)
* removing test_delete_all_documents from PgVector - already in haystack.testing * removing duplicated tests from OpenSearchDocumentStore * removing duplicated tests from ElasticStoreDocumentTest * removing test_delete_all_documents from weaviate * removing test_delete_all_documents from qdrant * removing delete_all_documents tests from PineCone * removing delete_all_documents tests from MondoDBDocumentStore * adding check for empty list for delete_all_documents on Chroma * removing delete_all_documents tests from AzureAISearchDocumentStore * fixing Pinecone tests * cleaning up tests in qdrant and weaviate * removing more weavite delete_by_filter tests * cleaning up tests in pgvector * updating OpenSearch: delete_by_filter_async refresh=True as default * updating OpenSearch: update_by_filter refresh=True as default * removing standard test from MongoDBDocumentStore * reverting default refresh param in OpenSearch * removing standard test from Chroma * updating AzuresAISearchDocumentStore to make use of base tests * removing duplicated tests from Astra * adding UpdateByFilterTest to Weavite * fixing Azure linting issues * formatting * formatting * temporarly using haystack branch with new standard tests * temporarly using haystack branch with new standard tests * formatting * removing duplicated tests from Pinecone * updating all tests to use new test class from haystack core * fixing imports for 3.10 * fixing linting issues * fixing azure_ai_search overrides due to class changes in haystack core * fixing linting * chaning imports due to MRO errors + changing document_stores fixtures when needed due to new method from ExtendedTests * fixing weaviate tests and imports * fixing Qdrandt tests and imports * fixing PGVector tests * fixing Chroma tests + imports * pointing all pyproject.toml to main * fixing elasticsearch 3.10 arg not used * fixing mongodb imports * fixing chroma formatting * fixing weaviate tests and imports * fixing pinecone imports * attending PR comments * removing white space * adding filterable docs fixture to azure_ai_search * fixing linter * updating pyproject.toml to reference pypi instead of main branch * updating pyproject.toml to reference latest pypi instead of main branch * Update integrations/qdrant/pyproject.toml Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> * Update integrations/pgvector/pyproject.toml Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> * Update integrations/mongodb_atlas/pyproject.toml Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> * Update integrations/weaviate/pyproject.toml Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> --------- Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
1 parent 591853a commit 47f72c0

26 files changed

Lines changed: 209 additions & 1062 deletions

File tree

integrations/astra/pyproject.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,12 @@ classifiers = [
2222
"Programming Language :: Python :: Implementation :: CPython",
2323
"Programming Language :: Python :: Implementation :: PyPy",
2424
]
25-
dependencies = ["haystack-ai>=2.22.0", "pydantic", "typing_extensions", "astrapy>=1.5.0,<2.0"]
25+
dependencies = [
26+
"astrapy>=1.5.0,<2.0",
27+
"haystack-ai>=2.24.0",
28+
"pydantic",
29+
"typing_extensions",
30+
]
2631

2732
[project.urls]
2833
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/astra#readme"

integrations/astra/tests/test_document_store.py

Lines changed: 3 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from haystack import Document
1111
from haystack.document_stores.errors import MissingDocumentError
1212
from haystack.document_stores.types import DuplicatePolicy
13-
from haystack.testing.document_store import DocumentStoreBaseTests
13+
from haystack.testing.document_store import DocumentStoreBaseExtendedTests
1414

1515
from haystack_integrations.document_stores.astra import AstraDocumentStore
1616

@@ -48,9 +48,9 @@ def test_to_dict(mock_auth): # noqa
4848
os.environ.get("ASTRA_DB_APPLICATION_TOKEN", "") == "", reason="ASTRA_DB_APPLICATION_TOKEN env var not set"
4949
)
5050
@pytest.mark.skipif(os.environ.get("ASTRA_DB_API_ENDPOINT", "") == "", reason="ASTRA_DB_API_ENDPOINT env var not set")
51-
class TestDocumentStore(DocumentStoreBaseTests):
51+
class TestDocumentStore(DocumentStoreBaseExtendedTests):
5252
"""
53-
Common test cases will be provided by `DocumentStoreBaseTests` but
53+
Common test cases will be provided by `DocumentStoreBaseExtendedTests` but
5454
you can add more to this class.
5555
"""
5656

@@ -204,74 +204,6 @@ def test_filter_documents_by_in_operator(self, document_store):
204204
self.assert_documents_are_equal([result[0]], [docs[0]])
205205
self.assert_documents_are_equal([result[1]], [docs[1]])
206206

207-
def test_delete_all_documents(self, document_store: AstraDocumentStore):
208-
"""
209-
Test delete_all_documents() on an Astra.
210-
"""
211-
document_store.delete_all_documents()
212-
assert document_store.count_documents() == 0
213-
214-
def test_delete_by_filter(self, document_store: AstraDocumentStore, filterable_docs):
215-
document_store.write_documents(filterable_docs)
216-
initial_count = document_store.count_documents()
217-
assert initial_count > 0
218-
219-
# count documents that match the filter before deletion
220-
matching_docs = [d for d in filterable_docs if d.meta.get("chapter") == "intro"]
221-
expected_deleted_count = len(matching_docs)
222-
223-
# delete all documents with chapter="intro"
224-
deleted_count = document_store.delete_by_filter(
225-
filters={"field": "meta.chapter", "operator": "==", "value": "intro"}
226-
)
227-
228-
assert deleted_count == expected_deleted_count
229-
assert document_store.count_documents() == initial_count - deleted_count
230-
231-
# remaining documents don't have chapter="intro"
232-
remaining_docs = document_store.filter_documents()
233-
for doc in remaining_docs:
234-
assert doc.meta.get("chapter") != "intro"
235-
236-
# all documents with chapter="intro" were deleted
237-
intro_docs = document_store.filter_documents(
238-
filters={"field": "meta.chapter", "operator": "==", "value": "intro"}
239-
)
240-
assert len(intro_docs) == 0
241-
242-
def test_update_by_filter(self, document_store: AstraDocumentStore, filterable_docs):
243-
document_store.write_documents(filterable_docs)
244-
initial_count = document_store.count_documents()
245-
assert initial_count > 0
246-
247-
# count documents that match the filter before update
248-
matching_docs = [d for d in filterable_docs if d.meta.get("chapter") == "intro"]
249-
expected_updated_count = len(matching_docs)
250-
251-
# update all documents with chapter="intro" to have status="updated"
252-
updated_count = document_store.update_by_filter(
253-
filters={"field": "meta.chapter", "operator": "==", "value": "intro"},
254-
meta={"status": "updated"},
255-
)
256-
257-
assert updated_count == expected_updated_count
258-
assert document_store.count_documents() == initial_count
259-
260-
# verify the updated documents have the new metadata
261-
updated_docs = document_store.filter_documents(
262-
filters={"field": "meta.status", "operator": "==", "value": "updated"}
263-
)
264-
assert len(updated_docs) == expected_updated_count
265-
for doc in updated_docs:
266-
assert doc.meta.get("chapter") == "intro"
267-
assert doc.meta.get("status") == "updated"
268-
269-
# verify other documents weren't affected
270-
all_docs = document_store.filter_documents()
271-
for doc in all_docs:
272-
if doc.meta.get("chapter") != "intro":
273-
assert doc.meta.get("status") != "updated"
274-
275207
@pytest.mark.skip(reason="Unsupported filter operator not.")
276208
def test_not_operator(self, document_store, filterable_docs):
277209
pass

integrations/azure_ai_search/pyproject.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,11 @@ classifiers = [
2222
"Programming Language :: Python :: Implementation :: CPython",
2323
"Programming Language :: Python :: Implementation :: PyPy",
2424
]
25-
dependencies = ["haystack-ai>=2.22.0", "azure-search-documents>=11.5", "azure-identity"]
25+
dependencies = [
26+
"haystack-ai>=2.24.0",
27+
"azure-search-documents>=11.5",
28+
"azure-identity"
29+
]
2630

2731
[project.urls]
2832
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/azure_ai_search#readme"

integrations/azure_ai_search/src/haystack_integrations/document_stores/azure_ai_search/filters.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,19 @@ def _parse_comparison_condition(condition: dict[str, Any]) -> str:
6868

6969

7070
def _eq(field: str, value: Any) -> str:
71-
return f"{field} eq '{value}'" if isinstance(value, str) and value != "null" else f"{field} eq {value}"
71+
if isinstance(value, str) and value != "null":
72+
return f"{field} eq '{value}'"
73+
if isinstance(value, bool):
74+
return f"{field} eq {str(value).lower()}"
75+
return f"{field} eq {value}"
7276

7377

7478
def _ne(field: str, value: Any) -> str:
75-
return f"not ({field} eq '{value}')" if isinstance(value, str) and value != "null" else f"not ({field} eq {value})"
79+
if isinstance(value, str) and value != "null":
80+
return f"not ({field} eq '{value}')"
81+
if isinstance(value, bool):
82+
return f"not ({field} eq {str(value).lower()})"
83+
return f"not ({field} eq {value})"
7684

7785

7886
def _in(field: str, value: Any) -> str:

integrations/azure_ai_search/tests/conftest.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ def delete_by_filter_and_wait(filters):
7070
time.sleep(SLEEP_TIME_IN_SECONDS)
7171
return deleted_count
7272

73-
def update_by_filter_and_wait(filters, fields):
74-
updated_count = original_update_by_filter(filters, fields)
73+
def update_by_filter_and_wait(filters, meta=None, fields=None):
74+
updates = meta if meta is not None else fields or {}
75+
updated_count = original_update_by_filter(filters, updates)
7576
time.sleep(SLEEP_TIME_IN_SECONDS)
7677
return updated_count
7778

integrations/azure_ai_search/tests/test_document_store.py

Lines changed: 61 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,12 @@
1313
from haystack.errors import FilterError
1414
from haystack.testing.document_store import (
1515
CountDocumentsTest,
16+
DeleteAllTest,
17+
DeleteByFilterTest,
1618
DeleteDocumentsTest,
19+
FilterableDocsFixtureMixin,
1720
FilterDocumentsTest,
21+
UpdateByFilterTest,
1822
WriteDocumentsTest,
1923
)
2024
from haystack.utils.auth import EnvVarSecret, Secret
@@ -256,7 +260,15 @@ def _assert_documents_are_equal(received: list[Document], expected: list[Documen
256260
not os.environ.get("AZURE_AI_SEARCH_ENDPOINT", None) and not os.environ.get("AZURE_AI_SEARCH_API_KEY", None),
257261
reason="Missing AZURE_AI_SEARCH_ENDPOINT or AZURE_AI_SEARCH_API_KEY.",
258262
)
259-
class TestDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest):
263+
class TestDocumentStore(
264+
CountDocumentsTest,
265+
DeleteDocumentsTest,
266+
DeleteAllTest,
267+
DeleteByFilterTest,
268+
FilterableDocsFixtureMixin,
269+
WriteDocumentsTest,
270+
UpdateByFilterTest,
271+
):
260272
def assert_documents_are_equal(self, received: list[Document], expected: list[Document]):
261273
_assert_documents_are_equal(received, expected)
262274

@@ -290,120 +302,80 @@ def test_write_documents_duplicate_fail(self, document_store: AzureAISearchDocum
290302
@pytest.mark.skip(reason="Azure AI search index overwrites duplicate documents by default")
291303
def test_write_documents_duplicate_skip(self, document_store: AzureAISearchDocumentStore): ...
292304

293-
def test_delete_all_documents(self, document_store: AzureAISearchDocumentStore):
294-
docs = [Document(content="first doc"), Document(content="second doc")]
295-
document_store.write_documents(docs)
296-
assert document_store.count_documents() == 2
297-
document_store.delete_all_documents()
298-
assert document_store.count_documents() == 0
299-
300-
def test_delete_all_documents_empty_index(self, document_store: AzureAISearchDocumentStore):
301-
assert document_store.count_documents() == 0
302-
document_store.delete_all_documents()
303-
assert document_store.count_documents() == 0
304-
305305
@pytest.mark.parametrize(
306306
"document_store",
307307
[{"metadata_fields": {"category": str}}],
308308
indirect=True,
309309
)
310310
def test_delete_by_filter(self, document_store: AzureAISearchDocumentStore):
311-
docs = [
312-
Document(content="Doc 1", meta={"category": "A"}),
313-
Document(content="Doc 2", meta={"category": "B"}),
314-
Document(content="Doc 3", meta={"category": "A"}),
315-
]
316-
document_store.write_documents(docs)
317-
assert document_store.count_documents() == 3
318-
319-
# Delete documents with category="A"
320-
deleted_count = document_store.delete_by_filter(
321-
filters={"field": "meta.category", "operator": "==", "value": "A"}
322-
)
323-
assert deleted_count == 2
324-
assert document_store.count_documents() == 1
325-
326-
# Verify only category B remains
327-
remaining_docs = document_store.filter_documents()
328-
assert len(remaining_docs) == 1
329-
assert remaining_docs[0].meta["category"] == "B"
311+
"""Override to use a document_store with category metadata field."""
312+
DeleteByFilterTest.test_delete_by_filter(document_store)
330313

331314
@pytest.mark.parametrize(
332315
"document_store",
333316
[{"metadata_fields": {"category": str}}],
334317
indirect=True,
335318
)
336319
def test_delete_by_filter_no_matches(self, document_store: AzureAISearchDocumentStore):
337-
docs = [
338-
Document(content="Doc 1", meta={"category": "A"}),
339-
Document(content="Doc 2", meta={"category": "B"}),
340-
]
341-
document_store.write_documents(docs)
342-
assert document_store.count_documents() == 2
343-
344-
# Try to delete documents with category="C" (no matches)
345-
deleted_count = document_store.delete_by_filter(
346-
filters={"field": "meta.category", "operator": "==", "value": "C"}
347-
)
348-
assert deleted_count == 0
349-
assert document_store.count_documents() == 2
320+
"""Override to use a document_store with category metadata field."""
321+
DeleteByFilterTest.test_delete_by_filter_no_matches(document_store)
350322

351323
@pytest.mark.parametrize(
352324
"document_store",
353-
[{"metadata_fields": {"category": str, "status": str}}],
325+
[{"metadata_fields": {"category": str, "year": int, "status": str}}],
354326
indirect=True,
355327
)
356-
def test_update_by_filter(self, document_store: AzureAISearchDocumentStore):
357-
docs = [
358-
Document(content="Doc 1", meta={"category": "A", "status": "draft"}),
359-
Document(content="Doc 2", meta={"category": "B", "status": "draft"}),
360-
Document(content="Doc 3", meta={"category": "A", "status": "draft"}),
361-
]
362-
document_store.write_documents(docs)
363-
assert document_store.count_documents() == 3
328+
def test_delete_by_filter_advanced_filters(self, document_store: AzureAISearchDocumentStore):
329+
"""Override to use a document_store with category, year, status metadata fields."""
330+
DeleteByFilterTest.test_delete_by_filter_advanced_filters(document_store)
331+
332+
# Metadata fields required by haystack UpdateByFilterTest filterable_docs (chapter, name, page, number, date, etc.)
333+
_FILTERABLE_DOCS_METADATA = { # noqa: RUF012
334+
"name": str,
335+
"page": str,
336+
"chapter": str,
337+
"number": int,
338+
"date": str,
339+
"no_embedding": bool,
340+
"updated": bool,
341+
"extra_field": str,
342+
}
364343

365-
# Update status for category="A" documents
366-
updated_count = document_store.update_by_filter(
367-
filters={"field": "meta.category", "operator": "==", "value": "A"},
368-
fields={"status": "published"},
369-
)
370-
assert updated_count == 2
344+
@pytest.mark.parametrize(
345+
"document_store",
346+
[{"metadata_fields": _FILTERABLE_DOCS_METADATA}],
347+
indirect=True,
348+
)
349+
def test_update_by_filter(self, document_store: AzureAISearchDocumentStore, filterable_docs):
350+
"""Override to use a document_store with metadata fields for filterable_docs."""
351+
UpdateByFilterTest.test_update_by_filter(document_store, filterable_docs)
371352

372-
# Verify the updates
373-
published_docs = document_store.filter_documents(
374-
filters={"field": "meta.status", "operator": "==", "value": "published"}
375-
)
376-
assert len(published_docs) == 2
377-
for doc in published_docs:
378-
assert doc.meta["category"] == "A"
379-
assert doc.meta["status"] == "published"
380-
381-
# Verify category B still has draft status
382-
draft_docs = document_store.filter_documents(
383-
filters={"field": "meta.status", "operator": "==", "value": "draft"}
384-
)
385-
assert len(draft_docs) == 1
386-
assert draft_docs[0].meta["category"] == "B"
353+
@pytest.mark.parametrize(
354+
"document_store",
355+
[{"metadata_fields": _FILTERABLE_DOCS_METADATA}],
356+
indirect=True,
357+
)
358+
def test_update_by_filter_no_matches(self, document_store: AzureAISearchDocumentStore, filterable_docs):
359+
"""Override to use a document_store with metadata fields for filterable_docs."""
360+
UpdateByFilterTest.test_update_by_filter_no_matches(document_store, filterable_docs)
387361

388362
@pytest.mark.parametrize(
389363
"document_store",
390-
[{"metadata_fields": {"category": str, "status": str}}],
364+
[{"metadata_fields": _FILTERABLE_DOCS_METADATA}],
391365
indirect=True,
392366
)
393-
def test_update_by_filter_no_matches(self, document_store: AzureAISearchDocumentStore):
394-
docs = [
395-
Document(content="Doc 1", meta={"category": "A", "status": "draft"}),
396-
Document(content="Doc 2", meta={"category": "B", "status": "draft"}),
397-
]
398-
document_store.write_documents(docs)
399-
assert document_store.count_documents() == 2
367+
def test_update_by_filter_multiple_fields(self, document_store: AzureAISearchDocumentStore, filterable_docs):
368+
"""Override to use a document_store with metadata fields for filterable_docs."""
369+
UpdateByFilterTest.test_update_by_filter_multiple_fields(document_store, filterable_docs)
400370

401-
# Try to update documents with category="C" (no matches)
402-
updated_count = document_store.update_by_filter(
403-
filters={"field": "meta.category", "operator": "==", "value": "C"},
404-
fields={"status": "published"},
405-
)
406-
assert updated_count == 0
371+
@pytest.mark.parametrize(
372+
"document_store",
373+
[{"metadata_fields": {"category": str, "year": int, "status": str, "featured": bool}}],
374+
indirect=True,
375+
)
376+
def test_update_by_filter_advanced_filters(self, document_store: AzureAISearchDocumentStore):
377+
"""Override to use a document_store with category, year, status, featured metadata fields."""
378+
UpdateByFilterTest.test_update_by_filter_advanced_filters(document_store)
407379

408380
@pytest.mark.parametrize(
409381
"document_store",

integrations/chroma/pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ classifiers = [
2222
"Programming Language :: Python :: Implementation :: CPython",
2323
"Programming Language :: Python :: Implementation :: PyPy",
2424
]
25-
dependencies = ["haystack-ai>=2.22.0", "chromadb>=1.0.2"]
25+
dependencies = [
26+
"haystack-ai>=2.24.0",
27+
"chromadb>=1.0.2"
28+
]
2629

2730
[project.urls]
2831
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/chroma#readme"

0 commit comments

Comments
 (0)