|
13 | 13 | from haystack.errors import FilterError |
14 | 14 | from haystack.testing.document_store import ( |
15 | 15 | CountDocumentsTest, |
| 16 | + DeleteAllTest, |
| 17 | + DeleteByFilterTest, |
16 | 18 | DeleteDocumentsTest, |
| 19 | + FilterableDocsFixtureMixin, |
17 | 20 | FilterDocumentsTest, |
| 21 | + UpdateByFilterTest, |
18 | 22 | WriteDocumentsTest, |
19 | 23 | ) |
20 | 24 | from haystack.utils.auth import EnvVarSecret, Secret |
@@ -256,7 +260,15 @@ def _assert_documents_are_equal(received: list[Document], expected: list[Documen |
256 | 260 | not os.environ.get("AZURE_AI_SEARCH_ENDPOINT", None) and not os.environ.get("AZURE_AI_SEARCH_API_KEY", None), |
257 | 261 | reason="Missing AZURE_AI_SEARCH_ENDPOINT or AZURE_AI_SEARCH_API_KEY.", |
258 | 262 | ) |
259 | | -class TestDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest): |
| 263 | +class TestDocumentStore( |
| 264 | + CountDocumentsTest, |
| 265 | + DeleteDocumentsTest, |
| 266 | + DeleteAllTest, |
| 267 | + DeleteByFilterTest, |
| 268 | + FilterableDocsFixtureMixin, |
| 269 | + WriteDocumentsTest, |
| 270 | + UpdateByFilterTest, |
| 271 | +): |
260 | 272 | def assert_documents_are_equal(self, received: list[Document], expected: list[Document]): |
261 | 273 | _assert_documents_are_equal(received, expected) |
262 | 274 |
|
@@ -290,120 +302,80 @@ def test_write_documents_duplicate_fail(self, document_store: AzureAISearchDocum |
290 | 302 | @pytest.mark.skip(reason="Azure AI search index overwrites duplicate documents by default") |
291 | 303 | def test_write_documents_duplicate_skip(self, document_store: AzureAISearchDocumentStore): ... |
292 | 304 |
|
293 | | - def test_delete_all_documents(self, document_store: AzureAISearchDocumentStore): |
294 | | - docs = [Document(content="first doc"), Document(content="second doc")] |
295 | | - document_store.write_documents(docs) |
296 | | - assert document_store.count_documents() == 2 |
297 | | - document_store.delete_all_documents() |
298 | | - assert document_store.count_documents() == 0 |
299 | | - |
300 | | - def test_delete_all_documents_empty_index(self, document_store: AzureAISearchDocumentStore): |
301 | | - assert document_store.count_documents() == 0 |
302 | | - document_store.delete_all_documents() |
303 | | - assert document_store.count_documents() == 0 |
304 | | - |
305 | 305 | @pytest.mark.parametrize( |
306 | 306 | "document_store", |
307 | 307 | [{"metadata_fields": {"category": str}}], |
308 | 308 | indirect=True, |
309 | 309 | ) |
310 | 310 | def test_delete_by_filter(self, document_store: AzureAISearchDocumentStore): |
311 | | - docs = [ |
312 | | - Document(content="Doc 1", meta={"category": "A"}), |
313 | | - Document(content="Doc 2", meta={"category": "B"}), |
314 | | - Document(content="Doc 3", meta={"category": "A"}), |
315 | | - ] |
316 | | - document_store.write_documents(docs) |
317 | | - assert document_store.count_documents() == 3 |
318 | | - |
319 | | - # Delete documents with category="A" |
320 | | - deleted_count = document_store.delete_by_filter( |
321 | | - filters={"field": "meta.category", "operator": "==", "value": "A"} |
322 | | - ) |
323 | | - assert deleted_count == 2 |
324 | | - assert document_store.count_documents() == 1 |
325 | | - |
326 | | - # Verify only category B remains |
327 | | - remaining_docs = document_store.filter_documents() |
328 | | - assert len(remaining_docs) == 1 |
329 | | - assert remaining_docs[0].meta["category"] == "B" |
| 311 | + """Override to use a document_store with category metadata field.""" |
| 312 | + DeleteByFilterTest.test_delete_by_filter(document_store) |
330 | 313 |
|
331 | 314 | @pytest.mark.parametrize( |
332 | 315 | "document_store", |
333 | 316 | [{"metadata_fields": {"category": str}}], |
334 | 317 | indirect=True, |
335 | 318 | ) |
336 | 319 | def test_delete_by_filter_no_matches(self, document_store: AzureAISearchDocumentStore): |
337 | | - docs = [ |
338 | | - Document(content="Doc 1", meta={"category": "A"}), |
339 | | - Document(content="Doc 2", meta={"category": "B"}), |
340 | | - ] |
341 | | - document_store.write_documents(docs) |
342 | | - assert document_store.count_documents() == 2 |
343 | | - |
344 | | - # Try to delete documents with category="C" (no matches) |
345 | | - deleted_count = document_store.delete_by_filter( |
346 | | - filters={"field": "meta.category", "operator": "==", "value": "C"} |
347 | | - ) |
348 | | - assert deleted_count == 0 |
349 | | - assert document_store.count_documents() == 2 |
| 320 | + """Override to use a document_store with category metadata field.""" |
| 321 | + DeleteByFilterTest.test_delete_by_filter_no_matches(document_store) |
350 | 322 |
|
351 | 323 | @pytest.mark.parametrize( |
352 | 324 | "document_store", |
353 | | - [{"metadata_fields": {"category": str, "status": str}}], |
| 325 | + [{"metadata_fields": {"category": str, "year": int, "status": str}}], |
354 | 326 | indirect=True, |
355 | 327 | ) |
356 | | - def test_update_by_filter(self, document_store: AzureAISearchDocumentStore): |
357 | | - docs = [ |
358 | | - Document(content="Doc 1", meta={"category": "A", "status": "draft"}), |
359 | | - Document(content="Doc 2", meta={"category": "B", "status": "draft"}), |
360 | | - Document(content="Doc 3", meta={"category": "A", "status": "draft"}), |
361 | | - ] |
362 | | - document_store.write_documents(docs) |
363 | | - assert document_store.count_documents() == 3 |
| 328 | + def test_delete_by_filter_advanced_filters(self, document_store: AzureAISearchDocumentStore): |
| 329 | + """Override to use a document_store with category, year, status metadata fields.""" |
| 330 | + DeleteByFilterTest.test_delete_by_filter_advanced_filters(document_store) |
| 331 | + |
| 332 | + # Metadata fields required by haystack UpdateByFilterTest filterable_docs (chapter, name, page, number, date, etc.) |
| 333 | + _FILTERABLE_DOCS_METADATA = { # noqa: RUF012 |
| 334 | + "name": str, |
| 335 | + "page": str, |
| 336 | + "chapter": str, |
| 337 | + "number": int, |
| 338 | + "date": str, |
| 339 | + "no_embedding": bool, |
| 340 | + "updated": bool, |
| 341 | + "extra_field": str, |
| 342 | + } |
364 | 343 |
|
365 | | - # Update status for category="A" documents |
366 | | - updated_count = document_store.update_by_filter( |
367 | | - filters={"field": "meta.category", "operator": "==", "value": "A"}, |
368 | | - fields={"status": "published"}, |
369 | | - ) |
370 | | - assert updated_count == 2 |
| 344 | + @pytest.mark.parametrize( |
| 345 | + "document_store", |
| 346 | + [{"metadata_fields": _FILTERABLE_DOCS_METADATA}], |
| 347 | + indirect=True, |
| 348 | + ) |
| 349 | + def test_update_by_filter(self, document_store: AzureAISearchDocumentStore, filterable_docs): |
| 350 | + """Override to use a document_store with metadata fields for filterable_docs.""" |
| 351 | + UpdateByFilterTest.test_update_by_filter(document_store, filterable_docs) |
371 | 352 |
|
372 | | - # Verify the updates |
373 | | - published_docs = document_store.filter_documents( |
374 | | - filters={"field": "meta.status", "operator": "==", "value": "published"} |
375 | | - ) |
376 | | - assert len(published_docs) == 2 |
377 | | - for doc in published_docs: |
378 | | - assert doc.meta["category"] == "A" |
379 | | - assert doc.meta["status"] == "published" |
380 | | - |
381 | | - # Verify category B still has draft status |
382 | | - draft_docs = document_store.filter_documents( |
383 | | - filters={"field": "meta.status", "operator": "==", "value": "draft"} |
384 | | - ) |
385 | | - assert len(draft_docs) == 1 |
386 | | - assert draft_docs[0].meta["category"] == "B" |
| 353 | + @pytest.mark.parametrize( |
| 354 | + "document_store", |
| 355 | + [{"metadata_fields": _FILTERABLE_DOCS_METADATA}], |
| 356 | + indirect=True, |
| 357 | + ) |
| 358 | + def test_update_by_filter_no_matches(self, document_store: AzureAISearchDocumentStore, filterable_docs): |
| 359 | + """Override to use a document_store with metadata fields for filterable_docs.""" |
| 360 | + UpdateByFilterTest.test_update_by_filter_no_matches(document_store, filterable_docs) |
387 | 361 |
|
388 | 362 | @pytest.mark.parametrize( |
389 | 363 | "document_store", |
390 | | - [{"metadata_fields": {"category": str, "status": str}}], |
| 364 | + [{"metadata_fields": _FILTERABLE_DOCS_METADATA}], |
391 | 365 | indirect=True, |
392 | 366 | ) |
393 | | - def test_update_by_filter_no_matches(self, document_store: AzureAISearchDocumentStore): |
394 | | - docs = [ |
395 | | - Document(content="Doc 1", meta={"category": "A", "status": "draft"}), |
396 | | - Document(content="Doc 2", meta={"category": "B", "status": "draft"}), |
397 | | - ] |
398 | | - document_store.write_documents(docs) |
399 | | - assert document_store.count_documents() == 2 |
| 367 | + def test_update_by_filter_multiple_fields(self, document_store: AzureAISearchDocumentStore, filterable_docs): |
| 368 | + """Override to use a document_store with metadata fields for filterable_docs.""" |
| 369 | + UpdateByFilterTest.test_update_by_filter_multiple_fields(document_store, filterable_docs) |
400 | 370 |
|
401 | | - # Try to update documents with category="C" (no matches) |
402 | | - updated_count = document_store.update_by_filter( |
403 | | - filters={"field": "meta.category", "operator": "==", "value": "C"}, |
404 | | - fields={"status": "published"}, |
405 | | - ) |
406 | | - assert updated_count == 0 |
| 371 | + @pytest.mark.parametrize( |
| 372 | + "document_store", |
| 373 | + [{"metadata_fields": {"category": str, "year": int, "status": str, "featured": bool}}], |
| 374 | + indirect=True, |
| 375 | + ) |
| 376 | + def test_update_by_filter_advanced_filters(self, document_store: AzureAISearchDocumentStore): |
| 377 | + """Override to use a document_store with category, year, status, featured metadata fields.""" |
| 378 | + UpdateByFilterTest.test_update_by_filter_advanced_filters(document_store) |
407 | 379 |
|
408 | 380 | @pytest.mark.parametrize( |
409 | 381 | "document_store", |
|
0 commit comments