|
9 | 9 | from haystack.dataclasses.document import Document |
10 | 10 | from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError |
11 | 11 | from haystack.document_stores.types import DuplicatePolicy |
12 | | -from haystack.testing.document_store import DocumentStoreBaseExtendedTests |
| 12 | +from haystack.testing.document_store import ( |
| 13 | + CountDocumentsByFilterTest, |
| 14 | + CountUniqueMetadataByFilterTest, |
| 15 | + DocumentStoreBaseExtendedTests, |
| 16 | + GetMetadataFieldMinMaxTest, |
| 17 | + GetMetadataFieldsInfoTest, |
| 18 | + GetMetadataFieldUniqueValuesTest, |
| 19 | +) |
13 | 20 | from opensearchpy.exceptions import RequestError |
14 | 21 |
|
15 | 22 | from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore |
@@ -218,10 +225,16 @@ def test_routing_in_delete(mock_bulk, _mock_opensearch_client): |
218 | 225 |
|
219 | 226 |
|
220 | 227 | @pytest.mark.integration |
221 | | -class TestDocumentStore(DocumentStoreBaseExtendedTests): |
| 228 | +class TestDocumentStore( |
| 229 | + CountDocumentsByFilterTest, |
| 230 | + CountUniqueMetadataByFilterTest, |
| 231 | + DocumentStoreBaseExtendedTests, |
| 232 | + GetMetadataFieldsInfoTest, |
| 233 | + GetMetadataFieldMinMaxTest, |
| 234 | + GetMetadataFieldUniqueValuesTest, |
| 235 | +): |
222 | 236 | """ |
223 | | - Common test cases will be provided by `DocumentStoreBaseExtendedTests` but |
224 | | - you can add more to this class. |
| 237 | + Common test cases will be provided by `DocumentStoreBaseExtendedTests` but you can add more to this class. |
225 | 238 | """ |
226 | 239 |
|
227 | 240 | @pytest.fixture |
@@ -607,158 +620,6 @@ def test_delete_all_documents_index_recreation(self, document_store: OpenSearchD |
607 | 620 | assert len(results) == 1 |
608 | 621 | assert results[0].content == "New document after delete all" |
609 | 622 |
|
610 | | - def test_count_documents_by_filter(self, document_store: OpenSearchDocumentStore): |
611 | | - docs = [ |
612 | | - Document(content="Doc 1", meta={"category": "A", "status": "active"}), |
613 | | - Document(content="Doc 2", meta={"category": "B", "status": "active"}), |
614 | | - Document(content="Doc 3", meta={"category": "A", "status": "inactive"}), |
615 | | - Document(content="Doc 4", meta={"category": "A", "status": "active"}), |
616 | | - ] |
617 | | - document_store.write_documents(docs) |
618 | | - assert document_store.count_documents() == 4 |
619 | | - |
620 | | - count_a = document_store.count_documents_by_filter( |
621 | | - filters={"field": "meta.category", "operator": "==", "value": "A"} |
622 | | - ) |
623 | | - assert count_a == 3 |
624 | | - |
625 | | - count_a_active = document_store.count_documents_by_filter( |
626 | | - filters={ |
627 | | - "operator": "AND", |
628 | | - "conditions": [ |
629 | | - {"field": "meta.category", "operator": "==", "value": "A"}, |
630 | | - {"field": "meta.status", "operator": "==", "value": "active"}, |
631 | | - ], |
632 | | - } |
633 | | - ) |
634 | | - assert count_a_active == 2 |
635 | | - |
636 | | - def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumentStore): |
637 | | - docs = [ |
638 | | - Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), |
639 | | - Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}), |
640 | | - Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}), |
641 | | - Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}), |
642 | | - Document(content="Doc 5", meta={"category": "C", "status": "active", "priority": 2}), |
643 | | - ] |
644 | | - document_store.write_documents(docs) |
645 | | - assert document_store.count_documents() == 5 |
646 | | - |
647 | | - # Count distinct values for all documents |
648 | | - distinct_counts = document_store.count_unique_metadata_by_filter( |
649 | | - filters={}, metadata_fields=["category", "status", "priority"] |
650 | | - ) |
651 | | - assert distinct_counts["category"] == 3 # A, B, C |
652 | | - assert distinct_counts["status"] == 2 # active, inactive |
653 | | - assert distinct_counts["priority"] == 3 # 1, 2, 3 |
654 | | - |
655 | | - # Count distinct values for documents with category="A" |
656 | | - distinct_counts_a = document_store.count_unique_metadata_by_filter( |
657 | | - filters={"field": "meta.category", "operator": "==", "value": "A"}, |
658 | | - metadata_fields=["category", "status", "priority"], |
659 | | - ) |
660 | | - assert distinct_counts_a["category"] == 1 # Only A |
661 | | - assert distinct_counts_a["status"] == 2 # active, inactive |
662 | | - assert distinct_counts_a["priority"] == 2 # 1, 3 |
663 | | - |
664 | | - # Count distinct values for documents with status="active" |
665 | | - distinct_counts_active = document_store.count_unique_metadata_by_filter( |
666 | | - filters={"field": "meta.status", "operator": "==", "value": "active"}, |
667 | | - metadata_fields=["category", "status", "priority"], |
668 | | - ) |
669 | | - assert distinct_counts_active["category"] == 3 # A, B, C |
670 | | - assert distinct_counts_active["status"] == 1 # Only active |
671 | | - assert distinct_counts_active["priority"] == 3 # 1, 2, 3 |
672 | | - |
673 | | - # Count distinct values with complex filter (category="A" AND status="active") |
674 | | - distinct_counts_a_active = document_store.count_unique_metadata_by_filter( |
675 | | - filters={ |
676 | | - "operator": "AND", |
677 | | - "conditions": [ |
678 | | - {"field": "meta.category", "operator": "==", "value": "A"}, |
679 | | - {"field": "meta.status", "operator": "==", "value": "active"}, |
680 | | - ], |
681 | | - }, |
682 | | - metadata_fields=["category", "status", "priority"], |
683 | | - ) |
684 | | - assert distinct_counts_a_active["category"] == 1 # Only A |
685 | | - assert distinct_counts_a_active["status"] == 1 # Only active |
686 | | - assert distinct_counts_a_active["priority"] == 2 # 1, 3 |
687 | | - |
688 | | - # Test with only a subset of fields |
689 | | - distinct_counts_subset = document_store.count_unique_metadata_by_filter( |
690 | | - filters={}, metadata_fields=["category", "status"] |
691 | | - ) |
692 | | - assert distinct_counts_subset["category"] == 3 |
693 | | - assert distinct_counts_subset["status"] == 2 |
694 | | - assert "priority" not in distinct_counts_subset |
695 | | - |
696 | | - # Test field name normalization (with "meta." prefix) |
697 | | - distinct_counts_normalized = document_store.count_unique_metadata_by_filter( |
698 | | - filters={}, metadata_fields=["meta.category", "status", "meta.priority"] |
699 | | - ) |
700 | | - assert distinct_counts_normalized["category"] == 3 |
701 | | - assert distinct_counts_normalized["status"] == 2 |
702 | | - assert distinct_counts_normalized["priority"] == 3 |
703 | | - |
704 | | - # Test error handling when field doesn't exist |
705 | | - with pytest.raises(ValueError, match="Fields not found in index mapping"): |
706 | | - document_store.count_unique_metadata_by_filter(filters={}, metadata_fields=["nonexistent_field"]) |
707 | | - |
708 | | - def test_get_metadata_fields_info(self, document_store: OpenSearchDocumentStore): |
709 | | - docs = [ |
710 | | - Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), |
711 | | - Document(content="Doc 2", meta={"category": "B", "status": "inactive"}), |
712 | | - ] |
713 | | - document_store.write_documents(docs) |
714 | | - |
715 | | - fields_info = document_store.get_metadata_fields_info() |
716 | | - |
717 | | - # Verify that fields_info contains expected fields |
718 | | - assert "category" in fields_info |
719 | | - assert "status" in fields_info |
720 | | - assert "priority" in fields_info |
721 | | - |
722 | | - assert fields_info["category"]["type"] == "keyword" |
723 | | - assert fields_info["status"]["type"] == "keyword" |
724 | | - assert fields_info["priority"]["type"] == "long" |
725 | | - |
726 | | - def test_get_metadata_field_min_max(self, document_store: OpenSearchDocumentStore): |
727 | | - # Test with integer values |
728 | | - docs = [ |
729 | | - Document(content="Doc 1", meta={"priority": 1, "age": 10}), |
730 | | - Document(content="Doc 2", meta={"priority": 5, "age": 20}), |
731 | | - Document(content="Doc 3", meta={"priority": 3, "age": 15}), |
732 | | - Document(content="Doc 4", meta={"priority": 10, "age": 5}), |
733 | | - Document(content="Doc 6", meta={"rating": 10.5}), |
734 | | - Document(content="Doc 7", meta={"rating": 20.3}), |
735 | | - Document(content="Doc 8", meta={"rating": 15.7}), |
736 | | - Document(content="Doc 9", meta={"rating": 5.2}), |
737 | | - ] |
738 | | - document_store.write_documents(docs) |
739 | | - |
740 | | - # Test with "meta." prefix for integer field |
741 | | - min_max_priority = document_store.get_metadata_field_min_max("meta.priority") |
742 | | - assert min_max_priority["min"] == 1 |
743 | | - assert min_max_priority["max"] == 10 |
744 | | - |
745 | | - # Test with "meta." prefix for another integer field |
746 | | - min_max_rating = document_store.get_metadata_field_min_max("meta.age") |
747 | | - assert min_max_rating["min"] == 5 |
748 | | - assert min_max_rating["max"] == 20 |
749 | | - |
750 | | - # Test with single value |
751 | | - single_doc = [Document(content="Doc 5", meta={"single_value": 42})] |
752 | | - document_store.write_documents(single_doc) |
753 | | - min_max_single = document_store.get_metadata_field_min_max("meta.single_value") |
754 | | - assert min_max_single["min"] == 42 |
755 | | - assert min_max_single["max"] == 42 |
756 | | - |
757 | | - # Test with float values |
758 | | - min_max_score = document_store.get_metadata_field_min_max("meta.rating") |
759 | | - assert min_max_score["min"] == pytest.approx(5.2) |
760 | | - assert min_max_score["max"] == pytest.approx(20.3) |
761 | | - |
762 | 623 | def test_get_metadata_field_unique_values(self, document_store: OpenSearchDocumentStore): |
763 | 624 | # Test with string values |
764 | 625 | docs = [ |
|
0 commit comments