Skip to content

Commit 7de3e79

Browse files
test: PGVectorDocumentStoree use Mixin tests (#3003)
* overwriting some mixin tests due to different internal implementation * chore: update haystack-ai dependency to >=2.26.1 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * removing duplicated tests --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 9eabba4 commit 7de3e79

2 files changed

Lines changed: 25 additions & 144 deletions

File tree

integrations/pgvector/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ classifiers = [
2323
"Programming Language :: Python :: Implementation :: PyPy",
2424
]
2525
dependencies = [
26-
"haystack-ai>=2.24.0",
26+
"haystack-ai>=2.26.1",
2727
"pgvector>=0.3.0",
2828
"psycopg[binary]"
2929
]

integrations/pgvector/tests/test_document_store.py

Lines changed: 24 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,16 @@
1010
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
1111
from haystack.document_stores.types import DuplicatePolicy
1212
from haystack.testing.document_store import (
13+
CountDocumentsByFilterTest,
1314
CountDocumentsTest,
15+
CountUniqueMetadataByFilterTest,
1416
DeleteAllTest,
1517
DeleteByFilterTest,
1618
DeleteDocumentsTest,
1719
FilterableDocsFixtureMixin,
20+
GetMetadataFieldMinMaxTest,
21+
GetMetadataFieldsInfoTest,
22+
GetMetadataFieldUniqueValuesTest,
1823
UpdateByFilterTest,
1924
WriteDocumentsTest,
2025
)
@@ -32,7 +37,26 @@ class TestDocumentStore(
3237
FilterableDocsFixtureMixin,
3338
UpdateByFilterTest,
3439
WriteDocumentsTest,
40+
CountDocumentsByFilterTest,
41+
CountUniqueMetadataByFilterTest,
42+
GetMetadataFieldsInfoTest,
43+
GetMetadataFieldMinMaxTest,
44+
GetMetadataFieldUniqueValuesTest,
3545
):
46+
def test_get_metadata_fields_info_empty_collection(self, document_store: PgvectorDocumentStore):
47+
"""PgvectorDocumentStore always includes 'content' in fields info, even for empty stores."""
48+
assert document_store.count_documents() == 0
49+
50+
fields_info = document_store.get_metadata_fields_info()
51+
assert fields_info == {"content": {"type": "text"}}
52+
53+
def test_get_metadata_field_min_max_empty_collection(self, document_store: PgvectorDocumentStore):
54+
"""PgvectorDocumentStore raises ValueError when the field doesn't exist in the store."""
55+
assert document_store.count_documents() == 0
56+
57+
with pytest.raises(ValueError, match="not found in document store"):
58+
document_store.get_metadata_field_min_max("priority")
59+
3660
def test_write_documents(self, document_store: PgvectorDocumentStore):
3761
docs = [Document(id="1")]
3862
assert document_store.write_documents(docs) == 1
@@ -280,151 +304,8 @@ def test_update_by_filter_empty_meta_raises_error(document_store: PgvectorDocume
280304
document_store.update_by_filter(filters={"field": "meta.category", "operator": "==", "value": "A"}, meta={})
281305

282306

283-
@pytest.mark.integration
284-
def test_count_documents_by_filter(document_store: PgvectorDocumentStore):
285-
docs = [
286-
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
287-
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
288-
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
289-
Document(content="Doc 4", meta={"category": "A", "status": "active"}),
290-
]
291-
document_store.write_documents(docs)
292-
293-
count_a = document_store.count_documents_by_filter(
294-
filters={"field": "meta.category", "operator": "==", "value": "A"}
295-
)
296-
assert count_a == 3
297-
298-
count_a_active = document_store.count_documents_by_filter(
299-
filters={
300-
"operator": "AND",
301-
"conditions": [
302-
{"field": "meta.category", "operator": "==", "value": "A"},
303-
{"field": "meta.status", "operator": "==", "value": "active"},
304-
],
305-
}
306-
)
307-
assert count_a_active == 2
308-
309-
310-
@pytest.mark.integration
311-
def test_count_unique_metadata_by_filter(document_store: PgvectorDocumentStore):
312-
docs = [
313-
Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
314-
Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}),
315-
Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}),
316-
Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}),
317-
Document(content="Doc 5", meta={"category": "C", "status": "active", "priority": 2}),
318-
]
319-
document_store.write_documents(docs)
320-
321-
distinct_counts = document_store.count_unique_metadata_by_filter(
322-
filters={}, metadata_fields=["category", "status", "priority"]
323-
)
324-
assert distinct_counts["category"] == 3 # A, B, C
325-
assert distinct_counts["status"] == 2 # active, inactive
326-
assert distinct_counts["priority"] == 3 # 1, 2, 3
327-
328-
# distinct values for documents with category="A"
329-
distinct_counts_a = document_store.count_unique_metadata_by_filter(
330-
filters={"field": "meta.category", "operator": "==", "value": "A"},
331-
metadata_fields=["category", "status", "priority"],
332-
)
333-
assert distinct_counts_a["category"] == 1 # Only A
334-
assert distinct_counts_a["status"] == 2 # active, inactive
335-
assert distinct_counts_a["priority"] == 2 # 1, 3
336-
337-
# distinct values with complex filter (category="A" AND status="active")
338-
distinct_counts_a_active = document_store.count_unique_metadata_by_filter(
339-
filters={
340-
"operator": "AND",
341-
"conditions": [
342-
{"field": "meta.category", "operator": "==", "value": "A"},
343-
{"field": "meta.status", "operator": "==", "value": "active"},
344-
],
345-
},
346-
metadata_fields=["category", "status", "priority"],
347-
)
348-
assert distinct_counts_a_active["category"] == 1 # Only A
349-
assert distinct_counts_a_active["status"] == 1 # Only active
350-
assert distinct_counts_a_active["priority"] == 2 # 1, 3
351-
352-
# with only a subset of fields
353-
distinct_counts_subset = document_store.count_unique_metadata_by_filter(
354-
filters={}, metadata_fields=["category", "status"]
355-
)
356-
assert distinct_counts_subset["category"] == 3
357-
assert distinct_counts_subset["status"] == 2
358-
assert "priority" not in distinct_counts_subset
359-
360-
# with field name normalization (with "meta." prefix)
361-
distinct_counts_normalized = document_store.count_unique_metadata_by_filter(
362-
filters={}, metadata_fields=["meta.category", "status", "meta.priority"]
363-
)
364-
assert distinct_counts_normalized["category"] == 3
365-
assert distinct_counts_normalized["status"] == 2
366-
assert distinct_counts_normalized["priority"] == 3
367-
368-
369-
@pytest.mark.integration
370-
def test_get_metadata_fields_info(document_store: PgvectorDocumentStore):
371-
docs = [
372-
Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
373-
Document(content="Doc 2", meta={"category": "B", "status": "inactive"}),
374-
]
375-
document_store.write_documents(docs)
376-
377-
fields_info = document_store.get_metadata_fields_info()
378-
379-
# Verify that fields_info contains expected fields
380-
assert "content" in fields_info
381-
assert "category" in fields_info
382-
assert "status" in fields_info
383-
assert "priority" in fields_info
384-
385-
assert fields_info["content"]["type"] == "text"
386-
assert fields_info["category"]["type"] == "text"
387-
assert fields_info["status"]["type"] == "text"
388-
assert fields_info["priority"]["type"] == "integer"
389-
390-
391307
@pytest.mark.integration
392308
def test_get_metadata_field_min_max(document_store: PgvectorDocumentStore):
393-
# Test with integer values
394-
docs = [
395-
Document(content="Doc 1", meta={"priority": 1, "age": 10}),
396-
Document(content="Doc 2", meta={"priority": 5, "age": 20}),
397-
Document(content="Doc 3", meta={"priority": 3, "age": 15}),
398-
Document(content="Doc 4", meta={"priority": 10, "age": 5}),
399-
Document(content="Doc 6", meta={"rating": 10.5}),
400-
Document(content="Doc 7", meta={"rating": 20.3}),
401-
Document(content="Doc 8", meta={"rating": 15.7}),
402-
Document(content="Doc 9", meta={"rating": 5.2}),
403-
]
404-
document_store.write_documents(docs)
405-
406-
# Test with "meta." prefix for integer field
407-
min_max_priority = document_store.get_metadata_field_min_max("meta.priority")
408-
assert min_max_priority["min"] == 1
409-
assert min_max_priority["max"] == 10
410-
411-
# Test with "meta." prefix for another integer field
412-
min_max_age = document_store.get_metadata_field_min_max("meta.age")
413-
assert min_max_age["min"] == 5
414-
assert min_max_age["max"] == 20
415-
416-
# Test with single value
417-
single_doc = [Document(content="Doc 5", meta={"single_value": 42})]
418-
document_store.write_documents(single_doc)
419-
min_max_single = document_store.get_metadata_field_min_max("meta.single_value")
420-
assert min_max_single["min"] == 42
421-
assert min_max_single["max"] == 42
422-
423-
# Test with float values
424-
min_max_rating = document_store.get_metadata_field_min_max("meta.rating")
425-
assert min_max_rating["min"] == pytest.approx(5.2)
426-
assert min_max_rating["max"] == pytest.approx(20.3)
427-
428309
# Test with text/string values - lexicographic comparison
429310
text_docs = [
430311
Document(content="Doc 1", meta={"category": "Zebra", "status": "active"}),

0 commit comments

Comments
 (0)