Skip to content

Commit 2f7b2e5

Browse files
test: PineconeDocumentStore use Mixin tests (#3020)
* adding Mixin tests * removing redundant/duplicated test and small fixes to make other tests pass * chore: update haystack-ai dependency to >=2.26.1 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * isort * removing duplicated tests * removing duplicated tests --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent e97481a commit 2f7b2e5

3 files changed

Lines changed: 20 additions & 116 deletions

File tree

integrations/pinecone/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ classifiers = [
2424
"Programming Language :: Python :: Implementation :: PyPy",
2525
]
2626
dependencies = [
27-
"haystack-ai>=2.24.0",
27+
"haystack-ai>=2.26.1",
2828
"pinecone[asyncio]>=7.0.0",
2929
]
3030

integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -852,10 +852,11 @@ def _get_metadata_fields_info_impl(documents: list[Document]) -> dict[str, dict[
852852
@staticmethod
853853
def _get_metadata_field_min_max_impl(documents: list[Document], metadata_field: str) -> dict[str, Any]:
854854
"""Helper method to get min/max values for a metadata field (supports numeric, boolean, and string types)."""
855+
field_name = metadata_field.removeprefix("meta.")
855856
values: list[bool | int | float | str] = []
856857
for doc in documents:
857-
if doc.meta and metadata_field in doc.meta:
858-
value = doc.meta[metadata_field]
858+
if doc.meta and field_name in doc.meta:
859+
value = doc.meta[field_name]
859860
# Note: bool check must come before numeric because bool is subclass of int
860861
if isinstance(value, bool):
861862
values.append(value)

integrations/pinecone/tests/test_document_store.py

Lines changed: 16 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,16 @@
1212
from haystack.components.preprocessors import DocumentSplitter
1313
from haystack.components.retrievers import SentenceWindowRetriever
1414
from haystack.testing.document_store import (
15+
CountDocumentsByFilterTest,
1516
CountDocumentsTest,
17+
CountUniqueMetadataByFilterTest,
1618
DeleteAllTest,
1719
DeleteByFilterTest,
1820
DeleteDocumentsTest,
1921
FilterableDocsFixtureMixin,
22+
GetMetadataFieldMinMaxTest,
23+
GetMetadataFieldsInfoTest,
24+
GetMetadataFieldUniqueValuesTest,
2025
UpdateByFilterTest,
2126
WriteDocumentsTest,
2227
)
@@ -276,6 +281,11 @@ class TestDocumentStore(
276281
UpdateByFilterTest,
277282
DeleteAllTest,
278283
DeleteByFilterTest,
284+
CountDocumentsByFilterTest,
285+
CountUniqueMetadataByFilterTest,
286+
GetMetadataFieldsInfoTest,
287+
GetMetadataFieldMinMaxTest,
288+
GetMetadataFieldUniqueValuesTest,
279289
):
280290
def test_write_documents(self, document_store: PineconeDocumentStore):
281291
docs = [Document(id="1")]
@@ -353,108 +363,6 @@ def test_sentence_window_retriever(self, document_store: PineconeDocumentStore):
353363

354364
assert len(result["context_windows"]) == 1
355365

356-
def test_count_documents_by_filter(self, document_store: PineconeDocumentStore):
357-
docs = [
358-
Document(content="Doc 1", meta={"category": "A", "status": "draft"}),
359-
Document(content="Doc 2", meta={"category": "B", "status": "published"}),
360-
Document(content="Doc 3", meta={"category": "A", "status": "published"}),
361-
Document(content="Doc 4", meta={"category": "A", "status": "draft"}),
362-
]
363-
document_store.write_documents(docs)
364-
365-
# Count documents with category="A"
366-
count = document_store.count_documents_by_filter(
367-
filters={"field": "meta.category", "operator": "==", "value": "A"}
368-
)
369-
assert count == 3
370-
371-
# Count documents with status="published"
372-
count = document_store.count_documents_by_filter(
373-
filters={"field": "meta.status", "operator": "==", "value": "published"}
374-
)
375-
assert count == 2
376-
377-
# Count with complex filter
378-
count = document_store.count_documents_by_filter(
379-
filters={
380-
"operator": "AND",
381-
"conditions": [
382-
{"field": "meta.category", "operator": "==", "value": "A"},
383-
{"field": "meta.status", "operator": "==", "value": "draft"},
384-
],
385-
}
386-
)
387-
assert count == 2
388-
389-
def test_count_unique_metadata_by_filter(self, document_store: PineconeDocumentStore):
390-
docs = [
391-
Document(content="Doc 1", meta={"category": "A", "author": "Alice", "priority": 1}),
392-
Document(content="Doc 2", meta={"category": "B", "author": "Bob", "priority": 2}),
393-
Document(content="Doc 3", meta={"category": "A", "author": "Alice", "priority": 1}),
394-
Document(content="Doc 4", meta={"category": "C", "author": "Charlie", "priority": 3}),
395-
Document(content="Doc 5", meta={"category": "A", "author": "Bob", "priority": 2}),
396-
]
397-
document_store.write_documents(docs)
398-
399-
# Count unique values without filter
400-
counts = document_store.count_unique_metadata_by_filter(
401-
filters={}, metadata_fields=["category", "author", "priority"]
402-
)
403-
assert counts["category"] == 3 # A, B, C
404-
assert counts["author"] == 3 # Alice, Bob, Charlie
405-
assert counts["priority"] == 3 # 1, 2, 3
406-
407-
# Count unique values with filter
408-
counts = document_store.count_unique_metadata_by_filter(
409-
filters={"field": "meta.category", "operator": "==", "value": "A"},
410-
metadata_fields=["author", "priority"],
411-
)
412-
assert counts["author"] == 2 # Alice, Bob
413-
assert counts["priority"] == 2 # 1, 2
414-
415-
def test_get_metadata_fields_info(self, document_store: PineconeDocumentStore):
416-
docs = [
417-
Document(
418-
content="Doc 1",
419-
meta={
420-
"category": "A",
421-
"author": "Alice",
422-
"priority": 1,
423-
"is_published": True,
424-
"tags": ["tag1", "tag2"],
425-
},
426-
),
427-
Document(content="Doc 2", meta={"category": "B", "author": "Bob", "priority": 2, "is_published": False}),
428-
]
429-
document_store.write_documents(docs)
430-
431-
field_info = document_store.get_metadata_fields_info()
432-
433-
# Check content field
434-
assert "content" in field_info
435-
assert field_info["content"]["type"] == "text"
436-
437-
# Check metadata fields
438-
assert "category" in field_info
439-
assert field_info["category"]["type"] == "keyword"
440-
441-
assert "author" in field_info
442-
assert field_info["author"]["type"] == "keyword"
443-
444-
assert "priority" in field_info
445-
assert field_info["priority"]["type"] == "long"
446-
447-
assert "is_published" in field_info
448-
assert field_info["is_published"]["type"] == "boolean"
449-
450-
assert "tags" in field_info
451-
assert field_info["tags"]["type"] == "keyword"
452-
453-
def test_get_metadata_fields_info_empty(self, document_store: PineconeDocumentStore):
454-
# Test with no documents
455-
field_info = document_store.get_metadata_fields_info()
456-
assert field_info == {}
457-
458366
def test_get_metadata_fields_info_consistent_types(self, document_store: PineconeDocumentStore):
459367
# Test that all documents are checked for type consistency
460368
docs = [
@@ -468,7 +376,7 @@ def test_get_metadata_fields_info_consistent_types(self, document_store: Pinecon
468376
assert "score" in field_info
469377
assert field_info["score"]["type"] == "long"
470378

471-
def test_get_metadata_field_min_max(self, document_store: PineconeDocumentStore):
379+
def test_get_metadata_field_min_max_boolean_and_string(self, document_store: PineconeDocumentStore):
472380
docs = [
473381
Document(content="Doc 1", meta={"priority": 1, "score": 85.5, "active": True, "category": "Zebra"}),
474382
Document(content="Doc 2", meta={"priority": 5, "score": 92.3, "active": False, "category": "Alpha"}),
@@ -477,16 +385,6 @@ def test_get_metadata_field_min_max(self, document_store: PineconeDocumentStore)
477385
]
478386
document_store.write_documents(docs)
479387

480-
# Get min/max for numeric field (int)
481-
min_max = document_store.get_metadata_field_min_max("priority")
482-
assert min_max["min"] == 1
483-
assert min_max["max"] == 7
484-
485-
# Get min/max for numeric field (float)
486-
min_max = document_store.get_metadata_field_min_max("score")
487-
assert min_max["min"] == 78.9
488-
assert min_max["max"] == 95.1
489-
490388
# Get min/max for boolean field
491389
min_max = document_store.get_metadata_field_min_max("active")
492390
assert min_max["min"] is False
@@ -497,6 +395,11 @@ def test_get_metadata_field_min_max(self, document_store: PineconeDocumentStore)
497395
assert min_max["min"] == "Alpha"
498396
assert min_max["max"] == "Zebra"
499397

398+
def test_get_metadata_field_min_max_empty_collection(self, document_store: PineconeDocumentStore):
399+
assert document_store.count_documents() == 0
400+
with pytest.raises(ValueError, match="No values found"):
401+
document_store.get_metadata_field_min_max("priority")
402+
500403
def test_get_metadata_field_min_max_no_values(self, document_store: PineconeDocumentStore):
501404
docs = [
502405
Document(content="Doc 1", meta={"tags": ["tag1", "tag2"]}),

0 commit comments

Comments
 (0)