Skip to content

Commit 59a2f4c

Browse files
authored
test: pinecone - add unit tests (#3218)
1 parent cbb212a commit 59a2f4c

3 files changed

Lines changed: 247 additions & 0 deletions

File tree

integrations/pinecone/tests/test_document_store.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from haystack import Document
1212
from haystack.components.preprocessors import DocumentSplitter
1313
from haystack.components.retrievers import SentenceWindowRetriever
14+
from haystack.dataclasses import ByteStream, SparseEmbedding
15+
from haystack.document_stores.types import DuplicatePolicy
1416
from haystack.testing.document_store import (
1517
CountDocumentsByFilterTest,
1618
CountDocumentsTest,
@@ -230,6 +232,132 @@ def test_convert_meta_to_int():
230232
assert PineconeDocumentStore._convert_meta_to_int(meta_data) == {}
231233

232234

235+
@pytest.mark.parametrize(
236+
("documents", "expected", "warning_fragment"),
237+
[
238+
([], {}, None),
239+
(
240+
[Document(content="hello", meta={"flag": True})],
241+
{"content": {"type": "text"}, "flag": {"type": "boolean"}},
242+
None,
243+
),
244+
(
245+
[Document(content=None, meta={"tags": ["a", "b"]})],
246+
{"tags": {"type": "keyword"}},
247+
None,
248+
),
249+
(
250+
[Document(content=None, meta={"counts": [1, 2]})],
251+
{"counts": {"type": "long"}},
252+
None,
253+
),
254+
(
255+
[Document(content=None, meta={"empty": []})],
256+
{"empty": {"type": "keyword"}},
257+
None,
258+
),
259+
(
260+
[Document(content=None, meta={"pi": 3.14})],
261+
{"pi": {"type": "long"}},
262+
None,
263+
),
264+
(
265+
[
266+
Document(content=None, meta={"value": 1}),
267+
Document(content=None, meta={"value": "two"}),
268+
],
269+
{"value": {"type": "keyword"}},
270+
"mixed types",
271+
),
272+
],
273+
)
274+
def test_get_metadata_fields_info_impl_type_inference(documents, expected, warning_fragment, caplog):
275+
with caplog.at_level("WARNING"):
276+
result = PineconeDocumentStore._get_metadata_fields_info_impl(documents)
277+
assert result == expected
278+
if warning_fragment:
279+
assert warning_fragment in caplog.text
280+
281+
282+
def test_get_metadata_field_min_max_impl_strips_meta_prefix_and_errors():
283+
docs = [
284+
Document(content="a", meta={"priority": 1}),
285+
Document(content="b", meta={"priority": 5}),
286+
]
287+
assert PineconeDocumentStore._get_metadata_field_min_max_impl(docs, "meta.priority") == {"min": 1, "max": 5}
288+
289+
with pytest.raises(ValueError, match="No values found"):
290+
PineconeDocumentStore._get_metadata_field_min_max_impl(docs, "missing")
291+
292+
293+
def test_get_metadata_field_unique_values_impl_pagination_search_and_lists():
294+
docs = [
295+
Document(content="a", meta={"tags": ["python", "java"]}),
296+
Document(content="b", meta={"tags": ["rust", "go"]}),
297+
Document(content="c", meta={"tags": ["python"]}),
298+
]
299+
300+
values, total = PineconeDocumentStore._get_metadata_field_unique_values_impl(
301+
docs, "tags", search_term=None, from_=0, size=10
302+
)
303+
assert total == 4
304+
assert values == ["go", "java", "python", "rust"]
305+
306+
values, total = PineconeDocumentStore._get_metadata_field_unique_values_impl(
307+
docs, "tags", search_term=None, from_=1, size=2
308+
)
309+
assert total == 4
310+
assert values == ["java", "python"]
311+
312+
values, total = PineconeDocumentStore._get_metadata_field_unique_values_impl(
313+
docs, "tags", search_term="PY", from_=0, size=10
314+
)
315+
assert total == 1
316+
assert values == ["python"]
317+
318+
319+
def test_prepare_documents_for_writing_edge_cases(caplog):
320+
ds = PineconeDocumentStore(api_key=Secret.from_token("fake-api-key"))
321+
322+
with pytest.raises(ValueError, match="must contain a list of objects of type Document"):
323+
ds._prepare_documents_for_writing(["not-a-document"], policy=DuplicatePolicy.NONE)
324+
325+
docs = [
326+
Document(content="no-embedding"),
327+
Document(content="with-blob", embedding=[0.1] * 768, blob=ByteStream(data=b"data")),
328+
Document(
329+
content="with-sparse",
330+
embedding=[0.1] * 768,
331+
sparse_embedding=SparseEmbedding(indices=[0], values=[1.0]),
332+
),
333+
]
334+
with caplog.at_level("WARNING"):
335+
result = ds._prepare_documents_for_writing(docs, policy=DuplicatePolicy.SKIP)
336+
337+
assert len(result) == 3
338+
assert result[0][1] == ds._dummy_vector
339+
assert "only supports `DuplicatePolicy.OVERWRITE`" in caplog.text
340+
assert "has no embedding" in caplog.text
341+
assert "blob" in caplog.text
342+
assert "sparse_embedding" in caplog.text
343+
344+
345+
@pytest.mark.asyncio
346+
async def test_validation_errors_on_empty_query_and_non_dict_meta():
347+
ds = PineconeDocumentStore(api_key=Secret.from_token("fake-api-key"))
348+
filters = {"field": "meta.category", "operator": "==", "value": "A"}
349+
350+
with pytest.raises(ValueError, match="query_embedding must be a non-empty list"):
351+
ds._embedding_retrieval(query_embedding=[])
352+
with pytest.raises(ValueError, match="query_embedding must be a non-empty list"):
353+
await ds._embedding_retrieval_async(query_embedding=[])
354+
355+
with pytest.raises(ValueError, match="meta must be a dictionary"):
356+
ds.update_by_filter(filters=filters, meta="not-a-dict")
357+
with pytest.raises(ValueError, match="meta must be a dictionary"):
358+
await ds.update_by_filter_async(filters=filters, meta="not-a-dict")
359+
360+
233361
@pytest.mark.integration
234362
@pytest.mark.skipif(not os.environ.get("PINECONE_API_KEY"), reason="PINECONE_API_KEY not set")
235363
def test_serverless_index_creation_from_scratch(delete_sleep_time):

integrations/pinecone/tests/test_embedding_retriever.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ def test_init_default():
2828
PineconeEmbeddingRetriever(document_store=mock_store, filter_policy="invalid")
2929

3030

31+
def test_init_raises_for_non_pinecone_document_store():
32+
with pytest.raises(ValueError, match="document_store must be an instance of PineconeDocumentStore"):
33+
PineconeEmbeddingRetriever(document_store="not-a-document-store")
34+
35+
3136
@patch("haystack_integrations.document_stores.pinecone.document_store.Pinecone")
3237
def test_to_dict(mock_pinecone, monkeypatch):
3338
monkeypatch.setenv("PINECONE_API_KEY", "env-api-key")

integrations/pinecone/tests/test_filters.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,124 @@
22

33
import pytest
44
from haystack.dataclasses.document import Document
5+
from haystack.errors import FilterError
56
from haystack.testing.document_store import (
67
FilterDocumentsTest,
78
)
89

10+
from haystack_integrations.document_stores.pinecone.filters import (
11+
_normalize_filters,
12+
_validate_filters,
13+
)
14+
15+
16+
def test_normalize_filters_rejects_non_dict():
17+
with pytest.raises(FilterError, match="Filters must be a dictionary"):
18+
_normalize_filters("not-a-dict")
19+
20+
21+
@pytest.mark.parametrize(
22+
("operator", "value", "expected"),
23+
[
24+
("==", "foo", {"field": {"$eq": "foo"}}),
25+
("!=", 5, {"field": {"$ne": 5}}),
26+
(">", 1.5, {"field": {"$gt": 1.5}}),
27+
(">=", 2, {"field": {"$gte": 2}}),
28+
("<", 3, {"field": {"$lt": 3}}),
29+
("<=", 4.2, {"field": {"$lte": 4.2}}),
30+
("in", ["a", "b"], {"field": {"$in": ["a", "b"]}}),
31+
("not in", [1, 2], {"field": {"$nin": [1, 2]}}),
32+
],
33+
)
34+
def test_comparison_operators(operator, value, expected):
35+
condition = {"field": "field", "operator": operator, "value": value}
36+
assert _normalize_filters(condition) == expected
37+
38+
39+
@pytest.mark.parametrize(
40+
("operator", "value"),
41+
[
42+
(">", "not-a-number"),
43+
(">=", "not-a-number"),
44+
("<", "not-a-number"),
45+
("<=", "not-a-number"),
46+
("==", [1, 2]),
47+
("!=", [1, 2]),
48+
],
49+
)
50+
def test_comparison_rejects_unsupported_value_types(operator, value):
51+
condition = {"field": "field", "operator": operator, "value": value}
52+
with pytest.raises(FilterError, match="Unsupported type"):
53+
_normalize_filters(condition)
54+
55+
56+
@pytest.mark.parametrize(
57+
("operator", "value", "match"),
58+
[
59+
("in", "not-a-list", "must be a list"),
60+
("not in", "not-a-list", "must be a list"),
61+
("in", [{"nested": "dict"}], "Unsupported type"),
62+
("not in", [{"nested": "dict"}], "Unsupported type"),
63+
],
64+
)
65+
def test_in_and_not_in_errors(operator, value, match):
66+
with pytest.raises(FilterError, match=match):
67+
_normalize_filters({"field": "field", "operator": operator, "value": value})
68+
69+
70+
@pytest.mark.parametrize(
71+
("condition", "match"),
72+
[
73+
({"conditions": []}, "'operator' key missing"),
74+
({"operator": "AND"}, "'conditions' key missing"),
75+
(
76+
{"operator": "XOR", "conditions": [{"field": "a", "operator": "==", "value": 1}]},
77+
"Unknown logical operator",
78+
),
79+
],
80+
)
81+
def test_logical_condition_errors(condition, match):
82+
with pytest.raises(FilterError, match=match):
83+
_normalize_filters(condition)
84+
85+
86+
@pytest.mark.parametrize(
87+
("condition", "match"),
88+
[
89+
({"field": "a", "value": 1}, "'operator' key missing"),
90+
({"field": "a", "operator": "=="}, "'value' key missing"),
91+
],
92+
)
93+
def test_comparison_condition_errors(condition, match):
94+
with pytest.raises(FilterError, match=match):
95+
_normalize_filters(condition)
96+
97+
98+
def test_meta_prefix_is_stripped():
99+
condition = {"field": "meta.category", "operator": "==", "value": "A"}
100+
assert _normalize_filters(condition) == {"category": {"$eq": "A"}}
101+
102+
103+
def test_nested_logical_conditions_are_parsed():
104+
filters = {
105+
"operator": "AND",
106+
"conditions": [
107+
{
108+
"operator": "OR",
109+
"conditions": [
110+
{"field": "a", "operator": "==", "value": 1},
111+
{"field": "b", "operator": ">", "value": 2},
112+
],
113+
},
114+
],
115+
}
116+
assert _normalize_filters(filters) == {"$and": [{"$or": [{"a": {"$eq": 1}}, {"b": {"$gt": 2}}]}]}
117+
118+
119+
def test_validate_filters_rejects_invalid_syntax():
120+
with pytest.raises(ValueError, match="Invalid filter syntax"):
121+
_validate_filters({"foo": "bar"})
122+
9123

10124
@pytest.mark.integration
11125
@pytest.mark.skipif(not os.environ.get("PINECONE_API_KEY"), reason="PINECONE_API_KEY not set")

0 commit comments

Comments
 (0)