Skip to content

Commit 6f82443

Browse files
committed
refactor: trim tests to high-signal only
Remove tests that just verify mock plumbing (count, write, delete calling the mock client). Keep tests that verify our actual logic: - Serialization roundtrip (full dict structure) - Score conversion (cosine + euclidean) - Filter conversion (pure function with real logic) - Duplicate policy batch checks (SKIP/NONE) - Document <-> S3 vector conversion - Input validation Before: 49 unit tests (many testing mock behavior) After: 26 unit tests (all testing our code) + 12 integration tests
1 parent 4ad3787 commit 6f82443

3 files changed

Lines changed: 102 additions & 228 deletions

File tree

integrations/amazon_s3_vectors/tests/test_document_store.py

Lines changed: 41 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -89,64 +89,9 @@ def test_from_dict(_mock_boto3):
8989
assert store.create_bucket_and_index is False
9090

9191

92-
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
93-
def test_count_documents_empty(mock_boto3):
94-
client = MagicMock()
95-
client.get_vector_bucket.return_value = {}
96-
client.get_index.return_value = {}
97-
client.list_vectors.return_value = {"vectors": []}
98-
mock_boto3.client.return_value = client
99-
100-
store = S3VectorsDocumentStore(vector_bucket_name="b", index_name="i", dimension=4, region_name="us-east-1")
101-
assert store.count_documents() == 0
102-
103-
104-
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
105-
def test_count_documents_pagination(mock_boto3):
106-
client = MagicMock()
107-
client.get_vector_bucket.return_value = {}
108-
client.get_index.return_value = {}
109-
client.list_vectors.side_effect = [
110-
{"vectors": [{"key": "1"}, {"key": "2"}], "nextToken": "tok"},
111-
{"vectors": [{"key": "3"}]},
112-
]
113-
mock_boto3.client.return_value = client
114-
115-
store = S3VectorsDocumentStore(vector_bucket_name="b", index_name="i", dimension=4, region_name="us-east-1")
116-
assert store.count_documents() == 3
117-
118-
119-
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
120-
def test_write_documents(mock_boto3):
121-
client = MagicMock()
122-
client.get_vector_bucket.return_value = {}
123-
client.get_index.return_value = {}
124-
client.put_vectors.return_value = {}
125-
mock_boto3.client.return_value = client
126-
127-
store = S3VectorsDocumentStore(vector_bucket_name="b", index_name="i", dimension=4, region_name="us-east-1")
128-
docs = [
129-
Document(id="1", content="Hello", embedding=[0.1] * 4),
130-
Document(id="2", content="World", embedding=[0.2] * 4),
131-
]
132-
assert store.write_documents(docs) == 2
133-
client.put_vectors.assert_called_once()
134-
135-
136-
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
137-
def test_write_documents_empty(mock_boto3):
138-
client = MagicMock()
139-
client.get_vector_bucket.return_value = {}
140-
client.get_index.return_value = {}
141-
mock_boto3.client.return_value = client
142-
143-
store = S3VectorsDocumentStore(vector_bucket_name="b", index_name="i", dimension=4, region_name="us-east-1")
144-
assert store.write_documents([]) == 0
145-
client.put_vectors.assert_not_called()
146-
147-
14892
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
14993
def test_write_documents_no_embedding_raises(mock_boto3):
94+
"""S3 Vectors requires embeddings — this tests our validation, not the store."""
15095
client = MagicMock()
15196
client.get_vector_bucket.return_value = {}
15297
client.get_index.return_value = {}
@@ -159,98 +104,82 @@ def test_write_documents_no_embedding_raises(mock_boto3):
159104

160105
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
161106
def test_write_documents_skip_existing(mock_boto3):
107+
"""Tests our batch existence check logic for SKIP policy."""
162108
client = MagicMock()
163109
client.get_vector_bucket.return_value = {}
164110
client.get_index.return_value = {}
165111
client.get_vectors.return_value = {"vectors": [{"key": "1"}]}
166112
mock_boto3.client.return_value = client
167113

168114
store = S3VectorsDocumentStore(vector_bucket_name="b", index_name="i", dimension=4, region_name="us-east-1")
169-
docs = [Document(id="1", content="Hello", embedding=[0.1] * 4)]
170-
assert store.write_documents(docs, policy=DuplicatePolicy.SKIP) == 0
115+
result = store.write_documents(
116+
[Document(id="1", content="Hello", embedding=[0.1] * 4)],
117+
policy=DuplicatePolicy.SKIP,
118+
)
119+
assert result == 0
120+
client.put_vectors.assert_not_called()
171121

172122

173123
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
174124
def test_write_documents_none_policy_raises(mock_boto3):
125+
"""Tests our batch existence check logic for NONE policy."""
175126
client = MagicMock()
176127
client.get_vector_bucket.return_value = {}
177128
client.get_index.return_value = {}
178129
client.get_vectors.return_value = {"vectors": [{"key": "1"}]}
179130
mock_boto3.client.return_value = client
180131

181132
store = S3VectorsDocumentStore(vector_bucket_name="b", index_name="i", dimension=4, region_name="us-east-1")
182-
docs = [Document(id="1", content="Hello", embedding=[0.1] * 4)]
183133
with pytest.raises(DocumentStoreError, match="already exist"):
184-
store.write_documents(docs, policy=DuplicatePolicy.NONE)
134+
store.write_documents(
135+
[Document(id="1", content="Hello", embedding=[0.1] * 4)],
136+
policy=DuplicatePolicy.NONE,
137+
)
185138

186139

187140
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
188-
def test_write_documents_metadata(mock_boto3):
189-
client = MagicMock()
190-
client.get_vector_bucket.return_value = {}
191-
client.get_index.return_value = {}
192-
client.put_vectors.return_value = {}
193-
mock_boto3.client.return_value = client
194-
195-
store = S3VectorsDocumentStore(vector_bucket_name="b", index_name="i", dimension=4, region_name="us-east-1")
196-
docs = [Document(id="1", content="Hello", embedding=[0.1] * 4, meta={"category": "test", "year": 2024})]
197-
store.write_documents(docs)
198-
199-
vectors = client.put_vectors.call_args[1]["vectors"]
200-
assert len(vectors) == 1
201-
assert vectors[0]["key"] == "1"
202-
assert vectors[0]["metadata"]["_content"] == "Hello"
203-
assert vectors[0]["metadata"]["category"] == "test"
204-
assert vectors[0]["metadata"]["year"] == 2024
205-
206-
207-
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
208-
def test_delete_documents(mock_boto3):
209-
client = MagicMock()
210-
client.get_vector_bucket.return_value = {}
211-
client.get_index.return_value = {}
212-
client.delete_vectors.return_value = {}
213-
mock_boto3.client.return_value = client
214-
215-
store = S3VectorsDocumentStore(vector_bucket_name="b", index_name="i", dimension=4, region_name="us-east-1")
216-
store.delete_documents(["1", "2"])
217-
client.delete_vectors.assert_called_once_with(vectorBucketName="b", indexName="i", keys=["1", "2"])
218-
219-
220-
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
221-
def test_delete_documents_empty(mock_boto3):
141+
def test_embedding_retrieval_score_conversion(mock_boto3):
142+
"""Tests our distance-to-score conversion logic — the only non-trivial transform in retrieval."""
222143
client = MagicMock()
223144
client.get_vector_bucket.return_value = {}
224145
client.get_index.return_value = {}
146+
client.query_vectors.return_value = {
147+
"vectors": [{"key": "1", "distance": 0.05, "metadata": {"_content": "Hello", "category": "news"}}],
148+
"distanceMetric": "cosine",
149+
}
225150
mock_boto3.client.return_value = client
226151

227152
store = S3VectorsDocumentStore(vector_bucket_name="b", index_name="i", dimension=4, region_name="us-east-1")
228-
store.delete_documents([])
229-
client.delete_vectors.assert_not_called()
153+
docs = store._embedding_retrieval(query_embedding=[0.1] * 4, top_k=5)
154+
assert len(docs) == 1
155+
assert docs[0].id == "1"
156+
assert docs[0].content == "Hello"
157+
assert docs[0].score == pytest.approx(0.95) # cosine: 1.0 - 0.05
158+
assert docs[0].meta == {"category": "news"}
230159

231160

232161
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
233-
def test_embedding_retrieval(mock_boto3):
162+
def test_embedding_retrieval_euclidean_score(mock_boto3):
163+
"""Tests euclidean distance-to-score conversion (negated)."""
234164
client = MagicMock()
235165
client.get_vector_bucket.return_value = {}
236166
client.get_index.return_value = {}
237167
client.query_vectors.return_value = {
238-
"vectors": [{"key": "1", "distance": 0.05, "metadata": {"_content": "Hello", "category": "news"}}],
239-
"distanceMetric": "cosine",
168+
"vectors": [{"key": "1", "distance": 1.5, "metadata": {}}],
169+
"distanceMetric": "euclidean",
240170
}
241171
mock_boto3.client.return_value = client
242172

243-
store = S3VectorsDocumentStore(vector_bucket_name="b", index_name="i", dimension=4, region_name="us-east-1")
173+
store = S3VectorsDocumentStore(
174+
vector_bucket_name="b", index_name="i", dimension=4, distance_metric="euclidean", region_name="us-east-1"
175+
)
244176
docs = store._embedding_retrieval(query_embedding=[0.1] * 4, top_k=5)
245-
assert len(docs) == 1
246-
assert docs[0].id == "1"
247-
assert docs[0].content == "Hello"
248-
assert docs[0].score == pytest.approx(0.95) # 1 - 0.05
249-
assert docs[0].meta == {"category": "news"}
177+
assert docs[0].score == pytest.approx(-1.5) # euclidean: negated
250178

251179

252180
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
253-
def test_embedding_retrieval_with_filters(mock_boto3):
181+
def test_embedding_retrieval_passes_filters(mock_boto3):
182+
"""Tests that Haystack filters are converted and passed to query_vectors."""
254183
client = MagicMock()
255184
client.get_vector_bucket.return_value = {}
256185
client.get_index.return_value = {}
@@ -265,19 +194,15 @@ def test_embedding_retrieval_with_filters(mock_boto3):
265194
assert call_args["filter"] == {"$and": [{"category": {"$eq": "news"}}]}
266195

267196

268-
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
269-
def test_embedding_retrieval_empty_embedding_raises(mock_boto3):
270-
client = MagicMock()
271-
client.get_vector_bucket.return_value = {}
272-
client.get_index.return_value = {}
273-
mock_boto3.client.return_value = client
274-
275-
store = S3VectorsDocumentStore(vector_bucket_name="b", index_name="i", dimension=4, region_name="us-east-1")
197+
def test_embedding_retrieval_empty_embedding_raises():
198+
"""Tests our input validation — no mocking needed."""
199+
store = S3VectorsDocumentStore(vector_bucket_name="b", index_name="i", dimension=4, create_bucket_and_index=False)
276200
with pytest.raises(ValueError, match="non-empty"):
277201
store._embedding_retrieval(query_embedding=[])
278202

279203

280204
def test_document_to_s3_vector():
205+
"""Tests our Document → S3 vector conversion (pure function)."""
281206
doc = Document(
282207
id="test-1", content="Hello world", embedding=[0.1, 0.2, 0.3], meta={"category": "test", "year": 2024}
283208
)
@@ -290,6 +215,7 @@ def test_document_to_s3_vector():
290215

291216

292217
def test_s3_vector_to_document():
218+
"""Tests our S3 vector → Document conversion (pure function)."""
293219
vector = {
294220
"key": "test-1",
295221
"data": {"float32": [0.1, 0.2, 0.3]},
@@ -303,6 +229,7 @@ def test_s3_vector_to_document():
303229

304230

305231
def test_document_roundtrip():
232+
"""Tests Document → S3 vector → Document is lossless."""
306233
doc = Document(
307234
id="test-1", content="Hello world", embedding=[0.1, 0.2, 0.3], meta={"category": "test", "year": 2024}
308235
)

integrations/amazon_s3_vectors/tests/test_embedding_retriever.py

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ def test_init_default():
2727
S3VectorsEmbeddingRetriever(document_store=mock_store, filter_policy="invalid")
2828

2929

30-
def test_init_invalid_store():
31-
with pytest.raises(ValueError, match="must be an instance"):
32-
S3VectorsEmbeddingRetriever(document_store="not a store")
33-
34-
3530
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
3631
def test_to_dict(_mock_boto3):
3732
store = S3VectorsDocumentStore(
@@ -102,6 +97,7 @@ def test_from_dict(_mock_boto3):
10297

10398
@patch("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3")
10499
def test_from_dict_no_filter_policy(_mock_boto3):
100+
"""Pipelines serialized with older versions may not have filter_policy."""
105101
data = {
106102
"type": "haystack_integrations.components.retrievers.amazon_s3_vectors.embedding_retriever.S3VectorsEmbeddingRetriever",
107103
"init_parameters": {
@@ -134,28 +130,3 @@ def test_run():
134130
)
135131
assert len(res["documents"]) == 1
136132
assert res["documents"][0].content == "Test doc"
137-
138-
139-
def test_run_with_top_k_override():
140-
mock_store = Mock(spec=S3VectorsDocumentStore)
141-
mock_store._embedding_retrieval.return_value = []
142-
retriever = S3VectorsEmbeddingRetriever(document_store=mock_store, top_k=10)
143-
retriever.run(query_embedding=[0.1, 0.2], top_k=3)
144-
mock_store._embedding_retrieval.assert_called_once_with(
145-
query_embedding=[0.1, 0.2],
146-
filters={},
147-
top_k=3,
148-
)
149-
150-
151-
def test_run_with_filters():
152-
mock_store = Mock(spec=S3VectorsDocumentStore)
153-
mock_store._embedding_retrieval.return_value = []
154-
retriever = S3VectorsEmbeddingRetriever(document_store=mock_store)
155-
filters = {"operator": "AND", "conditions": [{"field": "meta.category", "operator": "==", "value": "news"}]}
156-
retriever.run(query_embedding=[0.1, 0.2], filters=filters)
157-
mock_store._embedding_retrieval.assert_called_once_with(
158-
query_embedding=[0.1, 0.2],
159-
filters=filters,
160-
top_k=10,
161-
)

0 commit comments

Comments
 (0)