Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from __future__ import annotations

from dataclasses import replace
from typing import Any

from haystack import Document, component, default_from_dict, default_to_dict
Expand Down Expand Up @@ -204,11 +205,12 @@ def run(self, documents: list[Document]) -> dict[str, list[Document] | dict[str,
texts_to_embed = self._prepare_texts_to_embed(documents=documents)
embeddings = self.embedder.embed_documents(texts_to_embed)

new_documents = []
for doc, emb in zip(documents, embeddings, strict=True):
doc.embedding = emb
new_documents.append(replace(doc, embedding=emb))

return {
"documents": documents,
"documents": new_documents,
"meta": {
"model": self.model,
"truncate_input_tokens": self.truncate_input_tokens,
Expand Down
24 changes: 24 additions & 0 deletions integrations/watsonx/tests/test_document_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,30 @@ def test_run_empty_documents(self, mock_watsonx):
"meta": {"model": "ibm/slate-30m-english-rtrvr-v2", "truncate_input_tokens": None, "batch_size": 1000},
}

def test_run_does_not_modify_original_documents(self, mock_watsonx):
"""Test that original documents are not modified during embedding"""
embedder = WatsonxDocumentEmbedder(project_id=Secret.from_token("fake-project-id"))
original_docs = [
Document(content="I love cheese"),
Document(content="A transformer is a deep learning architecture"),
]

# Mock the embedder to return embeddings
mock_watsonx["embeddings_instance"].embed_documents.return_value = [
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
]

result = embedder.run(documents=original_docs)

# Check that original documents are not modified
for doc in original_docs:
assert doc.embedding is None

# Check that returned documents have embeddings
for doc_with_embedding in result["documents"]:
assert doc_with_embedding.embedding is not None


@pytest.mark.integration
class TestWatsonxDocumentEmbedderIntegration:
Expand Down