From a01ec38087d77b7ecef07fc71aba67e6e107a50e Mon Sep 17 00:00:00 2001
From: GunaPalanivel <gp5901@srmist.edu.in>
Date: Tue, 20 Jan 2026 18:44:13 +0530
Subject: [PATCH] fix(watsonx): use dataclass replace to avoid modifying input
 documents

This PR fixes the Watson X Document Embedder to avoid mutating input
Documents when setting embeddings.

Instead of mutating the original documents:
  doc.embedding = emb

We now create new document instances using dataclass replace:
  replace(doc, embedding=emb)

This follows the established pattern from haystack-ai/haystack#9693 and
aligns with other integrations (FastEmbed, Optimum, Nvidia, Bedrock, Cohere,
Google GenAI, Jina).

Related to: deepset-ai/haystack-core-integrations#2174
---
 .../embedders/watsonx/document_embedder.py    |  6 +++--
 .../watsonx/tests/test_document_embedder.py   | 24 +++++++++++++++++++
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/integrations/watsonx/src/haystack_integrations/components/embedders/watsonx/document_embedder.py b/integrations/watsonx/src/haystack_integrations/components/embedders/watsonx/document_embedder.py
index 4b4b536e2e..626cea26ed 100644
--- a/integrations/watsonx/src/haystack_integrations/components/embedders/watsonx/document_embedder.py
+++ b/integrations/watsonx/src/haystack_integrations/components/embedders/watsonx/document_embedder.py
@@ -4,6 +4,7 @@
 
 from __future__ import annotations
 
+from dataclasses import replace
 from typing import Any
 
 from haystack import Document, component, default_from_dict, default_to_dict
@@ -204,11 +205,12 @@ def run(self, documents: list[Document]) -> dict[str, list[Document] | dict[str,
         texts_to_embed = self._prepare_texts_to_embed(documents=documents)
         embeddings = self.embedder.embed_documents(texts_to_embed)
 
+        new_documents = []
         for doc, emb in zip(documents, embeddings, strict=True):
-            doc.embedding = emb
+            new_documents.append(replace(doc, embedding=emb))
 
         return {
-            "documents": documents,
+            "documents": new_documents,
             "meta": {
                 "model": self.model,
                 "truncate_input_tokens": self.truncate_input_tokens,
diff --git a/integrations/watsonx/tests/test_document_embedder.py b/integrations/watsonx/tests/test_document_embedder.py
index 6a4c50f71a..f968fdf56d 100644
--- a/integrations/watsonx/tests/test_document_embedder.py
+++ b/integrations/watsonx/tests/test_document_embedder.py
@@ -175,6 +175,30 @@ def test_run_empty_documents(self, mock_watsonx):
             "meta": {"model": "ibm/slate-30m-english-rtrvr-v2", "truncate_input_tokens": None, "batch_size": 1000},
         }
 
+    def test_run_does_not_modify_original_documents(self, mock_watsonx):
+        """Test that original documents are not modified during embedding"""
+        embedder = WatsonxDocumentEmbedder(project_id=Secret.from_token("fake-project-id"))
+        original_docs = [
+            Document(content="I love cheese"),
+            Document(content="A transformer is a deep learning architecture"),
+        ]
+
+        # Mock the embedder to return embeddings
+        mock_watsonx["embeddings_instance"].embed_documents.return_value = [
+            [0.1, 0.2, 0.3],
+            [0.4, 0.5, 0.6],
+        ]
+
+        result = embedder.run(documents=original_docs)
+
+        # Check that original documents are not modified
+        for doc in original_docs:
+            assert doc.embedding is None
+
+        # Check that returned documents have embeddings
+        for doc_with_embedding in result["documents"]:
+            assert doc_with_embedding.embedding is not None
+
 
 @pytest.mark.integration
 class TestWatsonxDocumentEmbedderIntegration: