deepset-ai · sjrl · Jan 9, 2026 · Jan 8, 2026 · Jan 9, 2026 · Jan 9, 2026
@@ -58,7 +58,7 @@ APPENDIX: How to apply the Apache License to your work.
 
 To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!)  The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives.
 
-Copyright [yyyy] [name of copyright owner]
+Copyright 2024 deepset GmbH
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import copy
 import json
 from dataclasses import dataclass

@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from dataclasses import dataclass
 from enum import Enum
 from typing import Any

@@ -1,3 +1,8 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from dataclasses import replace
 from typing import Any, Optional, Union
 
 from haystack import Document, component, default_from_dict, default_to_dict
@@ -52,7 +57,7 @@ def __init__(
         progress_bar: bool = True,
         meta_fields_to_embed: Optional[list[str]] = None,
         embedding_separator: str = "\n",
-    ):
+    ) -> None:
         """
         Create a OptimumDocumentEmbedder component.
 
@@ -136,7 +141,7 @@ def __init__(
         self._backend = _EmbedderBackend(params)
         self._initialized = False
 
-    def warm_up(self):
+    def warm_up(self) -> None:
         """
         Initializes the component.
         """
@@ -200,14 +205,12 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]:
             A list of Documents to embed.
         :returns:
             The updated Documents with their embeddings.
-        :raises RuntimeError:
-            If the component was not initialized.
         :raises TypeError:
             If the input is not a list of Documents.
         """
         if not self._initialized:
-            msg = "The embedding model has not been loaded. Please call warm_up() before running."
-            raise RuntimeError(msg)
+            self.warm_up()
+
         if not isinstance(documents, list) or (documents and not isinstance(documents[0], Document)):
             msg = (
                 "OptimumDocumentEmbedder expects a list of Documents as input."
@@ -221,7 +224,9 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]:
 
         texts_to_embed = self._prepare_texts_to_embed(documents=documents)
         embeddings = self._backend.embed_texts(texts_to_embed)
+
+        new_documents = []
         for doc, emb in zip(documents, embeddings):
-            doc.embedding = emb
+            new_documents.append(replace(doc, embedding=emb))
 
-        return {"documents": documents}
+        return {"documents": new_documents}
@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from typing import Any, Optional, Union
 
 from haystack import component, default_from_dict, default_to_dict
@@ -162,14 +166,11 @@ def run(self, text: str) -> dict[str, list[float]]:
             The text to embed.
         :returns:
             The embeddings of the text.
-        :raises RuntimeError:
-            If the component was not initialized.
         :raises TypeError:
             If the input is not a string.
         """
         if not self._initialized:
-            msg = "The embedding model has not been loaded. Please call warm_up() before running."
-            raise RuntimeError(msg)
+            self.warm_up()
 
         if not isinstance(text, str):
             msg = (

@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from enum import Enum
 
 

@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from dataclasses import dataclass
 from enum import Enum
 from typing import Any

@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import copy
 import tempfile
 from unittest.mock import MagicMock, patch
@@ -371,7 +375,6 @@ def test_run(self, opt_config, quant_config):
                 optimizer_settings=opt_config,
                 quantizer_settings=quant_config,
             )
-            embedder.warm_up()
 
             result = embedder.run(documents=docs)
             _ = [embedder.run([d]) for d in docs_copy]

@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -252,7 +256,6 @@ def test_run(self):
                 suffix=" suffix",
                 pooling_mode=pooling_mode,
             )
-            embedder.warm_up()
 
             result = embedder.run(text="The food was delicious")