From 576ca4b92883bdd2c73e83b05e674e5f5ab8e325 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Thu, 8 Jan 2026 11:50:09 +0100 Subject: [PATCH 1/3] Auto call warm_up --- .../embedders/optimum/optimum_document_embedder.py | 6 ++---- .../components/embedders/optimum/optimum_text_embedder.py | 5 +---- .../optimum/tests/test_optimum_document_embedder.py | 1 - integrations/optimum/tests/test_optimum_text_embedder.py | 1 - 4 files changed, 3 insertions(+), 10 deletions(-) diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py index c399d6be41..18eae93ec5 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py @@ -200,14 +200,12 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]: A list of Documents to embed. :returns: The updated Documents with their embeddings. - :raises RuntimeError: - If the component was not initialized. :raises TypeError: If the input is not a list of Documents. """ if not self._initialized: - msg = "The embedding model has not been loaded. Please call warm_up() before running." - raise RuntimeError(msg) + self.warm_up() + if not isinstance(documents, list) or (documents and not isinstance(documents[0], Document)): msg = ( "OptimumDocumentEmbedder expects a list of Documents as input." diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py index 2ae77b20a7..d9c6e3d131 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py @@ -162,14 +162,11 @@ def run(self, text: str) -> dict[str, list[float]]: The text to embed. :returns: The embeddings of the text. - :raises RuntimeError: - If the component was not initialized. :raises TypeError: If the input is not a string. """ if not self._initialized: - msg = "The embedding model has not been loaded. Please call warm_up() before running." - raise RuntimeError(msg) + self.warm_up() if not isinstance(text, str): msg = ( diff --git a/integrations/optimum/tests/test_optimum_document_embedder.py b/integrations/optimum/tests/test_optimum_document_embedder.py index 5ec10ef2cd..d0cc4545af 100644 --- a/integrations/optimum/tests/test_optimum_document_embedder.py +++ b/integrations/optimum/tests/test_optimum_document_embedder.py @@ -371,7 +371,6 @@ def test_run(self, opt_config, quant_config): optimizer_settings=opt_config, quantizer_settings=quant_config, ) - embedder.warm_up() result = embedder.run(documents=docs) _ = [embedder.run([d]) for d in docs_copy] diff --git a/integrations/optimum/tests/test_optimum_text_embedder.py b/integrations/optimum/tests/test_optimum_text_embedder.py index 24f460281a..e67b6e3df2 100644 --- a/integrations/optimum/tests/test_optimum_text_embedder.py +++ b/integrations/optimum/tests/test_optimum_text_embedder.py @@ -252,7 +252,6 @@ def test_run(self): suffix=" suffix", pooling_mode=pooling_mode, ) - embedder.warm_up() result = embedder.run(text="The food was delicious") From 79650abdbd567457f5f3b55635ea6967756c6dd7 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 9 Jan 2026 09:16:00 +0100 Subject: [PATCH 2/3] Add mising license headers --- integrations/optimum/LICENSE.txt | 2 +- .../components/embedders/optimum/_backend.py | 4 ++++ .../components/embedders/optimum/optimization.py | 4 ++++ .../components/embedders/optimum/optimum_document_embedder.py | 4 ++++ .../components/embedders/optimum/optimum_text_embedder.py | 4 ++++ .../components/embedders/optimum/pooling.py | 4 ++++ .../components/embedders/optimum/quantization.py | 4 ++++ integrations/optimum/tests/test_optimum_document_embedder.py | 4 ++++ integrations/optimum/tests/test_optimum_text_embedder.py | 4 ++++ 9 files changed, 33 insertions(+), 1 deletion(-) diff --git a/integrations/optimum/LICENSE.txt b/integrations/optimum/LICENSE.txt index 137069b823..a0f3c3ec48 100644 --- a/integrations/optimum/LICENSE.txt +++ b/integrations/optimum/LICENSE.txt @@ -58,7 +58,7 @@ APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. -Copyright [yyyy] [name of copyright owner] +Copyright 2024 deepset GmbH Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/_backend.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/_backend.py index 0896210d86..2dadfe75e3 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/_backend.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/_backend.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + import copy import json from dataclasses import dataclass diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimization.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimization.py index a065f796b5..956338c962 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimization.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimization.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from dataclasses import dataclass from enum import Enum from typing import Any diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py index 18eae93ec5..140ac0022e 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from typing import Any, Optional, Union from haystack import Document, component, default_from_dict, default_to_dict diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py index d9c6e3d131..2b325c89ef 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from typing import Any, Optional, Union from haystack import component, default_from_dict, default_to_dict diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/pooling.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/pooling.py index 2c8bbd9678..5e88a29835 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/pooling.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/pooling.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from enum import Enum diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/quantization.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/quantization.py index fd2c484dc3..ebced48d5e 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/quantization.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/quantization.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from dataclasses import dataclass from enum import Enum from typing import Any diff --git a/integrations/optimum/tests/test_optimum_document_embedder.py b/integrations/optimum/tests/test_optimum_document_embedder.py index d0cc4545af..4a34092bd7 100644 --- a/integrations/optimum/tests/test_optimum_document_embedder.py +++ b/integrations/optimum/tests/test_optimum_document_embedder.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + import copy import tempfile from unittest.mock import MagicMock, patch diff --git a/integrations/optimum/tests/test_optimum_text_embedder.py b/integrations/optimum/tests/test_optimum_text_embedder.py index e67b6e3df2..f0700e52a7 100644 --- a/integrations/optimum/tests/test_optimum_text_embedder.py +++ b/integrations/optimum/tests/test_optimum_text_embedder.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from unittest.mock import MagicMock, patch import pytest From 334198eef49377f86e0fef9b5d18df70e7c99a7f Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 9 Jan 2026 09:23:35 +0100 Subject: [PATCH 3/3] Dont' modify docs in place --- .../embedders/optimum/optimum_document_embedder.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py index 140ac0022e..727f0e7f56 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +from dataclasses import replace from typing import Any, Optional, Union from haystack import Document, component, default_from_dict, default_to_dict @@ -56,7 +57,7 @@ def __init__( progress_bar: bool = True, meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", - ): + ) -> None: """ Create a OptimumDocumentEmbedder component. @@ -140,7 +141,7 @@ def __init__( self._backend = _EmbedderBackend(params) self._initialized = False - def warm_up(self): + def warm_up(self) -> None: """ Initializes the component. """ @@ -223,7 +224,9 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]: texts_to_embed = self._prepare_texts_to_embed(documents=documents) embeddings = self._backend.embed_texts(texts_to_embed) + + new_documents = [] for doc, emb in zip(documents, embeddings): - doc.embedding = emb + new_documents.append(replace(doc, embedding=emb)) - return {"documents": documents} + return {"documents": new_documents}