From 84a293c10a641ee64de349cfc959ef65424a9e1f Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 9 Jan 2026 08:55:17 +0100 Subject: [PATCH 1/4] Add license headers, don't edit documents in-place, auto-call warm_up --- integrations/fastembed/LICENSE.txt | 2 +- .../embedders/fastembed/__init__.py | 1 + .../fastembed/fastembed_document_embedder.py | 26 +++++++++++------ .../fastembed_sparse_document_embedder.py | 29 ++++++++++++------- .../fastembed_sparse_text_embedder.py | 20 ++++++++----- .../fastembed/fastembed_text_embedder.py | 20 ++++++++----- .../components/rankers/fastembed/__init__.py | 4 +++ .../components/rankers/fastembed/ranker.py | 4 +++ .../fastembed/tests/test_fastembed_backend.py | 4 +++ .../tests/test_fastembed_document_embedder.py | 4 +++ .../fastembed/tests/test_fastembed_ranker.py | 4 +++ ...test_fastembed_sparse_document_embedder.py | 4 +++ .../test_fastembed_sparse_text_embedder.py | 4 +++ .../tests/test_fastembed_text_embedder.py | 4 +++ 14 files changed, 94 insertions(+), 36 deletions(-) diff --git a/integrations/fastembed/LICENSE.txt b/integrations/fastembed/LICENSE.txt index 137069b823..a0f3c3ec48 100644 --- a/integrations/fastembed/LICENSE.txt +++ b/integrations/fastembed/LICENSE.txt @@ -58,7 +58,7 @@ APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. -Copyright [yyyy] [name of copyright owner] +Copyright 2024 deepset GmbH Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/__init__.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/__init__.py index d73c297663..50c5297ea8 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/__init__.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/__init__.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2024-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 + from .fastembed_document_embedder import FastembedDocumentEmbedder from .fastembed_sparse_document_embedder import FastembedSparseDocumentEmbedder from .fastembed_sparse_text_embedder import FastembedSparseTextEmbedder diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py index bcd1a6111d..aed226359f 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py @@ -1,3 +1,8 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from dataclasses import replace from typing import Any, Optional from haystack import Document, component, default_to_dict @@ -68,7 +73,7 @@ def __init__( local_files_only: bool = False, meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", - ): + ) -> None: """ Create an FastembedDocumentEmbedder component. @@ -102,6 +107,7 @@ def __init__( self.local_files_only = local_files_only self.meta_fields_to_embed = meta_fields_to_embed or [] self.embedding_separator = embedding_separator + self.embedding_backend = None def to_dict(self) -> dict[str, Any]: """ @@ -124,11 +130,11 @@ def to_dict(self) -> dict[str, Any]: embedding_separator=self.embedding_separator, ) - def warm_up(self): + def warm_up(self) -> None: """ Initializes the component. """ - if not hasattr(self, "embedding_backend"): + if self.embedding_backend is None: self.embedding_backend = _FastembedEmbeddingBackendFactory.get_embedding_backend( model_name=self.model_name, cache_dir=self.cache_dir, @@ -157,6 +163,7 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]: :param documents: List of Documents to embed. :returns: A dictionary with the following keys: - `documents`: List of Documents with each Document's `embedding` field set to the computed embeddings. + :raises TypeError: If the input is not a list of Documents. """ if not isinstance(documents, list) or (documents and not isinstance(documents[0], Document)): msg = ( @@ -164,19 +171,20 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]: "In case you want to embed a list of strings, please use the FastembedTextEmbedder." ) raise TypeError(msg) - if not hasattr(self, "embedding_backend"): - msg = "The embedding model has not been loaded. Please call warm_up() before running." - raise RuntimeError(msg) + + if self.embedding_backend is None: + self.warm_up() texts_to_embed = self._prepare_texts_to_embed(documents=documents) - embeddings = self.embedding_backend.embed( + embeddings = self.embedding_backend.embed( # type: ignore[union-attr] texts_to_embed, batch_size=self.batch_size, progress_bar=self.progress_bar, parallel=self.parallel, ) + new_documents = [] for doc, emb in zip(documents, embeddings): - doc.embedding = emb + new_documents.append(replace(doc, embedding=emb)) - return {"documents": documents} + return {"documents": new_documents} diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py index 40137b4ea9..59bb83ea07 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py @@ -1,3 +1,8 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from dataclasses import replace from typing import Any, Optional from haystack import Document, component, default_to_dict @@ -63,7 +68,7 @@ def __init__( meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", model_kwargs: Optional[dict[str, Any]] = None, - ): + ) -> None: """ Create an FastembedDocumentEmbedder component. @@ -95,6 +100,7 @@ def __init__( self.meta_fields_to_embed = meta_fields_to_embed or [] self.embedding_separator = embedding_separator self.model_kwargs = model_kwargs + self.embedding_backend = None def to_dict(self) -> dict[str, Any]: """ @@ -116,11 +122,11 @@ def to_dict(self) -> dict[str, Any]: model_kwargs=self.model_kwargs, ) - def warm_up(self): + def warm_up(self) -> None: """ Initializes the component. """ - if not hasattr(self, "embedding_backend"): + if self.embedding_backend is None: self.embedding_backend = _FastembedSparseEmbeddingBackendFactory.get_embedding_backend( model_name=self.model_name, cache_dir=self.cache_dir, @@ -149,25 +155,28 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]: :returns: A dictionary with the following keys: - `documents`: List of Documents with each Document's `sparse_embedding` field set to the computed embeddings. + :raises TypeError: If the input is not a list of Documents. """ if not isinstance(documents, list) or (documents and not isinstance(documents[0], Document)): msg = ( "FastembedSparseDocumentEmbedder expects a list of Documents as input. " "In case you want to embed a list of strings, please use the FastembedTextEmbedder." ) - raise TypeError(msg) - if not hasattr(self, "embedding_backend"): - msg = "The embedding model has not been loaded. Please call warm_up() before running." - raise RuntimeError(msg) + raise TypeError(msg)\ + + if self.embedding_backend is None: + self.warm_up() texts_to_embed = self._prepare_texts_to_embed(documents=documents) - embeddings = self.embedding_backend.embed( + embeddings = self.embedding_backend.embed( # type: ignore[union-attr] texts_to_embed, batch_size=self.batch_size, progress_bar=self.progress_bar, parallel=self.parallel, ) + new_documents = [] for doc, emb in zip(documents, embeddings): - doc.sparse_embedding = emb - return {"documents": documents} + new_documents.append(replace(doc, embedding=emb)) + + return {"documents": new_documents} diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py index cac95f697a..0ce70d8775 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from typing import Any, Optional from haystack import component, default_to_dict @@ -36,7 +40,7 @@ def __init__( parallel: Optional[int] = None, local_files_only: bool = False, model_kwargs: Optional[dict[str, Any]] = None, - ): + ) -> None: """ Create a FastembedSparseTextEmbedder component. @@ -61,6 +65,7 @@ def __init__( self.parallel = parallel self.local_files_only = local_files_only self.model_kwargs = model_kwargs + self.embedding_backend = None def to_dict(self) -> dict[str, Any]: """ @@ -80,11 +85,11 @@ def to_dict(self) -> dict[str, Any]: model_kwargs=self.model_kwargs, ) - def warm_up(self): + def warm_up(self) -> None: """ Initializes the component. """ - if not hasattr(self, "embedding_backend"): + if self.embedding_backend is None: self.embedding_backend = _FastembedSparseEmbeddingBackendFactory.get_embedding_backend( model_name=self.model_name, cache_dir=self.cache_dir, @@ -102,7 +107,6 @@ def run(self, text: str) -> dict[str, SparseEmbedding]: :returns: A dictionary with the following keys: - `embedding`: A list of floats representing the embedding of the input text. :raises TypeError: If the input is not a string. - :raises RuntimeError: If the embedding model has not been loaded. """ if not isinstance(text, str): msg = ( @@ -110,11 +114,11 @@ def run(self, text: str) -> dict[str, SparseEmbedding]: "In case you want to embed a list of Documents, please use the FastembedDocumentEmbedder." ) raise TypeError(msg) - if not hasattr(self, "embedding_backend"): - msg = "The embedding model has not been loaded. Please call warm_up() before running." - raise RuntimeError(msg) - embedding = self.embedding_backend.embed( + if self.embedding_backend is None: + self.warm_up() + + embedding = self.embedding_backend.embed( # type: ignore[union-attr] [text], progress_bar=self.progress_bar, parallel=self.parallel, diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py index 0c6bb646f3..92f89e771c 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from typing import Any, Optional from haystack import component, default_to_dict @@ -36,7 +40,7 @@ def __init__( progress_bar: bool = True, parallel: Optional[int] = None, local_files_only: bool = False, - ): + ) -> None: """ Create a FastembedTextEmbedder component. @@ -63,6 +67,7 @@ def __init__( self.progress_bar = progress_bar self.parallel = parallel self.local_files_only = local_files_only + self.embedding_backend = None def to_dict(self) -> dict[str, Any]: """ @@ -83,11 +88,11 @@ def to_dict(self) -> dict[str, Any]: local_files_only=self.local_files_only, ) - def warm_up(self): + def warm_up(self) -> None: """ Initializes the component. """ - if not hasattr(self, "embedding_backend"): + if self.embedding_backend is None: self.embedding_backend = _FastembedEmbeddingBackendFactory.get_embedding_backend( model_name=self.model_name, cache_dir=self.cache_dir, @@ -104,7 +109,6 @@ def run(self, text: str) -> dict[str, list[float]]: :returns: A dictionary with the following keys: - `embedding`: A list of floats representing the embedding of the input text. :raises TypeError: If the input is not a string. - :raises RuntimeError: If the embedding model has not been loaded. """ if not isinstance(text, str): msg = ( @@ -112,13 +116,13 @@ def run(self, text: str) -> dict[str, list[float]]: "In case you want to embed a list of Documents, please use the FastembedDocumentEmbedder." ) raise TypeError(msg) - if not hasattr(self, "embedding_backend"): - msg = "The embedding model has not been loaded. Please call warm_up() before running." - raise RuntimeError(msg) + + if self.embedding_backend is None: + self.warm_up() text_to_embed = [self.prefix + text + self.suffix] embedding = list( - self.embedding_backend.embed( + self.embedding_backend.embed( # type: ignore[union-attr] text_to_embed, progress_bar=self.progress_bar, parallel=self.parallel, diff --git a/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/__init__.py b/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/__init__.py index ece5e858b9..318a80ddd3 100644 --- a/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/__init__.py +++ b/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/__init__.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from .ranker import FastembedRanker __all__ = ["FastembedRanker"] diff --git a/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py b/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py index 9b55fbb4e8..8726c1b637 100644 --- a/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py +++ b/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from typing import Any, Optional from haystack import Document, component, default_from_dict, default_to_dict, logging diff --git a/integrations/fastembed/tests/test_fastembed_backend.py b/integrations/fastembed/tests/test_fastembed_backend.py index 994a6f8835..f3567e56a7 100644 --- a/integrations/fastembed/tests/test_fastembed_backend.py +++ b/integrations/fastembed/tests/test_fastembed_backend.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from unittest.mock import patch from haystack_integrations.components.embedders.fastembed.embedding_backend.fastembed_backend import ( diff --git a/integrations/fastembed/tests/test_fastembed_document_embedder.py b/integrations/fastembed/tests/test_fastembed_document_embedder.py index ad22ab1e97..4aad6268f5 100644 --- a/integrations/fastembed/tests/test_fastembed_document_embedder.py +++ b/integrations/fastembed/tests/test_fastembed_document_embedder.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from unittest.mock import MagicMock, patch import numpy as np diff --git a/integrations/fastembed/tests/test_fastembed_ranker.py b/integrations/fastembed/tests/test_fastembed_ranker.py index a5e72536ca..a64958cbc4 100644 --- a/integrations/fastembed/tests/test_fastembed_ranker.py +++ b/integrations/fastembed/tests/test_fastembed_ranker.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from unittest.mock import MagicMock import pytest diff --git a/integrations/fastembed/tests/test_fastembed_sparse_document_embedder.py b/integrations/fastembed/tests/test_fastembed_sparse_document_embedder.py index 7f8d5faee8..59ea9cd249 100644 --- a/integrations/fastembed/tests/test_fastembed_sparse_document_embedder.py +++ b/integrations/fastembed/tests/test_fastembed_sparse_document_embedder.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from unittest.mock import MagicMock, patch import numpy as np diff --git a/integrations/fastembed/tests/test_fastembed_sparse_text_embedder.py b/integrations/fastembed/tests/test_fastembed_sparse_text_embedder.py index 9b73f5f3ab..c9e3f77130 100644 --- a/integrations/fastembed/tests/test_fastembed_sparse_text_embedder.py +++ b/integrations/fastembed/tests/test_fastembed_sparse_text_embedder.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from unittest.mock import MagicMock, patch import numpy as np diff --git a/integrations/fastembed/tests/test_fastembed_text_embedder.py b/integrations/fastembed/tests/test_fastembed_text_embedder.py index f1b2e21e90..da969dffa8 100644 --- a/integrations/fastembed/tests/test_fastembed_text_embedder.py +++ b/integrations/fastembed/tests/test_fastembed_text_embedder.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + from unittest.mock import MagicMock, patch import numpy as np From 588ab3b7ecb54a370bbcf360fdffeb299cd3b900 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 9 Jan 2026 08:58:24 +0100 Subject: [PATCH 2/4] Fix bug --- .../fastembed_sparse_document_embedder.py | 2 +- .../fastembed/tests/test_fastembed_ranker.py | 14 -------------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py index 59bb83ea07..55cd33e7fc 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py @@ -177,6 +177,6 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]: new_documents = [] for doc, emb in zip(documents, embeddings): - new_documents.append(replace(doc, embedding=emb)) + new_documents.append(replace(doc, sparse_embedding=emb)) return {"documents": new_documents} diff --git a/integrations/fastembed/tests/test_fastembed_ranker.py b/integrations/fastembed/tests/test_fastembed_ranker.py index a64958cbc4..1b1de7ed67 100644 --- a/integrations/fastembed/tests/test_fastembed_ranker.py +++ b/integrations/fastembed/tests/test_fastembed_ranker.py @@ -215,20 +215,6 @@ def test_run_incorrect_input_format(self): ): ranker.run(query=query, documents=list_document, top_k=-3) - def test_run_no_warmup(self): - """ - Test for checking error when calling without a warmup. - """ - ranker = FastembedRanker(model_name="Xenova/ms-marco-MiniLM-L-12-v2") - - query = "query" - list_document = [Document("Document 1")] - - with pytest.raises( - RuntimeError, - ): - ranker.run(query=query, documents=list_document) - def test_run_empty_document_list(self): """ Test for no error when sending no documents. From 6e19fe5192051e5888d7a3ceccc92b8ffea767c3 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 9 Jan 2026 09:03:44 +0100 Subject: [PATCH 3/4] Formatting --- .../embedders/fastembed/fastembed_sparse_document_embedder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py index 55cd33e7fc..aec8a3df0a 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py @@ -162,7 +162,7 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]: "FastembedSparseDocumentEmbedder expects a list of Documents as input. " "In case you want to embed a list of strings, please use the FastembedTextEmbedder." ) - raise TypeError(msg)\ + raise TypeError(msg) if self.embedding_backend is None: self.warm_up() From 9681895d62e0b327cf221e325fdc025b7e604c67 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 9 Jan 2026 09:13:24 +0100 Subject: [PATCH 4/4] mypy --- .../embedders/fastembed/fastembed_document_embedder.py | 4 ++-- .../fastembed/fastembed_sparse_document_embedder.py | 7 +++++-- .../embedders/fastembed/fastembed_sparse_text_embedder.py | 7 +++++-- .../embedders/fastembed/fastembed_text_embedder.py | 4 ++-- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py index aed226359f..37c5184667 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py @@ -7,7 +7,7 @@ from haystack import Document, component, default_to_dict -from .embedding_backend.fastembed_backend import _FastembedEmbeddingBackendFactory +from .embedding_backend.fastembed_backend import _FastembedEmbeddingBackend, _FastembedEmbeddingBackendFactory @component @@ -107,7 +107,7 @@ def __init__( self.local_files_only = local_files_only self.meta_fields_to_embed = meta_fields_to_embed or [] self.embedding_separator = embedding_separator - self.embedding_backend = None + self.embedding_backend: Optional[_FastembedEmbeddingBackend] = None def to_dict(self) -> dict[str, Any]: """ diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py index aec8a3df0a..62729e814a 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py @@ -7,7 +7,10 @@ from haystack import Document, component, default_to_dict -from .embedding_backend.fastembed_backend import _FastembedSparseEmbeddingBackendFactory +from .embedding_backend.fastembed_backend import ( + _FastembedSparseEmbeddingBackend, + _FastembedSparseEmbeddingBackendFactory, +) @component @@ -100,7 +103,7 @@ def __init__( self.meta_fields_to_embed = meta_fields_to_embed or [] self.embedding_separator = embedding_separator self.model_kwargs = model_kwargs - self.embedding_backend = None + self.embedding_backend: Optional[_FastembedSparseEmbeddingBackend] = None def to_dict(self) -> dict[str, Any]: """ diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py index 0ce70d8775..6d077435de 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py @@ -7,7 +7,10 @@ from haystack import component, default_to_dict from haystack.dataclasses.sparse_embedding import SparseEmbedding -from .embedding_backend.fastembed_backend import _FastembedSparseEmbeddingBackendFactory +from .embedding_backend.fastembed_backend import ( + _FastembedSparseEmbeddingBackend, + _FastembedSparseEmbeddingBackendFactory, +) @component @@ -65,7 +68,7 @@ def __init__( self.parallel = parallel self.local_files_only = local_files_only self.model_kwargs = model_kwargs - self.embedding_backend = None + self.embedding_backend: Optional[_FastembedSparseEmbeddingBackend] = None def to_dict(self) -> dict[str, Any]: """ diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py index 92f89e771c..07daf85dcb 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py @@ -6,7 +6,7 @@ from haystack import component, default_to_dict -from .embedding_backend.fastembed_backend import _FastembedEmbeddingBackendFactory +from .embedding_backend.fastembed_backend import _FastembedEmbeddingBackend, _FastembedEmbeddingBackendFactory @component @@ -67,7 +67,7 @@ def __init__( self.progress_bar = progress_bar self.parallel = parallel self.local_files_only = local_files_only - self.embedding_backend = None + self.embedding_backend: Optional[_FastembedEmbeddingBackend] = None def to_dict(self) -> dict[str, Any]: """