Skip to content

Commit 35525db

Browse files
committed
chore!: fastembed - drop Python 3.9 and use X|Y typing
1 parent 8094d0d commit 35525db

10 files changed

Lines changed: 51 additions & 54 deletions

File tree

integrations/fastembed/pyproject.toml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,22 @@ name = "fastembed-haystack"
77
dynamic = ["version"]
88
description = "Haystack 2.x component to embed strings and Documents using fastembed embedding model"
99
readme = "README.md"
10-
requires-python = ">=3.9"
10+
requires-python = ">=3.10"
1111
license = "Apache-2.0"
1212
keywords = []
1313
authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
1414
classifiers = [
1515
"License :: OSI Approved :: Apache Software License",
1616
"Development Status :: 4 - Beta",
1717
"Programming Language :: Python",
18-
"Programming Language :: Python :: 3.9",
1918
"Programming Language :: Python :: 3.10",
2019
"Programming Language :: Python :: 3.11",
2120
"Programming Language :: Python :: 3.12",
2221
"Programming Language :: Python :: 3.13",
2322
"Programming Language :: Python :: Implementation :: CPython",
2423
"Programming Language :: Python :: Implementation :: PyPy",
2524
]
26-
dependencies = ["haystack-ai>=2.0.1", "fastembed>=0.4.2"]
25+
dependencies = ["haystack-ai>=2.22.0", "fastembed>=0.4.2"]
2726

2827
[project.urls]
2928
Source = "https://github.com/deepset-ai/haystack-core-integrations"
@@ -75,7 +74,6 @@ check_untyped_defs = true
7574
disallow_incomplete_defs = true
7675

7776
[tool.ruff]
78-
target-version = "py39"
7977
line-length = 120
8078

8179
[tool.ruff.lint]
@@ -124,10 +122,6 @@ ignore = [
124122
"PLR0913",
125123
"PLR0915",
126124
]
127-
unfixable = [
128-
# Don't touch unused imports
129-
"F401",
130-
]
131125

132126
[tool.ruff.lint.isort]
133127
known-first-party = ["haystack_integrations"]

integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/embedding_backend/fastembed_backend.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, ClassVar, Optional
1+
from typing import Any, ClassVar
22

33
from haystack.dataclasses.sparse_embedding import SparseEmbedding
44
from tqdm import tqdm
@@ -17,8 +17,8 @@ class _FastembedEmbeddingBackendFactory:
1717
@staticmethod
1818
def get_embedding_backend(
1919
model_name: str,
20-
cache_dir: Optional[str] = None,
21-
threads: Optional[int] = None,
20+
cache_dir: str | None = None,
21+
threads: int | None = None,
2222
local_files_only: bool = False,
2323
) -> "_FastembedEmbeddingBackend":
2424
embedding_backend_id = f"{model_name}{cache_dir}{threads}"
@@ -41,8 +41,8 @@ class _FastembedEmbeddingBackend:
4141
def __init__(
4242
self,
4343
model_name: str,
44-
cache_dir: Optional[str] = None,
45-
threads: Optional[int] = None,
44+
cache_dir: str | None = None,
45+
threads: int | None = None,
4646
local_files_only: bool = False,
4747
):
4848
self.model = TextEmbedding(
@@ -70,10 +70,10 @@ class _FastembedSparseEmbeddingBackendFactory:
7070
@staticmethod
7171
def get_embedding_backend(
7272
model_name: str,
73-
cache_dir: Optional[str] = None,
74-
threads: Optional[int] = None,
73+
cache_dir: str | None = None,
74+
threads: int | None = None,
7575
local_files_only: bool = False,
76-
model_kwargs: Optional[dict[str, Any]] = None,
76+
model_kwargs: dict[str, Any] | None = None,
7777
) -> "_FastembedSparseEmbeddingBackend":
7878
embedding_backend_id = f"{model_name}{cache_dir}{threads}{local_files_only}{model_kwargs}"
7979

@@ -99,10 +99,10 @@ class _FastembedSparseEmbeddingBackend:
9999
def __init__(
100100
self,
101101
model_name: str,
102-
cache_dir: Optional[str] = None,
103-
threads: Optional[int] = None,
102+
cache_dir: str | None = None,
103+
threads: int | None = None,
104104
local_files_only: bool = False,
105-
model_kwargs: Optional[dict[str, Any]] = None,
105+
model_kwargs: dict[str, Any] | None = None,
106106
):
107107
model_kwargs = model_kwargs or {}
108108

integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
from dataclasses import replace
6-
from typing import Any, Optional
6+
from typing import Any
77

88
from haystack import Document, component, default_to_dict
99

@@ -63,15 +63,15 @@ class FastembedDocumentEmbedder:
6363
def __init__(
6464
self,
6565
model: str = "BAAI/bge-small-en-v1.5",
66-
cache_dir: Optional[str] = None,
67-
threads: Optional[int] = None,
66+
cache_dir: str | None = None,
67+
threads: int | None = None,
6868
prefix: str = "",
6969
suffix: str = "",
7070
batch_size: int = 256,
7171
progress_bar: bool = True,
72-
parallel: Optional[int] = None,
72+
parallel: int | None = None,
7373
local_files_only: bool = False,
74-
meta_fields_to_embed: Optional[list[str]] = None,
74+
meta_fields_to_embed: list[str] | None = None,
7575
embedding_separator: str = "\n",
7676
) -> None:
7777
"""
@@ -107,7 +107,7 @@ def __init__(
107107
self.local_files_only = local_files_only
108108
self.meta_fields_to_embed = meta_fields_to_embed or []
109109
self.embedding_separator = embedding_separator
110-
self.embedding_backend: Optional[_FastembedEmbeddingBackend] = None
110+
self.embedding_backend: _FastembedEmbeddingBackend | None = None
111111

112112
def to_dict(self) -> dict[str, Any]:
113113
"""
@@ -184,7 +184,7 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]:
184184
)
185185

186186
new_documents = []
187-
for doc, emb in zip(documents, embeddings):
187+
for doc, emb in zip(documents, embeddings, strict=True):
188188
new_documents.append(replace(doc, embedding=emb))
189189

190190
return {"documents": new_documents}

integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
from dataclasses import replace
6-
from typing import Any, Optional
6+
from typing import Any
77

88
from haystack import Document, component, default_to_dict
99

@@ -62,15 +62,15 @@ class FastembedSparseDocumentEmbedder:
6262
def __init__(
6363
self,
6464
model: str = "prithivida/Splade_PP_en_v1",
65-
cache_dir: Optional[str] = None,
66-
threads: Optional[int] = None,
65+
cache_dir: str | None = None,
66+
threads: int | None = None,
6767
batch_size: int = 32,
6868
progress_bar: bool = True,
69-
parallel: Optional[int] = None,
69+
parallel: int | None = None,
7070
local_files_only: bool = False,
71-
meta_fields_to_embed: Optional[list[str]] = None,
71+
meta_fields_to_embed: list[str] | None = None,
7272
embedding_separator: str = "\n",
73-
model_kwargs: Optional[dict[str, Any]] = None,
73+
model_kwargs: dict[str, Any] | None = None,
7474
) -> None:
7575
"""
7676
Create an FastembedDocumentEmbedder component.
@@ -103,7 +103,7 @@ def __init__(
103103
self.meta_fields_to_embed = meta_fields_to_embed or []
104104
self.embedding_separator = embedding_separator
105105
self.model_kwargs = model_kwargs
106-
self.embedding_backend: Optional[_FastembedSparseEmbeddingBackend] = None
106+
self.embedding_backend: _FastembedSparseEmbeddingBackend | None = None
107107

108108
def to_dict(self) -> dict[str, Any]:
109109
"""
@@ -179,7 +179,7 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]:
179179
)
180180

181181
new_documents = []
182-
for doc, emb in zip(documents, embeddings):
182+
for doc, emb in zip(documents, embeddings, strict=True):
183183
new_documents.append(replace(doc, sparse_embedding=emb))
184184

185185
return {"documents": new_documents}

integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
from typing import Any, Optional
5+
from typing import Any
66

77
from haystack import component, default_to_dict
88
from haystack.dataclasses.sparse_embedding import SparseEmbedding
@@ -37,12 +37,12 @@ class FastembedSparseTextEmbedder:
3737
def __init__(
3838
self,
3939
model: str = "prithivida/Splade_PP_en_v1",
40-
cache_dir: Optional[str] = None,
41-
threads: Optional[int] = None,
40+
cache_dir: str | None = None,
41+
threads: int | None = None,
4242
progress_bar: bool = True,
43-
parallel: Optional[int] = None,
43+
parallel: int | None = None,
4444
local_files_only: bool = False,
45-
model_kwargs: Optional[dict[str, Any]] = None,
45+
model_kwargs: dict[str, Any] | None = None,
4646
) -> None:
4747
"""
4848
Create a FastembedSparseTextEmbedder component.
@@ -68,7 +68,7 @@ def __init__(
6868
self.parallel = parallel
6969
self.local_files_only = local_files_only
7070
self.model_kwargs = model_kwargs
71-
self.embedding_backend: Optional[_FastembedSparseEmbeddingBackend] = None
71+
self.embedding_backend: _FastembedSparseEmbeddingBackend | None = None
7272

7373
def to_dict(self) -> dict[str, Any]:
7474
"""

integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_embedder.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
from typing import Any, Optional
5+
from typing import Any
66

77
from haystack import component, default_to_dict
88

@@ -33,12 +33,12 @@ class FastembedTextEmbedder:
3333
def __init__(
3434
self,
3535
model: str = "BAAI/bge-small-en-v1.5",
36-
cache_dir: Optional[str] = None,
37-
threads: Optional[int] = None,
36+
cache_dir: str | None = None,
37+
threads: int | None = None,
3838
prefix: str = "",
3939
suffix: str = "",
4040
progress_bar: bool = True,
41-
parallel: Optional[int] = None,
41+
parallel: int | None = None,
4242
local_files_only: bool = False,
4343
) -> None:
4444
"""
@@ -67,7 +67,7 @@ def __init__(
6767
self.progress_bar = progress_bar
6868
self.parallel = parallel
6969
self.local_files_only = local_files_only
70-
self.embedding_backend: Optional[_FastembedEmbeddingBackend] = None
70+
self.embedding_backend: _FastembedEmbeddingBackend | None = None
7171

7272
def to_dict(self) -> dict[str, Any]:
7373
"""

integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
from typing import Any, Optional
5+
from typing import Any
66

77
from haystack import Document, component, default_from_dict, default_to_dict, logging
88

@@ -39,12 +39,12 @@ def __init__(
3939
self,
4040
model_name: str = "Xenova/ms-marco-MiniLM-L-6-v2",
4141
top_k: int = 10,
42-
cache_dir: Optional[str] = None,
43-
threads: Optional[int] = None,
42+
cache_dir: str | None = None,
43+
threads: int | None = None,
4444
batch_size: int = 64,
45-
parallel: Optional[int] = None,
45+
parallel: int | None = None,
4646
local_files_only: bool = False,
47-
meta_fields_to_embed: Optional[list[str]] = None,
47+
meta_fields_to_embed: list[str] | None = None,
4848
meta_data_separator: str = "\n",
4949
):
5050
"""
@@ -80,7 +80,7 @@ def __init__(
8080
self.local_files_only = local_files_only
8181
self.meta_fields_to_embed = meta_fields_to_embed or []
8282
self.meta_data_separator = meta_data_separator
83-
self._model: Optional[TextCrossEncoder] = None
83+
self._model: TextCrossEncoder | None = None
8484

8585
def to_dict(self) -> dict[str, Any]:
8686
"""
@@ -144,7 +144,7 @@ def _prepare_fastembed_input_docs(self, documents: list[Document]) -> list[str]:
144144
return concatenated_input_list
145145

146146
@component.output_types(documents=list[Document])
147-
def run(self, query: str, documents: list[Document], top_k: Optional[int] = None) -> dict[str, list[Document]]:
147+
def run(self, query: str, documents: list[Document], top_k: int | None = None) -> dict[str, list[Document]]:
148148
"""
149149
Returns a list of documents ranked by their similarity to the given query, using FastEmbed.
150150
@@ -192,7 +192,7 @@ def run(self, query: str, documents: list[Document], top_k: Optional[int] = None
192192
)
193193

194194
# Combine the two lists into a single list of tuples
195-
doc_scores = list(zip(documents, scores))
195+
doc_scores = list(zip(documents, scores, strict=True))
196196

197197
# Sort the list of tuples by the score in descending order
198198
sorted_doc_scores = sorted(doc_scores, key=lambda x: x[1], reverse=True)

integrations/fastembed/tests/test_fastembed_document_embedder.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@ def test_embed_metadata(self):
262262
embedding_separator="\n",
263263
)
264264
embedder.embedding_backend = MagicMock()
265+
embedder.embedding_backend.embed.return_value = [np.random.rand(3, 16).tolist() for _ in range(5)]
265266

266267
documents = [Document(content=f"document-number {i}", meta={"meta_field": f"meta_value {i}"}) for i in range(5)]
267268

integrations/fastembed/tests/test_fastembed_ranker.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from unittest.mock import MagicMock
66

7+
import numpy as np
78
import pytest
89
from haystack import Document, default_from_dict
910

@@ -237,6 +238,7 @@ def test_embed_metadata(self):
237238
meta_fields_to_embed=["meta_field"],
238239
)
239240
ranker._model = MagicMock()
241+
ranker._model.rerank.return_value = [np.random.rand(3, 16).tolist() for _ in range(5)]
240242

241243
documents = [Document(content=f"document-number {i}", meta={"meta_field": f"meta_value {i}"}) for i in range(5)]
242244
query = "test"

integrations/fastembed/tests/test_fastembed_sparse_document_embedder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def test_embed_metadata(self):
267267
embedding_separator="\n",
268268
)
269269
embedder.embedding_backend = MagicMock()
270-
270+
embedder.embedding_backend.embed.return_value = [self._generate_mocked_sparse_embedding(3) for _ in range(5)]
271271
documents = [Document(content=f"document-number {i}", meta={"meta_field": f"meta_value {i}"}) for i in range(5)]
272272

273273
embedder.run(documents=documents)

0 commit comments

Comments
 (0)