From ee6e6409ac8091b5b457a7c2e4811e74feb3b933 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Mon, 12 Jan 2026 16:41:03 +0100 Subject: [PATCH] chore!: optimum - drop Python 3.9 and use X|Y typing --- integrations/optimum/pyproject.toml | 10 ++------ .../components/embedders/optimum/_backend.py | 24 +++++++++---------- .../optimum/optimum_document_embedder.py | 18 +++++++------- .../optimum/optimum_text_embedder.py | 14 +++++------ 4 files changed, 30 insertions(+), 36 deletions(-) diff --git a/integrations/optimum/pyproject.toml b/integrations/optimum/pyproject.toml index 1406130f72..4d7a45e6f8 100644 --- a/integrations/optimum/pyproject.toml +++ b/integrations/optimum/pyproject.toml @@ -7,7 +7,7 @@ name = "optimum-haystack" dynamic = ["version"] description = "Component to embed strings and Documents using models loaded with the HuggingFace Optimum library. This component is designed to seamlessly inference models using the high speed ONNX runtime." readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.10" license = "Apache-2.0" keywords = [] authors = [ @@ -18,7 +18,6 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Development Status :: 4 - Beta", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -26,7 +25,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] dependencies = [ - "haystack-ai", + "haystack-ai>=2.22.0", "optimum[onnxruntime]>=1.21.0", # The main export function of Optimum into ONNX has hidden dependencies. # It depends on either "sentence-transformers", "diffusers" or "timm", based @@ -99,7 +98,6 @@ known-first-party = ["haystack_integrations"] [tool.ruff] -target-version = "py39" line-length = 120 exclude = ["example", "tests"] @@ -146,10 +144,6 @@ ignore = [ # Asserts "S101", ] -unfixable = [ - # Don't touch unused imports - "F401", -] [tool.ruff.lint.flake8-tidy-imports] ban-relative-imports = "parents" diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/_backend.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/_backend.py index 2dadfe75e3..2670683103 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/_backend.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/_backend.py @@ -6,7 +6,7 @@ import json from dataclasses import dataclass from pathlib import Path -from typing import Any, Optional, Union, overload +from typing import Any, overload import numpy as np import torch @@ -32,18 +32,18 @@ @dataclass class _EmbedderParams: model: str - token: Optional[Secret] + token: Secret | None prefix: str suffix: str normalize_embeddings: bool onnx_execution_provider: str batch_size: int progress_bar: bool - pooling_mode: Optional[Union[str, OptimumEmbedderPooling]] - model_kwargs: Optional[dict[str, Any]] - working_dir: Optional[str] - optimizer_settings: Optional[OptimumEmbedderOptimizationConfig] - quantizer_settings: Optional[OptimumEmbedderQuantizationConfig] + pooling_mode: str | OptimumEmbedderPooling | None + model_kwargs: dict[str, Any] | None + working_dir: str | None + optimizer_settings: OptimumEmbedderOptimizationConfig | None + quantizer_settings: OptimumEmbedderQuantizationConfig | None def serialize(self) -> dict[str, Any]: out = {} @@ -115,7 +115,7 @@ def __init__(self, params: _EmbedderParams): self.params = params self.model = None self.tokenizer = None - self.pooling_layer: Optional[SentenceTransformerPoolingLayer] = None + self.pooling_layer: SentenceTransformerPoolingLayer | None = None def warm_up(self): assert self.params.model_kwargs @@ -200,8 +200,8 @@ def embed_texts(self, texts_to_embed: list[str]) -> list[list[float]]: ... def embed_texts( self, - texts_to_embed: Union[str, list[str]], - ) -> Union[list[list[float]], list[float]]: + texts_to_embed: str | list[str], + ) -> list[list[float]] | list[float]: assert self.model is not None assert self.tokenizer is not None @@ -236,7 +236,7 @@ def embed_texts( # Reorder embeddings according to original order reordered_embeddings: list[list[float]] = [None] * len(texts) # type: ignore - for embedding, idx in zip(embeddings, length_sorted_idx): + for embedding, idx in zip(embeddings, length_sorted_idx, strict=True): reordered_embeddings[idx] = embedding if isinstance(texts_to_embed, str): @@ -245,7 +245,7 @@ def embed_texts( return reordered_embeddings -def _pooling_from_model_config(model: str, token: Optional[str] = None) -> Optional[OptimumEmbedderPooling]: +def _pooling_from_model_config(model: str, token: str | None = None) -> OptimumEmbedderPooling | None: try: pooling_config_path = hf_hub_download(repo_id=model, token=token, filename="1_Pooling/config.json") except Exception as e: diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py index 727f0e7f56..a25b9f0c8a 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from dataclasses import replace -from typing import Any, Optional, Union +from typing import Any from haystack import Document, component, default_from_dict, default_to_dict from haystack.utils import Secret @@ -43,19 +43,19 @@ class OptimumDocumentEmbedder: def __init__( self, model: str = "sentence-transformers/all-mpnet-base-v2", - token: Optional[Secret] = Secret.from_env_var("HF_API_TOKEN", strict=False), # noqa: B008 + token: Secret | None = Secret.from_env_var("HF_API_TOKEN", strict=False), # noqa: B008 prefix: str = "", suffix: str = "", normalize_embeddings: bool = True, onnx_execution_provider: str = "CPUExecutionProvider", - pooling_mode: Optional[Union[str, OptimumEmbedderPooling]] = None, - model_kwargs: Optional[dict[str, Any]] = None, - working_dir: Optional[str] = None, - optimizer_settings: Optional[OptimumEmbedderOptimizationConfig] = None, - quantizer_settings: Optional[OptimumEmbedderQuantizationConfig] = None, + pooling_mode: str | OptimumEmbedderPooling | None = None, + model_kwargs: dict[str, Any] | None = None, + working_dir: str | None = None, + optimizer_settings: OptimumEmbedderOptimizationConfig | None = None, + quantizer_settings: OptimumEmbedderQuantizationConfig | None = None, batch_size: int = 32, progress_bar: bool = True, - meta_fields_to_embed: Optional[list[str]] = None, + meta_fields_to_embed: list[str] | None = None, embedding_separator: str = "\n", ) -> None: """ @@ -226,7 +226,7 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]: embeddings = self._backend.embed_texts(texts_to_embed) new_documents = [] - for doc, emb in zip(documents, embeddings): + for doc, emb in zip(documents, embeddings, strict=True): new_documents.append(replace(doc, embedding=emb)) return {"documents": new_documents} diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py index 2b325c89ef..72ab8a73b6 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Optional, Union +from typing import Any from haystack import component, default_from_dict, default_to_dict from haystack.utils import Secret @@ -38,16 +38,16 @@ class OptimumTextEmbedder: def __init__( self, model: str = "sentence-transformers/all-mpnet-base-v2", - token: Optional[Secret] = Secret.from_env_var("HF_API_TOKEN", strict=False), # noqa: B008 + token: Secret | None = Secret.from_env_var("HF_API_TOKEN", strict=False), # noqa: B008 prefix: str = "", suffix: str = "", normalize_embeddings: bool = True, onnx_execution_provider: str = "CPUExecutionProvider", - pooling_mode: Optional[Union[str, OptimumEmbedderPooling]] = None, - model_kwargs: Optional[dict[str, Any]] = None, - working_dir: Optional[str] = None, - optimizer_settings: Optional[OptimumEmbedderOptimizationConfig] = None, - quantizer_settings: Optional[OptimumEmbedderQuantizationConfig] = None, + pooling_mode: str | OptimumEmbedderPooling | None = None, + model_kwargs: dict[str, Any] | None = None, + working_dir: str | None = None, + optimizer_settings: OptimumEmbedderOptimizationConfig | None = None, + quantizer_settings: OptimumEmbedderQuantizationConfig | None = None, ): """ Create a OptimumTextEmbedder component.