diff --git a/integrations/jina/pyproject.toml b/integrations/jina/pyproject.toml index c89f84112e..1c8e24dcd9 100644 --- a/integrations/jina/pyproject.toml +++ b/integrations/jina/pyproject.toml @@ -7,7 +7,7 @@ name = "jina-haystack" dynamic = ["version"] description = '' readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.10" license = "Apache-2.0" keywords = [] authors = [ @@ -17,7 +17,6 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Development Status :: 4 - Beta", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -25,7 +24,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["requests>=2.25.0", "haystack-ai>=2.16.1"] +dependencies = ["requests>=2.25.0", "haystack-ai>=2.22.0"] [project.urls] Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/jina#readme" @@ -79,7 +78,6 @@ check_untyped_defs = true disallow_incomplete_defs = true [tool.ruff] -target-version = "py39" line-length = 120 [tool.ruff.lint] @@ -123,10 +121,6 @@ ignore = [ "PLR0913", "PLR0915", ] -unfixable = [ - # Don't touch unused imports - "F401", -] [tool.ruff.lint.isort] known-first-party = ["haystack_integrations"] diff --git a/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py b/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py index 4eda3e27ba..c0f5de4d1a 100644 --- a/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py +++ b/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import json -from typing import Any, Optional, Union +from typing import Any from urllib.parse import quote import requests @@ -42,7 +42,7 @@ class JinaReaderConnector: def __init__( self, - mode: Union[JinaReaderMode, str], + mode: JinaReaderMode | str, api_key: Secret = Secret.from_env_var("JINA_API_KEY"), # noqa: B008 json_response: bool = True, ): @@ -104,7 +104,7 @@ def _json_to_document(self, data: dict) -> Document: return document @component.output_types(documents=list[Document]) - def run(self, query: str, headers: Optional[dict[str, str]] = None) -> dict[str, list[Document]]: + def run(self, query: str, headers: dict[str, str] | None = None) -> dict[str, list[Document]]: """ Process the query/URL using the Jina AI reader service. diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py index d775833629..0cd0f1d21e 100644 --- a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py +++ b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Optional +from typing import Any import requests from haystack import Document, component, default_from_dict, default_to_dict @@ -43,11 +43,11 @@ def __init__( suffix: str = "", batch_size: int = 32, progress_bar: bool = True, - meta_fields_to_embed: Optional[list[str]] = None, + meta_fields_to_embed: list[str] | None = None, embedding_separator: str = "\n", - task: Optional[str] = None, - dimensions: Optional[int] = None, - late_chunking: Optional[bool] = None, + task: str | None = None, + dimensions: int | None = None, + late_chunking: bool | None = None, ): """ Create a JinaDocumentEmbedder component. @@ -156,7 +156,7 @@ def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]: return texts_to_embed def _embed_batch( - self, texts_to_embed: list[str], batch_size: int, parameters: Optional[dict] = None + self, texts_to_embed: list[str], batch_size: int, parameters: dict | None = None ) -> tuple[list[list[float]], dict[str, Any]]: """ Embed a list of texts in batches. @@ -219,7 +219,7 @@ def run(self, documents: list[Document]) -> dict[str, Any]: texts_to_embed=texts_to_embed, batch_size=self.batch_size, parameters=parameters ) - for doc, emb in zip(documents, embeddings): + for doc, emb in zip(documents, embeddings, strict=True): doc.embedding = emb return {"documents": documents, "meta": metadata} diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_image_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_image_embedder.py index 852dca138d..f3ac40f199 100644 --- a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_image_embedder.py +++ b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_image_embedder.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 from dataclasses import replace -from typing import Any, Optional +from typing import Any import requests from haystack import Document, component, default_from_dict, default_to_dict, logging @@ -58,9 +58,9 @@ def __init__( api_key: Secret = Secret.from_env_var("JINA_API_KEY"), # noqa: B008 model: str = "jina-clip-v2", file_path_meta_field: str = "file_path", - root_path: Optional[str] = None, - embedding_dimension: Optional[int] = None, - image_size: Optional[tuple[int, int]] = None, + root_path: str | None = None, + embedding_dimension: int | None = None, + image_size: tuple[int, int] | None = None, batch_size: int = 5, ): """ @@ -166,7 +166,7 @@ def _extract_images_to_embed(self, documents: list[Document]) -> list[str]: documents=documents, file_path_meta_field=self.file_path_meta_field, root_path=self.root_path ) - images_to_embed: list[Optional[str]] = [None] * len(documents) + images_to_embed: list[str | None] = [None] * len(documents) pdf_page_infos: list[_PDFPageInfo] = [] for doc_idx, image_source_info in enumerate(images_source_info): @@ -256,7 +256,7 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]: embeddings.extend(batch_embeddings) docs_with_embeddings = [] - for doc, emb in zip(documents, embeddings): + for doc, emb in zip(documents, embeddings, strict=True): # we store this information for later inspection new_meta = { **doc.meta, diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py index b27a8cfc47..3361c12ad7 100644 --- a/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py +++ b/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Optional +from typing import Any import requests from haystack import component, default_from_dict, default_to_dict @@ -39,9 +39,9 @@ def __init__( model: str = "jina-embeddings-v3", prefix: str = "", suffix: str = "", - task: Optional[str] = None, - dimensions: Optional[int] = None, - late_chunking: Optional[bool] = None, + task: str | None = None, + dimensions: int | None = None, + late_chunking: bool | None = None, ): """ Create a JinaTextEmbedder component. diff --git a/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py b/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py index 0bd06a7815..bfb0ff16f4 100644 --- a/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py +++ b/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Optional +from typing import Any import requests from haystack import Document, component, default_from_dict, default_to_dict @@ -33,8 +33,8 @@ def __init__( self, model: str = "jina-reranker-v1-base-en", api_key: Secret = Secret.from_env_var("JINA_API_KEY"), # noqa: B008, - top_k: Optional[int] = None, - score_threshold: Optional[float] = None, + top_k: int | None = None, + score_threshold: float | None = None, ): """ Creates an instance of JinaRanker. @@ -107,8 +107,8 @@ def run( self, query: str, documents: list[Document], - top_k: Optional[int] = None, - score_threshold: Optional[float] = None, + top_k: int | None = None, + score_threshold: float | None = None, ): """ Returns a list of Documents ranked by their similarity to the given query.