chore!: jina - drop Python 3.9 and use X|Y typing (#2708)

anakin87 · davidsbatista · commit 0734e97a286f · 2026-01-13T22:37:09.000+01:00
diff --git a/integrations/jina/pyproject.toml b/integrations/jina/pyproject.toml
@@ -7,7 +7,7 @@ name = "jina-haystack"
 dynamic = ["version"]
 description = ''
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = "Apache-2.0"
 keywords = []
 authors = [
@@ -17,15 +17,14 @@ classifiers = [
   "License :: OSI Approved :: Apache Software License",
   "Development Status :: 4 - Beta",
   "Programming Language :: Python",
-  "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
   "Programming Language :: Python :: 3.13",
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["requests>=2.25.0", "haystack-ai>=2.16.1"]
+dependencies = ["requests>=2.25.0", "haystack-ai>=2.22.0"]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/jina#readme"
@@ -79,7 +78,6 @@ check_untyped_defs = true
 disallow_incomplete_defs = true
 
 [tool.ruff]
-target-version = "py39"
 line-length = 120
 
 [tool.ruff.lint]
@@ -123,10 +121,6 @@ ignore = [
   "PLR0913",
   "PLR0915",
 ]
-unfixable = [
-  # Don't touch unused imports
-  "F401",
-]
 
 [tool.ruff.lint.isort]
 known-first-party = ["haystack_integrations"]
diff --git a/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py b/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import json
-from typing import Any, Optional, Union
+from typing import Any
 from urllib.parse import quote
 
 import requests
@@ -42,7 +42,7 @@ class JinaReaderConnector:
 
     def __init__(
         self,
-        mode: Union[JinaReaderMode, str],
+        mode: JinaReaderMode | str,
         api_key: Secret = Secret.from_env_var("JINA_API_KEY"),  # noqa: B008
         json_response: bool = True,
     ):
@@ -104,7 +104,7 @@ def _json_to_document(self, data: dict) -> Document:
         return document
 
     @component.output_types(documents=list[Document])
-    def run(self, query: str, headers: Optional[dict[str, str]] = None) -> dict[str, list[Document]]:
+    def run(self, query: str, headers: dict[str, str] | None = None) -> dict[str, list[Document]]:
         """
         Process the query/URL using the Jina AI reader service.
 
diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
-from typing import Any, Optional
+from typing import Any
 
 import requests
 from haystack import Document, component, default_from_dict, default_to_dict
@@ -43,11 +43,11 @@ def __init__(
         suffix: str = "",
         batch_size: int = 32,
         progress_bar: bool = True,
-        meta_fields_to_embed: Optional[list[str]] = None,
+        meta_fields_to_embed: list[str] | None = None,
         embedding_separator: str = "\n",
-        task: Optional[str] = None,
-        dimensions: Optional[int] = None,
-        late_chunking: Optional[bool] = None,
+        task: str | None = None,
+        dimensions: int | None = None,
+        late_chunking: bool | None = None,
     ):
         """
         Create a JinaDocumentEmbedder component.
@@ -156,7 +156,7 @@ def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]:
         return texts_to_embed
 
     def _embed_batch(
-        self, texts_to_embed: list[str], batch_size: int, parameters: Optional[dict] = None
+        self, texts_to_embed: list[str], batch_size: int, parameters: dict | None = None
     ) -> tuple[list[list[float]], dict[str, Any]]:
         """
         Embed a list of texts in batches.
@@ -219,7 +219,7 @@ def run(self, documents: list[Document]) -> dict[str, Any]:
             texts_to_embed=texts_to_embed, batch_size=self.batch_size, parameters=parameters
         )
 
-        for doc, emb in zip(documents, embeddings):
+        for doc, emb in zip(documents, embeddings, strict=True):
             doc.embedding = emb
 
         return {"documents": documents, "meta": metadata}
diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_image_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_image_embedder.py
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 from dataclasses import replace
-from typing import Any, Optional
+from typing import Any
 
 import requests
 from haystack import Document, component, default_from_dict, default_to_dict, logging
@@ -58,9 +58,9 @@ def __init__(
         api_key: Secret = Secret.from_env_var("JINA_API_KEY"),  # noqa: B008
         model: str = "jina-clip-v2",
         file_path_meta_field: str = "file_path",
-        root_path: Optional[str] = None,
-        embedding_dimension: Optional[int] = None,
-        image_size: Optional[tuple[int, int]] = None,
+        root_path: str | None = None,
+        embedding_dimension: int | None = None,
+        image_size: tuple[int, int] | None = None,
         batch_size: int = 5,
     ):
         """
@@ -166,7 +166,7 @@ def _extract_images_to_embed(self, documents: list[Document]) -> list[str]:
             documents=documents, file_path_meta_field=self.file_path_meta_field, root_path=self.root_path
         )
 
-        images_to_embed: list[Optional[str]] = [None] * len(documents)
+        images_to_embed: list[str | None] = [None] * len(documents)
         pdf_page_infos: list[_PDFPageInfo] = []
 
         for doc_idx, image_source_info in enumerate(images_source_info):
@@ -256,7 +256,7 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]:
             embeddings.extend(batch_embeddings)
 
         docs_with_embeddings = []
-        for doc, emb in zip(documents, embeddings):
+        for doc, emb in zip(documents, embeddings, strict=True):
             # we store this information for later inspection
             new_meta = {
                 **doc.meta,
diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
-from typing import Any, Optional
+from typing import Any
 
 import requests
 from haystack import component, default_from_dict, default_to_dict
@@ -39,9 +39,9 @@ def __init__(
         model: str = "jina-embeddings-v3",
         prefix: str = "",
         suffix: str = "",
-        task: Optional[str] = None,
-        dimensions: Optional[int] = None,
-        late_chunking: Optional[bool] = None,
+        task: str | None = None,
+        dimensions: int | None = None,
+        late_chunking: bool | None = None,
     ):
         """
         Create a JinaTextEmbedder component.
diff --git a/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py b/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
-from typing import Any, Optional
+from typing import Any
 
 import requests
 from haystack import Document, component, default_from_dict, default_to_dict
@@ -33,8 +33,8 @@ def __init__(
         self,
         model: str = "jina-reranker-v1-base-en",
         api_key: Secret = Secret.from_env_var("JINA_API_KEY"),  # noqa: B008,
-        top_k: Optional[int] = None,
-        score_threshold: Optional[float] = None,
+        top_k: int | None = None,
+        score_threshold: float | None = None,
     ):
         """
         Creates an instance of JinaRanker.
@@ -107,8 +107,8 @@ def run(
         self,
         query: str,
         documents: list[Document],
-        top_k: Optional[int] = None,
-        score_threshold: Optional[float] = None,
+        top_k: int | None = None,
+        score_threshold: float | None = None,
     ):
         """
         Returns a list of Documents ranked by their similarity to the given query.