Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions integrations/jina/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name = "jina-haystack"
dynamic = ["version"]
description = ''
readme = "README.md"
requires-python = ">=3.9"
requires-python = ">=3.10"
license = "Apache-2.0"
keywords = []
authors = [
Expand All @@ -17,15 +17,14 @@ classifiers = [
"License :: OSI Approved :: Apache Software License",
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = ["requests>=2.25.0", "haystack-ai>=2.16.1"]
dependencies = ["requests>=2.25.0", "haystack-ai>=2.22.0"]

[project.urls]
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/jina#readme"
Expand Down Expand Up @@ -79,7 +78,6 @@ check_untyped_defs = true
disallow_incomplete_defs = true

[tool.ruff]
target-version = "py39"
line-length = 120

[tool.ruff.lint]
Expand Down Expand Up @@ -123,10 +121,6 @@ ignore = [
"PLR0913",
"PLR0915",
]
unfixable = [
# Don't touch unused imports
"F401",
]

[tool.ruff.lint.isort]
known-first-party = ["haystack_integrations"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# SPDX-License-Identifier: Apache-2.0

import json
from typing import Any, Optional, Union
from typing import Any
from urllib.parse import quote

import requests
Expand Down Expand Up @@ -42,7 +42,7 @@ class JinaReaderConnector:

def __init__(
self,
mode: Union[JinaReaderMode, str],
mode: JinaReaderMode | str,
api_key: Secret = Secret.from_env_var("JINA_API_KEY"), # noqa: B008
json_response: bool = True,
):
Expand Down Expand Up @@ -104,7 +104,7 @@ def _json_to_document(self, data: dict) -> Document:
return document

@component.output_types(documents=list[Document])
def run(self, query: str, headers: Optional[dict[str, str]] = None) -> dict[str, list[Document]]:
def run(self, query: str, headers: dict[str, str] | None = None) -> dict[str, list[Document]]:
"""
Process the query/URL using the Jina AI reader service.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
from typing import Any, Optional
from typing import Any

import requests
from haystack import Document, component, default_from_dict, default_to_dict
Expand Down Expand Up @@ -43,11 +43,11 @@ def __init__(
suffix: str = "",
batch_size: int = 32,
progress_bar: bool = True,
meta_fields_to_embed: Optional[list[str]] = None,
meta_fields_to_embed: list[str] | None = None,
embedding_separator: str = "\n",
task: Optional[str] = None,
dimensions: Optional[int] = None,
late_chunking: Optional[bool] = None,
task: str | None = None,
dimensions: int | None = None,
late_chunking: bool | None = None,
):
"""
Create a JinaDocumentEmbedder component.
Expand Down Expand Up @@ -156,7 +156,7 @@ def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]:
return texts_to_embed

def _embed_batch(
self, texts_to_embed: list[str], batch_size: int, parameters: Optional[dict] = None
self, texts_to_embed: list[str], batch_size: int, parameters: dict | None = None
) -> tuple[list[list[float]], dict[str, Any]]:
"""
Embed a list of texts in batches.
Expand Down Expand Up @@ -219,7 +219,7 @@ def run(self, documents: list[Document]) -> dict[str, Any]:
texts_to_embed=texts_to_embed, batch_size=self.batch_size, parameters=parameters
)

for doc, emb in zip(documents, embeddings):
for doc, emb in zip(documents, embeddings, strict=True):
doc.embedding = emb

return {"documents": documents, "meta": metadata}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0
from dataclasses import replace
from typing import Any, Optional
from typing import Any

import requests
from haystack import Document, component, default_from_dict, default_to_dict, logging
Expand Down Expand Up @@ -58,9 +58,9 @@ def __init__(
api_key: Secret = Secret.from_env_var("JINA_API_KEY"), # noqa: B008
model: str = "jina-clip-v2",
file_path_meta_field: str = "file_path",
root_path: Optional[str] = None,
embedding_dimension: Optional[int] = None,
image_size: Optional[tuple[int, int]] = None,
root_path: str | None = None,
embedding_dimension: int | None = None,
image_size: tuple[int, int] | None = None,
batch_size: int = 5,
):
"""
Expand Down Expand Up @@ -166,7 +166,7 @@ def _extract_images_to_embed(self, documents: list[Document]) -> list[str]:
documents=documents, file_path_meta_field=self.file_path_meta_field, root_path=self.root_path
)

images_to_embed: list[Optional[str]] = [None] * len(documents)
images_to_embed: list[str | None] = [None] * len(documents)
pdf_page_infos: list[_PDFPageInfo] = []

for doc_idx, image_source_info in enumerate(images_source_info):
Expand Down Expand Up @@ -256,7 +256,7 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]:
embeddings.extend(batch_embeddings)

docs_with_embeddings = []
for doc, emb in zip(documents, embeddings):
for doc, emb in zip(documents, embeddings, strict=True):
# we store this information for later inspection
new_meta = {
**doc.meta,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
from typing import Any, Optional
from typing import Any

import requests
from haystack import component, default_from_dict, default_to_dict
Expand Down Expand Up @@ -39,9 +39,9 @@ def __init__(
model: str = "jina-embeddings-v3",
prefix: str = "",
suffix: str = "",
task: Optional[str] = None,
dimensions: Optional[int] = None,
late_chunking: Optional[bool] = None,
task: str | None = None,
dimensions: int | None = None,
late_chunking: bool | None = None,
):
"""
Create a JinaTextEmbedder component.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
from typing import Any, Optional
from typing import Any

import requests
from haystack import Document, component, default_from_dict, default_to_dict
Expand Down Expand Up @@ -33,8 +33,8 @@ def __init__(
self,
model: str = "jina-reranker-v1-base-en",
api_key: Secret = Secret.from_env_var("JINA_API_KEY"), # noqa: B008,
top_k: Optional[int] = None,
score_threshold: Optional[float] = None,
top_k: int | None = None,
score_threshold: float | None = None,
):
"""
Creates an instance of JinaRanker.
Expand Down Expand Up @@ -107,8 +107,8 @@ def run(
self,
query: str,
documents: list[Document],
top_k: Optional[int] = None,
score_threshold: Optional[float] = None,
top_k: int | None = None,
score_threshold: float | None = None,
):
"""
Returns a list of Documents ranked by their similarity to the given query.
Expand Down