Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions integrations/mistral/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,22 @@ name = "mistral-haystack"
dynamic = ["version"]
description = ''
readme = "README.md"
requires-python = ">=3.9"
requires-python = ">=3.10"
license = "Apache-2.0"
keywords = []
authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
classifiers = [
"License :: OSI Approved :: Apache Software License",
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = ["haystack-ai>=2.19.0", "mistralai>=1.9.11"]
dependencies = ["haystack-ai>=2.22.0", "mistralai>=1.9.11"]

[project.urls]
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/mistral#readme"
Expand Down Expand Up @@ -78,7 +77,6 @@ check_untyped_defs = true
disallow_incomplete_defs = true

[tool.ruff]
target-version = "py39"
line-length = 120

[tool.ruff.lint]
Expand Down Expand Up @@ -125,10 +123,6 @@ ignore = [
"B008",
"S101",
]
unfixable = [
# Don't touch unused imports
"F401",
]

[tool.ruff.lint.isort]
known-first-party = ["haystack_integrations"]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import re
from pathlib import Path
from typing import Any, Optional, Union
from typing import Any

from haystack import Document, component, default_from_dict, default_to_dict, logging
from haystack.components.converters.utils import (
Expand Down Expand Up @@ -103,9 +103,9 @@ def __init__(
api_key: Secret = Secret.from_env_var("MISTRAL_API_KEY"),
model: str = "mistral-ocr-2505",
include_image_base64: bool = False,
pages: Optional[list[int]] = None,
image_limit: Optional[int] = None,
image_min_size: Optional[int] = None,
pages: list[int] | None = None,
image_limit: int | None = None,
image_min_size: int | None = None,
cleanup_uploaded_files: bool = True,
):
"""
Expand Down Expand Up @@ -175,10 +175,10 @@ def from_dict(cls, data: dict[str, Any]) -> "MistralOCRDocumentConverter":
@component.output_types(documents=list[Document], raw_mistral_response=list[dict[str, Any]])
def run(
self,
sources: list[Union[str, Path, ByteStream, DocumentURLChunk, FileChunk, ImageURLChunk]],
meta: Optional[Union[dict[str, Any], list[dict[str, Any]]]] = None,
bbox_annotation_schema: Optional[type[BaseModel]] = None,
document_annotation_schema: Optional[type[BaseModel]] = None,
sources: list[str | Path | ByteStream | DocumentURLChunk | FileChunk | ImageURLChunk],
meta: dict[str, Any] | list[dict[str, Any]] | None = None,
bbox_annotation_schema: type[BaseModel] | None = None,
document_annotation_schema: type[BaseModel] | None = None,
) -> dict[str, Any]:
"""
Extract text from documents using Mistral OCR.
Expand Down Expand Up @@ -234,7 +234,7 @@ def run(
raw_responses = []
uploaded_file_ids = []

for source, user_metadata in zip(sources, meta_list):
for source, user_metadata in zip(sources, meta_list, strict=True):
document, raw_response, uploaded_file_id = self._process_single_source(
source,
user_metadata,
Expand All @@ -259,12 +259,12 @@ def run(

def _process_single_source(
self,
source: Union[str, Path, ByteStream, DocumentURLChunk, FileChunk, ImageURLChunk],
source: str | Path | ByteStream | DocumentURLChunk | FileChunk | ImageURLChunk,
user_metadata: dict[str, Any],
bbox_annotation_format: Optional[Any],
document_annotation_format: Optional[Any],
document_annotation_schema: Optional[type[BaseModel]],
) -> tuple[Optional[Document], Optional[dict[str, Any]], Optional[str]]:
bbox_annotation_format: Any | None,
document_annotation_format: Any | None,
document_annotation_schema: type[BaseModel] | None,
) -> tuple[Document | None, dict[str, Any] | None, str | None]:
"""
Process a single source and return the document, raw response, and file_id if uploaded.

Expand Down Expand Up @@ -334,8 +334,8 @@ def _cleanup_uploaded_files(self, file_ids: list[str]) -> None:

def _convert_source_to_chunk(
self,
source: Union[str, Path, ByteStream, DocumentURLChunk, FileChunk, ImageURLChunk],
) -> Union[DocumentURLChunk, FileChunk, ImageURLChunk]:
source: str | Path | ByteStream | DocumentURLChunk | FileChunk | ImageURLChunk,
) -> DocumentURLChunk | FileChunk | ImageURLChunk:
"""
Convert various source types to Mistral-compatible chunk format.

Expand Down Expand Up @@ -371,7 +371,7 @@ def _process_ocr_response(
self,
ocr_response: OCRResponse,
user_metadata: dict[str, Any],
document_annotation_schema: Optional[type[BaseModel]],
document_annotation_schema: type[BaseModel] | None,
) -> Document:
"""
Convert an OCR response from Mistral API into a single Haystack Document.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
from typing import Any, Optional
from typing import Any

from haystack import component, default_to_dict
from haystack.components.embedders import OpenAIDocumentEmbedder
Expand Down Expand Up @@ -34,17 +34,17 @@ def __init__(
self,
api_key: Secret = Secret.from_env_var("MISTRAL_API_KEY"),
model: str = "mistral-embed",
api_base_url: Optional[str] = "https://api.mistral.ai/v1",
api_base_url: str | None = "https://api.mistral.ai/v1",
prefix: str = "",
suffix: str = "",
batch_size: int = 32,
progress_bar: bool = True,
meta_fields_to_embed: Optional[list[str]] = None,
meta_fields_to_embed: list[str] | None = None,
embedding_separator: str = "\n",
*,
timeout: Optional[float] = None,
max_retries: Optional[int] = None,
http_client_kwargs: Optional[dict[str, Any]] = None,
timeout: float | None = None,
max_retries: int | None = None,
http_client_kwargs: dict[str, Any] | None = None,
):
"""
Creates a MistralDocumentEmbedder component.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
from typing import Any, Optional
from typing import Any

from haystack import component, default_to_dict
from haystack.components.embedders import OpenAITextEmbedder
Expand Down Expand Up @@ -32,13 +32,13 @@ def __init__(
self,
api_key: Secret = Secret.from_env_var("MISTRAL_API_KEY"),
model: str = "mistral-embed",
api_base_url: Optional[str] = "https://api.mistral.ai/v1",
api_base_url: str | None = "https://api.mistral.ai/v1",
prefix: str = "",
suffix: str = "",
*,
timeout: Optional[float] = None,
max_retries: Optional[int] = None,
http_client_kwargs: Optional[dict[str, Any]] = None,
timeout: float | None = None,
max_retries: int | None = None,
http_client_kwargs: dict[str, Any] | None = None,
):
"""
Creates an MistralTextEmbedder component.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0

from typing import Any, Optional
from typing import Any

from haystack import component, default_to_dict, logging
from haystack.components.generators.chat import OpenAIChatGenerator
Expand Down Expand Up @@ -63,14 +63,14 @@ def __init__(
self,
api_key: Secret = Secret.from_env_var("MISTRAL_API_KEY"),
model: str = "mistral-small-latest",
streaming_callback: Optional[StreamingCallbackT] = None,
api_base_url: Optional[str] = "https://api.mistral.ai/v1",
generation_kwargs: Optional[dict[str, Any]] = None,
tools: Optional[ToolsType] = None,
streaming_callback: StreamingCallbackT | None = None,
api_base_url: str | None = "https://api.mistral.ai/v1",
generation_kwargs: dict[str, Any] | None = None,
tools: ToolsType | None = None,
*,
timeout: Optional[float] = None,
max_retries: Optional[int] = None,
http_client_kwargs: Optional[dict[str, Any]] = None,
timeout: float | None = None,
max_retries: int | None = None,
http_client_kwargs: dict[str, Any] | None = None,
):
"""
Creates an instance of MistralChatGenerator. Unless specified otherwise in the `model`, this is for Mistral's
Expand Down Expand Up @@ -137,10 +137,10 @@ def _prepare_api_call(
self,
*,
messages: list[ChatMessage],
streaming_callback: Optional[StreamingCallbackT] = None,
generation_kwargs: Optional[dict[str, Any]] = None,
tools: Optional[ToolsType] = None,
tools_strict: Optional[bool] = None,
streaming_callback: StreamingCallbackT | None = None,
generation_kwargs: dict[str, Any] | None = None,
tools: ToolsType | None = None,
tools_strict: bool | None = None,
) -> dict[str, Any]:
api_args = super(MistralChatGenerator, self)._prepare_api_call( # noqa: UP008
messages=messages,
Expand Down
Loading