diff --git a/integrations/ollama/pyproject.toml b/integrations/ollama/pyproject.toml index 33fd232d8f..312954c76a 100644 --- a/integrations/ollama/pyproject.toml +++ b/integrations/ollama/pyproject.toml @@ -88,7 +88,7 @@ known-first-party = ["haystack_integrations"] [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py index 6a493d986c..99d81a0970 100644 --- a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py +++ b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py @@ -1,5 +1,5 @@ import asyncio -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import Document, component from tqdm import tqdm @@ -30,13 +30,13 @@ def __init__( self, model: str = "nomic-embed-text", url: str = "http://localhost:11434", - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, timeout: int = 120, keep_alive: Optional[Union[float, str]] = None, prefix: str = "", suffix: str = "", progress_bar: bool = True, - meta_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", batch_size: int = 32, ): @@ -87,7 +87,7 @@ def __init__( self._client = Client(host=self.url, timeout=self.timeout) self._async_client = AsyncClient(host=self.url, timeout=self.timeout) - def _prepare_input(self, documents: List[Document]) -> List[Document]: + def _prepare_input(self, documents: list[Document]) -> list[Document]: """ Prepares the list of documents to embed by appropriate validation. """ @@ -100,7 +100,7 @@ def _prepare_input(self, documents: List[Document]) -> List[Document]: return documents - def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: + def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]: """ Prepares the texts to embed by concatenating the Document text with the metadata fields to embed. """ @@ -123,8 +123,8 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: return texts_to_embed def _embed_batch( - self, texts_to_embed: List[str], batch_size: int, generation_kwargs: Optional[Dict[str, Any]] = None - ) -> List[List[float]]: + self, texts_to_embed: list[str], batch_size: int, generation_kwargs: Optional[dict[str, Any]] = None + ) -> list[list[float]]: """ Internal method to embed a batch of texts. """ @@ -146,8 +146,8 @@ def _embed_batch( return all_embeddings async def _embed_batch_async( - self, texts_to_embed: List[str], batch_size: int, generation_kwargs: Optional[Dict[str, Any]] = None - ) -> List[List[float]]: + self, texts_to_embed: list[str], batch_size: int, generation_kwargs: Optional[dict[str, Any]] = None + ) -> list[list[float]]: """ Internal method to embed a batch of texts asynchronously. """ @@ -175,10 +175,10 @@ async def _embed_batch_async( return all_embeddings - @component.output_types(documents=List[Document], meta=Dict[str, Any]) + @component.output_types(documents=list[Document], meta=dict[str, Any]) def run( - self, documents: List[Document], generation_kwargs: Optional[Dict[str, Any]] = None - ) -> Dict[str, Union[List[Document], Dict[str, Any]]]: + self, documents: list[Document], generation_kwargs: Optional[dict[str, Any]] = None + ) -> dict[str, Union[list[Document], dict[str, Any]]]: """ Runs an Ollama Model to compute embeddings of the provided documents. @@ -210,10 +210,10 @@ def run( return {"documents": documents, "meta": {"model": self.model}} - @component.output_types(documents=List[Document], meta=Dict[str, Any]) + @component.output_types(documents=list[Document], meta=dict[str, Any]) async def run_async( - self, documents: List[Document], generation_kwargs: Optional[Dict[str, Any]] = None - ) -> Dict[str, Union[List[Document], Dict[str, Any]]]: + self, documents: list[Document], generation_kwargs: Optional[dict[str, Any]] = None + ) -> dict[str, Union[list[Document], dict[str, Any]]]: """ Asynchronously run an Ollama Model to compute embeddings of the provided documents. diff --git a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py index 23ba6b1872..2f39079da2 100644 --- a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py +++ b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import component @@ -25,7 +25,7 @@ def __init__( self, model: str = "nomic-embed-text", url: str = "http://localhost:11434", - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, timeout: int = 120, keep_alive: Optional[Union[float, str]] = None, ): @@ -58,10 +58,10 @@ def __init__( self._client = Client(host=self.url, timeout=self.timeout) self._async_client = AsyncClient(host=self.url, timeout=self.timeout) - @component.output_types(embedding=List[float], meta=Dict[str, Any]) + @component.output_types(embedding=list[float], meta=dict[str, Any]) def run( - self, text: str, generation_kwargs: Optional[Dict[str, Any]] = None - ) -> Dict[str, Union[List[float], Dict[str, Any]]]: + self, text: str, generation_kwargs: Optional[dict[str, Any]] = None + ) -> dict[str, Union[list[float], dict[str, Any]]]: """ Runs an Ollama Model to compute embeddings of the provided text. @@ -85,10 +85,10 @@ def run( return result - @component.output_types(embedding=List[float], meta=Dict[str, Any]) + @component.output_types(embedding=list[float], meta=dict[str, Any]) async def run_async( - self, text: str, generation_kwargs: Optional[Dict[str, Any]] = None - ) -> Dict[str, Union[List[float], Dict[str, Any]]]: + self, text: str, generation_kwargs: Optional[dict[str, Any]] = None + ) -> dict[str, Union[list[float], dict[str, Any]]]: """ Asynchronously run an Ollama Model to compute embeddings of the provided text. diff --git a/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py b/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py index 257b25728d..879946bdff 100644 --- a/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py +++ b/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py @@ -1,5 +1,6 @@ import json -from typing import Any, AsyncIterator, Callable, Dict, Iterator, List, Literal, Optional, Union +from collections.abc import AsyncIterator, Iterator +from typing import Any, Callable, Literal, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import ( @@ -23,14 +24,14 @@ from ollama import AsyncClient, ChatResponse, Client -FINISH_REASON_MAPPING: Dict[str, FinishReason] = { +FINISH_REASON_MAPPING: dict[str, FinishReason] = { "stop": "stop", "tool_calls": "tool_calls", # we skip load and unload reasons } -def _convert_chatmessage_to_ollama_format(message: ChatMessage) -> Dict[str, Any]: +def _convert_chatmessage_to_ollama_format(message: ChatMessage) -> dict[str, Any]: """ Convert a ChatMessage to the format expected by the Ollama Chat API. """ @@ -48,7 +49,7 @@ def _convert_chatmessage_to_ollama_format(message: ChatMessage) -> Dict[str, Any msg = "For Ollama compatibility, a `ChatMessage` can contain at most one `TextContent` or `ToolCallResult`." raise ValueError(msg) - ollama_msg: Dict[str, Any] = {"role": message.role.value} + ollama_msg: dict[str, Any] = {"role": message.role.value} if tool_call_results: # Ollama does not provide a way to communicate errors in tool invocations, so we ignore the error field @@ -70,7 +71,7 @@ def _convert_chatmessage_to_ollama_format(message: ChatMessage) -> Dict[str, Any return ollama_msg -def _convert_ollama_meta_to_openai_format(input_response_dict: Dict) -> Dict[str, Any]: +def _convert_ollama_meta_to_openai_format(input_response_dict: dict) -> dict[str, Any]: """ Map Ollama metadata keys onto the OpenAI-compatible names Haystack expects. All fields that are not part of the OpenAI metadata are left unchanged in the returned dict. @@ -129,7 +130,7 @@ def _convert_ollama_response_to_chatmessage(ollama_response: ChatResponse) -> Ch response_dict = ollama_response.model_dump() ollama_message = response_dict["message"] text = ollama_message["content"] - tool_calls: List[ToolCall] = [] + tool_calls: list[ToolCall] = [] if ollama_tool_calls := ollama_message.get("tool_calls"): for ollama_tc in ollama_tool_calls: @@ -211,7 +212,7 @@ def __init__( self, model: str = "qwen3:0.6b", url: str = "http://localhost:11434", - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, timeout: int = 120, keep_alive: Optional[Union[float, str]] = None, streaming_callback: Optional[Callable[[StreamingChunk], None]] = None, @@ -274,7 +275,7 @@ def __init__( self._client = Client(host=self.url, timeout=self.timeout) self._async_client = AsyncClient(host=self.url, timeout=self.timeout) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -296,7 +297,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "OllamaChatGenerator": + def from_dict(cls, data: dict[str, Any]) -> "OllamaChatGenerator": """ Deserializes the component from a dictionary. @@ -315,7 +316,7 @@ def _handle_streaming_response( self, response_iter: Iterator[ChatResponse], callback: Optional[SyncStreamingCallbackT], - ) -> Dict[str, List[ChatMessage]]: + ) -> dict[str, list[ChatMessage]]: """ Merge an Ollama streaming response into a single ChatMessage, preserving tool calls. Works even when arguments arrive piecemeal as str fragments @@ -323,12 +324,12 @@ def _handle_streaming_response( """ component_info = ComponentInfo.from_component(self) - chunks: List[StreamingChunk] = [] + chunks: list[StreamingChunk] = [] # Accumulators - arg_by_id: Dict[str, str] = {} - name_by_id: Dict[str, str] = {} - id_order: List[str] = [] + arg_by_id: dict[str, str] = {} + name_by_id: dict[str, str] = {} + id_order: list[str] = [] tool_call_index: int = 0 # Stream @@ -399,18 +400,18 @@ async def _handle_streaming_response_async( self, response_iter: AsyncIterator[ChatResponse], callback: Optional[AsyncStreamingCallbackT], - ) -> Dict[str, List[ChatMessage]]: + ) -> dict[str, list[ChatMessage]]: """ Merge an Ollama async streaming response into a single ChatMessage, preserving tool calls. Works even when arguments arrive piecemeal as str fragments or as full JSON dicts.""" component_info = ComponentInfo.from_component(self) - chunks: List[StreamingChunk] = [] + chunks: list[StreamingChunk] = [] # Accumulators - arg_by_id: Dict[str, str] = {} - name_by_id: Dict[str, str] = {} - id_order: List[str] = [] + arg_by_id: dict[str, str] = {} + name_by_id: dict[str, str] = {} + id_order: list[str] = [] tool_call_index: int = 0 # Stream @@ -466,15 +467,15 @@ async def _handle_streaming_response_async( return {"replies": [reply]} - @component.output_types(replies=List[ChatMessage]) + @component.output_types(replies=list[ChatMessage]) def run( self, - messages: List[ChatMessage], - generation_kwargs: Optional[Dict[str, Any]] = None, + messages: list[ChatMessage], + generation_kwargs: Optional[dict[str, Any]] = None, tools: Optional[ToolsType] = None, *, streaming_callback: Optional[StreamingCallbackT] = None, - ) -> Dict[str, List[ChatMessage]]: + ) -> dict[str, list[ChatMessage]]: """ Runs an Ollama Model on a given chat history. @@ -532,15 +533,15 @@ def run( # non-stream path return {"replies": [_convert_ollama_response_to_chatmessage(ollama_response=response)]} - @component.output_types(replies=List[ChatMessage]) + @component.output_types(replies=list[ChatMessage]) async def run_async( self, - messages: List[ChatMessage], - generation_kwargs: Optional[Dict[str, Any]] = None, + messages: list[ChatMessage], + generation_kwargs: Optional[dict[str, Any]] = None, tools: Optional[ToolsType] = None, *, streaming_callback: Optional[StreamingCallbackT] = None, - ) -> Dict[str, List[ChatMessage]]: + ) -> dict[str, list[ChatMessage]]: """ Async version of run. Runs an Ollama Model on a given chat history. diff --git a/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py b/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py index bcc3f323b2..50d5d3ba4a 100644 --- a/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py +++ b/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import StreamingChunk @@ -7,7 +7,7 @@ from ollama import Client, GenerateResponse -def _convert_ollama_meta_to_openai_format(intput_response_dict: Dict) -> Dict: +def _convert_ollama_meta_to_openai_format(intput_response_dict: dict) -> dict: """ Converts Ollama metadata format to OpenAI-metadata format for standardization. @@ -98,7 +98,7 @@ def __init__( self, model: str = "orca-mini", url: str = "http://localhost:11434", - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, system_prompt: Optional[str] = None, template: Optional[str] = None, raw: bool = False, @@ -148,7 +148,7 @@ def __init__( self._client = Client(host=self.url, timeout=self.timeout) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -170,7 +170,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "OllamaGenerator": + def from_dict(cls, data: dict[str, Any]) -> "OllamaGenerator": """ Deserializes the component from a dictionary. @@ -185,7 +185,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "OllamaGenerator": data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler) return default_from_dict(cls, data) - def _convert_to_response(self, ollama_response: GenerateResponse) -> Dict[str, List[Any]]: + def _convert_to_response(self, ollama_response: GenerateResponse) -> dict[str, list[Any]]: """ Converts a response from the Ollama API to the required Haystack format. """ @@ -195,7 +195,7 @@ def _convert_to_response(self, ollama_response: GenerateResponse) -> Dict[str, L return {"replies": [reply], "meta": [meta]} - def _convert_to_streaming_response(self, chunks: List[StreamingChunk]) -> Dict[str, List[Any]]: + def _convert_to_streaming_response(self, chunks: list[StreamingChunk]) -> dict[str, list[Any]]: """ Converts a list of chunks response required Haystack format. """ @@ -209,11 +209,11 @@ def _convert_to_streaming_response(self, chunks: List[StreamingChunk]) -> Dict[s def _handle_streaming_response( self, response: Any, streaming_callback: Optional[Callable[[StreamingChunk], None]] - ) -> List[StreamingChunk]: + ) -> list[StreamingChunk]: """ Handles Streaming response cases """ - chunks: List[StreamingChunk] = [] + chunks: list[StreamingChunk] = [] for chunk in response: chunk_delta: StreamingChunk = self._build_chunk(chunk) chunks.append(chunk_delta) @@ -232,14 +232,14 @@ def _build_chunk(self, chunk_response: Any) -> StreamingChunk: chunk_message = StreamingChunk(content, meta) return chunk_message - @component.output_types(replies=List[str], meta=List[Dict[str, Any]]) + @component.output_types(replies=list[str], meta=list[dict[str, Any]]) def run( self, prompt: str, - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, *, streaming_callback: Optional[Callable[[StreamingChunk], None]] = None, - ) -> Dict[str, List[Any]]: + ) -> dict[str, list[Any]]: """ Runs an Ollama Model on the given prompt. @@ -269,7 +269,7 @@ def run( ) if stream: - chunks: List[StreamingChunk] = self._handle_streaming_response(response, resolved_streaming_callback) + chunks: list[StreamingChunk] = self._handle_streaming_response(response, resolved_streaming_callback) return self._convert_to_streaming_response(chunks) return self._convert_to_response(response) diff --git a/integrations/openrouter/pyproject.toml b/integrations/openrouter/pyproject.toml index 8360962e02..5b01daaad0 100644 --- a/integrations/openrouter/pyproject.toml +++ b/integrations/openrouter/pyproject.toml @@ -76,7 +76,7 @@ disallow_incomplete_defs = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py b/integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py index af423cac06..e36fe3dd4f 100644 --- a/integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py +++ b/integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Optional from haystack import component, default_to_dict, logging from haystack.components.generators.chat import OpenAIChatGenerator @@ -63,12 +63,12 @@ def __init__( model: str = "openai/gpt-4o-mini", streaming_callback: Optional[StreamingCallbackT] = None, api_base_url: Optional[str] = "https://openrouter.ai/api/v1", - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, tools: Optional[ToolsType] = None, timeout: Optional[float] = None, - extra_headers: Optional[Dict[str, Any]] = None, + extra_headers: Optional[dict[str, Any]] = None, max_retries: Optional[int] = None, - http_client_kwargs: Optional[Dict[str, Any]] = None, + http_client_kwargs: Optional[dict[str, Any]] = None, ): """ Creates an instance of OpenRouterChatGenerator. Unless specified otherwise, @@ -136,7 +136,7 @@ def __init__( ) self.extra_headers = extra_headers - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize this component to a dictionary. diff --git a/integrations/opensearch/pyproject.toml b/integrations/opensearch/pyproject.toml index 5625b99ea6..dc3a3d90db 100644 --- a/integrations/opensearch/pyproject.toml +++ b/integrations/opensearch/pyproject.toml @@ -84,7 +84,7 @@ allow-direct-references = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py index 176f1aa1b4..708b18d280 100644 --- a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py +++ b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py @@ -4,7 +4,7 @@ # ruff: noqa: FBT001 Boolean-typed positional argument in function definition -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import component, default_from_dict, default_to_dict, logging from haystack.dataclasses import Document @@ -28,13 +28,13 @@ def __init__( self, *, document_store: OpenSearchDocumentStore, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, fuzziness: Union[int, str] = "AUTO", top_k: int = 10, scale_score: bool = False, all_terms_must_match: bool = False, filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE, - custom_query: Optional[Dict[str, Any]] = None, + custom_query: Optional[dict[str, Any]] = None, raise_on_failure: bool = True, ): """ @@ -114,7 +114,7 @@ def __init__( self._custom_query = custom_query self._raise_on_failure = raise_on_failure - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -134,7 +134,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "OpenSearchBM25Retriever": + def from_dict(cls, data: dict[str, Any]) -> "OpenSearchBM25Retriever": """ Deserializes the component from a dictionary. @@ -158,13 +158,13 @@ def _prepare_bm25_args( self, *, query: str, - filters: Optional[Dict[str, Any]], + filters: Optional[dict[str, Any]], all_terms_must_match: Optional[bool], top_k: Optional[int], fuzziness: Optional[Union[str, int]], scale_score: Optional[bool], - custom_query: Optional[Dict[str, Any]], - ) -> Dict[str, Any]: + custom_query: Optional[dict[str, Any]], + ) -> dict[str, Any]: filters = apply_filter_policy(self._filter_policy, self._filters, filters) if filters is None: @@ -190,18 +190,18 @@ def _prepare_bm25_args( "custom_query": custom_query, } - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, query: str, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, all_terms_must_match: Optional[bool] = None, top_k: Optional[int] = None, fuzziness: Optional[Union[int, str]] = None, scale_score: Optional[bool] = None, - custom_query: Optional[Dict[str, Any]] = None, + custom_query: Optional[dict[str, Any]] = None, document_store: Optional[OpenSearchDocumentStore] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Retrieve documents using BM25 retrieval. @@ -256,7 +256,7 @@ def run( """ - docs: List[Document] = [] + docs: list[Document] = [] bm25_args = self._prepare_bm25_args( query=query, @@ -290,18 +290,18 @@ def run( return {"documents": docs} - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) async def run_async( # pylint: disable=too-many-positional-arguments self, query: str, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, all_terms_must_match: Optional[bool] = None, top_k: Optional[int] = None, fuzziness: Optional[Union[int, str]] = None, scale_score: Optional[bool] = None, - custom_query: Optional[Dict[str, Any]] = None, + custom_query: Optional[dict[str, Any]] = None, document_store: Optional[OpenSearchDocumentStore] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Asynchronously retrieve documents using BM25 retrieval. @@ -324,7 +324,7 @@ async def run_async( # pylint: disable=too-many-positional-arguments - documents: List of retrieved Documents. """ - docs: List[Document] = [] + docs: list[Document] = [] bm25_args = self._prepare_bm25_args( query=query, filters=filters, diff --git a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/embedding_retriever.py b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/embedding_retriever.py index 86052cd67c..601dbe1ba4 100644 --- a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/embedding_retriever.py +++ b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/embedding_retriever.py @@ -4,7 +4,7 @@ # ruff: noqa: FBT001 Boolean-typed positional argument in function definition -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import component, default_from_dict, default_to_dict, logging from haystack.dataclasses import Document @@ -28,10 +28,10 @@ def __init__( self, *, document_store: OpenSearchDocumentStore, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE, - custom_query: Optional[Dict[str, Any]] = None, + custom_query: Optional[dict[str, Any]] = None, raise_on_failure: bool = True, efficient_filtering: bool = False, ): @@ -107,7 +107,7 @@ def __init__( self._raise_on_failure = raise_on_failure self._efficient_filtering = efficient_filtering - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -126,7 +126,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "OpenSearchEmbeddingRetriever": + def from_dict(cls, data: dict[str, Any]) -> "OpenSearchEmbeddingRetriever": """ Deserializes the component from a dictionary. @@ -146,16 +146,16 @@ def from_dict(cls, data: Dict[str, Any]) -> "OpenSearchEmbeddingRetriever": data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(data["init_parameters"]["filter_policy"]) return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, - query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, + query_embedding: list[float], + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, - custom_query: Optional[Dict[str, Any]] = None, + custom_query: Optional[dict[str, Any]] = None, efficient_filtering: Optional[bool] = None, document_store: Optional[OpenSearchDocumentStore] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Retrieve documents using a vector similarity metric. @@ -224,7 +224,7 @@ def run( if efficient_filtering is None: efficient_filtering = self._efficient_filtering - docs: List[Document] = [] + docs: list[Document] = [] if document_store is not None: if not isinstance(document_store, OpenSearchDocumentStore): @@ -255,16 +255,16 @@ def run( return {"documents": docs} - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) async def run_async( self, - query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, + query_embedding: list[float], + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, - custom_query: Optional[Dict[str, Any]] = None, + custom_query: Optional[dict[str, Any]] = None, efficient_filtering: Optional[bool] = None, document_store: Optional[OpenSearchDocumentStore] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Asynchronously retrieve documents using a vector similarity metric. @@ -333,7 +333,7 @@ async def run_async( if efficient_filtering is None: efficient_filtering = self._efficient_filtering - docs: List[Document] = [] + docs: list[Document] = [] if document_store is not None: if not isinstance(document_store, OpenSearchDocumentStore): diff --git a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/open_search_hybrid_retriever.py b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/open_search_hybrid_retriever.py index 898f800a0d..eb7c9e94c5 100644 --- a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/open_search_hybrid_retriever.py +++ b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/open_search_hybrid_retriever.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, Optional, Union from haystack import Document, Pipeline, default_from_dict, default_to_dict, logging, super_component from haystack.components.embedders.types import TextEmbedder @@ -92,21 +92,21 @@ def __init__( *, embedder: TextEmbedder, # OpenSearchBM25Retriever - filters_bm25: Optional[Dict[str, Any]] = None, + filters_bm25: Optional[dict[str, Any]] = None, fuzziness: Union[int, str] = "AUTO", top_k_bm25: int = 10, scale_score: bool = False, all_terms_must_match: bool = False, filter_policy_bm25: Union[str, FilterPolicy] = FilterPolicy.REPLACE, - custom_query_bm25: Optional[Dict[str, Any]] = None, + custom_query_bm25: Optional[dict[str, Any]] = None, # OpenSearchEmbeddingRetriever - filters_embedding: Optional[Dict[str, Any]] = None, + filters_embedding: Optional[dict[str, Any]] = None, top_k_embedding: int = 10, filter_policy_embedding: Union[str, FilterPolicy] = FilterPolicy.REPLACE, - custom_query_embedding: Optional[Dict[str, Any]] = None, + custom_query_embedding: Optional[dict[str, Any]] = None, # DocumentJoiner join_mode: Union[str, JoinMode] = JoinMode.RECIPROCAL_RANK_FUSION, - weights: Optional[List[float]] = None, + weights: Optional[list[float]] = None, top_k: Optional[int] = None, sort_by_score: bool = True, # extra kwargs @@ -192,7 +192,7 @@ def __init__( self.top_k = top_k self.sort_by_score = sort_by_score - init_args: Dict[str, Any] = { + init_args: dict[str, Any] = { "bm25_retriever": { "document_store": self.document_store, "filters": self.filters_bm25, @@ -242,11 +242,11 @@ def warm_up(self) -> None: ... def run( self, query: str, - filters_bm25: Optional[Dict[str, Any]] = None, - filters_embedding: Optional[Dict[str, Any]] = None, + filters_bm25: Optional[dict[str, Any]] = None, + filters_embedding: Optional[dict[str, Any]] = None, top_k_bm25: Optional[int] = None, top_k_embedding: Optional[int] = None, - ) -> Dict[str, List[Document]]: ... + ) -> dict[str, list[Document]]: ... def _create_pipeline(self, data: dict[str, Any]) -> Pipeline: """ diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/auth.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/auth.py index 0c50e95466..f20ec1dd87 100644 --- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/auth.py +++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/auth.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field, fields -from typing import Any, Dict, Optional, Type, TypeVar +from typing import Any, Optional, TypeVar from haystack import default_from_dict, default_to_dict from haystack.document_stores.errors import DocumentStoreError @@ -99,7 +99,7 @@ def __post_init__(self) -> None: """ self._urllib3_aws_v4_signer_auth = self._get_aws_v4_signer_auth(Urllib3AWSV4SignerAuth) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Converts the object to a dictionary representation for serialization. """ @@ -114,7 +114,7 @@ def to_dict(self) -> Dict[str, Any]: return default_to_dict(self, **_fields) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> Optional["AWSAuth"]: + def from_dict(cls, data: dict[str, Any]) -> Optional["AWSAuth"]: """ Converts a dictionary representation to an AWSAuth object. """ @@ -125,7 +125,7 @@ def from_dict(cls, data: Dict[str, Any]) -> Optional["AWSAuth"]: ) return default_from_dict(cls, data) - def __call__(self, method: str, url: str, body: Any) -> Dict[str, str]: + def __call__(self, method: str, url: str, body: Any) -> dict[str, str]: """ Signs the request and returns headers. @@ -139,7 +139,7 @@ def __call__(self, method: str, url: str, body: Any) -> Dict[str, str]: """ return self._urllib3_aws_v4_signer_auth(method, url, body) - def _get_aws_v4_signer_auth(self, signer_auth_class: Type[TSignerAuth]) -> TSignerAuth: + def _get_aws_v4_signer_auth(self, signer_auth_class: type[TSignerAuth]) -> TSignerAuth: try: region_name = _resolve_secret(self.aws_region_name) session = _get_aws_session( @@ -184,7 +184,7 @@ def __init__(self, aws_auth: AWSAuth) -> None: self.aws_auth = aws_auth self._async_aws_v4_signer_auth = self.aws_auth._get_aws_v4_signer_auth(AWSV4SignerAsyncAuth) - def __call__(self, method: str, url: str, query_string: str, body: Any) -> Dict[str, str]: + def __call__(self, method: str, url: str, query_string: str, body: Any) -> dict[str, str]: """ Signs the request and returns headers. diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py index 5861251a1a..53b7518253 100644 --- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py +++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py @@ -2,8 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 +from collections.abc import Mapping from math import exp -from typing import Any, Dict, List, Mapping, Optional, Tuple, Union +from typing import Any, Optional, Union from haystack import default_from_dict, default_to_dict, logging from haystack.dataclasses import Document @@ -18,7 +19,7 @@ logger = logging.getLogger(__name__) -Hosts = Union[str, List[Union[str, Mapping[str, Union[str, int]]]]] +Hosts = Union[str, list[Union[str, Mapping[str, Union[str, int]]]]] # document scores are essentially unbounded and will be scaled to values between 0 and 1 if scale_score is set to # True. Scaling uses the expit function (inverse of the logit function) after applying a scaling factor @@ -70,9 +71,9 @@ def __init__( max_chunk_bytes: int = DEFAULT_MAX_CHUNK_BYTES, embedding_dim: int = 768, return_embedding: bool = False, - method: Optional[Dict[str, Any]] = None, - mappings: Optional[Dict[str, Any]] = None, - settings: Optional[Dict[str, Any]] = DEFAULT_SETTINGS, + method: Optional[dict[str, Any]] = None, + mappings: Optional[dict[str, Any]] = None, + settings: Optional[dict[str, Any]] = DEFAULT_SETTINGS, create_index: bool = True, http_auth: Any = ( Secret.from_env_var("OPENSEARCH_USERNAME", strict=False), # noqa: B008 @@ -153,8 +154,8 @@ def __init__( self._async_client: Optional[AsyncOpenSearch] = None self._initialized = False - def _get_default_mappings(self) -> Dict[str, Any]: - default_mappings: Dict[str, Any] = { + def _get_default_mappings(self) -> dict[str, Any]: + default_mappings: dict[str, Any] = { "properties": { "embedding": {"type": "knn_vector", "index": True, "dimension": self._embedding_dim}, "content": {"type": "text"}, @@ -175,8 +176,8 @@ def _get_default_mappings(self) -> Dict[str, Any]: def create_index( self, index: Optional[str] = None, - mappings: Optional[Dict[str, Any]] = None, - settings: Optional[Dict[str, Any]] = None, + mappings: Optional[dict[str, Any]] = None, + settings: Optional[dict[str, Any]] = None, ) -> None: """ Creates an index in OpenSearch. @@ -202,7 +203,7 @@ def create_index( if not self._client.indices.exists(index=index): self._client.indices.create(index=index, body={"mappings": mappings, "settings": settings}) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -210,7 +211,7 @@ def to_dict(self) -> Dict[str, Any]: Dictionary with serialized data. """ # Handle http_auth serialization - http_auth: Union[List[Dict[str, Any]], Dict[str, Any], Tuple[str, str], List[str], str] = "" + http_auth: Union[list[dict[str, Any]], dict[str, Any], tuple[str, str], list[str], str] = "" if isinstance(self._http_auth, list) and self._http_auth_are_secrets: # Recreate the Secret objects for serialization http_auth = [ @@ -241,7 +242,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "OpenSearchDocumentStore": + def from_dict(cls, data: dict[str, Any]) -> "OpenSearchDocumentStore": """ Deserializes the component from a dictionary. @@ -320,7 +321,7 @@ async def count_documents_async(self) -> int: return (await self._async_client.count(index=self._index))["count"] @staticmethod - def _deserialize_search_hits(hits: List[Dict[str, Any]]) -> List[Document]: + def _deserialize_search_hits(hits: list[dict[str, Any]]) -> list[Document]: out = [] for hit in hits: data = hit["_source"] @@ -331,8 +332,8 @@ def _deserialize_search_hits(hits: List[Dict[str, Any]]) -> List[Document]: return out - def _prepare_filter_search_request(self, filters: Optional[Dict[str, Any]]) -> Dict[str, Any]: - search_kwargs: Dict[str, Any] = {"size": 10_000} + def _prepare_filter_search_request(self, filters: Optional[dict[str, Any]]) -> dict[str, Any]: + search_kwargs: dict[str, Any] = {"size": 10_000} if filters: search_kwargs["query"] = {"bool": {"filter": normalize_filters(filters)}} @@ -342,17 +343,17 @@ def _prepare_filter_search_request(self, filters: Optional[Dict[str, Any]]) -> D search_kwargs["_source"] = {"excludes": ["embedding"]} return search_kwargs - def _search_documents(self, request_body: Dict[str, Any]) -> List[Document]: + def _search_documents(self, request_body: dict[str, Any]) -> list[Document]: assert self._client is not None search_results = self._client.search(index=self._index, body=request_body) return OpenSearchDocumentStore._deserialize_search_hits(search_results["hits"]["hits"]) - async def _search_documents_async(self, request_body: Dict[str, Any]) -> List[Document]: + async def _search_documents_async(self, request_body: dict[str, Any]) -> list[Document]: assert self._async_client is not None search_results = await self._async_client.search(index=self._index, body=request_body) return OpenSearchDocumentStore._deserialize_search_hits(search_results["hits"]["hits"]) - def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]: + def filter_documents(self, filters: Optional[dict[str, Any]] = None) -> list[Document]: """ Returns the documents that match the filters provided. @@ -365,7 +366,7 @@ def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Doc self._ensure_initialized() return self._search_documents(self._prepare_filter_search_request(filters)) - async def filter_documents_async(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]: + async def filter_documents_async(self, filters: Optional[dict[str, Any]] = None) -> list[Document]: """ Asynchronously returns the documents that match the filters provided. @@ -379,8 +380,8 @@ async def filter_documents_async(self, filters: Optional[Dict[str, Any]] = None) return await self._search_documents_async(self._prepare_filter_search_request(filters)) def _prepare_bulk_write_request( - self, *, documents: List[Document], policy: DuplicatePolicy, is_async: bool - ) -> Dict[str, Any]: + self, *, documents: list[Document], policy: DuplicatePolicy, is_async: bool + ) -> dict[str, Any]: if len(documents) > 0 and not isinstance(documents[0], Document): msg = "param 'documents' must contain a list of objects of type Document" raise ValueError(msg) @@ -420,7 +421,7 @@ def _prepare_bulk_write_request( } @staticmethod - def _process_bulk_write_errors(errors: List[Dict[str, Any]], policy: DuplicatePolicy) -> None: + def _process_bulk_write_errors(errors: list[dict[str, Any]], policy: DuplicatePolicy) -> None: if len(errors) == 0: return @@ -449,7 +450,7 @@ def _process_bulk_write_errors(errors: List[Dict[str, Any]], policy: DuplicatePo msg = f"Failed to write documents to OpenSearch. Errors:\n{other_errors}" raise DocumentStoreError(msg) - def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int: + def write_documents(self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int: """ Writes documents to the document store. @@ -467,7 +468,7 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D return documents_written async def write_documents_async( - self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE + self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE ) -> int: """ Asynchronously writes documents to the document store. @@ -484,7 +485,7 @@ async def write_documents_async( return documents_written @staticmethod - def _deserialize_document(hit: Dict[str, Any]) -> Document: + def _deserialize_document(hit: dict[str, Any]) -> Document: """ Creates a Document from the search hit provided. This is mostly useful in self.filter_documents(). @@ -497,7 +498,7 @@ def _deserialize_document(hit: Dict[str, Any]) -> Document: return Document.from_dict(data) - def _prepare_bulk_delete_request(self, *, document_ids: List[str], is_async: bool) -> Dict[str, Any]: + def _prepare_bulk_delete_request(self, *, document_ids: list[str], is_async: bool) -> dict[str, Any]: return { "client": self._client if not is_async else self._async_client, "actions": ({"_op_type": "delete", "_id": id_} for id_ in document_ids), @@ -507,7 +508,7 @@ def _prepare_bulk_delete_request(self, *, document_ids: List[str], is_async: boo "max_chunk_bytes": self._max_chunk_bytes, } - def delete_documents(self, document_ids: List[str]) -> None: + def delete_documents(self, document_ids: list[str]) -> None: """ Deletes documents that match the provided `document_ids` from the document store. @@ -518,7 +519,7 @@ def delete_documents(self, document_ids: List[str]) -> None: bulk(**self._prepare_bulk_delete_request(document_ids=document_ids, is_async=False)) - async def delete_documents_async(self, document_ids: List[str]) -> None: + async def delete_documents_async(self, document_ids: list[str]) -> None: """ Asynchronously deletes documents that match the provided `document_ids` from the document store. @@ -528,7 +529,7 @@ async def delete_documents_async(self, document_ids: List[str]) -> None: await async_bulk(**self._prepare_bulk_delete_request(document_ids=document_ids, is_async=True)) - def _prepare_delete_all_request(self, *, is_async: bool) -> Dict[str, Any]: + def _prepare_delete_all_request(self, *, is_async: bool) -> dict[str, Any]: return { "index": self._index, "body": {"query": {"match_all": {}}}, # Delete all documents @@ -608,7 +609,7 @@ async def delete_all_documents_async(self, recreate_index: bool = False) -> None msg = f"Failed to delete all documents from OpenSearch: {e!s}" raise DocumentStoreError(msg) from e - def delete_by_filter(self, filters: Dict[str, Any]) -> int: + def delete_by_filter(self, filters: dict[str, Any]) -> int: """ Deletes all documents that match the provided filters. @@ -634,7 +635,7 @@ def delete_by_filter(self, filters: Dict[str, Any]) -> int: msg = f"Failed to delete documents by filter from OpenSearch: {e!s}" raise DocumentStoreError(msg) from e - async def delete_by_filter_async(self, filters: Dict[str, Any]) -> int: + async def delete_by_filter_async(self, filters: dict[str, Any]) -> int: """ Asynchronously deletes all documents that match the provided filters. @@ -660,7 +661,7 @@ async def delete_by_filter_async(self, filters: Dict[str, Any]) -> int: msg = f"Failed to delete documents by filter from OpenSearch: {e!s}" raise DocumentStoreError(msg) from e - def update_by_filter(self, filters: Dict[str, Any], meta: Dict[str, Any]) -> int: + def update_by_filter(self, filters: dict[str, Any], meta: dict[str, Any]) -> int: """ Updates the metadata of all documents that match the provided filters. @@ -697,7 +698,7 @@ def update_by_filter(self, filters: Dict[str, Any], meta: Dict[str, Any]) -> int msg = f"Failed to update documents by filter in OpenSearch: {e!s}" raise DocumentStoreError(msg) from e - async def update_by_filter_async(self, filters: Dict[str, Any], meta: Dict[str, Any]) -> int: + async def update_by_filter_async(self, filters: dict[str, Any], meta: dict[str, Any]) -> int: """ Asynchronously updates the metadata of all documents that match the provided filters. @@ -738,14 +739,14 @@ def _prepare_bm25_search_request( self, *, query: str, - filters: Optional[Dict[str, Any]], + filters: Optional[dict[str, Any]], fuzziness: Union[int, str], top_k: int, all_terms_must_match: bool, - custom_query: Optional[Dict[str, Any]], - ) -> Dict[str, Any]: + custom_query: Optional[dict[str, Any]], + ) -> dict[str, Any]: if not query: - body: Dict[str, Any] = {"query": {"bool": {"must": {"match_all": {}}}}} + body: dict[str, Any] = {"query": {"bool": {"must": {"match_all": {}}}}} if filters: body["query"]["bool"]["filter"] = normalize_filters(filters) @@ -790,7 +791,7 @@ def _prepare_bm25_search_request( return body @staticmethod - def _postprocess_bm25_search_results(*, results: List[Document], scale_score: bool) -> None: + def _postprocess_bm25_search_results(*, results: list[Document], scale_score: bool) -> None: if not scale_score: return @@ -803,13 +804,13 @@ def _bm25_retrieval( self, query: str, *, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, fuzziness: Union[int, str] = "AUTO", top_k: int = 10, scale_score: bool = False, all_terms_must_match: bool = False, - custom_query: Optional[Dict[str, Any]] = None, - ) -> List[Document]: + custom_query: Optional[dict[str, Any]] = None, + ) -> list[Document]: """ Retrieves documents that match the provided `query` using the BM25 search algorithm. @@ -841,13 +842,13 @@ async def _bm25_retrieval_async( self, query: str, *, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, fuzziness: str = "AUTO", top_k: int = 10, scale_score: bool = False, all_terms_must_match: bool = False, - custom_query: Optional[Dict[str, Any]] = None, - ) -> List[Document]: + custom_query: Optional[dict[str, Any]] = None, + ) -> list[Document]: """ Asynchronously retrieves documents that match the provided `query` using the BM25 search algorithm. @@ -879,17 +880,17 @@ async def _bm25_retrieval_async( def _prepare_embedding_search_request( self, *, - query_embedding: List[float], - filters: Optional[Dict[str, Any]], + query_embedding: list[float], + filters: Optional[dict[str, Any]], top_k: int, - custom_query: Optional[Dict[str, Any]], + custom_query: Optional[dict[str, Any]], efficient_filtering: bool = False, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: if not query_embedding: msg = "query_embedding must be a non-empty list of floats" raise ValueError(msg) - body: Dict[str, Any] + body: dict[str, Any] if isinstance(custom_query, dict): body = self._render_custom_query( custom_query, @@ -934,13 +935,13 @@ def _prepare_embedding_search_request( def _embedding_retrieval( self, - query_embedding: List[float], + query_embedding: list[float], *, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, - custom_query: Optional[Dict[str, Any]] = None, + custom_query: Optional[dict[str, Any]] = None, efficient_filtering: bool = False, - ) -> List[Document]: + ) -> list[Document]: """ Retrieves documents that are most similar to the query embedding using a vector similarity metric. It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm. @@ -964,13 +965,13 @@ def _embedding_retrieval( async def _embedding_retrieval_async( self, - query_embedding: List[float], + query_embedding: list[float], *, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, - custom_query: Optional[Dict[str, Any]] = None, + custom_query: Optional[dict[str, Any]] = None, efficient_filtering: bool = False, - ) -> List[Document]: + ) -> list[Document]: """ Asynchronously retrieves documents that are most similar to the query embedding using a vector similarity metric. It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm. @@ -992,7 +993,7 @@ async def _embedding_retrieval_async( ) return await self._search_documents_async(search_params) - def _render_custom_query(self, custom_query: Any, substitutions: Dict[str, Any]) -> Any: + def _render_custom_query(self, custom_query: Any, substitutions: dict[str, Any]) -> Any: """ Recursively replaces the placeholders in the custom_query with the actual values. diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/filters.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/filters.py index be273e1397..30f0790420 100644 --- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/filters.py +++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/filters.py @@ -2,12 +2,12 @@ # # SPDX-License-Identifier: Apache-2.0 from datetime import datetime -from typing import Any, Dict, List +from typing import Any from haystack.errors import FilterError -def normalize_filters(filters: Dict[str, Any]) -> Dict[str, Any]: +def normalize_filters(filters: dict[str, Any]) -> dict[str, Any]: """ Converts Haystack filters in OpenSearch compatible filters. """ @@ -20,7 +20,7 @@ def normalize_filters(filters: Dict[str, Any]) -> Dict[str, Any]: return _parse_logical_condition(filters) -def _parse_logical_condition(condition: Dict[str, Any]) -> Dict[str, Any]: +def _parse_logical_condition(condition: dict[str, Any]) -> dict[str, Any]: if "operator" not in condition: msg = f"'operator' key missing in {condition}" raise FilterError(msg) @@ -43,7 +43,7 @@ def _parse_logical_condition(condition: Dict[str, Any]) -> Dict[str, Any]: raise FilterError(msg) -def _equal(field: str, value: Any) -> Dict[str, Any]: +def _equal(field: str, value: Any) -> dict[str, Any]: if value is None: return {"bool": {"must_not": {"exists": {"field": field}}}} @@ -62,7 +62,7 @@ def _equal(field: str, value: Any) -> Dict[str, Any]: return {"term": {field: value}} -def _not_equal(field: str, value: Any) -> Dict[str, Any]: +def _not_equal(field: str, value: Any) -> dict[str, Any]: if value is None: return {"exists": {"field": field}} @@ -75,7 +75,7 @@ def _not_equal(field: str, value: Any) -> Dict[str, Any]: return {"bool": {"must_not": {"term": {field: value}}}} -def _greater_than(field: str, value: Any) -> Dict[str, Any]: +def _greater_than(field: str, value: Any) -> dict[str, Any]: if value is None: # When the value is None and '>' is used we create a filter that would return a Document # if it has a field set and not set at the same time. @@ -97,7 +97,7 @@ def _greater_than(field: str, value: Any) -> Dict[str, Any]: return {"range": {field: {"gt": value}}} -def _greater_than_equal(field: str, value: Any) -> Dict[str, Any]: +def _greater_than_equal(field: str, value: Any) -> dict[str, Any]: if value is None: # When the value is None and '>=' is used we create a filter that would return a Document # if it has a field set and not set at the same time. @@ -119,7 +119,7 @@ def _greater_than_equal(field: str, value: Any) -> Dict[str, Any]: return {"range": {field: {"gte": value}}} -def _less_than(field: str, value: Any) -> Dict[str, Any]: +def _less_than(field: str, value: Any) -> dict[str, Any]: if value is None: # When the value is None and '<' is used we create a filter that would return a Document # if it has a field set and not set at the same time. @@ -141,7 +141,7 @@ def _less_than(field: str, value: Any) -> Dict[str, Any]: return {"range": {field: {"lt": value}}} -def _less_than_equal(field: str, value: Any) -> Dict[str, Any]: +def _less_than_equal(field: str, value: Any) -> dict[str, Any]: if value is None: # When the value is None and '<=' is used we create a filter that would return a Document # if it has a field set and not set at the same time. @@ -163,14 +163,14 @@ def _less_than_equal(field: str, value: Any) -> Dict[str, Any]: return {"range": {field: {"lte": value}}} -def _in(field: str, value: Any) -> Dict[str, Any]: +def _in(field: str, value: Any) -> dict[str, Any]: if not isinstance(value, list): msg = f"{field}'s value must be a list when using 'in' or 'not in' comparators" raise FilterError(msg) return {"terms": {field: value}} -def _not_in(field: str, value: Any) -> Dict[str, Any]: +def _not_in(field: str, value: Any) -> dict[str, Any]: if not isinstance(value, list): msg = f"{field}'s value must be a list when using 'in' or 'not in' comparators" raise FilterError(msg) @@ -189,7 +189,7 @@ def _not_in(field: str, value: Any) -> Dict[str, Any]: } -def _parse_comparison_condition(condition: Dict[str, Any]) -> Dict[str, Any]: +def _parse_comparison_condition(condition: dict[str, Any]) -> dict[str, Any]: if "field" not in condition: # 'field' key is only found in comparison dictionaries. # We assume this is a logic dictionary since it's not present. @@ -215,7 +215,7 @@ def _parse_comparison_condition(condition: Dict[str, Any]) -> Dict[str, Any]: return COMPARISON_OPERATORS[operator](field, value) -def _normalize_ranges(conditions: List[Dict[str, Any]]) -> List[Dict[str, Any]]: +def _normalize_ranges(conditions: list[dict[str, Any]]) -> list[dict[str, Any]]: """ Merges range conditions acting on a same field. @@ -235,7 +235,7 @@ def _normalize_ranges(conditions: List[Dict[str, Any]]) -> List[Dict[str, Any]]: range_conditions = [next(iter(c["range"].items())) for c in conditions if "range" in c] if range_conditions: conditions = [c for c in conditions if "range" not in c] - range_conditions_dict: Dict[str, Any] = {} + range_conditions_dict: dict[str, Any] = {} for field_name, comparison in range_conditions: if field_name not in range_conditions_dict: range_conditions_dict[field_name] = {} diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py index fdb8e3f412..d74cdbaa80 100644 --- a/integrations/opensearch/tests/test_document_store.py +++ b/integrations/opensearch/tests/test_document_store.py @@ -4,7 +4,6 @@ import random import time -from typing import List from unittest.mock import patch import pytest @@ -119,7 +118,7 @@ class TestDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsT you can add more to this class. """ - def assert_documents_are_equal(self, received: List[Document], expected: List[Document]): + def assert_documents_are_equal(self, received: list[Document], expected: list[Document]): """ The OpenSearchDocumentStore.filter_documents() method returns a Documents with their score set. We don't want to compare the score, so we set it to None before comparing the documents. @@ -143,7 +142,7 @@ def test_create_index(self, document_store_readonly: OpenSearchDocumentStore): document_store_readonly.create_index() assert document_store_readonly._client.indices.exists(index=document_store_readonly._index) - def test_bm25_retrieval(self, document_store: OpenSearchDocumentStore, test_documents: List[Document]): + def test_bm25_retrieval(self, document_store: OpenSearchDocumentStore, test_documents: list[Document]): document_store.write_documents(test_documents) res = document_store._bm25_retrieval("functional", top_k=3) @@ -152,7 +151,7 @@ def test_bm25_retrieval(self, document_store: OpenSearchDocumentStore, test_docu assert "functional" in res[1].content assert "functional" in res[2].content - def test_bm25_retrieval_pagination(self, document_store: OpenSearchDocumentStore, test_documents: List[Document]): + def test_bm25_retrieval_pagination(self, document_store: OpenSearchDocumentStore, test_documents: list[Document]): """ Test that handling of pagination works as expected, when the matching documents are > 10. """ @@ -163,7 +162,7 @@ def test_bm25_retrieval_pagination(self, document_store: OpenSearchDocumentStore assert all("programming" in doc.content for doc in res) def test_bm25_retrieval_all_terms_must_match( - self, document_store: OpenSearchDocumentStore, test_documents: List[Document] + self, document_store: OpenSearchDocumentStore, test_documents: list[Document] ): document_store.write_documents(test_documents) res = document_store._bm25_retrieval("functional Haskell", top_k=3, all_terms_must_match=True) @@ -172,7 +171,7 @@ def test_bm25_retrieval_all_terms_must_match( assert "Haskell is a functional programming language" in res[0].content def test_bm25_retrieval_all_terms_must_match_false( - self, document_store: OpenSearchDocumentStore, test_documents: List[Document] + self, document_store: OpenSearchDocumentStore, test_documents: list[Document] ): document_store.write_documents(test_documents) res = document_store._bm25_retrieval("functional Haskell", top_k=10, all_terms_must_match=False) @@ -181,7 +180,7 @@ def test_bm25_retrieval_all_terms_must_match_false( assert all("functional" in doc.content for doc in res) def test_bm25_retrieval_with_fuzziness( - self, document_store: OpenSearchDocumentStore, test_documents: List[Document] + self, document_store: OpenSearchDocumentStore, test_documents: list[Document] ): document_store.write_documents(test_documents) @@ -198,7 +197,7 @@ def test_bm25_retrieval_with_fuzziness( assert "functional" in res[1].content assert "functional" in res[2].content - def test_bm25_retrieval_with_filters(self, document_store: OpenSearchDocumentStore, test_documents: List[Document]): + def test_bm25_retrieval_with_filters(self, document_store: OpenSearchDocumentStore, test_documents: list[Document]): document_store.write_documents(test_documents) res = document_store._bm25_retrieval( "programming", @@ -210,7 +209,7 @@ def test_bm25_retrieval_with_filters(self, document_store: OpenSearchDocumentSto assert retrieved_ids == ["1", "2", "3", "4", "5"] def test_bm25_retrieval_with_custom_query( - self, document_store: OpenSearchDocumentStore, test_documents: List[Document] + self, document_store: OpenSearchDocumentStore, test_documents: list[Document] ): document_store.write_documents(test_documents) @@ -235,7 +234,7 @@ def test_bm25_retrieval_with_custom_query( assert "3" == res[2].id def test_bm25_retrieval_with_custom_query_empty_filters( - self, document_store: OpenSearchDocumentStore, test_documents: List[Document] + self, document_store: OpenSearchDocumentStore, test_documents: list[Document] ): document_store.write_documents(test_documents) diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py index 086bdbdcfd..2a6d494e4d 100644 --- a/integrations/opensearch/tests/test_document_store_async.py +++ b/integrations/opensearch/tests/test_document_store_async.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 import time -from typing import List import pytest from haystack.dataclasses import Document @@ -18,7 +17,7 @@ async def test_write_documents(self, document_store: OpenSearchDocumentStore): assert await document_store.write_documents_async([Document(id="1")]) == 1 @pytest.mark.asyncio - async def test_bm25_retrieval(self, document_store: OpenSearchDocumentStore, test_documents: List[Document]): + async def test_bm25_retrieval(self, document_store: OpenSearchDocumentStore, test_documents: list[Document]): document_store.write_documents(test_documents) res = await document_store._bm25_retrieval_async("functional", top_k=3) @@ -29,7 +28,7 @@ async def test_bm25_retrieval(self, document_store: OpenSearchDocumentStore, tes @pytest.mark.asyncio async def test_bm25_retrieval_pagination( - self, document_store: OpenSearchDocumentStore, test_documents: List[Document] + self, document_store: OpenSearchDocumentStore, test_documents: list[Document] ): """ Test that handling of pagination works as expected, when the matching documents are > 10. @@ -43,7 +42,7 @@ async def test_bm25_retrieval_pagination( @pytest.mark.asyncio async def test_bm25_retrieval_all_terms_must_match( - self, document_store: OpenSearchDocumentStore, test_documents: List[Document] + self, document_store: OpenSearchDocumentStore, test_documents: list[Document] ): document_store.write_documents(test_documents) res = await document_store._bm25_retrieval_async("functional Haskell", top_k=3, all_terms_must_match=True) @@ -53,7 +52,7 @@ async def test_bm25_retrieval_all_terms_must_match( @pytest.mark.asyncio async def test_bm25_retrieval_all_terms_must_match_false( - self, document_store: OpenSearchDocumentStore, test_documents: List[Document] + self, document_store: OpenSearchDocumentStore, test_documents: list[Document] ): document_store.write_documents(test_documents) res = await document_store._bm25_retrieval_async("functional Haskell", top_k=10, all_terms_must_match=False) @@ -63,7 +62,7 @@ async def test_bm25_retrieval_all_terms_must_match_false( @pytest.mark.asyncio async def test_bm25_retrieval_with_filters( - self, document_store: OpenSearchDocumentStore, test_documents: List[Document] + self, document_store: OpenSearchDocumentStore, test_documents: list[Document] ): document_store.write_documents(test_documents) res = await document_store._bm25_retrieval_async( @@ -78,7 +77,7 @@ async def test_bm25_retrieval_with_filters( @pytest.mark.asyncio async def test_bm25_retrieval_with_custom_query( - self, document_store: OpenSearchDocumentStore, test_documents: List[Document] + self, document_store: OpenSearchDocumentStore, test_documents: list[Document] ): document_store.write_documents(test_documents) diff --git a/integrations/opensearch/tests/test_filters.py b/integrations/opensearch/tests/test_filters.py index e35a0dabcc..c69f7f4044 100644 --- a/integrations/opensearch/tests/test_filters.py +++ b/integrations/opensearch/tests/test_filters.py @@ -2,7 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import List import pytest from haystack.dataclasses import Document @@ -228,7 +227,7 @@ def test_normalize_ranges(): @pytest.mark.integration class TestFilters(FilterDocumentsTest): - def assert_documents_are_equal(self, received: List[Document], expected: List[Document]): + def assert_documents_are_equal(self, received: list[Document], expected: list[Document]): """ The OpenSearchDocumentStore.filter_documents() method returns a Documents with their score set. We don't want to compare the score, so we set it to None before comparing the documents. diff --git a/integrations/opensearch/tests/test_open_search_hybrid_retriever.py b/integrations/opensearch/tests/test_open_search_hybrid_retriever.py index 2763adeac3..bd652f2c2f 100644 --- a/integrations/opensearch/tests/test_open_search_hybrid_retriever.py +++ b/integrations/opensearch/tests/test_open_search_hybrid_retriever.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from copy import deepcopy -from typing import Any, Dict +from typing import Any from unittest.mock import Mock import pytest @@ -18,7 +18,7 @@ @component class MockedTextEmbedder: @component.output_types(embedding=list[float]) - def run(self, text: str, param_a: str = "default", param_b: str = "another_default") -> Dict[str, Any]: + def run(self, text: str, param_a: str = "default", param_b: str = "another_default") -> dict[str, Any]: return {"embedding": [0.1, 0.2, 0.3], "metadata": {"text": text, "param_a": param_a, "param_b": param_b}} diff --git a/integrations/optimum/pyproject.toml b/integrations/optimum/pyproject.toml index 5d5dc5609c..b5c27b72cc 100644 --- a/integrations/optimum/pyproject.toml +++ b/integrations/optimum/pyproject.toml @@ -99,7 +99,7 @@ known-first-party = ["haystack_integrations"] [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 exclude = ["example", "tests"] diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/_backend.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/_backend.py index 0634b14207..0896210d86 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/_backend.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/_backend.py @@ -2,7 +2,7 @@ import json from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union, overload +from typing import Any, Optional, Union, overload import numpy as np import torch @@ -36,12 +36,12 @@ class _EmbedderParams: batch_size: int progress_bar: bool pooling_mode: Optional[Union[str, OptimumEmbedderPooling]] - model_kwargs: Optional[Dict[str, Any]] + model_kwargs: Optional[dict[str, Any]] working_dir: Optional[str] optimizer_settings: Optional[OptimumEmbedderOptimizationConfig] quantizer_settings: Optional[OptimumEmbedderQuantizationConfig] - def serialize(self) -> Dict[str, Any]: + def serialize(self) -> dict[str, Any]: out = {} for field in self.__dataclass_fields__.keys(): if field in [ @@ -66,7 +66,7 @@ def serialize(self) -> Dict[str, Any]: return out @classmethod - def deserialize_inplace(cls, data: Dict[str, Any]) -> Dict[str, Any]: + def deserialize_inplace(cls, data: dict[str, Any]) -> dict[str, Any]: data["pooling_mode"] = OptimumEmbedderPooling.from_str(data["pooling_mode"]) if data["optimizer_settings"] is not None: data["optimizer_settings"] = OptimumEmbedderOptimizationConfig.from_dict(data["optimizer_settings"]) @@ -167,7 +167,7 @@ def warm_up(self): pooling_mode_lasttoken=self.params.pooling_mode == OptimumEmbedderPooling.LAST_TOKEN, ) - def _tokenize_and_generate_outputs(self, texts: List[str]) -> Tuple[Dict[str, Any], BaseModelOutput]: + def _tokenize_and_generate_outputs(self, texts: list[str]) -> tuple[dict[str, Any], BaseModelOutput]: assert self.model is not None assert self.tokenizer is not None @@ -189,15 +189,15 @@ def pool_embeddings(self, model_output: torch.Tensor, attention_mask: torch.Tens return pooled_outputs["sentence_embedding"] @overload - def embed_texts(self, texts_to_embed: str) -> List[float]: ... + def embed_texts(self, texts_to_embed: str) -> list[float]: ... @overload - def embed_texts(self, texts_to_embed: List[str]) -> List[List[float]]: ... + def embed_texts(self, texts_to_embed: list[str]) -> list[list[float]]: ... def embed_texts( self, - texts_to_embed: Union[str, List[str]], - ) -> Union[List[List[float]], List[float]]: + texts_to_embed: Union[str, list[str]], + ) -> Union[list[list[float]], list[float]]: assert self.model is not None assert self.tokenizer is not None @@ -231,7 +231,7 @@ def embed_texts( embeddings = embeddings.tolist() # Reorder embeddings according to original order - reordered_embeddings: List[List[float]] = [None] * len(texts) # type: ignore + reordered_embeddings: list[list[float]] = [None] * len(texts) # type: ignore for embedding, idx in zip(embeddings, length_sorted_idx): reordered_embeddings[idx] = embedding diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimization.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimization.py index 17e553b83e..a065f796b5 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimization.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimization.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from enum import Enum -from typing import Any, Dict +from typing import Any from optimum.onnxruntime.configuration import AutoOptimizationConfig, OptimizationConfig @@ -77,7 +77,7 @@ def to_optimum_config(self) -> OptimizationConfig: msg = f"Unknown optimization mode '{self.mode}'" raise ValueError(msg) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Convert the configuration to a dictionary. @@ -90,7 +90,7 @@ def to_dict(self) -> Dict[str, Any]: } @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "OptimumEmbedderOptimizationConfig": + def from_dict(cls, data: dict[str, Any]) -> "OptimumEmbedderOptimizationConfig": """ Create an optimization configuration from a dictionary. diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py index cfd5381a50..c399d6be41 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import Document, component, default_from_dict, default_to_dict from haystack.utils import Secret @@ -44,13 +44,13 @@ def __init__( normalize_embeddings: bool = True, onnx_execution_provider: str = "CPUExecutionProvider", pooling_mode: Optional[Union[str, OptimumEmbedderPooling]] = None, - model_kwargs: Optional[Dict[str, Any]] = None, + model_kwargs: Optional[dict[str, Any]] = None, working_dir: Optional[str] = None, optimizer_settings: Optional[OptimumEmbedderOptimizationConfig] = None, quantizer_settings: Optional[OptimumEmbedderQuantizationConfig] = None, batch_size: int = 32, progress_bar: bool = True, - meta_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", ): """ @@ -146,7 +146,7 @@ def warm_up(self): self._backend.warm_up() self._initialized = True - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -159,7 +159,7 @@ def to_dict(self) -> Dict[str, Any]: return default_to_dict(self, **init_params) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "OptimumDocumentEmbedder": + def from_dict(cls, data: dict[str, Any]) -> "OptimumDocumentEmbedder": """ Deserializes the component from a dictionary. @@ -171,7 +171,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "OptimumDocumentEmbedder": _EmbedderParams.deserialize_inplace(data["init_parameters"]) return default_from_dict(cls, data) - def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: + def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]: """ Prepare the texts to embed by concatenating the Document text with the metadata fields to embed. """ @@ -190,8 +190,8 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: texts_to_embed.append(text_to_embed) return texts_to_embed - @component.output_types(documents=List[Document]) - def run(self, documents: List[Document]) -> Dict[str, List[Document]]: + @component.output_types(documents=list[Document]) + def run(self, documents: list[Document]) -> dict[str, list[Document]]: """ Embed a list of Documents. The embedding of each Document is stored in the `embedding` field of the Document. diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py index 8201e3ff8d..2ae77b20a7 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.utils import Secret @@ -40,7 +40,7 @@ def __init__( normalize_embeddings: bool = True, onnx_execution_provider: str = "CPUExecutionProvider", pooling_mode: Optional[Union[str, OptimumEmbedderPooling]] = None, - model_kwargs: Optional[Dict[str, Any]] = None, + model_kwargs: Optional[dict[str, Any]] = None, working_dir: Optional[str] = None, optimizer_settings: Optional[OptimumEmbedderOptimizationConfig] = None, quantizer_settings: Optional[OptimumEmbedderQuantizationConfig] = None, @@ -127,7 +127,7 @@ def warm_up(self): self._backend.warm_up() self._initialized = True - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -141,7 +141,7 @@ def to_dict(self) -> Dict[str, Any]: return default_to_dict(self, **init_params) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "OptimumTextEmbedder": + def from_dict(cls, data: dict[str, Any]) -> "OptimumTextEmbedder": """ Deserializes the component from a dictionary. @@ -153,8 +153,8 @@ def from_dict(cls, data: Dict[str, Any]) -> "OptimumTextEmbedder": _EmbedderParams.deserialize_inplace(data["init_parameters"]) return default_from_dict(cls, data) - @component.output_types(embedding=List[float]) - def run(self, text: str) -> Dict[str, List[float]]: + @component.output_types(embedding=list[float]) + def run(self, text: str) -> dict[str, list[float]]: """ Embed a string. diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/quantization.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/quantization.py index d453695442..fd2c484dc3 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/quantization.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/quantization.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from enum import Enum -from typing import Any, Dict +from typing import Any from optimum.onnxruntime.configuration import AutoQuantizationConfig, QuantizationConfig @@ -77,7 +77,7 @@ def to_optimum_config(self) -> QuantizationConfig: msg = f"Unknown quantization mode '{self.mode}'" raise ValueError(msg) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Convert the configuration to a dictionary. @@ -90,7 +90,7 @@ def to_dict(self) -> Dict[str, Any]: } @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "OptimumEmbedderQuantizationConfig": + def from_dict(cls, data: dict[str, Any]) -> "OptimumEmbedderQuantizationConfig": """ Create a configuration from a dictionary. diff --git a/integrations/pgvector/pyproject.toml b/integrations/pgvector/pyproject.toml index e3d9398545..27fc28256d 100644 --- a/integrations/pgvector/pyproject.toml +++ b/integrations/pgvector/pyproject.toml @@ -83,7 +83,7 @@ ignore_missing_imports = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py index d949c3b725..1e179bbce4 100644 --- a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py +++ b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import Document @@ -62,7 +62,7 @@ def __init__( self, *, document_store: PgvectorDocumentStore, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None, filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE, @@ -99,7 +99,7 @@ def __init__( filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy) ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -116,7 +116,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "PgvectorEmbeddingRetriever": + def from_dict(cls, data: dict[str, Any]) -> "PgvectorEmbeddingRetriever": """ Deserializes the component from a dictionary. @@ -133,14 +133,14 @@ def from_dict(cls, data: Dict[str, Any]) -> "PgvectorEmbeddingRetriever": data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy) return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, - query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, + query_embedding: list[float], + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Retrieve documents from the `PgvectorDocumentStore`, based on their embeddings. @@ -166,14 +166,14 @@ def run( ) return {"documents": docs} - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) async def run_async( self, - query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, + query_embedding: list[float], + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Asynchronously retrieve documents from the `PgvectorDocumentStore`, based on their embeddings. diff --git a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/keyword_retriever.py b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/keyword_retriever.py index 74ae553669..3d320dcab4 100644 --- a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/keyword_retriever.py +++ b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/keyword_retriever.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import Document @@ -52,7 +52,7 @@ def __init__( self, *, document_store: PgvectorDocumentStore, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE, ): @@ -74,7 +74,7 @@ def __init__( filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy) ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -90,7 +90,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "PgvectorKeywordRetriever": + def from_dict(cls, data: dict[str, Any]) -> "PgvectorKeywordRetriever": """ Deserializes the component from a dictionary. @@ -107,13 +107,13 @@ def from_dict(cls, data: Dict[str, Any]) -> "PgvectorKeywordRetriever": data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy) return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, query: str, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Retrieve documents from the `PgvectorDocumentStore`, based on keywords. @@ -137,13 +137,13 @@ def run( ) return {"documents": docs} - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) async def run_async( self, query: str, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Asynchronously retrieve documents from the `PgvectorDocumentStore`, based on keywords. diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/converters.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/converters.py index 1ba13e1914..2913c60075 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/converters.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/converters.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List +from typing import Any from haystack import logging from haystack.dataclasses import ByteStream, Document @@ -7,7 +7,7 @@ logger = logging.getLogger(__name__) -def _from_haystack_to_pg_documents(documents: List[Document]) -> List[Dict[str, Any]]: +def _from_haystack_to_pg_documents(documents: list[Document]) -> list[dict[str, Any]]: """ Internal method to convert a list of Haystack Documents to a list of dictionaries that can be used to insert documents into the PgvectorDocumentStore. @@ -38,7 +38,7 @@ def _from_haystack_to_pg_documents(documents: List[Document]) -> List[Dict[str, return db_documents -def _from_pg_to_haystack_documents(documents: List[Dict[str, Any]]) -> List[Document]: +def _from_pg_to_haystack_documents(documents: list[dict[str, Any]]) -> list[Document]: """ Internal method to convert a list of dictionaries from pgvector to a list of Haystack Documents. """ diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index 45aa1dd2fe..5094b136a6 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Literal, Optional, Tuple, Union, overload +from typing import Any, Literal, Optional, Union, overload from haystack import default_from_dict, default_to_dict, logging from haystack.dataclasses.document import Document @@ -90,7 +90,7 @@ def __init__( recreate_table: bool = False, search_strategy: Literal["exact_nearest_neighbor", "hnsw"] = "exact_nearest_neighbor", hnsw_recreate_index_if_exists: bool = False, - hnsw_index_creation_kwargs: Optional[Dict[str, int]] = None, + hnsw_index_creation_kwargs: Optional[dict[str, int]] = None, hnsw_index_name: str = "haystack_hnsw_index", hnsw_ef_search: Optional[int] = None, keyword_index_name: str = "haystack_keyword_index", @@ -180,7 +180,7 @@ def __init__( self._async_dict_cursor: Optional[AsyncCursor[DictRow]] = None self._table_initialized = False - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -207,7 +207,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "PgvectorDocumentStore": + def from_dict(cls, data: dict[str, Any]) -> "PgvectorDocumentStore": """ Deserializes the component from a dictionary. @@ -740,7 +740,7 @@ async def count_documents_async(self) -> int: return result[0] return 0 - def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]: + def filter_documents(self, filters: Optional[dict[str, Any]] = None) -> list[Document]: """ Returns the documents that match the filters provided. @@ -777,7 +777,7 @@ def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Doc docs = _from_pg_to_haystack_documents(records) return docs - async def filter_documents_async(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]: + async def filter_documents_async(self, filters: Optional[dict[str, Any]] = None) -> list[Document]: """ Asynchronously returns the documents that match the filters provided. @@ -832,7 +832,7 @@ def _build_insert_statement(self, policy: DuplicatePolicy) -> Composed: return sql_insert - def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int: + def write_documents(self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int: """ Writes documents to the document store. @@ -888,7 +888,7 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D return written_docs async def write_documents_async( - self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE + self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE ) -> int: """ Asynchronously writes documents to the document store. @@ -939,7 +939,7 @@ async def write_documents_async( return written_docs - def delete_documents(self, document_ids: List[str]) -> None: + def delete_documents(self, document_ids: list[str]) -> None: """ Deletes documents that match the provided `document_ids` from the document store. @@ -962,7 +962,7 @@ def delete_documents(self, document_ids: List[str]) -> None: cursor=self._cursor, sql_query=delete_sql, error_msg="Could not delete documents from PgvectorDocumentStore" ) - async def delete_documents_async(self, document_ids: List[str]) -> None: + async def delete_documents_async(self, document_ids: list[str]) -> None: """ Asynchronously deletes documents that match the provided `document_ids` from the document store. @@ -1020,8 +1020,8 @@ async def delete_all_documents_async(self) -> None: ) def _build_keyword_retrieval_query( - self, query: str, top_k: int, filters: Optional[Dict[str, Any]] = None - ) -> Tuple[Composed, tuple]: + self, query: str, top_k: int, filters: Optional[dict[str, Any]] = None + ) -> tuple[Composed, tuple]: """ Builds the SQL query and the where parameters for keyword retrieval. """ @@ -1049,9 +1049,9 @@ def _keyword_retrieval( self, query: str, *, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, - ) -> List[Document]: + ) -> list[Document]: """ Retrieves documents that are most similar to the query using a full-text search. @@ -1084,9 +1084,9 @@ async def _keyword_retrieval_async( self, query: str, *, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, - ) -> List[Document]: + ) -> list[Document]: """ Retrieves documents that are most similar to the query using a full-text search asynchronously. """ @@ -1111,11 +1111,11 @@ async def _keyword_retrieval_async( def _check_and_build_embedding_retrieval_query( self, - query_embedding: List[float], + query_embedding: list[float], vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]], top_k: int, - filters: Optional[Dict[str, Any]] = None, - ) -> Tuple[Composed, tuple]: + filters: Optional[dict[str, Any]] = None, + ) -> tuple[Composed, tuple]: """ Performs checks and builds the SQL query and the where parameters for embedding retrieval. """ @@ -1174,12 +1174,12 @@ def _check_and_build_embedding_retrieval_query( def _embedding_retrieval( self, - query_embedding: List[float], + query_embedding: list[float], *, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None, - ) -> List[Document]: + ) -> list[Document]: """ Retrieves documents that are most similar to the query embedding using a vector similarity metric. @@ -1208,12 +1208,12 @@ def _embedding_retrieval( async def _embedding_retrieval_async( self, - query_embedding: List[float], + query_embedding: list[float], *, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None, - ) -> List[Document]: + ) -> list[Document]: """ Asynchronously retrieves documents that are most similar to the query embedding using a vector similarity metric. diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/filters.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/filters.py index e15a3f523e..d06e75e115 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/filters.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/filters.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from datetime import datetime from itertools import chain -from typing import Any, Dict, List, Literal, Optional, Tuple +from typing import Any, Literal, Optional from haystack.errors import FilterError from psycopg.sql import SQL, Composed @@ -21,7 +21,7 @@ NO_VALUE = "no_value" -def _validate_filters(filters: Optional[Dict[str, Any]] = None) -> None: +def _validate_filters(filters: Optional[dict[str, Any]] = None) -> None: """ Validates the filters provided. """ @@ -35,8 +35,8 @@ def _validate_filters(filters: Optional[Dict[str, Any]] = None) -> None: def _convert_filters_to_where_clause_and_params( - filters: Dict[str, Any], operator: Literal["WHERE", "AND"] = "WHERE" -) -> Tuple[Composed, Tuple]: + filters: dict[str, Any], operator: Literal["WHERE", "AND"] = "WHERE" +) -> tuple[Composed, tuple]: """ Convert Haystack filters to a WHERE clause and a tuple of params to query PostgreSQL. """ @@ -51,7 +51,7 @@ def _convert_filters_to_where_clause_and_params( return where_clause, params -def _parse_logical_condition(condition: Dict[str, Any]) -> Tuple[str, List[Any]]: +def _parse_logical_condition(condition: dict[str, Any]) -> tuple[str, list[Any]]: if "operator" not in condition: msg = f"'operator' key missing in {condition}" raise FilterError(msg) @@ -91,7 +91,7 @@ def _parse_logical_condition(condition: Dict[str, Any]) -> Tuple[str, List[Any]] return sql_query, values -def _parse_comparison_condition(condition: Dict[str, Any]) -> Tuple[str, List[Any]]: +def _parse_comparison_condition(condition: dict[str, Any]) -> tuple[str, list[Any]]: field: str = condition["field"] if "operator" not in condition: msg = f"'operator' key missing in {condition}" @@ -142,20 +142,20 @@ def _treat_meta_field(field: str, value: Any) -> str: return field -def _equal(field: str, value: Any) -> Tuple[str, Any]: +def _equal(field: str, value: Any) -> tuple[str, Any]: if value is None: # NO_VALUE is a placeholder that will be removed in _convert_filters_to_where_clause_and_params return f"{field} IS NULL", NO_VALUE return f"{field} = %s", value -def _not_equal(field: str, value: Any) -> Tuple[str, Any]: +def _not_equal(field: str, value: Any) -> tuple[str, Any]: # we use IS DISTINCT FROM to correctly handle NULL values # (not handled by !=) return f"{field} IS DISTINCT FROM %s", value -def _greater_than(field: str, value: Any) -> Tuple[str, Any]: +def _greater_than(field: str, value: Any) -> tuple[str, Any]: if isinstance(value, str): try: datetime.fromisoformat(value) @@ -172,7 +172,7 @@ def _greater_than(field: str, value: Any) -> Tuple[str, Any]: return f"{field} > %s", value -def _greater_than_equal(field: str, value: Any) -> Tuple[str, Any]: +def _greater_than_equal(field: str, value: Any) -> tuple[str, Any]: if isinstance(value, str): try: datetime.fromisoformat(value) @@ -189,7 +189,7 @@ def _greater_than_equal(field: str, value: Any) -> Tuple[str, Any]: return f"{field} >= %s", value -def _less_than(field: str, value: Any) -> Tuple[str, Any]: +def _less_than(field: str, value: Any) -> tuple[str, Any]: if isinstance(value, str): try: datetime.fromisoformat(value) @@ -206,7 +206,7 @@ def _less_than(field: str, value: Any) -> Tuple[str, Any]: return f"{field} < %s", value -def _less_than_equal(field: str, value: Any) -> Tuple[str, Any]: +def _less_than_equal(field: str, value: Any) -> tuple[str, Any]: if isinstance(value, str): try: datetime.fromisoformat(value) @@ -223,7 +223,7 @@ def _less_than_equal(field: str, value: Any) -> Tuple[str, Any]: return f"{field} <= %s", value -def _not_in(field: str, value: Any) -> Tuple[str, List]: +def _not_in(field: str, value: Any) -> tuple[str, list]: if not isinstance(value, list): msg = f"{field}'s value must be a list when using 'not in' comparator in Pinecone" raise FilterError(msg) @@ -231,7 +231,7 @@ def _not_in(field: str, value: Any) -> Tuple[str, List]: return f"{field} IS NULL OR {field} != ALL(%s)", [value] -def _in(field: str, value: Any) -> Tuple[str, List]: +def _in(field: str, value: Any) -> tuple[str, list]: if not isinstance(value, list): msg = f"{field}'s value must be a list when using 'in' comparator in Pinecone" raise FilterError(msg) @@ -240,14 +240,14 @@ def _in(field: str, value: Any) -> Tuple[str, List]: return f"{field} = ANY(%s)", [value] -def _like(field: str, value: Any) -> Tuple[str, Any]: +def _like(field: str, value: Any) -> tuple[str, Any]: if not isinstance(value, str): msg = f"{field}'s value must be a str when using 'LIKE' " raise FilterError(msg) return f"{field} LIKE %s", value -def _not_like(field: str, value: Any) -> Tuple[str, Any]: +def _not_like(field: str, value: Any) -> tuple[str, Any]: if not isinstance(value, str): msg = f"{field}'s value must be a str when using 'LIKE' " raise FilterError(msg) diff --git a/integrations/pgvector/tests/conftest.py b/integrations/pgvector/tests/conftest.py index e989fc8c55..d6db33e411 100644 --- a/integrations/pgvector/tests/conftest.py +++ b/integrations/pgvector/tests/conftest.py @@ -80,11 +80,15 @@ def document_store_w_halfvec_hnsw_index(request, monkeypatch): @pytest.fixture def patches_for_unit_tests(): - with patch("haystack_integrations.document_stores.pgvector.document_store.register_vector") as mock_register, patch( - "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.delete_table" - ) as mock_delete, patch( - "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore._handle_hnsw" - ) as mock_hnsw: + with ( + patch("haystack_integrations.document_stores.pgvector.document_store.register_vector") as mock_register, + patch( + "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.delete_table" + ) as mock_delete, + patch( + "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore._handle_hnsw" + ) as mock_hnsw, + ): yield mock_register, mock_delete, mock_hnsw diff --git a/integrations/pgvector/tests/test_filters.py b/integrations/pgvector/tests/test_filters.py index 97ceb6a57d..ba750b5a6d 100644 --- a/integrations/pgvector/tests/test_filters.py +++ b/integrations/pgvector/tests/test_filters.py @@ -1,5 +1,3 @@ -from typing import List - import pytest from haystack.dataclasses.document import Document from haystack.testing.document_store import FilterDocumentsTest @@ -17,7 +15,7 @@ @pytest.mark.integration class TestFilters(FilterDocumentsTest): - def assert_documents_are_equal(self, received: List[Document], expected: List[Document]): + def assert_documents_are_equal(self, received: list[Document], expected: list[Document]): """ This overrides the default assert_documents_are_equal from FilterDocumentsTest. It is needed because the embeddings are not exactly the same when they are retrieved from Postgres. diff --git a/integrations/pgvector/tests/test_retrieval.py b/integrations/pgvector/tests/test_retrieval.py index 997126929f..42e53d7705 100644 --- a/integrations/pgvector/tests/test_retrieval.py +++ b/integrations/pgvector/tests/test_retrieval.py @@ -2,8 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import List - import pytest from haystack.dataclasses.document import Document from numpy.random import rand @@ -101,7 +99,7 @@ def test_embedding_retrieval_with_filters(self, document_store: PgvectorDocument assert results[0].score > results[1].score > results[2].score def test_empty_query_embedding(self, document_store: PgvectorDocumentStore): - query_embedding: List[float] = [] + query_embedding: list[float] = [] with pytest.raises(ValueError): document_store._embedding_retrieval(query_embedding=query_embedding) diff --git a/integrations/pgvector/tests/test_retrieval_async.py b/integrations/pgvector/tests/test_retrieval_async.py index 4ee5eba4e9..3d3c7d7ee2 100644 --- a/integrations/pgvector/tests/test_retrieval_async.py +++ b/integrations/pgvector/tests/test_retrieval_async.py @@ -2,7 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import List import pytest from haystack.dataclasses.document import Document @@ -104,7 +103,7 @@ async def test_embedding_retrieval_with_filters_async(self, document_store: Pgve assert results[0].score > results[1].score > results[2].score async def test_empty_query_embedding_async(self, document_store: PgvectorDocumentStore): - query_embedding: List[float] = [] + query_embedding: list[float] = [] with pytest.raises(ValueError): await document_store._embedding_retrieval_async(query_embedding=query_embedding) diff --git a/integrations/pinecone/pyproject.toml b/integrations/pinecone/pyproject.toml index 449df3430f..64b56dfc7a 100644 --- a/integrations/pinecone/pyproject.toml +++ b/integrations/pinecone/pyproject.toml @@ -88,7 +88,7 @@ allow-direct-references = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/embedding_retriever.py b/integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/embedding_retriever.py index 2885657c65..15e50dee42 100644 --- a/integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/embedding_retriever.py +++ b/integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/embedding_retriever.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import Document @@ -55,7 +55,7 @@ def __init__( self, *, document_store: PineconeDocumentStore, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE, ): @@ -78,7 +78,7 @@ def __init__( filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy) ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. :returns: @@ -93,7 +93,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "PineconeEmbeddingRetriever": + def from_dict(cls, data: dict[str, Any]) -> "PineconeEmbeddingRetriever": """ Deserializes the component from a dictionary. :param data: @@ -110,13 +110,13 @@ def from_dict(cls, data: Dict[str, Any]) -> "PineconeEmbeddingRetriever": data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy) return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, - query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, + query_embedding: list[float], + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Retrieve documents from the `PineconeDocumentStore`, based on their dense embeddings. @@ -139,13 +139,13 @@ def run( ) return {"documents": docs} - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) async def run_async( self, - query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, + query_embedding: list[float], + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Asynchronously retrieve documents from the `PineconeDocumentStore`, based on their dense embeddings. diff --git a/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py index 7b7d81b093..0672689664 100644 --- a/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py +++ b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 from copy import copy -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal, Optional, Union from haystack import default_from_dict, default_to_dict, logging from haystack.dataclasses import Document @@ -40,7 +40,7 @@ def __init__( namespace: str = "default", batch_size: int = 100, dimension: int = 768, - spec: Optional[Dict[str, Any]] = None, + spec: Optional[dict[str, Any]] = None, metric: Literal["cosine", "euclidean", "dotproduct"] = "cosine", ): """ @@ -155,7 +155,7 @@ async def close_async(self): self._async_index = None @staticmethod - def _convert_dict_spec_to_pinecone_object(spec: Dict[str, Any]) -> Union[ServerlessSpec, PodSpec]: + def _convert_dict_spec_to_pinecone_object(spec: dict[str, Any]) -> Union[ServerlessSpec, PodSpec]: """Convert the spec dictionary to a Pinecone spec object""" if "serverless" in spec: @@ -172,7 +172,7 @@ def _convert_dict_spec_to_pinecone_object(spec: Dict[str, Any]) -> Union[Serverl raise ValueError(msg) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "PineconeDocumentStore": + def from_dict(cls, data: dict[str, Any]) -> "PineconeDocumentStore": """ Deserializes the component from a dictionary. :param data: @@ -183,7 +183,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "PineconeDocumentStore": deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. :returns: @@ -227,7 +227,7 @@ async def count_documents_async(self) -> int: count = 0 return count - def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int: + def write_documents(self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int: """ Writes Documents to Pinecone. @@ -250,7 +250,7 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D return written_docs async def write_documents_async( - self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE + self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE ) -> int: """ Asynchronously writes Documents to Pinecone. @@ -274,7 +274,7 @@ async def write_documents_async( return written_docs - def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]: + def filter_documents(self, filters: Optional[dict[str, Any]] = None) -> list[Document]: """ Returns the documents that match the filters provided. @@ -306,7 +306,7 @@ def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Doc ) return documents - async def filter_documents_async(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]: + async def filter_documents_async(self, filters: Optional[dict[str, Any]] = None) -> list[Document]: """ Asynchronously returns the documents that match the filters provided. @@ -333,7 +333,7 @@ async def filter_documents_async(self, filters: Optional[Dict[str, Any]] = None) return documents - def delete_documents(self, document_ids: List[str]) -> None: + def delete_documents(self, document_ids: list[str]) -> None: """ Deletes documents that match the provided `document_ids` from the document store. @@ -343,7 +343,7 @@ def delete_documents(self, document_ids: List[str]) -> None: assert self._index is not None, "Index is not initialized" self._index.delete(ids=document_ids, namespace=self.namespace) - async def delete_documents_async(self, document_ids: List[str]) -> None: + async def delete_documents_async(self, document_ids: list[str]) -> None: """ Asynchronously deletes documents that match the provided `document_ids` from the document store. @@ -379,12 +379,12 @@ async def delete_all_documents_async(self) -> None: def _embedding_retrieval( self, - query_embedding: List[float], + query_embedding: list[float], *, namespace: Optional[str] = None, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, - ) -> List[Document]: + ) -> list[Document]: """ Retrieves documents that are most similar to the query embedding using a vector similarity metric. @@ -422,12 +422,12 @@ def _embedding_retrieval( async def _embedding_retrieval_async( self, - query_embedding: List[float], + query_embedding: list[float], *, namespace: Optional[str] = None, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, - ) -> List[Document]: + ) -> list[Document]: """ Asynchronously retrieves documents that are similar to the query embedding using a vector similarity metric. @@ -460,7 +460,7 @@ async def _embedding_retrieval_async( return self._convert_query_result_to_documents(result) @staticmethod - def _convert_meta_to_int(metadata: Dict[str, Any]) -> Dict[str, Any]: + def _convert_meta_to_int(metadata: dict[str, Any]) -> dict[str, Any]: """ Pinecone store numeric metadata values as `float`. Some specific metadata are used in Retrievers components and are expected to be `int`. This method converts them back to integers. @@ -473,7 +473,7 @@ def _convert_meta_to_int(metadata: Dict[str, Any]) -> Dict[str, Any]: return metadata - def _convert_query_result_to_documents(self, query_result: Dict[str, Any]) -> List[Document]: + def _convert_query_result_to_documents(self, query_result: dict[str, Any]) -> list[Document]: pinecone_docs = query_result["matches"] documents = [] for pinecone_doc in pinecone_docs: @@ -524,7 +524,7 @@ def valid_type(value: Any) -> bool: document.meta = new_meta - def _convert_documents_to_pinecone_format(self, documents: List[Document]) -> List[Dict[str, Any]]: + def _convert_documents_to_pinecone_format(self, documents: list[Document]) -> list[dict[str, Any]]: documents_for_pinecone = [] for document in documents: embedding = copy(document.embedding) @@ -538,7 +538,7 @@ def _convert_documents_to_pinecone_format(self, documents: List[Document]) -> Li if document.meta: self._discard_invalid_meta(document) - doc_for_pinecone: Dict[str, Any] = {"id": document.id, "values": embedding, "metadata": dict(document.meta)} + doc_for_pinecone: dict[str, Any] = {"id": document.id, "values": embedding, "metadata": dict(document.meta)} # we save content as metadata if document.content is not None: @@ -563,8 +563,8 @@ def _convert_documents_to_pinecone_format(self, documents: List[Document]) -> Li return documents_for_pinecone def _prepare_documents_for_writing( - self, documents: List[Document], policy: DuplicatePolicy - ) -> List[Dict[str, Any]]: + self, documents: list[Document], policy: DuplicatePolicy + ) -> list[dict[str, Any]]: """ Helper method to prepare documents for writing to Pinecone. """ diff --git a/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/filters.py b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/filters.py index f7f52d434c..0835fd5fa4 100644 --- a/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/filters.py +++ b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/filters.py @@ -1,12 +1,12 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Optional from haystack.errors import FilterError -def _normalize_filters(filters: Dict[str, Any]) -> Dict[str, Any]: +def _normalize_filters(filters: dict[str, Any]) -> dict[str, Any]: """ Converts Haystack filters in Pinecone compatible filters. Reference: https://docs.pinecone.io/docs/metadata-filtering @@ -20,7 +20,7 @@ def _normalize_filters(filters: Dict[str, Any]) -> Dict[str, Any]: return _parse_logical_condition(filters) -def _parse_logical_condition(condition: Dict[str, Any]) -> Dict[str, Any]: +def _parse_logical_condition(condition: dict[str, Any]) -> dict[str, Any]: if "operator" not in condition: msg = f"'operator' key missing in {condition}" raise FilterError(msg) @@ -38,7 +38,7 @@ def _parse_logical_condition(condition: Dict[str, Any]) -> Dict[str, Any]: raise FilterError(msg) -def _parse_comparison_condition(condition: Dict[str, Any]) -> Dict[str, Any]: +def _parse_comparison_condition(condition: dict[str, Any]) -> dict[str, Any]: if "field" not in condition: # 'field' key is only found in comparison dictionaries. # We assume this is a logic dictionary since it's not present. @@ -64,7 +64,7 @@ def _parse_comparison_condition(condition: Dict[str, Any]) -> Dict[str, Any]: return COMPARISON_OPERATORS[operator](field, value) -def _equal(field: str, value: Any) -> Dict[str, Any]: +def _equal(field: str, value: Any) -> dict[str, Any]: supported_types = (str, int, float, bool) if not isinstance(value, supported_types): msg = ( @@ -76,7 +76,7 @@ def _equal(field: str, value: Any) -> Dict[str, Any]: return {field: {"$eq": value}} -def _not_equal(field: str, value: Any) -> Dict[str, Any]: +def _not_equal(field: str, value: Any) -> dict[str, Any]: supported_types = (str, int, float, bool) if not isinstance(value, supported_types): msg = ( @@ -88,7 +88,7 @@ def _not_equal(field: str, value: Any) -> Dict[str, Any]: return {field: {"$ne": value}} -def _greater_than(field: str, value: Any) -> Dict[str, Any]: +def _greater_than(field: str, value: Any) -> dict[str, Any]: supported_types = (int, float) if not isinstance(value, supported_types): msg = ( @@ -100,7 +100,7 @@ def _greater_than(field: str, value: Any) -> Dict[str, Any]: return {field: {"$gt": value}} -def _greater_than_equal(field: str, value: Any) -> Dict[str, Any]: +def _greater_than_equal(field: str, value: Any) -> dict[str, Any]: supported_types = (int, float) if not isinstance(value, supported_types): msg = ( @@ -112,7 +112,7 @@ def _greater_than_equal(field: str, value: Any) -> Dict[str, Any]: return {field: {"$gte": value}} -def _less_than(field: str, value: Any) -> Dict[str, Any]: +def _less_than(field: str, value: Any) -> dict[str, Any]: supported_types = (int, float) if not isinstance(value, supported_types): msg = ( @@ -124,7 +124,7 @@ def _less_than(field: str, value: Any) -> Dict[str, Any]: return {field: {"$lt": value}} -def _less_than_equal(field: str, value: Any) -> Dict[str, Any]: +def _less_than_equal(field: str, value: Any) -> dict[str, Any]: supported_types = (int, float) if not isinstance(value, supported_types): msg = ( @@ -136,7 +136,7 @@ def _less_than_equal(field: str, value: Any) -> Dict[str, Any]: return {field: {"$lte": value}} -def _not_in(field: str, value: Any) -> Dict[str, Any]: +def _not_in(field: str, value: Any) -> dict[str, Any]: if not isinstance(value, list): msg = f"{field}'s value must be a list when using 'not in' comparator in Pinecone" raise FilterError(msg) @@ -153,7 +153,7 @@ def _not_in(field: str, value: Any) -> Dict[str, Any]: return {field: {"$nin": value}} -def _in(field: str, value: Any) -> Dict[str, Any]: +def _in(field: str, value: Any) -> dict[str, Any]: if not isinstance(value, list): msg = f"{field}'s value must be a list when using 'in' comparator in Pinecone" raise FilterError(msg) @@ -181,7 +181,7 @@ def _in(field: str, value: Any) -> Dict[str, Any]: LOGICAL_OPERATORS = {"AND": "$and", "OR": "$or"} -def _validate_filters(filters: Optional[Dict[str, Any]]) -> None: +def _validate_filters(filters: Optional[dict[str, Any]]) -> None: """ Helper method to validate filter syntax. """ diff --git a/integrations/pinecone/tests/test_filters.py b/integrations/pinecone/tests/test_filters.py index a3661eb11f..63da94d03f 100644 --- a/integrations/pinecone/tests/test_filters.py +++ b/integrations/pinecone/tests/test_filters.py @@ -1,5 +1,4 @@ import os -from typing import List import pytest from haystack.dataclasses.document import Document @@ -11,7 +10,7 @@ @pytest.mark.integration @pytest.mark.skipif(not os.environ.get("PINECONE_API_KEY"), reason="PINECONE_API_KEY not set") class TestFilters(FilterDocumentsTest): - def assert_documents_are_equal(self, received: List[Document], expected: List[Document]): + def assert_documents_are_equal(self, received: list[Document], expected: list[Document]): for doc in received: # Pinecone seems to convert integers to floats (undocumented behavior) # We convert them back to integers to compare them diff --git a/integrations/qdrant/pyproject.toml b/integrations/qdrant/pyproject.toml index 983a93ac36..db61161008 100644 --- a/integrations/qdrant/pyproject.toml +++ b/integrations/qdrant/pyproject.toml @@ -80,7 +80,7 @@ disallow_incomplete_defs = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/qdrant/src/haystack_integrations/components/retrievers/qdrant/retriever.py b/integrations/qdrant/src/haystack_integrations/components/retrievers/qdrant/retriever.py index bee9b5ae5c..bd8e9a901e 100644 --- a/integrations/qdrant/src/haystack_integrations/components/retrievers/qdrant/retriever.py +++ b/integrations/qdrant/src/haystack_integrations/components/retrievers/qdrant/retriever.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import Document, component, default_from_dict, default_to_dict from haystack.dataclasses.sparse_embedding import SparseEmbedding @@ -43,7 +43,7 @@ class QdrantEmbeddingRetriever: def __init__( self, document_store: QdrantDocumentStore, - filters: Optional[Union[Dict[str, Any], models.Filter]] = None, + filters: Optional[Union[dict[str, Any], models.Filter]] = None, top_k: int = 10, scale_score: bool = False, return_embedding: bool = False, @@ -89,7 +89,7 @@ def __init__( self._group_by = group_by self._group_size = group_size - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -113,7 +113,7 @@ def to_dict(self) -> Dict[str, Any]: return d @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "QdrantEmbeddingRetriever": + def from_dict(cls, data: dict[str, Any]) -> "QdrantEmbeddingRetriever": """ Deserializes the component from a dictionary. @@ -130,18 +130,18 @@ def from_dict(cls, data: Dict[str, Any]) -> "QdrantEmbeddingRetriever": data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy) return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, - query_embedding: List[float], - filters: Optional[Union[Dict[str, Any], models.Filter]] = None, + query_embedding: list[float], + filters: Optional[Union[dict[str, Any], models.Filter]] = None, top_k: Optional[int] = None, scale_score: Optional[bool] = None, return_embedding: Optional[bool] = None, score_threshold: Optional[float] = None, group_by: Optional[str] = None, group_size: Optional[int] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Run the Embedding Retriever on the given input data. @@ -185,18 +185,18 @@ def run( return {"documents": docs} - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) async def run_async( self, - query_embedding: List[float], - filters: Optional[Union[Dict[str, Any], models.Filter]] = None, + query_embedding: list[float], + filters: Optional[Union[dict[str, Any], models.Filter]] = None, top_k: Optional[int] = None, scale_score: Optional[bool] = None, return_embedding: Optional[bool] = None, score_threshold: Optional[float] = None, group_by: Optional[str] = None, group_size: Optional[int] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Asynchronously run the Embedding Retriever on the given input data. @@ -271,7 +271,7 @@ class QdrantSparseEmbeddingRetriever: def __init__( self, document_store: QdrantDocumentStore, - filters: Optional[Union[Dict[str, Any], models.Filter]] = None, + filters: Optional[Union[dict[str, Any], models.Filter]] = None, top_k: int = 10, scale_score: bool = False, return_embedding: bool = False, @@ -317,7 +317,7 @@ def __init__( self._group_by = group_by self._group_size = group_size - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -341,7 +341,7 @@ def to_dict(self) -> Dict[str, Any]: return d @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "QdrantSparseEmbeddingRetriever": + def from_dict(cls, data: dict[str, Any]) -> "QdrantSparseEmbeddingRetriever": """ Deserializes the component from a dictionary. @@ -358,18 +358,18 @@ def from_dict(cls, data: Dict[str, Any]) -> "QdrantSparseEmbeddingRetriever": data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy) return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, query_sparse_embedding: SparseEmbedding, - filters: Optional[Union[Dict[str, Any], models.Filter]] = None, + filters: Optional[Union[dict[str, Any], models.Filter]] = None, top_k: Optional[int] = None, scale_score: Optional[bool] = None, return_embedding: Optional[bool] = None, score_threshold: Optional[float] = None, group_by: Optional[str] = None, group_size: Optional[int] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Run the Sparse Embedding Retriever on the given input data. @@ -418,18 +418,18 @@ def run( return {"documents": docs} - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) async def run_async( self, query_sparse_embedding: SparseEmbedding, - filters: Optional[Union[Dict[str, Any], models.Filter]] = None, + filters: Optional[Union[dict[str, Any], models.Filter]] = None, top_k: Optional[int] = None, scale_score: Optional[bool] = None, return_embedding: Optional[bool] = None, score_threshold: Optional[float] = None, group_by: Optional[str] = None, group_size: Optional[int] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Asynchronously run the Sparse Embedding Retriever on the given input data. @@ -515,7 +515,7 @@ class QdrantHybridRetriever: def __init__( self, document_store: QdrantDocumentStore, - filters: Optional[Union[Dict[str, Any], models.Filter]] = None, + filters: Optional[Union[dict[str, Any], models.Filter]] = None, top_k: int = 10, return_embedding: bool = False, filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE, @@ -558,7 +558,7 @@ def __init__( self._group_by = group_by self._group_size = group_size - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -578,7 +578,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "QdrantHybridRetriever": + def from_dict(cls, data: dict[str, Any]) -> "QdrantHybridRetriever": """ Deserializes the component from a dictionary. @@ -595,18 +595,18 @@ def from_dict(cls, data: Dict[str, Any]) -> "QdrantHybridRetriever": data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy) return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, - query_embedding: List[float], + query_embedding: list[float], query_sparse_embedding: SparseEmbedding, - filters: Optional[Union[Dict[str, Any], models.Filter]] = None, + filters: Optional[Union[dict[str, Any], models.Filter]] = None, top_k: Optional[int] = None, return_embedding: Optional[bool] = None, score_threshold: Optional[float] = None, group_by: Optional[str] = None, group_size: Optional[int] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Run the Sparse Embedding Retriever on the given input data. @@ -655,18 +655,18 @@ def run( return {"documents": docs} - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) async def run_async( self, - query_embedding: List[float], + query_embedding: list[float], query_sparse_embedding: SparseEmbedding, - filters: Optional[Union[Dict[str, Any], models.Filter]] = None, + filters: Optional[Union[dict[str, Any], models.Filter]] = None, top_k: Optional[int] = None, return_embedding: Optional[bool] = None, score_threshold: Optional[float] = None, group_by: Optional[str] = None, group_size: Optional[int] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Asynchronously run the Sparse Embedding Retriever on the given input data. diff --git a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/converters.py b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/converters.py index 9c227d8d47..fcc8ff312b 100644 --- a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/converters.py +++ b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/converters.py @@ -1,5 +1,5 @@ import uuid -from typing import List, Union +from typing import Union from haystack import logging from haystack.dataclasses import Document @@ -15,10 +15,10 @@ def convert_haystack_documents_to_qdrant_points( - documents: List[Document], + documents: list[Document], *, use_sparse_embeddings: bool, -) -> List[rest.PointStruct]: +) -> list[rest.PointStruct]: points = [] for document in documents: payload = document.to_dict(flatten=False) diff --git a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py index 83adf29b91..bea3211423 100644 --- a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py +++ b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py @@ -1,6 +1,7 @@ import inspect +from collections.abc import AsyncGenerator, Generator from itertools import islice -from typing import Any, AsyncGenerator, ClassVar, Dict, Generator, List, Optional, Set, Tuple, Union +from typing import Any, ClassVar, Optional, Union import qdrant_client from haystack import default_from_dict, default_to_dict, logging @@ -37,10 +38,10 @@ class QdrantStoreError(DocumentStoreError): pass -FilterType = Dict[str, Union[Dict[str, Any], List[Any], str, int, float, bool]] +FilterType = dict[str, Union[dict[str, Any], list[Any], str, int, float, bool]] -def get_batches_from_generator(iterable: List, n: int) -> Generator: +def get_batches_from_generator(iterable: list, n: int) -> Generator: """ Batch elements of an iterable into fixed-length chunks or blocks. """ @@ -90,7 +91,7 @@ class QdrantDocumentStore: ``` """ - SIMILARITY: ClassVar[Dict[str, rest.Distance]] = { + SIMILARITY: ClassVar[dict[str, rest.Distance]] = { "cosine": rest.Distance.COSINE, "dot_product": rest.Distance.DOT, "l2": rest.Distance.EUCLID, @@ -132,7 +133,7 @@ def __init__( metadata: Optional[dict] = None, write_batch_size: int = 100, scroll_size: int = 10_000, - payload_fields_to_index: Optional[List[dict]] = None, + payload_fields_to_index: Optional[list[dict]] = None, ) -> None: """ Initializes a QdrantDocumentStore. @@ -338,8 +339,8 @@ async def count_documents_async(self) -> int: def filter_documents( self, - filters: Optional[Union[Dict[str, Any], rest.Filter]] = None, - ) -> List[Document]: + filters: Optional[Union[dict[str, Any], rest.Filter]] = None, + ) -> list[Document]: """ Returns the documents that match the provided filters. @@ -361,8 +362,8 @@ def filter_documents( async def filter_documents_async( self, - filters: Optional[Union[Dict[str, Any], rest.Filter]] = None, - ) -> List[Document]: + filters: Optional[Union[dict[str, Any], rest.Filter]] = None, + ) -> list[Document]: """ Asynchronously returns the documents that match the provided filters. """ @@ -374,7 +375,7 @@ async def filter_documents_async( def write_documents( self, - documents: List[Document], + documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.FAIL, ) -> int: """ @@ -427,7 +428,7 @@ def write_documents( async def write_documents_async( self, - documents: List[Document], + documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.FAIL, ) -> int: """ @@ -479,7 +480,7 @@ async def write_documents_async( progress_bar.update(self.write_batch_size) return len(document_objects) - def delete_documents(self, document_ids: List[str]) -> None: + def delete_documents(self, document_ids: list[str]) -> None: """ Deletes documents that match the provided `document_ids` from the document store. @@ -501,7 +502,7 @@ def delete_documents(self, document_ids: List[str]) -> None: "Called QdrantDocumentStore.delete_documents() on a non-existing ID", ) - async def delete_documents_async(self, document_ids: List[str]) -> None: + async def delete_documents_async(self, document_ids: list[str]) -> None: """ Asynchronously deletes documents that match the provided `document_ids` from the document store. @@ -626,7 +627,7 @@ async def delete_all_documents_async(self, recreate_index: bool = False) -> None ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "QdrantDocumentStore": + def from_dict(cls, data: dict[str, Any]) -> "QdrantDocumentStore": """ Deserializes the component from a dictionary. @@ -638,7 +639,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "QdrantDocumentStore": deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -657,7 +658,7 @@ def to_dict(self) -> Dict[str, Any]: def _get_documents_generator( self, - filters: Optional[Union[Dict[str, Any], rest.Filter]] = None, + filters: Optional[Union[dict[str, Any], rest.Filter]] = None, ) -> Generator[Document, None, None]: """ Returns a generator that yields documents from Qdrant based on the provided filters. @@ -694,7 +695,7 @@ def _get_documents_generator( async def _get_documents_generator_async( self, - filters: Optional[Union[Dict[str, Any], rest.Filter]] = None, + filters: Optional[Union[dict[str, Any], rest.Filter]] = None, ) -> AsyncGenerator[Document, None]: """ Returns an asynchronous generator that yields documents from Qdrant based on the provided filters. @@ -731,8 +732,8 @@ async def _get_documents_generator_async( def get_documents_by_id( self, - ids: List[str], - ) -> List[Document]: + ids: list[str], + ) -> list[Document]: """ Retrieves documents from Qdrant by their IDs. @@ -741,7 +742,7 @@ def get_documents_by_id( :returns: A list of documents. """ - documents: List[Document] = [] + documents: list[Document] = [] self._initialize_client() assert self._client is not None @@ -762,8 +763,8 @@ def get_documents_by_id( async def get_documents_by_id_async( self, - ids: List[str], - ) -> List[Document]: + ids: list[str], + ) -> list[Document]: """ Retrieves documents from Qdrant by their IDs. @@ -772,7 +773,7 @@ async def get_documents_by_id_async( :returns: A list of documents. """ - documents: List[Document] = [] + documents: list[Document] = [] await self._initialize_async_client() assert self._async_client is not None @@ -794,14 +795,14 @@ async def get_documents_by_id_async( def _query_by_sparse( self, query_sparse_embedding: SparseEmbedding, - filters: Optional[Union[Dict[str, Any], rest.Filter]] = None, + filters: Optional[Union[dict[str, Any], rest.Filter]] = None, top_k: int = 10, scale_score: bool = False, return_embedding: bool = False, score_threshold: Optional[float] = None, group_by: Optional[str] = None, group_size: Optional[int] = None, - ) -> List[Document]: + ) -> list[Document]: """ Queries Qdrant using a sparse embedding and returns the most relevant documents. @@ -870,15 +871,15 @@ def _query_by_sparse( def _query_by_embedding( self, - query_embedding: List[float], - filters: Optional[Union[Dict[str, Any], rest.Filter]] = None, + query_embedding: list[float], + filters: Optional[Union[dict[str, Any], rest.Filter]] = None, top_k: int = 10, scale_score: bool = False, return_embedding: bool = False, score_threshold: Optional[float] = None, group_by: Optional[str] = None, group_size: Optional[int] = None, - ) -> List[Document]: + ) -> list[Document]: """ Queries Qdrant using a dense embedding and returns the most relevant documents. @@ -930,15 +931,15 @@ def _query_by_embedding( def _query_hybrid( self, - query_embedding: List[float], + query_embedding: list[float], query_sparse_embedding: SparseEmbedding, - filters: Optional[Union[Dict[str, Any], rest.Filter]] = None, + filters: Optional[Union[dict[str, Any], rest.Filter]] = None, top_k: int = 10, return_embedding: bool = False, score_threshold: Optional[float] = None, group_by: Optional[str] = None, group_size: Optional[int] = None, - ) -> List[Document]: + ) -> list[Document]: """ Retrieves documents based on dense and sparse embeddings and fuses the results using Reciprocal Rank Fusion. @@ -1044,14 +1045,14 @@ def _query_hybrid( async def _query_by_sparse_async( self, query_sparse_embedding: SparseEmbedding, - filters: Optional[Union[Dict[str, Any], rest.Filter]] = None, + filters: Optional[Union[dict[str, Any], rest.Filter]] = None, top_k: int = 10, scale_score: bool = False, return_embedding: bool = False, score_threshold: Optional[float] = None, group_by: Optional[str] = None, group_size: Optional[int] = None, - ) -> List[Document]: + ) -> list[Document]: """ Asynchronously queries Qdrant using a sparse embedding and returns the most relevant documents. @@ -1123,15 +1124,15 @@ async def _query_by_sparse_async( async def _query_by_embedding_async( self, - query_embedding: List[float], - filters: Optional[Union[Dict[str, Any], rest.Filter]] = None, + query_embedding: list[float], + filters: Optional[Union[dict[str, Any], rest.Filter]] = None, top_k: int = 10, scale_score: bool = False, return_embedding: bool = False, score_threshold: Optional[float] = None, group_by: Optional[str] = None, group_size: Optional[int] = None, - ) -> List[Document]: + ) -> list[Document]: """ Asynchronously queries Qdrant using a dense embedding and returns the most relevant documents. @@ -1184,15 +1185,15 @@ async def _query_by_embedding_async( async def _query_hybrid_async( self, - query_embedding: List[float], + query_embedding: list[float], query_sparse_embedding: SparseEmbedding, - filters: Optional[Union[Dict[str, Any], rest.Filter]] = None, + filters: Optional[Union[dict[str, Any], rest.Filter]] = None, top_k: int = 10, return_embedding: bool = False, score_threshold: Optional[float] = None, group_by: Optional[str] = None, group_size: Optional[int] = None, - ) -> List[Document]: + ) -> list[Document]: """ Asynchronously retrieves documents based on dense and sparse embeddings and fuses the results using Reciprocal Rank Fusion. @@ -1316,7 +1317,7 @@ def get_distance(self, similarity: str) -> rest.Distance: ) raise QdrantStoreError(msg) from ke - def _create_payload_index(self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None) -> None: + def _create_payload_index(self, collection_name: str, payload_fields_to_index: Optional[list[dict]] = None) -> None: """ Create payload index for the collection if payload_fields_to_index is provided. @@ -1335,7 +1336,7 @@ def _create_payload_index(self, collection_name: str, payload_fields_to_index: O ) async def _create_payload_index_async( - self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None + self, collection_name: str, payload_fields_to_index: Optional[list[dict]] = None ) -> None: """ Asynchronously create payload index for the collection if payload_fields_to_index is provided. @@ -1363,7 +1364,7 @@ def _set_up_collection( use_sparse_embeddings: bool, sparse_idf: bool, on_disk: bool = False, - payload_fields_to_index: Optional[List[dict]] = None, + payload_fields_to_index: Optional[list[dict]] = None, ) -> None: """ Sets up the Qdrant collection with the specified parameters. @@ -1420,7 +1421,7 @@ async def _set_up_collection_async( use_sparse_embeddings: bool, sparse_idf: bool, on_disk: bool = False, - payload_fields_to_index: Optional[List[dict]] = None, + payload_fields_to_index: Optional[list[dict]] = None, ) -> None: """ Asynchronously sets up the Qdrant collection with the specified parameters. @@ -1556,9 +1557,9 @@ async def recreate_collection_async( def _handle_duplicate_documents( self, - documents: List[Document], + documents: list[Document], policy: Optional[DuplicatePolicy] = None, - ) -> List[Document]: + ) -> list[Document]: """ Checks whether any of the passed documents is already existing in the chosen index and returns a list of documents that are not in the index yet. @@ -1571,7 +1572,7 @@ def _handle_duplicate_documents( if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL): documents = self._drop_duplicate_documents(documents) documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents]) - ids_exist_in_db: List[str] = [doc.id for doc in documents_found] + ids_exist_in_db: list[str] = [doc.id for doc in documents_found] if len(ids_exist_in_db) > 0 and policy == DuplicatePolicy.FAIL: msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{self.index}'." @@ -1583,9 +1584,9 @@ def _handle_duplicate_documents( async def _handle_duplicate_documents_async( self, - documents: List[Document], + documents: list[Document], policy: Optional[DuplicatePolicy] = None, - ) -> List[Document]: + ) -> list[Document]: """ Asynchronously checks whether any of the passed documents is already existing in the chosen index and returns a list of @@ -1599,7 +1600,7 @@ async def _handle_duplicate_documents_async( if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL): documents = self._drop_duplicate_documents(documents) documents_found = await self.get_documents_by_id_async(ids=[doc.id for doc in documents]) - ids_exist_in_db: List[str] = [doc.id for doc in documents_found] + ids_exist_in_db: list[str] = [doc.id for doc in documents_found] if len(ids_exist_in_db) > 0 and policy == DuplicatePolicy.FAIL: msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{self.index}'." @@ -1609,13 +1610,13 @@ async def _handle_duplicate_documents_async( return documents - def _drop_duplicate_documents(self, documents: List[Document]) -> List[Document]: + def _drop_duplicate_documents(self, documents: list[Document]) -> list[Document]: """ Drop duplicate documents based on same hash ID. """ - _hash_ids: Set = set() - _documents: List[Document] = [] + _hash_ids: set = set() + _documents: list[Document] = [] for document in documents: if document.id in _hash_ids: @@ -1630,7 +1631,7 @@ def _drop_duplicate_documents(self, documents: List[Document]) -> List[Document] return _documents - def _prepare_collection_params(self) -> Dict[str, Any]: + def _prepare_collection_params(self) -> dict[str, Any]: """ Prepares the common parameters for collection creation. """ @@ -1646,7 +1647,7 @@ def _prepare_collection_params(self) -> Dict[str, Any]: "init_from": self.init_from, } - def _prepare_client_params(self) -> Dict[str, Any]: + def _prepare_client_params(self) -> dict[str, Any]: """ Prepares the common parameters for client initialization. @@ -1677,7 +1678,7 @@ def _prepare_collection_config( on_disk: Optional[bool] = None, use_sparse_embeddings: Optional[bool] = None, sparse_idf: bool = False, - ) -> Tuple[Union[Dict[str, rest.VectorParams], rest.VectorParams], Optional[Dict[str, rest.SparseVectorParams]]]: + ) -> tuple[Union[dict[str, rest.VectorParams], rest.VectorParams], Optional[dict[str, rest.SparseVectorParams]]]: """ Prepares the configuration for creating or recreating a Qdrant collection. @@ -1690,9 +1691,9 @@ def _prepare_collection_config( # dense vectors configuration base_vectors_config = rest.VectorParams(size=embedding_dim, on_disk=on_disk, distance=distance) - vectors_config: Union[rest.VectorParams, Dict[str, rest.VectorParams]] = base_vectors_config + vectors_config: Union[rest.VectorParams, dict[str, rest.VectorParams]] = base_vectors_config - sparse_vectors_config: Optional[Dict[str, rest.SparseVectorParams]] = None + sparse_vectors_config: Optional[dict[str, rest.SparseVectorParams]] = None if use_sparse_embeddings: # in this case, we need to define named vectors @@ -1710,7 +1711,7 @@ def _prepare_collection_config( return vectors_config, sparse_vectors_config @staticmethod - def _validate_filters(filters: Optional[Union[Dict[str, Any], rest.Filter]] = None) -> None: + def _validate_filters(filters: Optional[Union[dict[str, Any], rest.Filter]] = None) -> None: """ Validates the filters provided for querying. @@ -1726,8 +1727,8 @@ def _validate_filters(filters: Optional[Union[Dict[str, Any], rest.Filter]] = No raise ValueError(msg) def _process_query_point_results( - self, results: List[rest.ScoredPoint], scale_score: bool = False - ) -> List[Document]: + self, results: list[rest.ScoredPoint], scale_score: bool = False + ) -> list[Document]: """ Processes query results from Qdrant. """ @@ -1749,7 +1750,7 @@ def _process_query_point_results( return documents - def _process_group_results(self, groups: List[rest.PointGroup]) -> List[Document]: + def _process_group_results(self, groups: list[rest.PointGroup]) -> list[Document]: """ Processes grouped query results from Qdrant. diff --git a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/filters.py b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/filters.py index 7b2ab062eb..345911aab0 100644 --- a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/filters.py +++ b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/filters.py @@ -1,12 +1,12 @@ from datetime import datetime -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Optional, Union from haystack.utils.filters import COMPARISON_OPERATORS, LOGICAL_OPERATORS, FilterError from qdrant_client.http import models def convert_filters_to_qdrant( - filter_term: Optional[Union[List[Dict[str, Any]], Dict[str, Any], models.Filter]] = None, + filter_term: Optional[Union[list[dict[str, Any]], dict[str, Any], models.Filter]] = None, ) -> Optional[models.Filter]: """Converts Haystack filters to the format used by Qdrant. @@ -27,9 +27,9 @@ def convert_filters_to_qdrant( return _build_final_filter(conditions) -def _process_filter_items(filter_items: List[Dict[str, Any]]) -> List[models.Condition]: +def _process_filter_items(filter_items: list[dict[str, Any]]) -> list[models.Condition]: """Process a list of filter items and return all conditions.""" - all_conditions: List[models.Condition] = [] + all_conditions: list[models.Condition] = [] for item in filter_items: operator = item.get("operator") @@ -52,7 +52,7 @@ def _process_filter_items(filter_items: List[Dict[str, Any]]) -> List[models.Con return all_conditions -def _process_logical_operator(item: Dict[str, Any]) -> Optional[models.Condition]: +def _process_logical_operator(item: dict[str, Any]) -> Optional[models.Condition]: """Process a logical operator (AND, OR, NOT) and return the corresponding condition.""" operator = item["operator"] conditions = item.get("conditions") @@ -78,7 +78,7 @@ def _process_logical_operator(item: Dict[str, Any]) -> Optional[models.Condition return None -def _process_comparison_operator(item: Dict[str, Any]) -> Optional[models.Condition]: +def _process_comparison_operator(item: dict[str, Any]) -> Optional[models.Condition]: """Process a comparison operator and return the corresponding condition.""" operator = item["operator"] field = item.get("field") @@ -91,7 +91,7 @@ def _process_comparison_operator(item: Dict[str, Any]) -> Optional[models.Condit return _build_comparison_condition(operator, field, value) -def _build_final_filter(conditions: List[models.Condition]) -> Optional[models.Filter]: +def _build_final_filter(conditions: list[models.Condition]) -> Optional[models.Filter]: """Build the final filter from a list of conditions.""" if not conditions: return None @@ -109,7 +109,7 @@ def _build_final_filter(conditions: List[models.Condition]) -> Optional[models.F def _build_comparison_condition(operator: str, key: str, value: Any) -> models.Condition: """Build a comparison condition based on operator, key, and value.""" - condition_builders: Dict[str, Callable[[str, Any], models.Condition]] = { + condition_builders: dict[str, Callable[[str, Any], models.Condition]] = { "==": _build_eq_condition, "in": _build_in_condition, "!=": _build_ne_condition, @@ -134,7 +134,7 @@ def _build_eq_condition(key: str, value: models.ValueVariants) -> models.Conditi return models.FieldCondition(key=key, match=models.MatchValue(value=value)) -def _build_in_condition(key: str, value: List[models.ValueVariants]) -> models.Condition: +def _build_in_condition(key: str, value: list[models.ValueVariants]) -> models.Condition: if not isinstance(value, list): msg = f"Value {value} is not a list" raise FilterError(msg) @@ -162,7 +162,7 @@ def _build_ne_condition(key: str, value: models.ValueVariants) -> models.Conditi ) -def _build_nin_condition(key: str, value: List[models.ValueVariants]) -> models.Condition: +def _build_nin_condition(key: str, value: list[models.ValueVariants]) -> models.Condition: if not isinstance(value, list): msg = f"Value {value} is not a list" raise FilterError(msg) diff --git a/integrations/qdrant/tests/test_document_store.py b/integrations/qdrant/tests/test_document_store.py index 004c815e79..214a99d148 100644 --- a/integrations/qdrant/tests/test_document_store.py +++ b/integrations/qdrant/tests/test_document_store.py @@ -1,4 +1,3 @@ -from typing import List from unittest.mock import MagicMock, patch import pytest @@ -112,7 +111,7 @@ def test_to_dict(self, monkeypatch): } assert doc_store.to_dict() == expected_dict - def assert_documents_are_equal(self, received: List[Document], expected: List[Document]): + def assert_documents_are_equal(self, received: list[Document], expected: list[Document]): """ Assert that two lists of Documents are equal. This is used in every test. @@ -163,7 +162,7 @@ def test_query_hybrid(self, generate_sparse_embedding): sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33]) embedding = [0.1] * 768 - results: List[Document] = document_store._query_hybrid( + results: list[Document] = document_store._query_hybrid( query_sparse_embedding=sparse_embedding, query_embedding=embedding, top_k=10, return_embedding=True ) assert len(results) == 10 @@ -191,7 +190,7 @@ def test_query_hybrid_with_group_by(self, generate_sparse_embedding): sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33]) embedding = [0.1] * 768 - results: List[Document] = document_store._query_hybrid( + results: list[Document] = document_store._query_hybrid( query_sparse_embedding=sparse_embedding, query_embedding=embedding, top_k=3, @@ -232,8 +231,9 @@ def test_set_up_collection_with_existing_incompatible_collection(self): mock_collection_info = MagicMock() mock_collection_info.config.params.vectors = {"some_other_vector": MagicMock()} - with patch.object(document_store._client, "collection_exists", return_value=True), patch.object( - document_store._client, "get_collection", return_value=mock_collection_info + with ( + patch.object(document_store._client, "collection_exists", return_value=True), + patch.object(document_store._client, "get_collection", return_value=mock_collection_info), ): with pytest.raises(QdrantStoreError, match="created outside of Haystack"): document_store._set_up_collection("test_collection", 768, False, "cosine", True, False) @@ -247,8 +247,9 @@ def test_set_up_collection_use_sparse_embeddings_true_without_named_vectors(self mock_collection_info = MagicMock() mock_collection_info.config.params.vectors = MagicMock(spec=rest.VectorsConfig) - with patch.object(document_store._client, "collection_exists", return_value=True), patch.object( - document_store._client, "get_collection", return_value=mock_collection_info + with ( + patch.object(document_store._client, "collection_exists", return_value=True), + patch.object(document_store._client, "get_collection", return_value=mock_collection_info), ): with pytest.raises(QdrantStoreError, match="without sparse embedding vectors"): document_store._set_up_collection("test_collection", 768, False, "cosine", True, False) @@ -261,8 +262,9 @@ def test_set_up_collection_use_sparse_embeddings_false_with_named_vectors(self): mock_collection_info = MagicMock() mock_collection_info.config.params.vectors = {DENSE_VECTORS_NAME: MagicMock()} - with patch.object(document_store._client, "collection_exists", return_value=True), patch.object( - document_store._client, "get_collection", return_value=mock_collection_info + with ( + patch.object(document_store._client, "collection_exists", return_value=True), + patch.object(document_store._client, "get_collection", return_value=mock_collection_info), ): with pytest.raises(QdrantStoreError, match="with sparse embedding vectors"): document_store._set_up_collection("test_collection", 768, False, "cosine", False, False) @@ -277,8 +279,9 @@ def test_set_up_collection_with_distance_mismatch(self): mock_collection_info.config.params.vectors.distance = rest.Distance.DOT mock_collection_info.config.params.vectors.size = 768 - with patch.object(document_store._client, "collection_exists", return_value=True), patch.object( - document_store._client, "get_collection", return_value=mock_collection_info + with ( + patch.object(document_store._client, "collection_exists", return_value=True), + patch.object(document_store._client, "get_collection", return_value=mock_collection_info), ): with pytest.raises(ValueError, match="different similarity"): document_store._set_up_collection("test_collection", 768, False, "cosine", False, False) @@ -292,8 +295,9 @@ def test_set_up_collection_with_dimension_mismatch(self): mock_collection_info.config.params.vectors.distance = rest.Distance.COSINE mock_collection_info.config.params.vectors.size = 512 - with patch.object(document_store._client, "collection_exists", return_value=True), patch.object( - document_store._client, "get_collection", return_value=mock_collection_info + with ( + patch.object(document_store._client, "collection_exists", return_value=True), + patch.object(document_store._client, "get_collection", return_value=mock_collection_info), ): with pytest.raises(ValueError, match="different vector size"): document_store._set_up_collection("test_collection", 768, False, "cosine", False, False) diff --git a/integrations/qdrant/tests/test_document_store_async.py b/integrations/qdrant/tests/test_document_store_async.py index 5fbdd8b304..8235a66239 100644 --- a/integrations/qdrant/tests/test_document_store_async.py +++ b/integrations/qdrant/tests/test_document_store_async.py @@ -1,4 +1,3 @@ -from typing import List from unittest.mock import MagicMock, patch import pytest @@ -74,7 +73,7 @@ async def test_query_hybrid_async(self, generate_sparse_embedding): sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33]) embedding = [0.1] * 768 - results: List[Document] = await document_store._query_hybrid_async( + results: list[Document] = await document_store._query_hybrid_async( query_sparse_embedding=sparse_embedding, query_embedding=embedding, top_k=10, return_embedding=True ) assert len(results) == 10 @@ -103,7 +102,7 @@ async def test_query_hybrid_with_group_by_async(self, generate_sparse_embedding) sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33]) embedding = [0.1] * 768 - results: List[Document] = await document_store._query_hybrid_async( + results: list[Document] = await document_store._query_hybrid_async( query_sparse_embedding=sparse_embedding, query_embedding=embedding, top_k=3, @@ -151,8 +150,9 @@ async def test_set_up_collection_with_dimension_mismatch_async(self): mock_collection_info.config.params.vectors.distance = rest.Distance.COSINE mock_collection_info.config.params.vectors.size = 512 - with patch.object(document_store._async_client, "collection_exists", return_value=True), patch.object( - document_store._async_client, "get_collection", return_value=mock_collection_info + with ( + patch.object(document_store._async_client, "collection_exists", return_value=True), + patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), ): with pytest.raises(ValueError, match="different vector size"): await document_store._set_up_collection_async("test_collection", 768, False, "cosine", False, False) @@ -165,8 +165,9 @@ async def test_set_up_collection_with_existing_incompatible_collection_async(sel mock_collection_info = MagicMock() mock_collection_info.config.params.vectors = {"some_other_vector": MagicMock()} - with patch.object(document_store._async_client, "collection_exists", return_value=True), patch.object( - document_store._async_client, "get_collection", return_value=mock_collection_info + with ( + patch.object(document_store._async_client, "collection_exists", return_value=True), + patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), ): with pytest.raises(QdrantStoreError, match="created outside of Haystack"): await document_store._set_up_collection_async("test_collection", 768, False, "cosine", True, False) @@ -181,8 +182,9 @@ async def test_set_up_collection_use_sparse_embeddings_true_without_named_vector mock_collection_info = MagicMock() mock_collection_info.config.params.vectors = MagicMock(spec=rest.VectorsConfig) - with patch.object(document_store._async_client, "collection_exists", return_value=True), patch.object( - document_store._async_client, "get_collection", return_value=mock_collection_info + with ( + patch.object(document_store._async_client, "collection_exists", return_value=True), + patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), ): with pytest.raises(QdrantStoreError, match="without sparse embedding vectors"): await document_store._set_up_collection_async("test_collection", 768, False, "cosine", True, False) @@ -196,8 +198,9 @@ async def test_set_up_collection_use_sparse_embeddings_false_with_named_vectors_ mock_collection_info = MagicMock() mock_collection_info.config.params.vectors = {DENSE_VECTORS_NAME: MagicMock()} - with patch.object(document_store._async_client, "collection_exists", return_value=True), patch.object( - document_store._async_client, "get_collection", return_value=mock_collection_info + with ( + patch.object(document_store._async_client, "collection_exists", return_value=True), + patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), ): with pytest.raises(QdrantStoreError, match="with sparse embedding vectors"): await document_store._set_up_collection_async("test_collection", 768, False, "cosine", False, False) @@ -213,8 +216,9 @@ async def test_set_up_collection_with_distance_mismatch_async(self): mock_collection_info.config.params.vectors.distance = rest.Distance.DOT mock_collection_info.config.params.vectors.size = 768 - with patch.object(document_store._async_client, "collection_exists", return_value=True), patch.object( - document_store._async_client, "get_collection", return_value=mock_collection_info + with ( + patch.object(document_store._async_client, "collection_exists", return_value=True), + patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), ): with pytest.raises(ValueError, match="different similarity"): await document_store._set_up_collection_async("test_collection", 768, False, "cosine", False, False) diff --git a/integrations/qdrant/tests/test_embedding_retriever.py b/integrations/qdrant/tests/test_embedding_retriever.py index 35b07a0c7a..092e1b3a52 100644 --- a/integrations/qdrant/tests/test_embedding_retriever.py +++ b/integrations/qdrant/tests/test_embedding_retriever.py @@ -1,5 +1,3 @@ -from typing import List - import pytest from haystack.dataclasses import Document from haystack.document_stores.types import FilterPolicy @@ -121,14 +119,14 @@ def test_from_dict(self): assert retriever._group_by is None assert retriever._group_size is None - def test_run(self, filterable_docs: List[Document]): + def test_run(self, filterable_docs: list[Document]): document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=False) document_store.write_documents(filterable_docs) retriever = QdrantEmbeddingRetriever(document_store=document_store) - results: List[Document] = retriever.run(query_embedding=_random_embeddings(768))["documents"] + results: list[Document] = retriever.run(query_embedding=_random_embeddings(768))["documents"] assert len(results) == 10 results = retriever.run(query_embedding=_random_embeddings(768), top_k=5, return_embedding=False)["documents"] @@ -137,7 +135,7 @@ def test_run(self, filterable_docs: List[Document]): for document in results: assert document.embedding is None - def test_run_filters(self, filterable_docs: List[Document]): + def test_run_filters(self, filterable_docs: list[Document]): document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=False) document_store.write_documents(filterable_docs) @@ -148,7 +146,7 @@ def test_run_filters(self, filterable_docs: List[Document]): filter_policy=FilterPolicy.MERGE, ) - results: List[Document] = retriever.run(query_embedding=_random_embeddings(768))["documents"] + results: list[Document] = retriever.run(query_embedding=_random_embeddings(768))["documents"] assert len(results) == 3 results = retriever.run( @@ -187,14 +185,14 @@ def test_run_with_score_threshold(self): )["documents"] assert len(results) == 2 - def test_run_with_sparse_activated(self, filterable_docs: List[Document]): + def test_run_with_sparse_activated(self, filterable_docs: list[Document]): document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True) document_store.write_documents(filterable_docs) retriever = QdrantEmbeddingRetriever(document_store=document_store) - results: List[Document] = retriever.run(query_embedding=_random_embeddings(768))["documents"] + results: list[Document] = retriever.run(query_embedding=_random_embeddings(768))["documents"] assert len(results) == 10 @@ -205,7 +203,7 @@ def test_run_with_sparse_activated(self, filterable_docs: List[Document]): for document in results: assert document.embedding is None - def test_run_with_group_by(self, filterable_docs: List[Document]): + def test_run_with_group_by(self, filterable_docs: list[Document]): document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True) # Add group_field metadata to documents for index, doc in enumerate(filterable_docs): @@ -226,7 +224,7 @@ def test_run_with_group_by(self, filterable_docs: List[Document]): assert document.embedding is None @pytest.mark.asyncio - async def test_run_async(self, filterable_docs: List[Document]): + async def test_run_async(self, filterable_docs: list[Document]): document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=False) await document_store.write_documents_async(filterable_docs) @@ -242,7 +240,7 @@ async def test_run_async(self, filterable_docs: List[Document]): assert document.embedding is None @pytest.mark.asyncio - async def test_run_filters_async(self, filterable_docs: List[Document]): + async def test_run_filters_async(self, filterable_docs: list[Document]): document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=False) await document_store.write_documents_async(filterable_docs) @@ -293,7 +291,7 @@ async def test_run_with_score_threshold_async(self): assert len(result["documents"]) == 2 @pytest.mark.asyncio - async def test_run_with_sparse_activated_async(self, filterable_docs: List[Document]): + async def test_run_with_sparse_activated_async(self, filterable_docs: list[Document]): document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True) await document_store.write_documents_async(filterable_docs) @@ -312,7 +310,7 @@ async def test_run_with_sparse_activated_async(self, filterable_docs: List[Docum assert document.embedding is None @pytest.mark.asyncio - async def test_run_with_group_by_async(self, filterable_docs: List[Document]): + async def test_run_with_group_by_async(self, filterable_docs: list[Document]): document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True) # Add group_field metadata to documents for index, doc in enumerate(filterable_docs): diff --git a/integrations/qdrant/tests/test_filters.py b/integrations/qdrant/tests/test_filters.py index 68abac5d05..fca8755296 100644 --- a/integrations/qdrant/tests/test_filters.py +++ b/integrations/qdrant/tests/test_filters.py @@ -1,5 +1,3 @@ -from typing import List - import pytest from haystack import Document from haystack.testing.document_store import FilterDocumentsTest @@ -34,7 +32,7 @@ def test_filter_documents_with_qdrant_filters(self, document_store, filterable_d [d for d in filterable_docs if (d.meta.get("number") != 100 and d.meta.get("name") != "name_0")], ) - def assert_documents_are_equal(self, received: List[Document], expected: List[Document]): + def assert_documents_are_equal(self, received: list[Document], expected: list[Document]): """ Assert that two lists of Documents are equal. This is used in every test. diff --git a/integrations/qdrant/tests/test_sparse_embedding_retriever.py b/integrations/qdrant/tests/test_sparse_embedding_retriever.py index 58c1c0e633..162da994ca 100644 --- a/integrations/qdrant/tests/test_sparse_embedding_retriever.py +++ b/integrations/qdrant/tests/test_sparse_embedding_retriever.py @@ -1,5 +1,3 @@ -from typing import List - import pytest from haystack.dataclasses import Document, SparseEmbedding from haystack.document_stores.types import FilterPolicy @@ -149,7 +147,7 @@ def test_from_dict_no_filter_policy(self): assert retriever._group_by is None assert retriever._group_size is None - def test_run(self, filterable_docs: List[Document], generate_sparse_embedding): + def test_run(self, filterable_docs: list[Document], generate_sparse_embedding): document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True) # Add fake sparse embedding to documents @@ -160,7 +158,7 @@ def test_run(self, filterable_docs: List[Document], generate_sparse_embedding): retriever = QdrantSparseEmbeddingRetriever(document_store=document_store) sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33]) - results: List[Document] = retriever.run(query_sparse_embedding=sparse_embedding)["documents"] + results: list[Document] = retriever.run(query_sparse_embedding=sparse_embedding)["documents"] assert len(results) == 10 results = retriever.run(query_sparse_embedding=sparse_embedding, top_k=5, return_embedding=True)["documents"] @@ -169,7 +167,7 @@ def test_run(self, filterable_docs: List[Document], generate_sparse_embedding): for document in results: assert document.sparse_embedding - def test_run_with_group_by(self, filterable_docs: List[Document], generate_sparse_embedding): + def test_run_with_group_by(self, filterable_docs: list[Document], generate_sparse_embedding): document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True) # Add fake sparse embedding to documents @@ -193,7 +191,7 @@ def test_run_with_group_by(self, filterable_docs: List[Document], generate_spars assert document.sparse_embedding @pytest.mark.asyncio - async def test_run_async(self, filterable_docs: List[Document], generate_sparse_embedding): + async def test_run_async(self, filterable_docs: list[Document], generate_sparse_embedding): document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True) # Add fake sparse embedding to documents @@ -214,7 +212,7 @@ async def test_run_async(self, filterable_docs: List[Document], generate_sparse_ assert document.sparse_embedding @pytest.mark.asyncio - async def test_run_with_group_by_async(self, filterable_docs: List[Document], generate_sparse_embedding): + async def test_run_with_group_by_async(self, filterable_docs: list[Document], generate_sparse_embedding): document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True) # Add fake sparse embedding to documents diff --git a/integrations/ragas/pyproject.toml b/integrations/ragas/pyproject.toml index 2779868bae..e6a1a32742 100644 --- a/integrations/ragas/pyproject.toml +++ b/integrations/ragas/pyproject.toml @@ -80,7 +80,7 @@ module = ["ragas.*"] ignore_missing_imports = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 exclude = ["example", "tests"] diff --git a/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py b/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py index 7b477cef45..b653e12860 100644 --- a/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py +++ b/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py @@ -1,5 +1,5 @@ import re -from typing import Any, Dict, List, Optional, Union, cast, get_args, get_origin +from typing import Any, Optional, Union, cast, get_args, get_origin from haystack import Document, component from haystack.dataclasses import ChatMessage @@ -54,7 +54,7 @@ class RagasEvaluator: def __init__( self, - ragas_metrics: List[Metric], + ragas_metrics: list[Metric], evaluator_llm: Optional[BaseRagasLLM] = None, evaluator_embedding: Optional[BaseRagasEmbeddings] = None, ): @@ -72,7 +72,7 @@ def __init__( def _validate_inputs( self, - metrics: List[Metric], + metrics: list[Metric], llm: Optional[BaseRagasLLM], embedding: Optional[BaseRagasEmbeddings], ) -> None: @@ -100,13 +100,13 @@ def _validate_inputs( def run( self, query: Optional[str] = None, - response: Optional[Union[List[ChatMessage], str]] = None, - documents: Optional[List[Union[Document, str]]] = None, - reference_contexts: Optional[List[str]] = None, - multi_responses: Optional[List[str]] = None, + response: Optional[Union[list[ChatMessage], str]] = None, + documents: Optional[list[Union[Document, str]]] = None, + reference_contexts: Optional[list[str]] = None, + multi_responses: Optional[list[str]] = None, reference: Optional[str] = None, - rubrics: Optional[Dict[str, str]] = None, - ) -> Dict[str, Any]: + rubrics: Optional[dict[str, str]] = None, + ) -> dict[str, Any]: """ Evaluates the provided query against the documents and returns the evaluation result. @@ -151,7 +151,7 @@ def run( return {"result": result} - def _process_documents(self, documents: Union[List[Union[Document, str]], None]) -> Union[List[str], None]: + def _process_documents(self, documents: Union[list[Union[Document, str]], None]) -> Union[list[str], None]: """Process and validate input documents. :param documents: List of Documents or strings to process @@ -171,7 +171,7 @@ def _process_documents(self, documents: Union[List[Union[Document, str]], None]) error_message = "'documents' must be a list of either Documents or strings." raise ValueError(error_message) - def _process_response(self, response: Optional[Union[List[ChatMessage], str]]) -> Union[str, None]: + def _process_response(self, response: Optional[Union[list[ChatMessage], str]]) -> Union[str, None]: """Process response into expected format. :param response: Response to process diff --git a/integrations/snowflake/pyproject.toml b/integrations/snowflake/pyproject.toml index a99604a2b8..b883f269c4 100644 --- a/integrations/snowflake/pyproject.toml +++ b/integrations/snowflake/pyproject.toml @@ -82,7 +82,7 @@ check_untyped_defs = true disallow_incomplete_defs = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/snowflake/src/haystack_integrations/components/retrievers/snowflake/snowflake_table_retriever.py b/integrations/snowflake/src/haystack_integrations/components/retrievers/snowflake/snowflake_table_retriever.py index 610b452173..6ccd124a86 100644 --- a/integrations/snowflake/src/haystack_integrations/components/retrievers/snowflake/snowflake_table_retriever.py +++ b/integrations/snowflake/src/haystack_integrations/components/retrievers/snowflake/snowflake_table_retriever.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Literal, Optional +from typing import Any, Literal, Optional from urllib.parse import quote_plus import polars as pl @@ -180,14 +180,14 @@ def warm_up(self) -> None: self._warmed_up = True - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. :returns: Dictionary with serialized data. """ - data: Dict[str, Any] = { + data: dict[str, Any] = { "user": self.user, "account": self.account, "database": self.database, @@ -207,7 +207,7 @@ def to_dict(self) -> Dict[str, Any]: return default_to_dict(self, **data) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "SnowflakeTableRetriever": + def from_dict(cls, data: dict[str, Any]) -> "SnowflakeTableRetriever": """ Deserializes the component from a dictionary. @@ -346,7 +346,7 @@ def _execute_query_with_connector(self, query: str) -> Optional[pl.DataFrame]: """ try: # Build connection parameters - conn_params: Dict[str, Any] = { + conn_params: dict[str, Any] = { "user": self.user, "account": self.account, "authenticator": self.authenticator.lower(), @@ -400,7 +400,7 @@ def _execute_query_with_connector(self, query: str) -> Optional[pl.DataFrame]: return None @staticmethod - def _empty_response() -> Dict[str, Any]: + def _empty_response() -> dict[str, Any]: """Returns a standardized empty response. :returns: @@ -411,7 +411,7 @@ def _empty_response() -> Dict[str, Any]: return {"dataframe": DataFrame(), "table": ""} @component.output_types(dataframe=DataFrame, table=str) - def run(self, query: str, return_markdown: Optional[bool] = None) -> Dict[str, Any]: + def run(self, query: str, return_markdown: Optional[bool] = None) -> dict[str, Any]: """ Executes a SQL query against a Snowflake database using ADBC and Polars. diff --git a/integrations/snowflake/tests/test_snowflake_table_retriever.py b/integrations/snowflake/tests/test_snowflake_table_retriever.py index b8b4a3707c..64f3ce3967 100644 --- a/integrations/snowflake/tests/test_snowflake_table_retriever.py +++ b/integrations/snowflake/tests/test_snowflake_table_retriever.py @@ -1,6 +1,6 @@ import os from pathlib import Path -from typing import Any, Dict, Optional +from typing import Any, Optional from unittest.mock import Mock from urllib.parse import quote_plus @@ -40,7 +40,7 @@ def retriever(mocker: Mock) -> SnowflakeTableRetriever: @pytest.fixture -def empty_response() -> Dict[str, Any]: +def empty_response() -> dict[str, Any]: return SnowflakeTableRetriever._empty_response() @@ -253,7 +253,7 @@ def test_run_sql_error_handling( exception: Exception, expected_log_level: str, expected_log_msg: str, - empty_response: Dict[str, Any], + empty_response: dict[str, Any], ) -> None: mocker.patch("polars.read_database_uri", side_effect=exception) @@ -286,7 +286,7 @@ def test_run_pandas_conversion_error( mocker: Mock, exception: Exception, expected_error_msg: str, - empty_response: Dict[str, Any], + empty_response: dict[str, Any], toy_polars_df: pl.DataFrame, ) -> None: mocker.patch.object(toy_polars_df, "to_pandas", side_effect=exception) @@ -316,18 +316,18 @@ def test_run_happy_path( assert result["dataframe"].equals(toy_pandas_df) assert result["table"] == expected_markdown - def test_empty_query(self, retriever: SnowflakeTableRetriever, empty_response: Dict[str, Any]) -> None: + def test_empty_query(self, retriever: SnowflakeTableRetriever, empty_response: dict[str, Any]) -> None: result = retriever.run(query="") assert result["dataframe"].equals(empty_response["dataframe"]) assert result["table"] == empty_response["table"] - def test_non_string_query(self, retriever: SnowflakeTableRetriever, empty_response: Dict[str, Any]) -> None: + def test_non_string_query(self, retriever: SnowflakeTableRetriever, empty_response: dict[str, Any]) -> None: result = retriever.run(query=123) assert result["dataframe"].equals(empty_response["dataframe"]) assert result["table"] == empty_response["table"] def test_empty_dataframe_result( - self, retriever: SnowflakeTableRetriever, mocker: Mock, empty_response: Dict[str, Any] + self, retriever: SnowflakeTableRetriever, mocker: Mock, empty_response: dict[str, Any] ) -> None: empty_df = pl.DataFrame() mocker.patch("polars.read_database_uri", return_value=empty_df) @@ -338,7 +338,7 @@ def test_empty_dataframe_result( assert result["table"] == empty_response["table"] def test_uri_construction_error( - self, retriever: SnowflakeTableRetriever, mocker: Mock, empty_response: Dict[str, Any] + self, retriever: SnowflakeTableRetriever, mocker: Mock, empty_response: dict[str, Any] ) -> None: mocker.patch.object( SnowflakeTableRetriever, "_snowflake_uri_constructor", side_effect=RuntimeError("Failed to construct URI") diff --git a/integrations/stackit/pyproject.toml b/integrations/stackit/pyproject.toml index afd54d3d1e..470c2f4750 100644 --- a/integrations/stackit/pyproject.toml +++ b/integrations/stackit/pyproject.toml @@ -75,7 +75,7 @@ check_untyped_defs = true disallow_incomplete_defs = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/stackit/src/haystack_integrations/components/embedders/stackit/document_embedder.py b/integrations/stackit/src/haystack_integrations/components/embedders/stackit/document_embedder.py index a116d92a1f..0362a7f5cf 100644 --- a/integrations/stackit/src/haystack_integrations/components/embedders/stackit/document_embedder.py +++ b/integrations/stackit/src/haystack_integrations/components/embedders/stackit/document_embedder.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional +from typing import Any, Optional from haystack import component, default_to_dict from haystack.components.embedders import OpenAIDocumentEmbedder @@ -39,12 +39,12 @@ def __init__( suffix: str = "", batch_size: int = 32, progress_bar: bool = True, - meta_fields_to_embed: Optional[List[str]] = None, + meta_fields_to_embed: Optional[list[str]] = None, embedding_separator: str = "\n", *, timeout: Optional[float] = None, max_retries: Optional[int] = None, - http_client_kwargs: Optional[Dict[str, Any]] = None, + http_client_kwargs: Optional[dict[str, Any]] = None, ): """ Creates a STACKITDocumentEmbedder component. @@ -99,7 +99,7 @@ def __init__( self.timeout = timeout self.max_retries = max_retries - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. :returns: diff --git a/integrations/stackit/src/haystack_integrations/components/embedders/stackit/text_embedder.py b/integrations/stackit/src/haystack_integrations/components/embedders/stackit/text_embedder.py index 52e9581f16..061933227e 100644 --- a/integrations/stackit/src/haystack_integrations/components/embedders/stackit/text_embedder.py +++ b/integrations/stackit/src/haystack_integrations/components/embedders/stackit/text_embedder.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Optional from haystack import component, default_to_dict from haystack.components.embedders import OpenAITextEmbedder @@ -33,7 +33,7 @@ def __init__( *, timeout: Optional[float] = None, max_retries: Optional[int] = None, - http_client_kwargs: Optional[Dict[str, Any]] = None, + http_client_kwargs: Optional[dict[str, Any]] = None, ): """ Creates a STACKITTextEmbedder component. @@ -75,7 +75,7 @@ def __init__( self.timeout = timeout self.max_retries = max_retries - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. :returns: diff --git a/integrations/stackit/src/haystack_integrations/components/generators/stackit/chat/chat_generator.py b/integrations/stackit/src/haystack_integrations/components/generators/stackit/chat/chat_generator.py index e2ec1bd86d..d937ead733 100644 --- a/integrations/stackit/src/haystack_integrations/components/generators/stackit/chat/chat_generator.py +++ b/integrations/stackit/src/haystack_integrations/components/generators/stackit/chat/chat_generator.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2025-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Optional from haystack import component, default_to_dict from haystack.components.generators.chat import OpenAIChatGenerator @@ -42,11 +42,11 @@ def __init__( api_key: Secret = Secret.from_env_var("STACKIT_API_KEY"), streaming_callback: Optional[StreamingCallbackT] = None, api_base_url: Optional[str] = "https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1", - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, *, timeout: Optional[float] = None, max_retries: Optional[int] = None, - http_client_kwargs: Optional[Dict[str, Any]] = None, + http_client_kwargs: Optional[dict[str, Any]] = None, ): """ Creates an instance of STACKITChatGenerator class. @@ -96,7 +96,7 @@ def __init__( http_client_kwargs=http_client_kwargs, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize this component to a dictionary. diff --git a/integrations/togetherai/pyproject.toml b/integrations/togetherai/pyproject.toml index 9b8bd21069..45dc2c26b5 100644 --- a/integrations/togetherai/pyproject.toml +++ b/integrations/togetherai/pyproject.toml @@ -84,7 +84,7 @@ known-first-party = ["haystack_integrations"] [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/togetherai/src/haystack_integrations/components/generators/togetherai/chat/chat_generator.py b/integrations/togetherai/src/haystack_integrations/components/generators/togetherai/chat/chat_generator.py index 0a732f31e7..73018e3d75 100644 --- a/integrations/togetherai/src/haystack_integrations/components/generators/togetherai/chat/chat_generator.py +++ b/integrations/togetherai/src/haystack_integrations/components/generators/togetherai/chat/chat_generator.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Optional from haystack import component, default_to_dict, logging from haystack.components.generators.chat import OpenAIChatGenerator @@ -63,11 +63,11 @@ def __init__( model: str = "meta-llama/Llama-3.3-70B-Instruct-Turbo", streaming_callback: Optional[StreamingCallbackT] = None, api_base_url: Optional[str] = "https://api.together.xyz/v1", - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, tools: Optional[ToolsType] = None, timeout: Optional[float] = None, max_retries: Optional[int] = None, - http_client_kwargs: Optional[Dict[str, Any]] = None, + http_client_kwargs: Optional[dict[str, Any]] = None, ): """ Creates an instance of TogetherAIChatGenerator. Unless specified otherwise, @@ -123,7 +123,7 @@ def __init__( http_client_kwargs=http_client_kwargs, ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize this component to a dictionary. diff --git a/integrations/togetherai/src/haystack_integrations/components/generators/togetherai/generator.py b/integrations/togetherai/src/haystack_integrations/components/generators/togetherai/generator.py index c86110c512..c578a215c6 100644 --- a/integrations/togetherai/src/haystack_integrations/components/generators/togetherai/generator.py +++ b/integrations/togetherai/src/haystack_integrations/components/generators/togetherai/generator.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional, Union, cast +from typing import Any, Optional, Union, cast from haystack import component, default_to_dict, logging from haystack.dataclasses import ChatMessage, StreamingCallbackT @@ -38,7 +38,7 @@ def __init__( api_base_url: Optional[str] = "https://api.together.xyz/v1", streaming_callback: Optional[StreamingCallbackT] = None, system_prompt: Optional[str] = None, - generation_kwargs: Optional[Dict[str, Any]] = None, + generation_kwargs: Optional[dict[str, Any]] = None, timeout: Optional[float] = None, max_retries: Optional[int] = None, ): @@ -99,7 +99,7 @@ def __init__( ) self.system_prompt = system_prompt - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serialize this component to a dictionary. diff --git a/integrations/unstructured/pyproject.toml b/integrations/unstructured/pyproject.toml index 43cefb893b..8ac34e0f2b 100644 --- a/integrations/unstructured/pyproject.toml +++ b/integrations/unstructured/pyproject.toml @@ -84,7 +84,7 @@ allow-direct-references = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py b/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py index 3538cc69e2..8c67cda588 100644 --- a/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py +++ b/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py @@ -5,7 +5,7 @@ import os from collections import defaultdict from pathlib import Path -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal, Optional, Union from haystack import Document, component, default_from_dict, default_to_dict, logging from haystack.components.converters.utils import normalize_metadata @@ -52,7 +52,7 @@ def __init__( "one-doc-per-file", "one-doc-per-page", "one-doc-per-element" ] = "one-doc-per-file", separator: str = "\n\n", - unstructured_kwargs: Optional[Dict[str, Any]] = None, + unstructured_kwargs: Optional[dict[str, Any]] = None, progress_bar: bool = True, # noqa: FBT001, FBT002 ): """ @@ -91,7 +91,7 @@ def __init__( ) raise ValueError(msg) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -110,7 +110,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "UnstructuredFileConverter": + def from_dict(cls, data: dict[str, Any]) -> "UnstructuredFileConverter": """ Deserializes the component from a dictionary. :param data: @@ -121,12 +121,12 @@ def from_dict(cls, data: Dict[str, Any]) -> "UnstructuredFileConverter": deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, - paths: Union[List[str], List[os.PathLike]], - meta: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None, - ) -> Dict[str, List[Document]]: + paths: Union[list[str], list[os.PathLike]], + meta: Optional[Union[dict[str, Any], list[dict[str, Any]]]] = None, + ) -> dict[str, list[Document]]: """ Convert files to Haystack Documents using the Unstructured API. @@ -178,11 +178,11 @@ def run( @staticmethod def _create_documents( filepath: Path, - elements: List[Element], + elements: list[Element], document_creation_mode: Literal["one-doc-per-file", "one-doc-per-page", "one-doc-per-element"], separator: str, - meta: Dict[str, Any], - ) -> List[Document]: + meta: dict[str, Any], + ) -> list[Document]: """ Create Haystack Documents from the elements returned by Unstructured. """ @@ -222,7 +222,7 @@ def _create_documents( docs.append(doc) return docs - def _partition_file_into_elements(self, filepath: Path) -> List[Element]: + def _partition_file_into_elements(self, filepath: Path) -> list[Element]: """ Partition a file into elements using the Unstructured API. """ diff --git a/integrations/weaviate/pyproject.toml b/integrations/weaviate/pyproject.toml index 6bf86ba9b6..324e5f92ca 100644 --- a/integrations/weaviate/pyproject.toml +++ b/integrations/weaviate/pyproject.toml @@ -81,7 +81,7 @@ disallow_incomplete_defs = true [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 [tool.ruff.lint] diff --git a/integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/bm25_retriever.py b/integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/bm25_retriever.py index 8a05f9e5c5..6ee7e8766b 100644 --- a/integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/bm25_retriever.py +++ b/integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/bm25_retriever.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import Document, component, default_from_dict, default_to_dict from haystack.document_stores.types import FilterPolicy @@ -31,7 +31,7 @@ def __init__( self, *, document_store: WeaviateDocumentStore, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE, ): @@ -53,7 +53,7 @@ def __init__( filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy) ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -69,7 +69,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "WeaviateBM25Retriever": + def from_dict(cls, data: dict[str, Any]) -> "WeaviateBM25Retriever": """ Deserializes the component from a dictionary. @@ -88,10 +88,10 @@ def from_dict(cls, data: Dict[str, Any]) -> "WeaviateBM25Retriever": return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( - self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None - ) -> Dict[str, List[Document]]: + self, query: str, filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None + ) -> dict[str, list[Document]]: """ Retrieves documents from Weaviate using the BM25 algorithm. diff --git a/integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/embedding_retriever.py b/integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/embedding_retriever.py index 1868804216..218874c8f3 100644 --- a/integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/embedding_retriever.py +++ b/integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/embedding_retriever.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import Document, component, default_from_dict, default_to_dict from haystack.document_stores.types import FilterPolicy @@ -21,7 +21,7 @@ def __init__( self, *, document_store: WeaviateDocumentStore, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, distance: Optional[float] = None, certainty: Optional[float] = None, @@ -60,7 +60,7 @@ def __init__( filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy) ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -78,7 +78,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "WeaviateEmbeddingRetriever": + def from_dict(cls, data: dict[str, Any]) -> "WeaviateEmbeddingRetriever": """ Deserializes the component from a dictionary. @@ -98,15 +98,15 @@ def from_dict(cls, data: Dict[str, Any]) -> "WeaviateEmbeddingRetriever": return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, - query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, + query_embedding: list[float], + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, distance: Optional[float] = None, certainty: Optional[float] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Retrieves documents from Weaviate using the vector search. diff --git a/integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/hybrid_retriever.py b/integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/hybrid_retriever.py index 53c509002f..38844a0050 100644 --- a/integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/hybrid_retriever.py +++ b/integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/hybrid_retriever.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from haystack import Document, component, default_from_dict, default_to_dict from haystack.document_stores.types import FilterPolicy @@ -21,7 +21,7 @@ def __init__( self, *, document_store: WeaviateDocumentStore, - filters: Optional[Dict[str, Any]] = None, + filters: Optional[dict[str, Any]] = None, top_k: int = 10, alpha: Optional[float] = None, max_vector_distance: Optional[float] = None, @@ -75,7 +75,7 @@ def __init__( filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy) ) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -93,7 +93,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "WeaviateHybridRetriever": + def from_dict(cls, data: dict[str, Any]) -> "WeaviateHybridRetriever": """ Deserializes the component from a dictionary. @@ -111,16 +111,16 @@ def from_dict(cls, data: Dict[str, Any]) -> "WeaviateHybridRetriever": return default_from_dict(cls, data) - @component.output_types(documents=List[Document]) + @component.output_types(documents=list[Document]) def run( self, query: str, - query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, + query_embedding: list[float], + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, alpha: Optional[float] = None, max_vector_distance: Optional[float] = None, - ) -> Dict[str, List[Document]]: + ) -> dict[str, list[Document]]: """ Retrieves documents from Weaviate using hybrid search. diff --git a/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/_filters.py b/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/_filters.py index 6374218ee9..91e1b905f0 100644 --- a/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/_filters.py +++ b/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/_filters.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict +from typing import Any from dateutil import parser from haystack.errors import FilterError @@ -11,7 +11,7 @@ from weaviate.collections.classes.filters import Filter, FilterReturn -def convert_filters(filters: Dict[str, Any]) -> FilterReturn: +def convert_filters(filters: dict[str, Any]) -> FilterReturn: """ Convert filters from Haystack format to Weaviate format. """ @@ -39,7 +39,7 @@ def convert_filters(filters: Dict[str, Any]) -> FilterReturn: } -def _invert_condition(filters: Dict[str, Any]) -> Dict[str, Any]: +def _invert_condition(filters: dict[str, Any]) -> dict[str, Any]: """ Invert condition recursively. Weaviate doesn't support NOT filters so we need to invert them ourselves. @@ -63,7 +63,7 @@ def _invert_condition(filters: Dict[str, Any]) -> Dict[str, Any]: } -def _parse_logical_condition(condition: Dict[str, Any]) -> FilterReturn: +def _parse_logical_condition(condition: dict[str, Any]) -> FilterReturn: if "operator" not in condition: msg = f"'operator' key missing in {condition}" raise FilterError(msg) @@ -228,7 +228,7 @@ def _not_in(field: str, value: Any) -> FilterReturn: } -def _parse_comparison_condition(condition: Dict[str, Any]) -> FilterReturn: +def _parse_comparison_condition(condition: dict[str, Any]) -> FilterReturn: field: str = condition["field"] if field.startswith("meta."): diff --git a/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/auth.py b/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/auth.py index 19461efb85..20880a8c01 100644 --- a/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/auth.py +++ b/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/auth.py @@ -5,7 +5,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass, field, fields from enum import Enum -from typing import Any, Dict, Type +from typing import Any from haystack.core.errors import DeserializationError from haystack.utils.auth import Secret, deserialize_secrets_inplace @@ -30,7 +30,7 @@ def __str__(self): return self.value @staticmethod - def from_class(auth_class: Type["AuthCredentials"]) -> "SupportedAuthTypes": + def from_class(auth_class: type["AuthCredentials"]) -> "SupportedAuthTypes": auth_types = { AuthApiKey: SupportedAuthTypes.API_KEY, AuthBearerToken: SupportedAuthTypes.BEARER, @@ -47,7 +47,7 @@ class AuthCredentials(ABC): Can be used to deserialize from dict any of the supported auth credentials. """ - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Converts the object to a dictionary representation for serialization. """ @@ -61,7 +61,7 @@ def to_dict(self) -> Dict[str, Any]: return {"type": str(SupportedAuthTypes.from_class(self.__class__)), "init_parameters": _fields} @staticmethod - def from_dict(data: Dict[str, Any]) -> "AuthCredentials": + def from_dict(data: dict[str, Any]) -> "AuthCredentials": """ Converts a dictionary representation to an auth credentials object. """ @@ -69,7 +69,7 @@ def from_dict(data: Dict[str, Any]) -> "AuthCredentials": msg = "Missing 'type' in serialization data" raise DeserializationError(msg) - auth_classes: Dict[str, Type[AuthCredentials]] = { + auth_classes: dict[str, type[AuthCredentials]] = { str(SupportedAuthTypes.API_KEY): AuthApiKey, str(SupportedAuthTypes.BEARER): AuthBearerToken, str(SupportedAuthTypes.CLIENT_CREDENTIALS): AuthClientCredentials, @@ -80,7 +80,7 @@ def from_dict(data: Dict[str, Any]) -> "AuthCredentials": @classmethod @abstractmethod - def _from_dict(cls, data: Dict[str, Any]) -> "AuthCredentials": + def _from_dict(cls, data: dict[str, Any]) -> "AuthCredentials": """ Internal method to convert a dictionary representation to an auth credentials object. All subclasses must implement this method. @@ -104,7 +104,7 @@ class AuthApiKey(AuthCredentials): api_key: Secret = field(default_factory=lambda: Secret.from_env_var(["WEAVIATE_API_KEY"])) @classmethod - def _from_dict(cls, data: Dict[str, Any]) -> "AuthApiKey": + def _from_dict(cls, data: dict[str, Any]) -> "AuthApiKey": deserialize_secrets_inplace(data["init_parameters"], ["api_key"]) return cls(**data["init_parameters"]) @@ -128,7 +128,7 @@ class AuthBearerToken(AuthCredentials): refresh_token: Secret = field(default_factory=lambda: Secret.from_env_var(["WEAVIATE_REFRESH_TOKEN"], strict=False)) @classmethod - def _from_dict(cls, data: Dict[str, Any]) -> "AuthBearerToken": + def _from_dict(cls, data: dict[str, Any]) -> "AuthBearerToken": deserialize_secrets_inplace(data["init_parameters"], ["access_token", "refresh_token"]) return cls(**data["init_parameters"]) @@ -159,7 +159,7 @@ class AuthClientCredentials(AuthCredentials): scope: Secret = field(default_factory=lambda: Secret.from_env_var(["WEAVIATE_SCOPE"], strict=False)) @classmethod - def _from_dict(cls, data: Dict[str, Any]) -> "AuthClientCredentials": + def _from_dict(cls, data: dict[str, Any]) -> "AuthClientCredentials": deserialize_secrets_inplace(data["init_parameters"], ["client_secret", "scope"]) return cls(**data["init_parameters"]) @@ -188,7 +188,7 @@ class AuthClientPassword(AuthCredentials): scope: Secret = field(default_factory=lambda: Secret.from_env_var(["WEAVIATE_SCOPE"], strict=False)) @classmethod - def _from_dict(cls, data: Dict[str, Any]) -> "AuthClientPassword": + def _from_dict(cls, data: dict[str, Any]) -> "AuthClientPassword": deserialize_secrets_inplace(data["init_parameters"], ["username", "password", "scope"]) return cls(**data["init_parameters"]) diff --git a/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/document_store.py b/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/document_store.py index c81e2bc79f..33bc86e3a6 100644 --- a/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/document_store.py +++ b/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/document_store.py @@ -5,7 +5,7 @@ import datetime import json from dataclasses import asdict -from typing import Any, Dict, List, Optional +from typing import Any, Optional from haystack import logging from haystack.core.serialization import default_from_dict, default_to_dict @@ -86,9 +86,9 @@ def __init__( self, *, url: Optional[str] = None, - collection_settings: Optional[Dict[str, Any]] = None, + collection_settings: Optional[dict[str, Any]] = None, auth_client_secret: Optional[AuthCredentials] = None, - additional_headers: Optional[Dict] = None, + additional_headers: Optional[dict] = None, embedded_options: Optional[EmbeddedOptions] = None, additional_config: Optional[AdditionalConfig] = None, grpc_port: int = 50051, @@ -219,7 +219,7 @@ def collection(self): self._collection = client.collections.get(self._collection_settings["class"]) return self._collection - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. @@ -242,7 +242,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "WeaviateDocumentStore": + def from_dict(cls, data: dict[str, Any]) -> "WeaviateDocumentStore": """ Deserializes the component from a dictionary. @@ -269,7 +269,7 @@ def count_documents(self) -> int: total = self.collection.aggregate.over_all(total_count=True).total_count return total if total else 0 - def _to_data_object(self, document: Document) -> Dict[str, Any]: + def _to_data_object(self, document: Document) -> dict[str, Any]: """ Converts a Document to a Weaviate data object ready to be saved. """ @@ -308,15 +308,15 @@ def _to_data_object(self, document: Document) -> Dict[str, Any]: return data - def _to_document(self, data: DataObject[Dict[str, Any], None]) -> Document: + def _to_document(self, data: DataObject[dict[str, Any], None]) -> Document: """ Converts a data object read from Weaviate into a Document. """ document_data = data.properties document_data["id"] = document_data.pop("_original_id") - if isinstance(data.vector, List): + if isinstance(data.vector, list): document_data["embedding"] = data.vector - elif isinstance(data.vector, Dict): + elif isinstance(data.vector, dict): document_data["embedding"] = data.vector.get("default") else: document_data["embedding"] = None @@ -346,7 +346,7 @@ def _to_document(self, data: DataObject[Dict[str, Any], None]) -> Document: return Document.from_dict(document_data) - def _query(self) -> List[DataObject[Dict[str, Any], None]]: + def _query(self) -> list[DataObject[dict[str, Any], None]]: properties = [p.name for p in self.collection.config.get().properties] try: result = self.collection.iterator(include_vector=True, return_properties=properties) @@ -355,7 +355,7 @@ def _query(self) -> List[DataObject[Dict[str, Any], None]]: raise DocumentStoreError(msg) from e return result - def _query_with_filters(self, filters: Dict[str, Any]) -> List[DataObject[Dict[str, Any], None]]: + def _query_with_filters(self, filters: dict[str, Any]) -> list[DataObject[dict[str, Any], None]]: properties = [p.name for p in self.collection.config.get().properties] # When querying with filters we need to paginate using limit and offset as using # a cursor with after is not possible. See the official docs: @@ -386,7 +386,7 @@ def _query_with_filters(self, filters: Dict[str, Any]) -> List[DataObject[Dict[s offset += DEFAULT_QUERY_LIMIT return result - def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]: + def filter_documents(self, filters: Optional[dict[str, Any]] = None) -> list[Document]: """ Returns the documents that match the filters provided. @@ -407,7 +407,7 @@ def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Doc result = self._query() return [self._to_document(doc) for doc in result] - def _batch_write(self, documents: List[Document]) -> int: + def _batch_write(self, documents: list[Document]) -> int: """ Writes document to Weaviate in batches. Documents with the same id will be overwritten. @@ -448,7 +448,7 @@ def _batch_write(self, documents: List[Document]) -> int: # So we assume that all Documents were written. return len(documents) - def _write(self, documents: List[Document], policy: DuplicatePolicy) -> int: + def _write(self, documents: list[Document], policy: DuplicatePolicy) -> int: """ Writes documents to Weaviate using the specified policy. This doesn't uses the batch API, so it's slower than _batch_write. @@ -482,7 +482,7 @@ def _write(self, documents: List[Document], policy: DuplicatePolicy) -> int: raise DuplicateDocumentError(msg) return written - def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int: + def write_documents(self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int: """ Writes documents to Weaviate using the specified policy. We recommend using a OVERWRITE policy as it's faster than other policies for Weaviate since it uses @@ -496,7 +496,7 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D return self._write(documents, policy) - def delete_documents(self, document_ids: List[str]) -> None: + def delete_documents(self, document_ids: list[str]) -> None: """ Deletes all documents with matching document_ids from the DocumentStore. @@ -555,8 +555,8 @@ def delete_all_documents(self, *, recreate_index: bool = False, batch_size: int ) def _bm25_retrieval( - self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None - ) -> List[Document]: + self, query: str, filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None + ) -> list[Document]: properties = [p.name for p in self.collection.config.get().properties] result = self.collection.query.bm25( query=query, @@ -572,12 +572,12 @@ def _bm25_retrieval( def _embedding_retrieval( self, - query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, + query_embedding: list[float], + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, distance: Optional[float] = None, certainty: Optional[float] = None, - ) -> List[Document]: + ) -> list[Document]: if distance is not None and certainty is not None: msg = "Can't use 'distance' and 'certainty' parameters together" raise ValueError(msg) @@ -599,12 +599,12 @@ def _embedding_retrieval( def _hybrid_retrieval( self, query: str, - query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, + query_embedding: list[float], + filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None, alpha: Optional[float] = None, max_vector_distance: Optional[float] = None, - ) -> List[Document]: + ) -> list[Document]: properties = [p.name for p in self.collection.config.get().properties] result = self.collection.query.hybrid( query=query, diff --git a/integrations/weaviate/tests/test_document_store.py b/integrations/weaviate/tests/test_document_store.py index bdf9290a92..e410877e84 100644 --- a/integrations/weaviate/tests/test_document_store.py +++ b/integrations/weaviate/tests/test_document_store.py @@ -5,7 +5,6 @@ import base64 import logging import os -from typing import List from unittest.mock import MagicMock, patch import pytest @@ -69,7 +68,7 @@ def document_store(self, request) -> WeaviateDocumentStore: store.client.collections.delete(collection_settings["class"]) @pytest.fixture - def filterable_docs(self) -> List[Document]: + def filterable_docs(self) -> list[Document]: """ This fixture has been copied from haystack/testing/document_store.py and modified to use a different date format. @@ -82,7 +81,7 @@ def filterable_docs(self) -> List[Document]: documents[i].meta["date"] = f"{date}Z" return documents - def assert_documents_are_equal(self, received: List[Document], expected: List[Document]): + def assert_documents_are_equal(self, received: list[Document], expected: list[Document]): assert len(received) == len(expected) received = sorted(received, key=lambda doc: doc.id) expected = sorted(expected, key=lambda doc: doc.id)