diff --git a/integrations/pgvector/pyproject.toml b/integrations/pgvector/pyproject.toml index f03f988a06..89cbcbce4b 100644 --- a/integrations/pgvector/pyproject.toml +++ b/integrations/pgvector/pyproject.toml @@ -7,7 +7,7 @@ name = "pgvector-haystack" dynamic = ["version"] description = "An integration of pgvector (vector search extension for Postgres) with Haystack" readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.10" license = "Apache-2.0" keywords = [] authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }] @@ -15,7 +15,6 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Development Status :: 4 - Beta", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -23,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai>=2.11.0", "pgvector>=0.3.0", "psycopg[binary]"] +dependencies = ["haystack-ai>=2.22.0", "pgvector>=0.3.0", "psycopg[binary]"] [project.urls] Source = "https://github.com/deepset-ai/haystack-core-integrations" @@ -83,7 +82,6 @@ ignore_missing_imports = true [tool.ruff] -target-version = "py39" line-length = 120 [tool.ruff.lint] @@ -134,10 +132,6 @@ ignore = [ # ignore assertions "S101", ] -unfixable = [ - # Don't touch unused imports - "F401", -] [tool.ruff.lint.isort] known-first-party = ["haystack_integrations"] diff --git a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py index 1e179bbce4..b01c57b5a6 100644 --- a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py +++ b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Literal, Optional, Union +from typing import Any, Literal from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import Document @@ -62,10 +62,10 @@ def __init__( self, *, document_store: PgvectorDocumentStore, - filters: Optional[dict[str, Any]] = None, + filters: dict[str, Any] | None = None, top_k: int = 10, - vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None, - filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE, + vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] | None = None, + filter_policy: str | FilterPolicy = FilterPolicy.REPLACE, ): """ :param document_store: An instance of `PgvectorDocumentStore`. @@ -137,9 +137,9 @@ def from_dict(cls, data: dict[str, Any]) -> "PgvectorEmbeddingRetriever": def run( self, query_embedding: list[float], - filters: Optional[dict[str, Any]] = None, - top_k: Optional[int] = None, - vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None, + filters: dict[str, Any] | None = None, + top_k: int | None = None, + vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] | None = None, ) -> dict[str, list[Document]]: """ Retrieve documents from the `PgvectorDocumentStore`, based on their embeddings. @@ -170,9 +170,9 @@ def run( async def run_async( self, query_embedding: list[float], - filters: Optional[dict[str, Any]] = None, - top_k: Optional[int] = None, - vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None, + filters: dict[str, Any] | None = None, + top_k: int | None = None, + vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] | None = None, ) -> dict[str, list[Document]]: """ Asynchronously retrieve documents from the `PgvectorDocumentStore`, based on their embeddings. diff --git a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/keyword_retriever.py b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/keyword_retriever.py index 3d320dcab4..9f4afcda80 100644 --- a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/keyword_retriever.py +++ b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/keyword_retriever.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Optional, Union +from typing import Any from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import Document @@ -52,9 +52,9 @@ def __init__( self, *, document_store: PgvectorDocumentStore, - filters: Optional[dict[str, Any]] = None, + filters: dict[str, Any] | None = None, top_k: int = 10, - filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE, + filter_policy: str | FilterPolicy = FilterPolicy.REPLACE, ): """ :param document_store: An instance of `PgvectorDocumentStore`. @@ -111,8 +111,8 @@ def from_dict(cls, data: dict[str, Any]) -> "PgvectorKeywordRetriever": def run( self, query: str, - filters: Optional[dict[str, Any]] = None, - top_k: Optional[int] = None, + filters: dict[str, Any] | None = None, + top_k: int | None = None, ) -> dict[str, list[Document]]: """ Retrieve documents from the `PgvectorDocumentStore`, based on keywords. @@ -141,8 +141,8 @@ def run( async def run_async( self, query: str, - filters: Optional[dict[str, Any]] = None, - top_k: Optional[int] = None, + filters: dict[str, Any] | None = None, + top_k: int | None = None, ) -> dict[str, list[Document]]: """ Asynchronously retrieve documents from the `PgvectorDocumentStore`, based on keywords. diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index f843c8eeb9..91bcefb929 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Literal, Optional, Union, overload +from typing import Any, Literal, overload from haystack import default_from_dict, default_to_dict, logging from haystack.dataclasses.document import Document @@ -92,9 +92,9 @@ def __init__( recreate_table: bool = False, search_strategy: Literal["exact_nearest_neighbor", "hnsw"] = "exact_nearest_neighbor", hnsw_recreate_index_if_exists: bool = False, - hnsw_index_creation_kwargs: Optional[dict[str, int]] = None, + hnsw_index_creation_kwargs: dict[str, int] | None = None, hnsw_index_name: str = "haystack_hnsw_index", - hnsw_ef_search: Optional[int] = None, + hnsw_ef_search: int | None = None, keyword_index_name: str = "haystack_keyword_index", ): """ @@ -175,12 +175,12 @@ def __init__( self.keyword_index_name = keyword_index_name self.language = language - self._connection: Optional[Connection] = None - self._async_connection: Optional[AsyncConnection] = None - self._cursor: Optional[Cursor] = None - self._async_cursor: Optional[AsyncCursor] = None - self._dict_cursor: Optional[Cursor[DictRow]] = None - self._async_dict_cursor: Optional[AsyncCursor[DictRow]] = None + self._connection: Connection | None = None + self._async_connection: AsyncConnection | None = None + self._cursor: Cursor | None = None + self._async_cursor: AsyncCursor | None = None + self._dict_cursor: Cursor[DictRow] | None = None + self._async_dict_cursor: AsyncCursor[DictRow] | None = None self._table_initialized = False def to_dict(self) -> dict[str, Any]: @@ -250,21 +250,21 @@ async def _connection_is_valid_async(connection): @overload def _execute_sql( - self, cursor: Cursor, sql_query: Composed, params: Optional[tuple] = None, error_msg: str = "" + self, cursor: Cursor, sql_query: Composed, params: tuple | None = None, error_msg: str = "" ) -> Cursor: ... @overload def _execute_sql( - self, cursor: Cursor[DictRow], sql_query: Composed, params: Optional[tuple] = None, error_msg: str = "" + self, cursor: Cursor[DictRow], sql_query: Composed, params: tuple | None = None, error_msg: str = "" ) -> Cursor[DictRow]: ... def _execute_sql( self, - cursor: Union[Cursor, Cursor[DictRow]], + cursor: Cursor | Cursor[DictRow], sql_query: Composed, - params: Optional[tuple] = None, + params: tuple | None = None, error_msg: str = "", - ) -> Union[Cursor, Cursor[DictRow]]: + ) -> Cursor | Cursor[DictRow]: """ Internal method to execute SQL statements and handle exceptions. @@ -299,21 +299,21 @@ def _execute_sql( @overload async def _execute_sql_async( - self, cursor: AsyncCursor, sql_query: Composed, params: Optional[tuple] = None, error_msg: str = "" + self, cursor: AsyncCursor, sql_query: Composed, params: tuple | None = None, error_msg: str = "" ) -> AsyncCursor: ... @overload async def _execute_sql_async( - self, cursor: AsyncCursor[DictRow], sql_query: Composed, params: Optional[tuple] = None, error_msg: str = "" + self, cursor: AsyncCursor[DictRow], sql_query: Composed, params: tuple | None = None, error_msg: str = "" ) -> AsyncCursor[DictRow]: ... async def _execute_sql_async( self, - cursor: Union[AsyncCursor, AsyncCursor[DictRow]], + cursor: AsyncCursor | AsyncCursor[DictRow], sql_query: Composed, - params: Optional[tuple] = None, + params: tuple | None = None, error_msg: str = "", - ) -> Union[AsyncCursor, AsyncCursor[DictRow]]: + ) -> AsyncCursor | AsyncCursor[DictRow]: """ Internal method to asynchronously execute SQL statements and handle exceptions. @@ -759,7 +759,7 @@ async def count_documents_async(self) -> int: return result[0] return 0 - def filter_documents(self, filters: Optional[dict[str, Any]] = None) -> list[Document]: + def filter_documents(self, filters: dict[str, Any] | None = None) -> list[Document]: """ Returns the documents that match the filters provided. @@ -796,7 +796,7 @@ def filter_documents(self, filters: Optional[dict[str, Any]] = None) -> list[Doc docs = _from_pg_to_haystack_documents(records) return docs - async def filter_documents_async(self, filters: Optional[dict[str, Any]] = None) -> list[Document]: + async def filter_documents_async(self, filters: dict[str, Any] | None = None) -> list[Document]: """ Asynchronously returns the documents that match the filters provided. @@ -1223,7 +1223,7 @@ async def update_by_filter_async(self, filters: dict[str, Any], meta: dict[str, raise DocumentStoreError(msg) from e def _build_keyword_retrieval_query( - self, query: str, top_k: int, filters: Optional[dict[str, Any]] = None + self, query: str, top_k: int, filters: dict[str, Any] | None = None ) -> tuple[Composed, tuple]: """ Builds the SQL query and the where parameters for keyword retrieval. @@ -1236,7 +1236,7 @@ def _build_keyword_retrieval_query( ) where_params = () - sql_where_clause: Union[Composed, SQL] = SQL("") + sql_where_clause: Composed | SQL = SQL("") if filters: sql_where_clause, where_params = _convert_filters_to_where_clause_and_params( filters=filters, operator="AND" @@ -1252,7 +1252,7 @@ def _keyword_retrieval( self, query: str, *, - filters: Optional[dict[str, Any]] = None, + filters: dict[str, Any] | None = None, top_k: int = 10, ) -> list[Document]: """ @@ -1287,7 +1287,7 @@ async def _keyword_retrieval_async( self, query: str, *, - filters: Optional[dict[str, Any]] = None, + filters: dict[str, Any] | None = None, top_k: int = 10, ) -> list[Document]: """ @@ -1315,9 +1315,9 @@ async def _keyword_retrieval_async( def _check_and_build_embedding_retrieval_query( self, query_embedding: list[float], - vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]], + vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] | None, top_k: int, - filters: Optional[dict[str, Any]] = None, + filters: dict[str, Any] | None = None, ) -> tuple[Composed, tuple]: """ Performs checks and builds the SQL query and the where parameters for embedding retrieval. @@ -1357,7 +1357,7 @@ def _check_and_build_embedding_retrieval_query( score=SQL(score_definition), ) - sql_where_clause: Union[Composed, SQL] = SQL("") + sql_where_clause: Composed | SQL = SQL("") params = () if filters: sql_where_clause, params = _convert_filters_to_where_clause_and_params(filters) @@ -1379,9 +1379,9 @@ def _embedding_retrieval( self, query_embedding: list[float], *, - filters: Optional[dict[str, Any]] = None, + filters: dict[str, Any] | None = None, top_k: int = 10, - vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None, + vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] | None = None, ) -> list[Document]: """ Retrieves documents that are most similar to the query embedding using a vector similarity metric. @@ -1413,9 +1413,9 @@ async def _embedding_retrieval_async( self, query_embedding: list[float], *, - filters: Optional[dict[str, Any]] = None, + filters: dict[str, Any] | None = None, top_k: int = 10, - vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None, + vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] | None = None, ) -> list[Document]: """ Asynchronously retrieves documents that are most similar to the query embedding using a diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/filters.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/filters.py index d06e75e115..7aa501be96 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/filters.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/filters.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from datetime import datetime from itertools import chain -from typing import Any, Literal, Optional +from typing import Any, Literal from haystack.errors import FilterError from psycopg.sql import SQL, Composed @@ -21,7 +21,7 @@ NO_VALUE = "no_value" -def _validate_filters(filters: Optional[dict[str, Any]] = None) -> None: +def _validate_filters(filters: dict[str, Any] | None = None) -> None: """ Validates the filters provided. """ diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py index b926083891..9d089eea6f 100644 --- a/integrations/pgvector/tests/test_document_store.py +++ b/integrations/pgvector/tests/test_document_store.py @@ -154,7 +154,7 @@ def test_halfvec_hnsw_write_documents(document_store_w_halfvec_hnsw_index: Pgvec retrieved_docs = document_store_w_halfvec_hnsw_index.filter_documents() retrieved_docs.sort(key=lambda x: x.id) - for original_doc, retrieved_doc in zip(documents, retrieved_docs): + for original_doc, retrieved_doc in zip(documents, retrieved_docs, strict=True): assert original_doc.id == retrieved_doc.id assert original_doc.content == retrieved_doc.content assert len(original_doc.embedding) == len(retrieved_doc.embedding) diff --git a/integrations/pgvector/tests/test_filters.py b/integrations/pgvector/tests/test_filters.py index ba750b5a6d..5a4fdc6a7e 100644 --- a/integrations/pgvector/tests/test_filters.py +++ b/integrations/pgvector/tests/test_filters.py @@ -24,7 +24,7 @@ def assert_documents_are_equal(self, received: list[Document], expected: list[Do assert len(received) == len(expected) received.sort(key=lambda x: x.id) expected.sort(key=lambda x: x.id) - for received_doc, expected_doc in zip(received, expected): + for received_doc, expected_doc in zip(received, expected, strict=True): # we first compare the embeddings approximately if received_doc.embedding is None: assert expected_doc.embedding is None