Skip to content

Commit f89a56a

Browse files
authored
chore!: pgvector - drop Python 3.9 and use X|Y typing (#2722)
1 parent e1c71d9 commit f89a56a

7 files changed

Lines changed: 55 additions & 61 deletions

File tree

integrations/pgvector/pyproject.toml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,22 @@ name = "pgvector-haystack"
77
dynamic = ["version"]
88
description = "An integration of pgvector (vector search extension for Postgres) with Haystack"
99
readme = "README.md"
10-
requires-python = ">=3.9"
10+
requires-python = ">=3.10"
1111
license = "Apache-2.0"
1212
keywords = []
1313
authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
1414
classifiers = [
1515
"License :: OSI Approved :: Apache Software License",
1616
"Development Status :: 4 - Beta",
1717
"Programming Language :: Python",
18-
"Programming Language :: Python :: 3.9",
1918
"Programming Language :: Python :: 3.10",
2019
"Programming Language :: Python :: 3.11",
2120
"Programming Language :: Python :: 3.12",
2221
"Programming Language :: Python :: 3.13",
2322
"Programming Language :: Python :: Implementation :: CPython",
2423
"Programming Language :: Python :: Implementation :: PyPy",
2524
]
26-
dependencies = ["haystack-ai>=2.11.0", "pgvector>=0.3.0", "psycopg[binary]"]
25+
dependencies = ["haystack-ai>=2.22.0", "pgvector>=0.3.0", "psycopg[binary]"]
2726

2827
[project.urls]
2928
Source = "https://github.com/deepset-ai/haystack-core-integrations"
@@ -83,7 +82,6 @@ ignore_missing_imports = true
8382

8483

8584
[tool.ruff]
86-
target-version = "py39"
8785
line-length = 120
8886

8987
[tool.ruff.lint]
@@ -134,10 +132,6 @@ ignore = [
134132
# ignore assertions
135133
"S101",
136134
]
137-
unfixable = [
138-
# Don't touch unused imports
139-
"F401",
140-
]
141135

142136
[tool.ruff.lint.isort]
143137
known-first-party = ["haystack_integrations"]

integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
22
#
33
# SPDX-License-Identifier: Apache-2.0
4-
from typing import Any, Literal, Optional, Union
4+
from typing import Any, Literal
55

66
from haystack import component, default_from_dict, default_to_dict
77
from haystack.dataclasses import Document
@@ -62,10 +62,10 @@ def __init__(
6262
self,
6363
*,
6464
document_store: PgvectorDocumentStore,
65-
filters: Optional[dict[str, Any]] = None,
65+
filters: dict[str, Any] | None = None,
6666
top_k: int = 10,
67-
vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None,
68-
filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
67+
vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] | None = None,
68+
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
6969
):
7070
"""
7171
:param document_store: An instance of `PgvectorDocumentStore`.
@@ -137,9 +137,9 @@ def from_dict(cls, data: dict[str, Any]) -> "PgvectorEmbeddingRetriever":
137137
def run(
138138
self,
139139
query_embedding: list[float],
140-
filters: Optional[dict[str, Any]] = None,
141-
top_k: Optional[int] = None,
142-
vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None,
140+
filters: dict[str, Any] | None = None,
141+
top_k: int | None = None,
142+
vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] | None = None,
143143
) -> dict[str, list[Document]]:
144144
"""
145145
Retrieve documents from the `PgvectorDocumentStore`, based on their embeddings.
@@ -170,9 +170,9 @@ def run(
170170
async def run_async(
171171
self,
172172
query_embedding: list[float],
173-
filters: Optional[dict[str, Any]] = None,
174-
top_k: Optional[int] = None,
175-
vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None,
173+
filters: dict[str, Any] | None = None,
174+
top_k: int | None = None,
175+
vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] | None = None,
176176
) -> dict[str, list[Document]]:
177177
"""
178178
Asynchronously retrieve documents from the `PgvectorDocumentStore`, based on their embeddings.

integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/keyword_retriever.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
22
#
33
# SPDX-License-Identifier: Apache-2.0
4-
from typing import Any, Optional, Union
4+
from typing import Any
55

66
from haystack import component, default_from_dict, default_to_dict
77
from haystack.dataclasses import Document
@@ -52,9 +52,9 @@ def __init__(
5252
self,
5353
*,
5454
document_store: PgvectorDocumentStore,
55-
filters: Optional[dict[str, Any]] = None,
55+
filters: dict[str, Any] | None = None,
5656
top_k: int = 10,
57-
filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
57+
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
5858
):
5959
"""
6060
:param document_store: An instance of `PgvectorDocumentStore`.
@@ -111,8 +111,8 @@ def from_dict(cls, data: dict[str, Any]) -> "PgvectorKeywordRetriever":
111111
def run(
112112
self,
113113
query: str,
114-
filters: Optional[dict[str, Any]] = None,
115-
top_k: Optional[int] = None,
114+
filters: dict[str, Any] | None = None,
115+
top_k: int | None = None,
116116
) -> dict[str, list[Document]]:
117117
"""
118118
Retrieve documents from the `PgvectorDocumentStore`, based on keywords.
@@ -141,8 +141,8 @@ def run(
141141
async def run_async(
142142
self,
143143
query: str,
144-
filters: Optional[dict[str, Any]] = None,
145-
top_k: Optional[int] = None,
144+
filters: dict[str, Any] | None = None,
145+
top_k: int | None = None,
146146
) -> dict[str, list[Document]]:
147147
"""
148148
Asynchronously retrieve documents from the `PgvectorDocumentStore`, based on keywords.

integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
from typing import Any, Literal, Optional, Union, overload
5+
from typing import Any, Literal, overload
66

77
from haystack import default_from_dict, default_to_dict, logging
88
from haystack.dataclasses.document import Document
@@ -92,9 +92,9 @@ def __init__(
9292
recreate_table: bool = False,
9393
search_strategy: Literal["exact_nearest_neighbor", "hnsw"] = "exact_nearest_neighbor",
9494
hnsw_recreate_index_if_exists: bool = False,
95-
hnsw_index_creation_kwargs: Optional[dict[str, int]] = None,
95+
hnsw_index_creation_kwargs: dict[str, int] | None = None,
9696
hnsw_index_name: str = "haystack_hnsw_index",
97-
hnsw_ef_search: Optional[int] = None,
97+
hnsw_ef_search: int | None = None,
9898
keyword_index_name: str = "haystack_keyword_index",
9999
):
100100
"""
@@ -175,12 +175,12 @@ def __init__(
175175
self.keyword_index_name = keyword_index_name
176176
self.language = language
177177

178-
self._connection: Optional[Connection] = None
179-
self._async_connection: Optional[AsyncConnection] = None
180-
self._cursor: Optional[Cursor] = None
181-
self._async_cursor: Optional[AsyncCursor] = None
182-
self._dict_cursor: Optional[Cursor[DictRow]] = None
183-
self._async_dict_cursor: Optional[AsyncCursor[DictRow]] = None
178+
self._connection: Connection | None = None
179+
self._async_connection: AsyncConnection | None = None
180+
self._cursor: Cursor | None = None
181+
self._async_cursor: AsyncCursor | None = None
182+
self._dict_cursor: Cursor[DictRow] | None = None
183+
self._async_dict_cursor: AsyncCursor[DictRow] | None = None
184184
self._table_initialized = False
185185

186186
def to_dict(self) -> dict[str, Any]:
@@ -250,21 +250,21 @@ async def _connection_is_valid_async(connection):
250250

251251
@overload
252252
def _execute_sql(
253-
self, cursor: Cursor, sql_query: Composed, params: Optional[tuple] = None, error_msg: str = ""
253+
self, cursor: Cursor, sql_query: Composed, params: tuple | None = None, error_msg: str = ""
254254
) -> Cursor: ...
255255

256256
@overload
257257
def _execute_sql(
258-
self, cursor: Cursor[DictRow], sql_query: Composed, params: Optional[tuple] = None, error_msg: str = ""
258+
self, cursor: Cursor[DictRow], sql_query: Composed, params: tuple | None = None, error_msg: str = ""
259259
) -> Cursor[DictRow]: ...
260260

261261
def _execute_sql(
262262
self,
263-
cursor: Union[Cursor, Cursor[DictRow]],
263+
cursor: Cursor | Cursor[DictRow],
264264
sql_query: Composed,
265-
params: Optional[tuple] = None,
265+
params: tuple | None = None,
266266
error_msg: str = "",
267-
) -> Union[Cursor, Cursor[DictRow]]:
267+
) -> Cursor | Cursor[DictRow]:
268268
"""
269269
Internal method to execute SQL statements and handle exceptions.
270270
@@ -299,21 +299,21 @@ def _execute_sql(
299299

300300
@overload
301301
async def _execute_sql_async(
302-
self, cursor: AsyncCursor, sql_query: Composed, params: Optional[tuple] = None, error_msg: str = ""
302+
self, cursor: AsyncCursor, sql_query: Composed, params: tuple | None = None, error_msg: str = ""
303303
) -> AsyncCursor: ...
304304

305305
@overload
306306
async def _execute_sql_async(
307-
self, cursor: AsyncCursor[DictRow], sql_query: Composed, params: Optional[tuple] = None, error_msg: str = ""
307+
self, cursor: AsyncCursor[DictRow], sql_query: Composed, params: tuple | None = None, error_msg: str = ""
308308
) -> AsyncCursor[DictRow]: ...
309309

310310
async def _execute_sql_async(
311311
self,
312-
cursor: Union[AsyncCursor, AsyncCursor[DictRow]],
312+
cursor: AsyncCursor | AsyncCursor[DictRow],
313313
sql_query: Composed,
314-
params: Optional[tuple] = None,
314+
params: tuple | None = None,
315315
error_msg: str = "",
316-
) -> Union[AsyncCursor, AsyncCursor[DictRow]]:
316+
) -> AsyncCursor | AsyncCursor[DictRow]:
317317
"""
318318
Internal method to asynchronously execute SQL statements and handle exceptions.
319319
@@ -759,7 +759,7 @@ async def count_documents_async(self) -> int:
759759
return result[0]
760760
return 0
761761

762-
def filter_documents(self, filters: Optional[dict[str, Any]] = None) -> list[Document]:
762+
def filter_documents(self, filters: dict[str, Any] | None = None) -> list[Document]:
763763
"""
764764
Returns the documents that match the filters provided.
765765
@@ -796,7 +796,7 @@ def filter_documents(self, filters: Optional[dict[str, Any]] = None) -> list[Doc
796796
docs = _from_pg_to_haystack_documents(records)
797797
return docs
798798

799-
async def filter_documents_async(self, filters: Optional[dict[str, Any]] = None) -> list[Document]:
799+
async def filter_documents_async(self, filters: dict[str, Any] | None = None) -> list[Document]:
800800
"""
801801
Asynchronously returns the documents that match the filters provided.
802802
@@ -1223,7 +1223,7 @@ async def update_by_filter_async(self, filters: dict[str, Any], meta: dict[str,
12231223
raise DocumentStoreError(msg) from e
12241224

12251225
def _build_keyword_retrieval_query(
1226-
self, query: str, top_k: int, filters: Optional[dict[str, Any]] = None
1226+
self, query: str, top_k: int, filters: dict[str, Any] | None = None
12271227
) -> tuple[Composed, tuple]:
12281228
"""
12291229
Builds the SQL query and the where parameters for keyword retrieval.
@@ -1236,7 +1236,7 @@ def _build_keyword_retrieval_query(
12361236
)
12371237

12381238
where_params = ()
1239-
sql_where_clause: Union[Composed, SQL] = SQL("")
1239+
sql_where_clause: Composed | SQL = SQL("")
12401240
if filters:
12411241
sql_where_clause, where_params = _convert_filters_to_where_clause_and_params(
12421242
filters=filters, operator="AND"
@@ -1252,7 +1252,7 @@ def _keyword_retrieval(
12521252
self,
12531253
query: str,
12541254
*,
1255-
filters: Optional[dict[str, Any]] = None,
1255+
filters: dict[str, Any] | None = None,
12561256
top_k: int = 10,
12571257
) -> list[Document]:
12581258
"""
@@ -1287,7 +1287,7 @@ async def _keyword_retrieval_async(
12871287
self,
12881288
query: str,
12891289
*,
1290-
filters: Optional[dict[str, Any]] = None,
1290+
filters: dict[str, Any] | None = None,
12911291
top_k: int = 10,
12921292
) -> list[Document]:
12931293
"""
@@ -1315,9 +1315,9 @@ async def _keyword_retrieval_async(
13151315
def _check_and_build_embedding_retrieval_query(
13161316
self,
13171317
query_embedding: list[float],
1318-
vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]],
1318+
vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] | None,
13191319
top_k: int,
1320-
filters: Optional[dict[str, Any]] = None,
1320+
filters: dict[str, Any] | None = None,
13211321
) -> tuple[Composed, tuple]:
13221322
"""
13231323
Performs checks and builds the SQL query and the where parameters for embedding retrieval.
@@ -1357,7 +1357,7 @@ def _check_and_build_embedding_retrieval_query(
13571357
score=SQL(score_definition),
13581358
)
13591359

1360-
sql_where_clause: Union[Composed, SQL] = SQL("")
1360+
sql_where_clause: Composed | SQL = SQL("")
13611361
params = ()
13621362
if filters:
13631363
sql_where_clause, params = _convert_filters_to_where_clause_and_params(filters)
@@ -1379,9 +1379,9 @@ def _embedding_retrieval(
13791379
self,
13801380
query_embedding: list[float],
13811381
*,
1382-
filters: Optional[dict[str, Any]] = None,
1382+
filters: dict[str, Any] | None = None,
13831383
top_k: int = 10,
1384-
vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None,
1384+
vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] | None = None,
13851385
) -> list[Document]:
13861386
"""
13871387
Retrieves documents that are most similar to the query embedding using a vector similarity metric.
@@ -1413,9 +1413,9 @@ async def _embedding_retrieval_async(
14131413
self,
14141414
query_embedding: list[float],
14151415
*,
1416-
filters: Optional[dict[str, Any]] = None,
1416+
filters: dict[str, Any] | None = None,
14171417
top_k: int = 10,
1418-
vector_function: Optional[Literal["cosine_similarity", "inner_product", "l2_distance"]] = None,
1418+
vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] | None = None,
14191419
) -> list[Document]:
14201420
"""
14211421
Asynchronously retrieves documents that are most similar to the query embedding using a

integrations/pgvector/src/haystack_integrations/document_stores/pgvector/filters.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44
from datetime import datetime
55
from itertools import chain
6-
from typing import Any, Literal, Optional
6+
from typing import Any, Literal
77

88
from haystack.errors import FilterError
99
from psycopg.sql import SQL, Composed
@@ -21,7 +21,7 @@
2121
NO_VALUE = "no_value"
2222

2323

24-
def _validate_filters(filters: Optional[dict[str, Any]] = None) -> None:
24+
def _validate_filters(filters: dict[str, Any] | None = None) -> None:
2525
"""
2626
Validates the filters provided.
2727
"""

integrations/pgvector/tests/test_document_store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def test_halfvec_hnsw_write_documents(document_store_w_halfvec_hnsw_index: Pgvec
154154
retrieved_docs = document_store_w_halfvec_hnsw_index.filter_documents()
155155
retrieved_docs.sort(key=lambda x: x.id)
156156

157-
for original_doc, retrieved_doc in zip(documents, retrieved_docs):
157+
for original_doc, retrieved_doc in zip(documents, retrieved_docs, strict=True):
158158
assert original_doc.id == retrieved_doc.id
159159
assert original_doc.content == retrieved_doc.content
160160
assert len(original_doc.embedding) == len(retrieved_doc.embedding)

integrations/pgvector/tests/test_filters.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def assert_documents_are_equal(self, received: list[Document], expected: list[Do
2424
assert len(received) == len(expected)
2525
received.sort(key=lambda x: x.id)
2626
expected.sort(key=lambda x: x.id)
27-
for received_doc, expected_doc in zip(received, expected):
27+
for received_doc, expected_doc in zip(received, expected, strict=True):
2828
# we first compare the embeddings approximately
2929
if received_doc.embedding is None:
3030
assert expected_doc.embedding is None

0 commit comments

Comments
 (0)