11import inspect
22from itertools import islice
3- from typing import Any , AsyncGenerator , ClassVar , Dict , Generator , List , Optional , Set , Union
3+ from typing import Any , AsyncGenerator , ClassVar , Dict , Generator , List , Optional , Set , Tuple , Union
44
55import numpy as np
66import qdrant_client
1818from .converters import (
1919 DENSE_VECTORS_NAME ,
2020 SPARSE_VECTORS_NAME ,
21+ QdrantPoint ,
2122 convert_haystack_documents_to_qdrant_points ,
2223 convert_id ,
2324 convert_qdrant_point_to_haystack_document ,
@@ -34,7 +35,7 @@ class QdrantStoreError(DocumentStoreError):
3435FilterType = Dict [str , Union [Dict [str , Any ], List [Any ], str , int , float , bool ]]
3536
3637
37- def get_batches_from_generator (iterable , n ) :
38+ def get_batches_from_generator (iterable : List , n : int ) -> Generator :
3839 """
3940 Batch elements of an iterable into fixed-length chunks or blocks.
4041 """
@@ -127,10 +128,10 @@ def __init__(
127128 write_batch_size : int = 100 ,
128129 scroll_size : int = 10_000 ,
129130 payload_fields_to_index : Optional [List [dict ]] = None ,
130- ):
131+ ) -> None :
131132 """
132133 :param location:
133- If `memory` - use in-memory Qdrant instance.
134+ If `": memory:" ` - use in-memory Qdrant instance.
134135 If `str` - use it as a URL parameter.
135136 If `None` - use default values for host and port.
136137 :param url:
@@ -164,7 +165,7 @@ def __init__(
164165 Dimension of the embeddings.
165166 :param on_disk:
166167 Whether to store the collection on disk.
167- :param use_sparse_embedding :
168+ :param use_sparse_embeddings :
168169 If set to `True`, enables support for sparse embeddings.
169170 :param sparse_idf:
170171 If set to `True`, computes the Inverse Document Frequency (IDF) when using sparse embeddings.
@@ -232,7 +233,6 @@ def __init__(
232233 self .path = path
233234 self .force_disable_check_same_thread = force_disable_check_same_thread
234235 self .metadata = metadata or {}
235- self .api_key = api_key
236236
237237 # Store the Qdrant collection specific attributes
238238 self .shard_number = shard_number
@@ -258,9 +258,10 @@ def __init__(
258258 self .write_batch_size = write_batch_size
259259 self .scroll_size = scroll_size
260260
261- def _initialize_client (self ):
261+ def _initialize_client (self ) -> None :
262262 if self ._client is None :
263263 client_params = self ._prepare_client_params ()
264+ # This step adds the api-key and User-Agent to metadata
264265 self ._client = qdrant_client .QdrantClient (** client_params )
265266 # Make sure the collection is properly set up
266267 self ._set_up_collection (
@@ -274,7 +275,7 @@ def _initialize_client(self):
274275 self .payload_fields_to_index ,
275276 )
276277
277- async def _initialize_async_client (self ):
278+ async def _initialize_async_client (self ) -> None :
278279 """
279280 Returns the asynchronous Qdrant client, initializing it if necessary.
280281 """
@@ -628,8 +629,6 @@ def get_documents_by_id(
628629
629630 :param ids:
630631 A list of document IDs to retrieve.
631- :param index:
632- The name of the index to retrieve documents from.
633632 :returns:
634633 A list of documents.
635634 """
@@ -661,8 +660,6 @@ async def get_documents_by_id_async(
661660
662661 :param ids:
663662 A list of document IDs to retrieve.
664- :param index:
665- The name of the index to retrieve documents from.
666663 :returns:
667664 A list of documents.
668665 """
@@ -1210,7 +1207,7 @@ def get_distance(self, similarity: str) -> rest.Distance:
12101207 )
12111208 raise QdrantStoreError (msg ) from ke
12121209
1213- def _create_payload_index (self , collection_name : str , payload_fields_to_index : Optional [List [dict ]] = None ):
1210+ def _create_payload_index (self , collection_name : str , payload_fields_to_index : Optional [List [dict ]] = None ) -> None :
12141211 """
12151212 Create payload index for the collection if payload_fields_to_index is provided
12161213 See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
@@ -1229,7 +1226,7 @@ def _create_payload_index(self, collection_name: str, payload_fields_to_index: O
12291226
12301227 async def _create_payload_index_async (
12311228 self , collection_name : str , payload_fields_to_index : Optional [List [dict ]] = None
1232- ):
1229+ ) -> None :
12331230 """
12341231 Asynchronously create payload index for the collection if payload_fields_to_index is provided
12351232 See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
@@ -1257,7 +1254,7 @@ def _set_up_collection(
12571254 sparse_idf : bool ,
12581255 on_disk : bool = False ,
12591256 payload_fields_to_index : Optional [List [dict ]] = None ,
1260- ):
1257+ ) -> None :
12611258 """
12621259 Sets up the Qdrant collection with the specified parameters.
12631260 :param collection_name:
@@ -1313,7 +1310,7 @@ async def _set_up_collection_async(
13131310 sparse_idf : bool ,
13141311 on_disk : bool = False ,
13151312 payload_fields_to_index : Optional [List [dict ]] = None ,
1316- ):
1313+ ) -> None :
13171314 """
13181315 Asynchronously sets up the Qdrant collection with the specified parameters.
13191316 :param collection_name:
@@ -1367,7 +1364,7 @@ def recreate_collection(
13671364 on_disk : Optional [bool ] = None ,
13681365 use_sparse_embeddings : Optional [bool ] = None ,
13691366 sparse_idf : bool = False ,
1370- ):
1367+ ) -> None :
13711368 """
13721369 Recreates the Qdrant collection with the specified parameters.
13731370
@@ -1410,7 +1407,7 @@ async def recreate_collection_async(
14101407 on_disk : Optional [bool ] = None ,
14111408 use_sparse_embeddings : Optional [bool ] = None ,
14121409 sparse_idf : bool = False ,
1413- ):
1410+ ) -> None :
14141411 """
14151412 Asynchronously recreates the Qdrant collection with the specified parameters.
14161413
@@ -1449,7 +1446,7 @@ def _handle_duplicate_documents(
14491446 self ,
14501447 documents : List [Document ],
14511448 policy : DuplicatePolicy = None ,
1452- ):
1449+ ) -> List [ Document ] :
14531450 """
14541451 Checks whether any of the passed documents is already existing in the chosen index and returns a list of
14551452 documents that are not in the index yet.
@@ -1476,7 +1473,7 @@ async def _handle_duplicate_documents_async(
14761473 self ,
14771474 documents : List [Document ],
14781475 policy : DuplicatePolicy = None ,
1479- ):
1476+ ) -> List [ Document ] :
14801477 """
14811478 Asynchronously checks whether any of the passed documents is already existing
14821479 in the chosen index and returns a list of
@@ -1521,7 +1518,7 @@ def _drop_duplicate_documents(self, documents: List[Document]) -> List[Document]
15211518
15221519 return _documents
15231520
1524- def _prepare_collection_params (self ):
1521+ def _prepare_collection_params (self ) -> Dict [ str , Any ] :
15251522 """
15261523 Prepares the common parameters for collection creation.
15271524 """
@@ -1537,7 +1534,7 @@ def _prepare_collection_params(self):
15371534 "init_from" : self .init_from ,
15381535 }
15391536
1540- def _prepare_client_params (self ):
1537+ def _prepare_client_params (self ) -> Dict [ str , Any ] :
15411538 """
15421539 Prepares the common parameters for client initialization.
15431540
@@ -1554,7 +1551,10 @@ def _prepare_client_params(self):
15541551 "timeout" : self .timeout ,
15551552 "host" : self .host ,
15561553 "path" : self .path ,
1557- "metadata" : self .metadata ,
1554+ # NOTE: We purposefully expand the fields of self.metadata to avoid modifying the original self.metadata
1555+ # class attribute. For example, the resolved api key is added to metadata by the QdrantClient class
1556+ # when using a hosted Qdrant service, which means running to_dict() exposes the api key.
1557+ "metadata" : {** self .metadata },
15581558 "force_disable_check_same_thread" : self .force_disable_check_same_thread ,
15591559 }
15601560
@@ -1565,7 +1565,7 @@ def _prepare_collection_config(
15651565 on_disk : Optional [bool ] = None ,
15661566 use_sparse_embeddings : Optional [bool ] = None ,
15671567 sparse_idf : bool = False ,
1568- ):
1568+ ) -> Tuple [ Dict [ str , rest . VectorParams ], Optional [ Dict [ str , rest . SparseVectorParams ]]] :
15691569 """
15701570 Prepares the configuration for creating or recreating a Qdrant collection.
15711571
@@ -1595,9 +1595,12 @@ def _prepare_collection_config(
15951595
15961596 return vectors_config , sparse_vectors_config
15971597
1598- def _validate_filters (self , filters : Optional [Union [Dict [str , Any ], rest .Filter ]] = None ):
1598+ def _validate_filters (self , filters : Optional [Union [Dict [str , Any ], rest .Filter ]] = None ) -> None :
15991599 """
16001600 Validates the filters provided for querying.
1601+
1602+ :param filters: Filters to validate. Can be a dictionary or an instance of `qdrant_client.http.models.Filter`.
1603+ :raises ValueError: If the filters are not in the correct format or syntax.
16011604 """
16021605 if filters and not isinstance (filters , dict ) and not isinstance (filters , rest .Filter ):
16031606 msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
@@ -1607,7 +1610,7 @@ def _validate_filters(self, filters: Optional[Union[Dict[str, Any], rest.Filter]
16071610 msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
16081611 raise ValueError (msg )
16091612
1610- def _process_query_point_results (self , results , scale_score : bool = False ):
1613+ def _process_query_point_results (self , results : List [ QdrantPoint ] , scale_score : bool = False ) -> List [ Document ] :
16111614 """
16121615 Processes query results from Qdrant.
16131616 """
@@ -1627,7 +1630,7 @@ def _process_query_point_results(self, results, scale_score: bool = False):
16271630
16281631 return documents
16291632
1630- def _process_group_results (self , groups ) :
1633+ def _process_group_results (self , groups : List [ rest . PointGroup ]) -> List [ Document ] :
16311634 """
16321635 Processes grouped query results from Qdrant.
16331636
@@ -1647,7 +1650,7 @@ def _validate_collection_compatibility(
16471650 collection_info ,
16481651 distance ,
16491652 embedding_dim : int ,
1650- ):
1653+ ) -> None :
16511654 """
16521655 Validates that an existing collection is compatible with the current configuration.
16531656 """
0 commit comments