Skip to content

Commit ba53192

Browse files
committed
wip
1 parent 1f4f124 commit ba53192

1 file changed

Lines changed: 48 additions & 24 deletions

File tree

  • integrations/qdrant/src/haystack_integrations/document_stores/qdrant

integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ def write_documents(
380380
) -> int:
381381
"""
382382
Writes documents to Qdrant using the specified policy.
383+
383384
The QdrantDocumentStore can handle duplicate documents based on the given policy.
384385
The available policies are:
385386
- `FAIL`: The operation will raise an error if any document already exists.
@@ -389,7 +390,8 @@ def write_documents(
389390
:param documents: A list of Document objects to write to Qdrant.
390391
:param policy: The policy for handling duplicate documents.
391392
392-
:returns: The number of documents written to the document store.
393+
:returns:
394+
The number of documents written to the document store.
393395
"""
394396

395397
self._initialize_client()
@@ -525,6 +527,40 @@ async def delete_documents_async(self, document_ids: List[str]) -> None:
525527
"Called QdrantDocumentStore.delete_documents_async() on a non-existing ID",
526528
)
527529

530+
def delete_all_documents(self, recreate_index: bool = False) -> None:
531+
"""
532+
Deletes all documents in the document store.
533+
534+
:param recreate_index: If `True`, the index will be recreated after deletion.
535+
536+
It deletes the collection and recreates it to ensure all documents are removed.
537+
"""
538+
539+
if recreate_index:
540+
self.recreate_collection(
541+
self.index,
542+
self.get_distance(self.similarity),
543+
self.embedding_dim,
544+
self.on_disk,
545+
self.use_sparse_embeddings,
546+
self.sparse_idf,
547+
)
548+
else:
549+
self._initialize_client() # _initialize_client assures the client is initialized
550+
self._client.delete( # type: ignore
551+
collection_name=self.index,
552+
points_selector=rest.PointsSelectorAll(),
553+
wait=self.wait_result_from_api,
554+
)
555+
556+
async def del_all_documents_async(self) -> None:
557+
"""
558+
Asynchronously deletes all documents in the document store.
559+
560+
It deletes the collection and recreates it to ensure all documents are removed.
561+
"""
562+
# ToDo
563+
528564
@classmethod
529565
def from_dict(cls, data: Dict[str, Any]) -> "QdrantDocumentStore":
530566
"""
@@ -563,8 +599,7 @@ def _get_documents_generator(
563599
Returns a generator that yields documents from Qdrant based on the provided filters.
564600
565601
:param filters: Filters applied to the retrieved documents.
566-
:returns:
567-
A generator that yields documents retrieved from Qdrant.
602+
:returns: A generator that yields documents retrieved from Qdrant.
568603
"""
569604

570605
self._initialize_client()
@@ -601,8 +636,7 @@ async def _get_documents_generator_async(
601636
Returns an asynchronous generator that yields documents from Qdrant based on the provided filters.
602637
603638
:param filters: Filters applied to the retrieved documents.
604-
:returns:
605-
An asynchronous generator that yields documents retrieved from Qdrant.
639+
:returns: An asynchronous generator that yields documents retrieved from Qdrant.
606640
"""
607641

608642
await self._initialize_async_client()
@@ -639,8 +673,7 @@ def get_documents_by_id(
639673
Retrieves documents from Qdrant by their IDs.
640674
641675
:param ids: A list of document IDs to retrieve.
642-
:returns:
643-
A list of documents.
676+
:returns: A list of documents.
644677
"""
645678
documents: List[Document] = []
646679

@@ -719,8 +752,7 @@ def _query_by_sparse(
719752
value, all values will be used for grouping. One point can be in multiple groups.
720753
:param group_size: Maximum amount of points to return per group. Default is 3.
721754
722-
:returns:
723-
List of documents that are most similar to `query_sparse_embedding`.
755+
:returns: List of documents that are most similar to `query_sparse_embedding`.
724756
725757
:raises QdrantStoreError:
726758
If the Document Store was initialized with `use_sparse_embeddings=False`.
@@ -797,8 +829,7 @@ def _query_by_embedding(
797829
value, all values will be used for grouping. One point can be in multiple groups.
798830
:param group_size: Maximum amount of points to return per group. Default is 3.
799831
800-
:returns:
801-
List of documents that are most similar to `query_embedding`.
832+
:returns: List of documents that are most similar to `query_embedding`.
802833
"""
803834
self._initialize_client()
804835
assert self._client is not None
@@ -861,8 +892,7 @@ def _query_hybrid(
861892
value, all values will be used for grouping. One point can be in multiple groups.
862893
:param group_size: Maximum amount of points to return per group. Default is 3.
863894
864-
:returns:
865-
A list of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
895+
:returns: A list of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
866896
867897
:raises QdrantStoreError:
868898
If the Document Store was initialized with `use_sparse_embeddings=False`.
@@ -972,8 +1002,7 @@ async def _query_by_sparse_async(
9721002
value, all values will be used for grouping. One point can be in multiple groups.
9731003
:param group_size: Maximum amount of points to return per group. Default is 3.
9741004
975-
:returns:
976-
A list of documents that are most similar to `query_sparse_embedding`.
1005+
:returns: A list of documents that are most similar to `query_sparse_embedding`.
9771006
9781007
:raises QdrantStoreError:
9791008
If the Document Store was initialized with `use_sparse_embeddings=False`.
@@ -1053,8 +1082,7 @@ async def _query_by_embedding_async(
10531082
value, all values will be used for grouping. One point can be in multiple groups.
10541083
:param group_size: Maximum amount of points to return per group. Default is 3.
10551084
1056-
:returns:
1057-
A list of documents that are most similar to `query_embedding`.
1085+
:returns: A list of documents that are most similar to `query_embedding`.
10581086
"""
10591087
await self._initialize_async_client()
10601088
assert self._async_client is not None
@@ -1119,8 +1147,7 @@ async def _query_hybrid_async(
11191147
value, all values will be used for grouping. One point can be in multiple groups.
11201148
:param group_size: Maximum amount of points to return per group. Default is 3.
11211149
1122-
:returns:
1123-
A list of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
1150+
:returns: A list of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
11241151
11251152
:raises QdrantStoreError:
11261153
If the Document Store was initialized with `use_sparse_embeddings=False`.
@@ -1295,7 +1322,6 @@ def _set_up_collection(
12951322
If the collection exists with incompatible settings.
12961323
:raises ValueError:
12971324
If the collection exists with a different similarity measure or embedding dimension.
1298-
12991325
"""
13001326

13011327
self._initialize_client()
@@ -1498,8 +1524,7 @@ async def _handle_duplicate_documents_async(
14981524
14991525
:param documents: A list of Haystack Document objects.
15001526
:param policy: The duplicate policy to use when writing documents.
1501-
:returns:
1502-
A list of Haystack Document objects.
1527+
:returns: A list of Haystack Document objects.
15031528
"""
15041529

15051530
if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL):
@@ -1521,8 +1546,7 @@ def _drop_duplicate_documents(self, documents: List[Document]) -> List[Document]
15211546
15221547
:param documents: A list of Haystack Document objects.
15231548
1524-
:returns:
1525-
A list of Haystack Document objects with unique IDs.
1549+
:returns: A list of Haystack Document objects with unique IDs.
15261550
"""
15271551
_hash_ids: Set = set()
15281552
_documents: List[Document] = []

0 commit comments

Comments
 (0)