Skip to content

Commit 7e37bf3

Browse files
SaraCalladavidsbatistaanakin87
authored
feat: add run_async to all Weaviate retrievers (#2607)
* add run_async to all retrievers * add :returns: docstrings * embed user values in error * add check for alpha range * fix tests * add tests for alpha checks * Apply suggestions from code review Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> * fix double backticks and wrong URL syntax --------- Co-authored-by: David S. Batista <dsbatista@gmail.com> Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
1 parent e708b89 commit 7e37bf3

10 files changed

Lines changed: 637 additions & 24 deletions

File tree

integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/bm25_retriever.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,34 @@ def run(
102102
details.
103103
:param top_k:
104104
The maximum number of documents to return.
105+
:returns: A dictionary with the following keys:
106+
- `documents`: List of documents returned by the search engine.
105107
"""
106108
filters = apply_filter_policy(self._filter_policy, self._filters, filters)
107109

108110
top_k = top_k or self._top_k
109111
documents = self._document_store._bm25_retrieval(query=query, filters=filters, top_k=top_k)
110112
return {"documents": documents}
113+
114+
@component.output_types(documents=list[Document])
115+
async def run_async(
116+
self, query: str, filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None
117+
) -> dict[str, list[Document]]:
118+
"""
119+
Asynchronously retrieves documents from Weaviate using the BM25 algorithm.
120+
121+
:param query:
122+
The query text.
123+
:param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
124+
the `filter_policy` chosen at retriever initialization. See init method docstring for more
125+
details.
126+
:param top_k:
127+
The maximum number of documents to return.
128+
:returns: A dictionary with the following keys:
129+
- `documents`: List of documents returned by the search engine.
130+
"""
131+
filters = apply_filter_policy(self._filter_policy, self._filters, filters)
132+
133+
top_k = top_k or self._top_k
134+
documents = await self._document_store._bm25_retrieval_async(query=query, filters=filters, top_k=top_k)
135+
return {"documents": documents}

integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/embedding_retriever.py

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def __init__(
4848
`distance` and `certainty` parameters.
4949
"""
5050
if distance is not None and certainty is not None:
51-
msg = "Can't use 'distance' and 'certainty' parameters together"
51+
msg = f"Can't use 'distance' ({distance}) and 'certainty' ({certainty}) parameters together"
5252
raise ValueError(msg)
5353

5454
self._document_store = document_store
@@ -121,6 +121,8 @@ def run(
121121
The maximum allowed distance between Documents' embeddings.
122122
:param certainty:
123123
Normalized distance between the result item and the search vector.
124+
:returns: A dictionary with the following keys:
125+
- `documents`: List of documents returned by the search engine.
124126
:raises ValueError:
125127
If both `distance` and `certainty` are provided.
126128
See https://weaviate.io/developers/weaviate/api/graphql/search-operators#variables to learn more about
@@ -132,7 +134,7 @@ def run(
132134
distance = distance or self._distance
133135
certainty = certainty or self._certainty
134136
if distance is not None and certainty is not None:
135-
msg = "Can't use 'distance' and 'certainty' parameters together"
137+
msg = f"Can't use 'distance' ({distance}) and 'certainty' ({certainty}) parameters together"
136138
raise ValueError(msg)
137139

138140
documents = self._document_store._embedding_retrieval(
@@ -143,3 +145,51 @@ def run(
143145
certainty=certainty,
144146
)
145147
return {"documents": documents}
148+
149+
@component.output_types(documents=list[Document])
150+
async def run_async(
151+
self,
152+
query_embedding: list[float],
153+
filters: Optional[dict[str, Any]] = None,
154+
top_k: Optional[int] = None,
155+
distance: Optional[float] = None,
156+
certainty: Optional[float] = None,
157+
) -> dict[str, list[Document]]:
158+
"""
159+
Asynchronously retrieves documents from Weaviate using the vector search.
160+
161+
:param query_embedding:
162+
Embedding of the query.
163+
:param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
164+
the `filter_policy` chosen at retriever initialization. See init method docstring for more
165+
details.
166+
:param top_k:
167+
The maximum number of documents to return.
168+
:param distance:
169+
The maximum allowed distance between Documents' embeddings.
170+
:param certainty:
171+
Normalized distance between the result item and the search vector.
172+
:returns: A dictionary with the following keys:
173+
- `documents`: List of documents returned by the search engine.
174+
:raises ValueError:
175+
If both `distance` and `certainty` are provided.
176+
See https://weaviate.io/developers/weaviate/api/graphql/search-operators#variables to learn more about
177+
`distance` and `certainty` parameters.
178+
"""
179+
filters = apply_filter_policy(self._filter_policy, self._filters, filters)
180+
top_k = top_k or self._top_k
181+
182+
distance = distance or self._distance
183+
certainty = certainty or self._certainty
184+
if distance is not None and certainty is not None:
185+
msg = f"Can't use 'distance' ({distance}) and 'certainty' ({certainty}) parameters together"
186+
raise ValueError(msg)
187+
188+
documents = await self._document_store._embedding_retrieval_async(
189+
query_embedding=query_embedding,
190+
filters=filters,
191+
top_k=top_k,
192+
distance=distance,
193+
certainty=certainty,
194+
)
195+
return {"documents": documents}

integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/hybrid_retriever.py

Lines changed: 100 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -37,35 +37,39 @@ def __init__(
3737
:param top_k:
3838
Maximum number of documents to return.
3939
:param alpha:
40-
Blending factor for hybrid retrieval in Weaviate. Must be in the range ``[0.0, 1.0]``.
40+
Blending factor for hybrid retrieval in Weaviate. Must be in the range `[0.0, 1.0]`.
4141
42-
Weaviate hybrid search combines keyword (BM25) and vector scores into a single ranking. ``alpha`` controls
42+
Weaviate hybrid search combines keyword (BM25) and vector scores into a single ranking. `alpha` controls
4343
how much each part contributes to the final score:
4444
45-
- ``alpha = 0.0``: only keyword (BM25) scoring is used.
46-
- ``alpha = 1.0``: only vector similarity scoring is used.
45+
- `alpha = 0.0`: only keyword (BM25) scoring is used.
46+
- `alpha = 1.0`: only vector similarity scoring is used.
4747
- Values in between blend the two; higher values favor the vector score, lower values favor BM25.
4848
49-
If ``None``, the Weaviate server default is used.
49+
If `None`, the Weaviate server default is used.
5050
5151
See the official Weaviate docs on Hybrid Search parameters for more details:
52-
`Hybrid search parameters <https://weaviate.io/developers/weaviate/search/hybrid#parameters>`_
53-
`Hybrid Search <https://docs.weaviate.io/weaviate/concepts/search/hybrid-search>`_
52+
- [Hybrid search parameters](https://weaviate.io/developers/weaviate/search/hybrid#parameters)
53+
- [Hybrid Search](https://docs.weaviate.io/weaviate/concepts/search/hybrid-search)
5454
:param max_vector_distance:
5555
Optional threshold that restricts the vector part of the hybrid search to candidates within a maximum
5656
vector distance. Candidates with a distance larger than this threshold are excluded from the vector portion
5757
before blending.
5858
59-
Use this to prune low-quality vector matches while still benefitting from keyword recall. Leave ``None`` to
59+
Use this to prune low-quality vector matches while still benefitting from keyword recall. Leave `None` to
6060
use Weaviate's default behavior without an explicit cutoff.
6161
6262
See the official Weaviate docs on Hybrid Search parameters for more details:
63-
- `Hybrid search parameters <https://weaviate.io/developers/weaviate/search/hybrid#parameters>`_
64-
- `Hybrid Search <https://docs.weaviate.io/weaviate/concepts/search/hybrid-search>`_
63+
- [Hybrid search parameters](https://weaviate.io/developers/weaviate/search/hybrid#parameters)
64+
- [Hybrid Search](https://docs.weaviate.io/weaviate/concepts/search/hybrid-search)
6565
:param filter_policy:
6666
Policy to determine how filters are applied.
6767
"""
6868

69+
if alpha is not None and not 0.0 <= alpha <= 1.0:
70+
msg = f"alpha ({alpha}) must be in the range [0.0, 1.0]"
71+
raise ValueError(msg)
72+
6973
self._document_store = document_store
7074
self._filters = filters or {}
7175
self._top_k = top_k
@@ -134,37 +138,43 @@ def run(
134138
:param top_k:
135139
The maximum number of documents to return.
136140
:param alpha:
137-
Blending factor for hybrid retrieval in Weaviate. Must be in the range ``[0.0, 1.0]``.
141+
Blending factor for hybrid retrieval in Weaviate. Must be in the range `[0.0, 1.0]`.
138142
139-
Weaviate hybrid search combines keyword (BM25) and vector scores into a single ranking. ``alpha`` controls
143+
Weaviate hybrid search combines keyword (BM25) and vector scores into a single ranking. `alpha` controls
140144
how much each part contributes to the final score:
141145
142-
- ``alpha = 0.0``: only keyword (BM25) scoring is used.
143-
- ``alpha = 1.0``: only vector similarity scoring is used.
146+
- `alpha = 0.0`: only keyword (BM25) scoring is used.
147+
- `alpha = 1.0`: only vector similarity scoring is used.
144148
- Values in between blend the two; higher values favor the vector score, lower values favor BM25.
145149
146-
If ``None``, the Weaviate server default is used.
150+
If `None`, the Weaviate server default is used.
147151
148152
See the official Weaviate docs on Hybrid Search parameters for more details:
149-
`Hybrid search parameters <https://weaviate.io/developers/weaviate/search/hybrid#parameters>`_
150-
`Hybrid Search <https://docs.weaviate.io/weaviate/concepts/search/hybrid-search>`_
153+
- [Hybrid search parameters](https://weaviate.io/developers/weaviate/search/hybrid#parameters)
154+
- [Hybrid Search](https://docs.weaviate.io/weaviate/concepts/search/hybrid-search)
151155
:param max_vector_distance:
152156
Optional threshold that restricts the vector part of the hybrid search to candidates within a maximum
153157
vector distance. Candidates with a distance larger than this threshold are excluded from the vector portion
154158
before blending.
155159
156-
Use this to prune low-quality vector matches while still benefitting from keyword recall. Leave ``None`` to
160+
Use this to prune low-quality vector matches while still benefitting from keyword recall. Leave `None` to
157161
use Weaviate's default behavior without an explicit cutoff.
158162
159163
See the official Weaviate docs on Hybrid Search parameters for more details:
160-
- `Hybrid search parameters <https://weaviate.io/developers/weaviate/search/hybrid#parameters>`_
161-
- `Hybrid Search <https://docs.weaviate.io/weaviate/concepts/search/hybrid-search>`_
164+
- [Hybrid search parameters](https://weaviate.io/developers/weaviate/search/hybrid#parameters)
165+
- [Hybrid Search](https://docs.weaviate.io/weaviate/concepts/search/hybrid-search)
166+
:returns: A dictionary with the following keys:
167+
- `documents`: List of documents returned by the search engine.
162168
"""
163169
filters = apply_filter_policy(self._filter_policy, self._filters, filters)
164170
top_k = self._top_k if top_k is None else top_k
165171
alpha = self._alpha if alpha is None else alpha
166172
max_vector_distance = self._max_vector_distance if max_vector_distance is None else max_vector_distance
167173

174+
if alpha is not None and not 0.0 <= alpha <= 1.0:
175+
msg = f"alpha ({alpha}) must be in the range [0.0, 1.0]"
176+
raise ValueError(msg)
177+
168178
documents = self._document_store._hybrid_retrieval(
169179
query=query,
170180
query_embedding=query_embedding,
@@ -174,3 +184,73 @@ def run(
174184
max_vector_distance=max_vector_distance,
175185
)
176186
return {"documents": documents}
187+
188+
@component.output_types(documents=list[Document])
189+
async def run_async(
190+
self,
191+
query: str,
192+
query_embedding: list[float],
193+
filters: Optional[dict[str, Any]] = None,
194+
top_k: Optional[int] = None,
195+
alpha: Optional[float] = None,
196+
max_vector_distance: Optional[float] = None,
197+
) -> dict[str, list[Document]]:
198+
"""
199+
Asynchronously retrieves documents from Weaviate using hybrid search.
200+
201+
:param query:
202+
The query text.
203+
:param query_embedding:
204+
Embedding of the query.
205+
:param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
206+
the `filter_policy` chosen at retriever initialization. See init method docstring for more
207+
details.
208+
:param top_k:
209+
The maximum number of documents to return.
210+
:param alpha:
211+
Blending factor for hybrid retrieval in Weaviate. Must be in the range `[0.0, 1.0]`.
212+
213+
Weaviate hybrid search combines keyword (BM25) and vector scores into a single ranking. `alpha` controls
214+
how much each part contributes to the final score:
215+
216+
- `alpha = 0.0`: only keyword (BM25) scoring is used.
217+
- `alpha = 1.0`: only vector similarity scoring is used.
218+
- Values in between blend the two; higher values favor the vector score, lower values favor BM25.
219+
220+
If `None`, the Weaviate server default is used.
221+
222+
See the official Weaviate docs on Hybrid Search parameters for more details:
223+
- [Hybrid search parameters](https://weaviate.io/developers/weaviate/search/hybrid#parameters)
224+
- [Hybrid Search](https://docs.weaviate.io/weaviate/concepts/search/hybrid-search)
225+
:param max_vector_distance:
226+
Optional threshold that restricts the vector part of the hybrid search to candidates within a maximum
227+
vector distance. Candidates with a distance larger than this threshold are excluded from the vector portion
228+
before blending.
229+
230+
Use this to prune low-quality vector matches while still benefitting from keyword recall. Leave `None` to
231+
use Weaviate's default behavior without an explicit cutoff.
232+
233+
See the official Weaviate docs on Hybrid Search parameters for more details:
234+
- [Hybrid search parameters](https://weaviate.io/developers/weaviate/search/hybrid#parameters)
235+
- [Hybrid Search](https://docs.weaviate.io/weaviate/concepts/search/hybrid-search)
236+
:returns: A dictionary with the following keys:
237+
- `documents`: List of documents returned by the search engine.
238+
"""
239+
filters = apply_filter_policy(self._filter_policy, self._filters, filters)
240+
top_k = self._top_k if top_k is None else top_k
241+
alpha = self._alpha if alpha is None else alpha
242+
max_vector_distance = self._max_vector_distance if max_vector_distance is None else max_vector_distance
243+
244+
if alpha is not None and not 0.0 <= alpha <= 1.0:
245+
msg = f"alpha ({alpha}) must be in the range [0.0, 1.0]"
246+
raise ValueError(msg)
247+
248+
documents = await self._document_store._hybrid_retrieval_async(
249+
query=query,
250+
query_embedding=query_embedding,
251+
filters=filters,
252+
top_k=top_k,
253+
alpha=alpha,
254+
max_vector_distance=max_vector_distance,
255+
)
256+
return {"documents": documents}

0 commit comments

Comments
 (0)