Skip to content

Commit c5cc92c

Browse files
feat!: change default fuzziness from AUTO to 0 in OpenSearch retrievers (#3198)
1 parent 623447e commit c5cc92c

5 files changed

Lines changed: 14 additions & 14 deletions

File tree

integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __init__(
3131
*,
3232
document_store: OpenSearchDocumentStore,
3333
filters: dict[str, Any] | None = None,
34-
fuzziness: int | str = "AUTO",
34+
fuzziness: int | str = 0,
3535
top_k: int = 10,
3636
scale_score: bool = False,
3737
all_terms_must_match: bool = False,
@@ -49,8 +49,8 @@ def __init__(
4949
required to transform one word into another. For example, the "fuzziness" between the words
5050
"wined" and "wind" is 1 because only one edit is needed to match them.
5151
52-
Use "AUTO" (the default) for automatic adjustment based on term length, which is optimal for
53-
most scenarios. For detailed guidance, refer to the
52+
Defaults to `0` (exact matching). Use `"AUTO"` for automatic adjustment based on term length.
53+
For detailed guidance, refer to the
5454
[OpenSearch fuzzy query documentation](https://opensearch.org/docs/latest/query-dsl/term/fuzzy/).
5555
:param top_k: Maximum number of documents to return.
5656
:param scale_score: If `True`, scales the score of retrieved documents to a range between 0 and 1.

integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/open_search_hybrid_retriever.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def __init__(
9393
embedder: TextEmbedder,
9494
# OpenSearchBM25Retriever
9595
filters_bm25: dict[str, Any] | None = None,
96-
fuzziness: int | str = "AUTO",
96+
fuzziness: int | str = 0,
9797
top_k_bm25: int = 10,
9898
scale_score: bool = False,
9999
all_terms_must_match: bool = False,

integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,7 +1057,7 @@ def _bm25_retrieval(
10571057
query: str,
10581058
*,
10591059
filters: dict[str, Any] | None = None,
1060-
fuzziness: int | str = "AUTO",
1060+
fuzziness: int | str = 0,
10611061
top_k: int = 10,
10621062
scale_score: bool = False,
10631063
all_terms_must_match: bool = False,
@@ -1115,7 +1115,7 @@ async def _bm25_retrieval_async(
11151115
query: str,
11161116
*,
11171117
filters: dict[str, Any] | None = None,
1118-
fuzziness: str = "AUTO",
1118+
fuzziness: int | str = 0,
11191119
top_k: int = 10,
11201120
scale_score: bool = False,
11211121
all_terms_must_match: bool = False,

integrations/opensearch/tests/test_bm25_retriever.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def test_to_dict(_mock_opensearch_client):
6868
"type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore",
6969
},
7070
"filters": {},
71-
"fuzziness": "AUTO",
71+
"fuzziness": 0,
7272
"top_k": 10,
7373
"scale_score": False,
7474
"filter_policy": "replace",
@@ -163,7 +163,7 @@ def test_run():
163163
mock_store._bm25_retrieval.assert_called_once_with(
164164
query="some query",
165165
filters={},
166-
fuzziness="AUTO",
166+
fuzziness=0,
167167
top_k=10,
168168
scale_score=False,
169169
all_terms_must_match=False,
@@ -183,7 +183,7 @@ async def test_run_async():
183183
mock_store._bm25_retrieval_async.assert_called_once_with(
184184
query="some query",
185185
filters={},
186-
fuzziness="AUTO",
186+
fuzziness=0,
187187
top_k=10,
188188
scale_score=False,
189189
all_terms_must_match=False,
@@ -359,7 +359,7 @@ def test_run_with_runtime_document_store():
359359
runtime_store._bm25_retrieval.assert_called_once_with(
360360
query="some query",
361361
filters={},
362-
fuzziness="AUTO",
362+
fuzziness=0,
363363
top_k=10,
364364
scale_score=False,
365365
all_terms_must_match=False,
@@ -390,7 +390,7 @@ async def test_run_async_with_runtime_document_store():
390390
runtime_store._bm25_retrieval_async.assert_called_once_with(
391391
query="some query",
392392
filters={},
393-
fuzziness="AUTO",
393+
fuzziness=0,
394394
top_k=10,
395395
scale_score=False,
396396
all_terms_must_match=False,

integrations/opensearch/tests/test_open_search_hybrid_retriever.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class TestOpenSearchHybridRetriever:
7878
},
7979
},
8080
"filters_bm25": None,
81-
"fuzziness": "AUTO",
81+
"fuzziness": 0,
8282
"top_k_bm25": 10,
8383
"scale_score": False,
8484
"all_terms_must_match": False,
@@ -227,7 +227,7 @@ def test_run_with_extra_runtime_params(self, mock_embedder):
227227
filters={"key": "value"},
228228
top_k=1,
229229
all_terms_must_match=False,
230-
fuzziness="AUTO",
230+
fuzziness=0,
231231
scale_score=False,
232232
custom_query=None,
233233
)
@@ -260,7 +260,7 @@ def test_run_in_pipeline(self, mock_embedder):
260260
filters={"param_a": "default"},
261261
top_k=10,
262262
all_terms_must_match=False,
263-
fuzziness="AUTO",
263+
fuzziness=0,
264264
scale_score=False,
265265
custom_query=None,
266266
)

0 commit comments

Comments
 (0)