Skip to content

Commit 5d72bf0

Browse files
feat: change default fuzziness from AUTO to 0 in OpenSearch retrievers
fuzziness="AUTO" was causing severe performance regressions (30s → 6s search times) for large indexes. Switching the default to 0 (exact matching) avoids the clause explosion that AUTO triggers on big corpora. Users who need fuzzy matching can still pass fuzziness="AUTO" explicitly. Closes #3187
1 parent 6b0993d commit 5d72bf0

5 files changed

Lines changed: 12 additions & 12 deletions

File tree

integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def __init__(
2929
*,
3030
document_store: OpenSearchDocumentStore,
3131
filters: dict[str, Any] | None = None,
32-
fuzziness: int | str = "AUTO",
32+
fuzziness: int | str = 0,
3333
top_k: int = 10,
3434
scale_score: bool = False,
3535
all_terms_must_match: bool = False,

integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/open_search_hybrid_retriever.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def __init__(
9393
embedder: TextEmbedder,
9494
# OpenSearchBM25Retriever
9595
filters_bm25: dict[str, Any] | None = None,
96-
fuzziness: int | str = "AUTO",
96+
fuzziness: int | str = 0,
9797
top_k_bm25: int = 10,
9898
scale_score: bool = False,
9999
all_terms_must_match: bool = False,

integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,7 +1057,7 @@ def _bm25_retrieval(
10571057
query: str,
10581058
*,
10591059
filters: dict[str, Any] | None = None,
1060-
fuzziness: int | str = "AUTO",
1060+
fuzziness: int | str = 0,
10611061
top_k: int = 10,
10621062
scale_score: bool = False,
10631063
all_terms_must_match: bool = False,
@@ -1115,7 +1115,7 @@ async def _bm25_retrieval_async(
11151115
query: str,
11161116
*,
11171117
filters: dict[str, Any] | None = None,
1118-
fuzziness: str = "AUTO",
1118+
fuzziness: int | str = 0,
11191119
top_k: int = 10,
11201120
scale_score: bool = False,
11211121
all_terms_must_match: bool = False,

integrations/opensearch/tests/test_bm25_retriever.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def test_to_dict(_mock_opensearch_client):
6868
"type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore",
6969
},
7070
"filters": {},
71-
"fuzziness": "AUTO",
71+
"fuzziness": 0,
7272
"top_k": 10,
7373
"scale_score": False,
7474
"filter_policy": "replace",
@@ -163,7 +163,7 @@ def test_run():
163163
mock_store._bm25_retrieval.assert_called_once_with(
164164
query="some query",
165165
filters={},
166-
fuzziness="AUTO",
166+
fuzziness=0,
167167
top_k=10,
168168
scale_score=False,
169169
all_terms_must_match=False,
@@ -183,7 +183,7 @@ async def test_run_async():
183183
mock_store._bm25_retrieval_async.assert_called_once_with(
184184
query="some query",
185185
filters={},
186-
fuzziness="AUTO",
186+
fuzziness=0,
187187
top_k=10,
188188
scale_score=False,
189189
all_terms_must_match=False,
@@ -343,7 +343,7 @@ def test_run_with_runtime_document_store():
343343
runtime_store._bm25_retrieval.assert_called_once_with(
344344
query="some query",
345345
filters={},
346-
fuzziness="AUTO",
346+
fuzziness=0,
347347
top_k=10,
348348
scale_score=False,
349349
all_terms_must_match=False,
@@ -374,7 +374,7 @@ async def test_run_async_with_runtime_document_store():
374374
runtime_store._bm25_retrieval_async.assert_called_once_with(
375375
query="some query",
376376
filters={},
377-
fuzziness="AUTO",
377+
fuzziness=0,
378378
top_k=10,
379379
scale_score=False,
380380
all_terms_must_match=False,

integrations/opensearch/tests/test_open_search_hybrid_retriever.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class TestOpenSearchHybridRetriever:
7878
},
7979
},
8080
"filters_bm25": None,
81-
"fuzziness": "AUTO",
81+
"fuzziness": 0,
8282
"top_k_bm25": 10,
8383
"scale_score": False,
8484
"all_terms_must_match": False,
@@ -219,7 +219,7 @@ def test_run_with_extra_runtime_params(self, mock_embedder):
219219
filters={"key": "value"},
220220
top_k=1,
221221
all_terms_must_match=False,
222-
fuzziness="AUTO",
222+
fuzziness=0,
223223
scale_score=False,
224224
custom_query=None,
225225
)
@@ -252,7 +252,7 @@ def test_run_in_pipeline(self, mock_embedder):
252252
filters={"param_a": "default"},
253253
top_k=10,
254254
all_terms_must_match=False,
255-
fuzziness="AUTO",
255+
fuzziness=0,
256256
scale_score=False,
257257
custom_query=None,
258258
)

0 commit comments

Comments
 (0)