Skip to content

Commit b8b0ef3

Browse files
committed
adding missed file
1 parent 0c38dc6 commit b8b0ef3

2 files changed

Lines changed: 154 additions & 0 deletions

File tree

integrations/supabase/src/haystack_integrations/components/retrievers/supabase/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
22
#
33
# SPDX-License-Identifier: Apache-2.0
4+
45
from .embedding_retriever import SupabasePgvectorEmbeddingRetriever
56
from .groonga_bm25_retriever import SupabaseGroongaBM25Retriever
67
from .keyword_retriever import SupabasePgvectorKeywordRetriever
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import copy
6+
from typing import Any
7+
8+
from haystack import component, default_from_dict, default_to_dict
9+
from haystack.dataclasses import Document
10+
from haystack.document_stores.types import FilterPolicy
11+
from haystack.document_stores.types.filter_policy import apply_filter_policy
12+
13+
from haystack_integrations.document_stores.supabase import SupabaseGroongaDocumentStore
14+
15+
16+
@component
17+
class SupabaseGroongaBM25Retriever:
18+
"""
19+
Retrieves documents from SupabaseGroongaDocumentStore using PGroonga full-text search.
20+
21+
This retriever works without embeddings — it searches documents using plain text queries.
22+
It can be used alongside SupabasePgvectorEmbeddingRetriever in hybrid search pipelines.
23+
24+
Note: async operations are not supported as the supabase-py sync client does not expose
25+
awaitable query methods. Use the sync run() method instead.
26+
27+
Example usage:
28+
29+
```python
30+
from haystack_integrations.document_stores.supabase import SupabaseGroongaDocumentStore
31+
from haystack_integrations.components.retrievers.supabase import SupabaseGroongaBM25Retriever
32+
from haystack.utils import Secret
33+
34+
document_store = SupabaseGroongaDocumentStore(
35+
supabase_url="https://<project>.supabase.co",
36+
supabase_key=Secret.from_env_var("SUPABASE_SERVICE_KEY"),
37+
table_name="haystack_fts_documents",
38+
)
39+
document_store.warm_up()
40+
41+
retriever = SupabaseGroongaBM25Retriever(document_store=document_store, top_k=10)
42+
result = retriever.run(query="python programming")
43+
print(result["documents"])
44+
```
45+
"""
46+
47+
def __init__(
48+
self,
49+
*,
50+
document_store: SupabaseGroongaDocumentStore,
51+
filters: dict[str, Any] | None = None,
52+
top_k: int = 10,
53+
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
54+
) -> None:
55+
"""
56+
Initialize the SupabaseGroongaBM25Retriever.
57+
58+
:param document_store: An instance of SupabaseGroongaDocumentStore.
59+
:param filters: Optional filters applied to retrieved Documents.
60+
:param top_k: Maximum number of Documents to return. Defaults to 10.
61+
:param filter_policy: Policy to determine how filters are applied.
62+
:raises ValueError: If document_store is not an instance of SupabaseGroongaDocumentStore.
63+
"""
64+
if not isinstance(document_store, SupabaseGroongaDocumentStore):
65+
msg = "document_store must be an instance of SupabaseGroongaDocumentStore"
66+
raise ValueError(msg)
67+
68+
self.document_store = document_store
69+
self.filters = filters or {}
70+
self.top_k = top_k
71+
self.filter_policy = (
72+
filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy)
73+
)
74+
75+
@component.output_types(documents=list[Document])
76+
def run(
77+
self,
78+
query: str,
79+
filters: dict[str, Any] | None = None,
80+
top_k: int | None = None,
81+
) -> dict[str, list[Document]]:
82+
"""
83+
Runs the retriever on the given query.
84+
85+
:param query: The text query to search for.
86+
:param filters: Optional runtime filters. Merged or replaced based on filter_policy.
87+
:param top_k: Optional override for maximum number of documents to return.
88+
:returns: Dictionary with key "documents" containing list of matching Documents.
89+
"""
90+
if not query:
91+
return {"documents": []}
92+
93+
merged_filters = apply_filter_policy(self.filter_policy, self.filters, filters)
94+
effective_top_k = top_k if top_k is not None else self.top_k
95+
96+
documents = self.document_store._groonga_retrieval(
97+
query=query,
98+
top_k=effective_top_k,
99+
filters=merged_filters,
100+
)
101+
102+
return {"documents": documents}
103+
104+
@component.output_types(documents=list[Document])
105+
async def run_async(
106+
self,
107+
query: str,
108+
filters: dict[str, Any] | None = None,
109+
top_k: int | None = None,
110+
) -> dict[str, list[Document]]:
111+
"""
112+
Async version of run().
113+
114+
Note: supabase-py's sync client does not support native async queries.
115+
This method runs the synchronous retrieval and returns the result.
116+
For fully async support, consider using acreate_client() from supabase-py
117+
and refactoring the document store accordingly.
118+
119+
:param query: The text query to search for.
120+
:param filters: Optional runtime filters. Merged or replaced based on filter_policy.
121+
:param top_k: Optional override for maximum number of documents to return.
122+
:returns: Dictionary with key "documents" containing list of matching Documents.
123+
"""
124+
return self.run(query=query, filters=filters, top_k=top_k)
125+
126+
def to_dict(self) -> dict[str, Any]:
127+
"""
128+
Serializes the component to a dictionary.
129+
130+
:returns: Dictionary with serialized data.
131+
"""
132+
return default_to_dict(
133+
self,
134+
filters=self.filters,
135+
top_k=self.top_k,
136+
filter_policy=self.filter_policy.value,
137+
document_store=self.document_store.to_dict(),
138+
)
139+
140+
@classmethod
141+
def from_dict(cls, data: dict[str, Any]) -> "SupabaseGroongaBM25Retriever":
142+
"""
143+
Deserializes the component from a dictionary.
144+
145+
:param data: Dictionary to deserialize from.
146+
:returns: Deserialized component.
147+
"""
148+
data = copy.deepcopy(data)
149+
doc_store_params = data["init_parameters"]["document_store"]
150+
data["init_parameters"]["document_store"] = SupabaseGroongaDocumentStore.from_dict(doc_store_params)
151+
if filter_policy := data["init_parameters"].get("filter_policy"):
152+
data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
153+
return default_from_dict(cls, data)

0 commit comments

Comments
 (0)