Skip to content

Commit 494a433

Browse files
feat(supabase): add SupabaseGroongaDocumentStore and SupabaseGroongaRetriever
1 parent c2ac6f6 commit 494a433

5 files changed

Lines changed: 690 additions & 3 deletions

File tree

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
22
#
33
# SPDX-License-Identifier: Apache-2.0
4-
54
from .embedding_retriever import SupabasePgvectorEmbeddingRetriever
65
from .keyword_retriever import SupabasePgvectorKeywordRetriever
6+
from .groonga_retriever import SupabaseGroongaRetriever
77

8-
__all__ = ["SupabasePgvectorEmbeddingRetriever", "SupabasePgvectorKeywordRetriever"]
8+
__all__ = [
9+
"SupabasePgvectorEmbeddingRetriever",
10+
"SupabasePgvectorKeywordRetriever",
11+
"SupabaseGroongaRetriever",
12+
]
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import copy
6+
from typing import Any, Dict, List, Optional
7+
8+
from haystack import component, default_from_dict, default_to_dict
9+
from haystack.dataclasses import Document
10+
from haystack.document_stores.types import FilterPolicy
11+
12+
from haystack_integrations.document_stores.supabase import SupabaseGroongaDocumentStore
13+
14+
15+
@component
16+
class SupabaseGroongaRetriever:
17+
"""
18+
Retrieves documents from SupabaseGroongaDocumentStore using PGroonga full-text search.
19+
20+
This retriever works without embeddings — it searches documents using plain text queries.
21+
It can be used alongside SupabasePgvectorEmbeddingRetriever in hybrid search pipelines.
22+
23+
Example usage:
24+
25+
```python
26+
from haystack_integrations.document_stores.supabase import SupabaseGroongaDocumentStore
27+
from haystack_integrations.components.retrievers.supabase import SupabaseGroongaRetriever
28+
from haystack.utils import Secret
29+
30+
document_store = SupabaseGroongaDocumentStore(
31+
supabase_url="https://<project>.supabase.co",
32+
supabase_key=Secret.from_env_var("SUPABASE_SERVICE_KEY"),
33+
table_name="haystack_fts_documents",
34+
)
35+
36+
retriever = SupabaseGroongaRetriever(document_store=document_store, top_k=10)
37+
result = retriever.run(query="python programming")
38+
print(result["documents"])
39+
```
40+
"""
41+
42+
def __init__(
43+
self,
44+
*,
45+
document_store: SupabaseGroongaDocumentStore,
46+
filters: Optional[Dict[str, Any]] = None,
47+
top_k: int = 10,
48+
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
49+
) -> None:
50+
"""
51+
Initialize the SupabaseGroongaRetriever.
52+
53+
:param document_store: An instance of SupabaseGroongaDocumentStore.
54+
:param filters: Optional filters applied to retrieved Documents.
55+
:param top_k: Maximum number of Documents to return. Defaults to 10.
56+
:param filter_policy: Policy to determine how filters are applied.
57+
:raises ValueError: If document_store is not an instance of SupabaseGroongaDocumentStore.
58+
"""
59+
if not isinstance(document_store, SupabaseGroongaDocumentStore):
60+
msg = "document_store must be an instance of SupabaseGroongaDocumentStore"
61+
raise ValueError(msg)
62+
63+
self.document_store = document_store
64+
self.filters = filters or {}
65+
self.top_k = top_k
66+
self.filter_policy = (
67+
filter_policy
68+
if isinstance(filter_policy, FilterPolicy)
69+
else FilterPolicy.from_str(filter_policy)
70+
)
71+
72+
@component.output_types(documents=List[Document])
73+
def run(
74+
self,
75+
query: str,
76+
filters: Optional[Dict[str, Any]] = None,
77+
top_k: Optional[int] = None,
78+
) -> Dict[str, List[Document]]:
79+
"""
80+
Runs the retriever on the given query.
81+
82+
:param query: The text query to search for.
83+
:param filters: Optional runtime filters. Merged or replaced based on filter_policy.
84+
:param top_k: Optional override for maximum number of documents to return.
85+
:returns: Dictionary with key "documents" containing list of matching Documents.
86+
"""
87+
if not query:
88+
return {"documents": []}
89+
90+
# Handle filter policy
91+
if filters is not None:
92+
if self.filter_policy == FilterPolicy.MERGE:
93+
merged_filters = {**self.filters, **filters}
94+
else:
95+
merged_filters = filters
96+
else:
97+
merged_filters = self.filters
98+
99+
effective_top_k = top_k if top_k is not None else self.top_k
100+
101+
documents = self.document_store._groonga_retrieval(
102+
query=query,
103+
top_k=effective_top_k,
104+
filters=merged_filters,
105+
)
106+
107+
return {"documents": documents}
108+
109+
def to_dict(self) -> Dict[str, Any]:
110+
"""
111+
Serializes the component to a dictionary.
112+
113+
:returns: Dictionary with serialized data.
114+
"""
115+
return default_to_dict(
116+
self,
117+
filters=self.filters,
118+
top_k=self.top_k,
119+
filter_policy=self.filter_policy.value,
120+
document_store=self.document_store.to_dict(),
121+
)
122+
123+
@classmethod
124+
def from_dict(cls, data: Dict[str, Any]) -> "SupabaseGroongaRetriever":
125+
"""
126+
Deserializes the component from a dictionary.
127+
128+
:param data: Dictionary to deserialize from.
129+
:returns: Deserialized component.
130+
"""
131+
data = copy.deepcopy(data)
132+
doc_store_params = data["init_parameters"]["document_store"]
133+
data["init_parameters"]["document_store"] = SupabaseGroongaDocumentStore.from_dict(
134+
doc_store_params
135+
)
136+
if filter_policy := data["init_parameters"].get("filter_policy"):
137+
data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
138+
return default_from_dict(cls, data)

integrations/supabase/src/haystack_integrations/document_stores/supabase/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,9 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44
from .document_store import SupabasePgvectorDocumentStore
5+
from .groonga_document_store import SupabaseGroongaDocumentStore
56

6-
__all__ = ["SupabasePgvectorDocumentStore"]
7+
__all__ = [
8+
"SupabasePgvectorDocumentStore",
9+
"SupabaseGroongaDocumentStore",
10+
]

0 commit comments

Comments
 (0)