1+ # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2+ #
3+ # SPDX-License-Identifier: Apache-2.0
4+
5+ import copy
6+ from typing import Any , Dict , List , Optional
7+
8+ from haystack import component , default_from_dict , default_to_dict
9+ from haystack .dataclasses import Document
10+ from haystack .document_stores .types import FilterPolicy
11+
12+ from haystack_integrations .document_stores .supabase import SupabaseGroongaDocumentStore
13+
14+
15+ @component
16+ class SupabaseGroongaRetriever :
17+ """
18+ Retrieves documents from SupabaseGroongaDocumentStore using PGroonga full-text search.
19+
20+ This retriever works without embeddings — it searches documents using plain text queries.
21+ It can be used alongside SupabasePgvectorEmbeddingRetriever in hybrid search pipelines.
22+
23+ Example usage:
24+
25+ ```python
26+ from haystack_integrations.document_stores.supabase import SupabaseGroongaDocumentStore
27+ from haystack_integrations.components.retrievers.supabase import SupabaseGroongaRetriever
28+ from haystack.utils import Secret
29+
30+ document_store = SupabaseGroongaDocumentStore(
31+ supabase_url="https://<project>.supabase.co",
32+ supabase_key=Secret.from_env_var("SUPABASE_SERVICE_KEY"),
33+ table_name="haystack_fts_documents",
34+ )
35+
36+ retriever = SupabaseGroongaRetriever(document_store=document_store, top_k=10)
37+ result = retriever.run(query="python programming")
38+ print(result["documents"])
39+ ```
40+ """
41+
42+ def __init__ (
43+ self ,
44+ * ,
45+ document_store : SupabaseGroongaDocumentStore ,
46+ filters : Optional [Dict [str , Any ]] = None ,
47+ top_k : int = 10 ,
48+ filter_policy : str | FilterPolicy = FilterPolicy .REPLACE ,
49+ ) -> None :
50+ """
51+ Initialize the SupabaseGroongaRetriever.
52+
53+ :param document_store: An instance of SupabaseGroongaDocumentStore.
54+ :param filters: Optional filters applied to retrieved Documents.
55+ :param top_k: Maximum number of Documents to return. Defaults to 10.
56+ :param filter_policy: Policy to determine how filters are applied.
57+ :raises ValueError: If document_store is not an instance of SupabaseGroongaDocumentStore.
58+ """
59+ if not isinstance (document_store , SupabaseGroongaDocumentStore ):
60+ msg = "document_store must be an instance of SupabaseGroongaDocumentStore"
61+ raise ValueError (msg )
62+
63+ self .document_store = document_store
64+ self .filters = filters or {}
65+ self .top_k = top_k
66+ self .filter_policy = (
67+ filter_policy
68+ if isinstance (filter_policy , FilterPolicy )
69+ else FilterPolicy .from_str (filter_policy )
70+ )
71+
72+ @component .output_types (documents = List [Document ])
73+ def run (
74+ self ,
75+ query : str ,
76+ filters : Optional [Dict [str , Any ]] = None ,
77+ top_k : Optional [int ] = None ,
78+ ) -> Dict [str , List [Document ]]:
79+ """
80+ Runs the retriever on the given query.
81+
82+ :param query: The text query to search for.
83+ :param filters: Optional runtime filters. Merged or replaced based on filter_policy.
84+ :param top_k: Optional override for maximum number of documents to return.
85+ :returns: Dictionary with key "documents" containing list of matching Documents.
86+ """
87+ if not query :
88+ return {"documents" : []}
89+
90+ # Handle filter policy
91+ if filters is not None :
92+ if self .filter_policy == FilterPolicy .MERGE :
93+ merged_filters = {** self .filters , ** filters }
94+ else :
95+ merged_filters = filters
96+ else :
97+ merged_filters = self .filters
98+
99+ effective_top_k = top_k if top_k is not None else self .top_k
100+
101+ documents = self .document_store ._groonga_retrieval (
102+ query = query ,
103+ top_k = effective_top_k ,
104+ filters = merged_filters ,
105+ )
106+
107+ return {"documents" : documents }
108+
109+ def to_dict (self ) -> Dict [str , Any ]:
110+ """
111+ Serializes the component to a dictionary.
112+
113+ :returns: Dictionary with serialized data.
114+ """
115+ return default_to_dict (
116+ self ,
117+ filters = self .filters ,
118+ top_k = self .top_k ,
119+ filter_policy = self .filter_policy .value ,
120+ document_store = self .document_store .to_dict (),
121+ )
122+
123+ @classmethod
124+ def from_dict (cls , data : Dict [str , Any ]) -> "SupabaseGroongaRetriever" :
125+ """
126+ Deserializes the component from a dictionary.
127+
128+ :param data: Dictionary to deserialize from.
129+ :returns: Deserialized component.
130+ """
131+ data = copy .deepcopy (data )
132+ doc_store_params = data ["init_parameters" ]["document_store" ]
133+ data ["init_parameters" ]["document_store" ] = SupabaseGroongaDocumentStore .from_dict (
134+ doc_store_params
135+ )
136+ if filter_policy := data ["init_parameters" ].get ("filter_policy" ):
137+ data ["init_parameters" ]["filter_policy" ] = FilterPolicy .from_str (filter_policy )
138+ return default_from_dict (cls , data )
0 commit comments