22#
33# SPDX-License-Identifier: Apache-2.0
44
5- from typing import Any , Dict , List , Optional
5+ from typing import Any
66
77from haystack import default_from_dict , default_to_dict , logging
88from haystack .dataclasses import Document
1717
1818class SupabaseGroongaDocumentStore :
1919 """
20- A Document Store for Supabase using PGroonga for full-text search.
20+ A Document Store for Supabase using PGroonga for full-text search.
2121
22- PGroonga is a PostgreSQL extension for fast, multilingual full-text search.
23- Unlike vector search, this store works with plain text queries — no embeddings needed.
22+ PGroonga is a PostgreSQL extension for fast, multilingual full-text search.
23+ Unlike vector search, this store works with plain text queries — no embeddings needed.
2424
25- Prerequisites:
26- - A Supabase project with PGroonga extension enabled.
27- - Enable PGroonga in your Supabase project by running:
28- `CREATE EXTENSION IF NOT EXISTS pgroonga;`
25+ Prerequisites:
26+ - A Supabase project with PGroonga extension enabled.
27+ - Enable PGroonga in your Supabase project by running:
28+ `CREATE EXTENSION IF NOT EXISTS pgroonga;`
2929
30- Example usage:
30+ Example usage:
3131
32- ```python
33- from haystack_integrations.document_stores.supabase import SupabaseGroongaDocumentStore
34- from haystack.utils import Secret
32+ ```python
33+ from haystack_integrations.document_stores.supabase import SupabaseGroongaDocumentStore
34+ from haystack.utils import Secret
3535
36- document_store = SupabaseGroongaDocumentStore(
37- supabase_url="https://<project>.supabase.co",
38- supabase_key=Secret.from_env_var("SUPABASE_SERVICE_KEY"),
39- table_name="haystack_fts_documents",
40- )
41- ```
36+ document_store = SupabaseGroongaDocumentStore(
37+ supabase_url="https://<project>.supabase.co",
38+ supabase_key=Secret.from_env_var("SUPABASE_SERVICE_KEY"),
39+ table_name="haystack_fts_documents",
40+ )
41+ ```
4242 """
4343
4444 def __init__ (
@@ -76,13 +76,11 @@ def __init__(
7676 def _setup_table (self ) -> None :
7777 """
7878 Creates the documents table with PGroonga index if it does not exist.
79+
7980 If recreate_table is True, drops and recreates the table.
8081 """
8182 if self .recreate_table :
82- self ._client .rpc (
83- "exec_sql" ,
84- {"query" : f"DROP TABLE IF EXISTS { self .table_name } ;" }
85- ).execute ()
83+ self ._client .rpc ("exec_sql" , {"query" : f"DROP TABLE IF EXISTS { self .table_name } ;" }).execute ()
8684
8785 # Create table if not exists
8886 create_table_sql = f"""
@@ -112,7 +110,7 @@ def count_documents(self) -> int:
112110 result = self ._client .table (self .table_name ).select ("id" , count = "exact" ).execute ()
113111 return result .count or 0
114112
115- def filter_documents (self , filters : Optional [ Dict [ str , Any ]] = None ) -> List [Document ]:
113+ def filter_documents (self , filters : dict [ str , Any ] | None = None ) -> list [Document ]: # noqa: ARG002
116114 """
117115 Returns documents matching the given filters.
118116
@@ -125,7 +123,7 @@ def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Doc
125123
126124 def write_documents (
127125 self ,
128- documents : List [Document ],
126+ documents : list [Document ],
129127 policy : DuplicatePolicy = DuplicatePolicy .NONE ,
130128 ) -> int :
131129 """
@@ -150,26 +148,15 @@ def write_documents(
150148 self ._client .table (self .table_name ).upsert (row ).execute ()
151149 written += 1
152150 elif policy == DuplicatePolicy .SKIP :
153- existing = (
154- self ._client .table (self .table_name )
155- .select ("id" )
156- .eq ("id" , doc .id )
157- .execute ()
158- )
151+ existing = self ._client .table (self .table_name ).select ("id" ).eq ("id" , doc .id ).execute ()
159152 if not existing .data :
160153 self ._client .table (self .table_name ).insert (row ).execute ()
161154 written += 1
162155 elif policy == DuplicatePolicy .FAIL :
163- existing = (
164- self ._client .table (self .table_name )
165- .select ("id" )
166- .eq ("id" , doc .id )
167- .execute ()
168- )
156+ existing = self ._client .table (self .table_name ).select ("id" ).eq ("id" , doc .id ).execute ()
169157 if existing .data :
170- raise DuplicateDocumentError (
171- f"Document with id { doc .id !r} already exists."
172- )
158+ msg = f"Document with id { doc .id !r} already exists."
159+ raise DuplicateDocumentError (msg )
173160 self ._client .table (self .table_name ).insert (row ).execute ()
174161 written += 1
175162 else :
@@ -178,7 +165,7 @@ def write_documents(
178165
179166 return written
180167
181- def delete_documents (self , document_ids : List [str ]) -> None :
168+ def delete_documents (self , document_ids : list [str ]) -> None :
182169 """
183170 Deletes documents with the given IDs.
184171
@@ -192,8 +179,8 @@ def _groonga_retrieval(
192179 self ,
193180 query : str ,
194181 top_k : int = 10 ,
195- filters : Optional [ Dict [ str , Any ]] = None ,
196- ) -> List [Document ]:
182+ filters : dict [ str , Any ] | None = None , # noqa: ARG002
183+ ) -> list [Document ]:
197184 """
198185 Searches documents using PGroonga full-text search.
199186
@@ -202,22 +189,13 @@ def _groonga_retrieval(
202189 :param filters: Optional filters to apply.
203190 :returns: List of matching Document objects ranked by relevance.
204191 """
205- search_sql = f"""
206- SELECT id, content, meta,
207- pgroonga_score(tableoid, ctid) AS score
208- FROM { self .table_name }
209- WHERE content &@~ %s
210- ORDER BY score DESC
211- LIMIT %s;
212- """
213192 result = self ._client .rpc (
214- "groonga_search" ,
215- {"query_text" : query , "table" : self .table_name , "top_k" : top_k }
193+ "groonga_search" , {"query_text" : query , "table" : self .table_name , "top_k" : top_k }
216194 ).execute ()
217195
218196 return [self ._to_haystack_document (row ) for row in result .data ]
219197
220- def _to_haystack_document (self , row : Dict [str , Any ]) -> Document :
198+ def _to_haystack_document (self , row : dict [str , Any ]) -> Document :
221199 """
222200 Converts a database row dictionary into a Haystack Document.
223201
@@ -231,7 +209,7 @@ def _to_haystack_document(self, row: Dict[str, Any]) -> Document:
231209 score = row .get ("score" ),
232210 )
233211
234- def to_dict (self ) -> Dict [str , Any ]:
212+ def to_dict (self ) -> dict [str , Any ]:
235213 """
236214 Serializes the component to a dictionary.
237215
@@ -246,12 +224,12 @@ def to_dict(self) -> Dict[str, Any]:
246224 )
247225
248226 @classmethod
249- def from_dict (cls , data : Dict [str , Any ]) -> "SupabaseGroongaDocumentStore" :
227+ def from_dict (cls , data : dict [str , Any ]) -> "SupabaseGroongaDocumentStore" :
250228 """
251229 Deserializes the component from a dictionary.
252230
253231 :param data: Dictionary to deserialize from.
254232 :returns: Deserialized component.
255233 """
256234 deserialize_secrets_inplace (data ["init_parameters" ], ["supabase_key" ])
257- return default_from_dict (cls , data )
235+ return default_from_dict (cls , data )
0 commit comments