@@ -123,64 +123,64 @@ async def init_search_index(self):
123123
124124 def _prepare_boolean_query (self , query : str ) -> str :
125125 """Prepare a Boolean query by quoting individual terms while preserving operators.
126-
126+
127127 Args:
128128 query: A Boolean query like "tier1-test AND unicode" or "(hello OR world) NOT test"
129-
129+
130130 Returns:
131131 A properly formatted Boolean query with quoted terms that need quoting
132132 """
133133 # Define Boolean operators and their boundaries
134- boolean_pattern = r' (\bAND\b|\bOR\b|\bNOT\b)'
135-
134+ boolean_pattern = r" (\bAND\b|\bOR\b|\bNOT\b)"
135+
136136 # Split the query by Boolean operators, keeping the operators
137137 parts = re .split (boolean_pattern , query )
138-
138+
139139 processed_parts = []
140140 for part in parts :
141141 part = part .strip ()
142142 if not part :
143143 continue
144-
144+
145145 # If it's a Boolean operator, keep it as is
146- if part in [' AND' , 'OR' , ' NOT' ]:
146+ if part in [" AND" , "OR" , " NOT" ]:
147147 processed_parts .append (part )
148148 else :
149149 # Handle parentheses specially - they should be preserved for grouping
150- if '(' in part or ')' in part :
150+ if "(" in part or ")" in part :
151151 # Parse parenthetical expressions carefully
152152 processed_part = self ._prepare_parenthetical_term (part )
153153 processed_parts .append (processed_part )
154154 else :
155155 # This is a search term - for Boolean queries, don't add prefix wildcards
156156 prepared_term = self ._prepare_single_term (part , is_prefix = False )
157157 processed_parts .append (prepared_term )
158-
158+
159159 return " " .join (processed_parts )
160-
160+
161161 def _prepare_parenthetical_term (self , term : str ) -> str :
162162 """Prepare a term that contains parentheses, preserving the parentheses for grouping.
163-
163+
164164 Args:
165165 term: A term that may contain parentheses like "(hello" or "world)" or "(hello OR world)"
166-
166+
167167 Returns:
168168 A properly formatted term with parentheses preserved
169169 """
170170 # Handle terms that start/end with parentheses but may contain quotable content
171171 result = ""
172172 i = 0
173173 while i < len (term ):
174- if term [i ] in '()' :
174+ if term [i ] in "()" :
175175 # Preserve parentheses as-is
176176 result += term [i ]
177177 i += 1
178178 else :
179179 # Find the next parenthesis or end of string
180180 start = i
181- while i < len (term ) and term [i ] not in '()' :
181+ while i < len (term ) and term [i ] not in "()" :
182182 i += 1
183-
183+
184184 # Extract the content between parentheses
185185 content = term [start :i ].strip ()
186186 if content :
@@ -191,43 +191,71 @@ def _prepare_parenthetical_term(self, term: str) -> str:
191191 result += f'"{ escaped_content } "'
192192 else :
193193 result += content
194-
194+
195195 return result
196-
196+
197197 def _needs_quoting (self , term : str ) -> bool :
198198 """Check if a term needs to be quoted for FTS5 safety.
199-
199+
200200 Args:
201201 term: The term to check
202-
202+
203203 Returns:
204204 True if the term should be quoted
205205 """
206206 if not term or not term .strip ():
207207 return False
208-
208+
209209 # Characters that indicate we should quote (excluding parentheses which are valid syntax)
210- needs_quoting_chars = [" " , "." , ":" , ";" , "," , "<" , ">" , "?" , "/" , "-" , "'" , '"' ,
211- "[" , "]" , "{" , "}" , "+" , "!" , "@" , "#" , "$" , "%" , "^" , "&" ,
212- "=" , "|" , "\\ " , "~" , "`" ]
213-
210+ needs_quoting_chars = [
211+ " " ,
212+ "." ,
213+ ":" ,
214+ ";" ,
215+ "," ,
216+ "<" ,
217+ ">" ,
218+ "?" ,
219+ "/" ,
220+ "-" ,
221+ "'" ,
222+ '"' ,
223+ "[" ,
224+ "]" ,
225+ "{" ,
226+ "}" ,
227+ "+" ,
228+ "!" ,
229+ "@" ,
230+ "#" ,
231+ "$" ,
232+ "%" ,
233+ "^" ,
234+ "&" ,
235+ "=" ,
236+ "|" ,
237+ "\\ " ,
238+ "~" ,
239+ "`" ,
240+ ]
241+
214242 return any (c in term for c in needs_quoting_chars )
215-
243+
216244 def _prepare_single_term (self , term : str , is_prefix : bool = True ) -> str :
217245 """Prepare a single search term (no Boolean operators).
218-
246+
219247 Args:
220248 term: A single search term
221249 is_prefix: Whether to add prefix search capability (* suffix)
222-
250+
223251 Returns:
224252 A properly formatted single term
225253 """
226254 if not term or not term .strip ():
227255 return term
228-
256+
229257 term = term .strip ()
230-
258+
231259 # Check if term is already a proper wildcard pattern (alphanumeric + *)
232260 # e.g., "hello*", "test*world" - these should be left alone
233261 if "*" in term and all (c .isalnum () or c in "*_-" for c in term ):
0 commit comments