Skip to content

Commit 506307b

Browse files
Copilotnkanu17
andcommitted
fix: address review feedback — FULLTEXT consistency, term escaping, Black formatting
Agent-Logs-Url: https://github.com/redis-developer/sql-redis/sessions/a931f501-2012-4aa2-8d18-2bca615d32be Co-authored-by: nkanu17 <47899917+nkanu17@users.noreply.github.com>
1 parent aec9ea3 commit 506307b

3 files changed

Lines changed: 90 additions & 80 deletions

File tree

sql_redis/query_builder.py

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ class QueryBuilder:
5151
# Characters that need escaping in TAG values
5252
TAG_SPECIAL_CHARS = r".,<>{}[]\"':;!@#$%^&*()-+=~"
5353

54+
# Characters that have special meaning in RediSearch free-text queries
55+
# (outside of double-quoted phrases) and must be escaped with a backslash.
56+
# Only characters likely to appear accidentally in user data are included;
57+
# intentional RediSearch features (~, *, %, ^) are intentionally excluded.
58+
TEXT_QUERY_SPECIAL_CHARS = frozenset({"\\", "-", "@", "|", "(", ")"})
59+
5460
@staticmethod
5561
def _escape_text_value(value: str) -> str:
5662
"""Escape characters that are special inside RediSearch double-quoted phrases.
@@ -62,6 +68,23 @@ def _escape_text_value(value: str) -> str:
6268
# then escape double quotes.
6369
return value.replace("\\", "\\\\").replace('"', '\\"')
6470

71+
@classmethod
72+
def _escape_fulltext_term(cls, term: str) -> str:
73+
"""Escape characters that have special meaning in RediSearch free-text queries.
74+
75+
Applied to individual terms used outside of double-quoted phrases (e.g.,
76+
in parenthesized FULLTEXT expressions) so that user input containing
77+
RediSearch operator characters like |, -, (, ), @ does not alter the
78+
query semantics or produce syntax errors.
79+
"""
80+
result = []
81+
for char in term:
82+
if char in cls.TEXT_QUERY_SPECIAL_CHARS:
83+
result.append(f"\\{char}")
84+
else:
85+
result.append(char)
86+
return "".join(result)
87+
6588
def build_text_condition(
6689
self,
6790
field: str | list[str],
@@ -77,7 +100,11 @@ def build_text_condition(
77100
78101
Args:
79102
field: Field name or list of field names for multi-field search.
80-
operator: One of =, !=, FULLTEXT (or MATCH alias), LIKE, FUZZY.
103+
operator: One of =, !=, FULLTEXT, LIKE, FUZZY.
104+
- = / !=: exact phrase match, value wrapped in double quotes.
105+
- FULLTEXT: tokenized keyword search with stopword filtering.
106+
- LIKE: prefix/suffix/infix pattern (SQL % → RediSearch *).
107+
- FUZZY: Levenshtein fuzzy match.
81108
value: The search term or pattern.
82109
negated: If True, prefix with - for negation.
83110
fuzzy_level: Levenshtein distance for FUZZY (1, 2, or 3). Default 1.
@@ -118,7 +145,9 @@ def build_text_condition(
118145
escaped = self._escape_text_value(value)
119146
search_value = f'"{escaped}"'
120147
elif " " in value and " OR " not in value:
121-
# FULLTEXT/MATCH with multi-word: tokenized search with stopword filtering
148+
# FULLTEXT with multi-word: tokenized search with stopword filtering.
149+
# Each term is escaped to prevent RediSearch operator characters in
150+
# user input from changing query semantics.
122151
words = value.split()
123152
removed_stopwords = [
124153
w for w in words if w.lower() in REDIS_DEFAULT_STOPWORDS
@@ -137,14 +166,24 @@ def build_text_condition(
137166
stacklevel=2,
138167
)
139168

140-
terms = " ".join(filtered_words) if filtered_words else value
141-
search_value = f"({terms})"
169+
if filtered_words:
170+
escaped_terms = " ".join(
171+
self._escape_fulltext_term(w) for w in filtered_words
172+
)
173+
else:
174+
# All words were stopwords; pass them through (escaped) so the
175+
# query doesn't become empty. RediSearch will still skip them at
176+
# query time, but this avoids a syntax error from an empty clause.
177+
escaped_terms = " ".join(self._escape_fulltext_term(w) for w in words)
178+
search_value = f"({escaped_terms})"
142179
elif " OR " in value:
143180
# OR union within text field: split on ' OR ' and join with |
144-
or_terms = [t.strip() for t in value.split(" OR ")]
181+
or_terms = [
182+
self._escape_fulltext_term(t.strip()) for t in value.split(" OR ")
183+
]
145184
search_value = f"({'|'.join(or_terms)})"
146185
else:
147-
search_value = value
186+
search_value = self._escape_fulltext_term(value)
148187

149188
base = f"{prefix}@{field}:{search_value}"
150189

tests/test_query_builder.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def test_text_negation(self):
7171
"""TEXT field with NOT: -@field:term."""
7272
builder = QueryBuilder()
7373
result = builder.build_text_condition(
74-
"title", "MATCH", "refurbished", negated=True
74+
"title", "FULLTEXT", "refurbished", negated=True
7575
)
7676

7777
assert result == "-@title:refurbished"
@@ -83,11 +83,18 @@ def test_text_fuzzy_match(self):
8383

8484
assert result == "@title:%laptap%"
8585

86+
def test_text_fulltext_special_chars_escaped(self):
87+
"""FULLTEXT term with RediSearch operator chars is escaped to avoid injection."""
88+
builder = QueryBuilder()
89+
result = builder.build_text_condition("description", "FULLTEXT", "anti-virus")
90+
91+
assert result == r"@description:anti\-virus"
92+
8693
def test_text_multi_field(self):
8794
"""TEXT multi-field search: (@field1|field2:term)."""
8895
builder = QueryBuilder()
8996
result = builder.build_text_condition(
90-
["title", "description"], "MATCH", "wireless"
97+
["title", "description"], "FULLTEXT", "wireless"
9198
)
9299

93100
assert result == "(@title|description:wireless)"

0 commit comments

Comments
 (0)