Skip to content

Commit 2fd025f

Browse files
committed
fix: address Codex/Copilot review feedback (cycle 2)
- score() only SELECT emits RETURN 0 to prevent full document payload leak - score() with FT.AGGREGATE raises ValueError (WITHSCORES is FT.SEARCH-only) - Score alias collision guard in executor (both sync/async) - Add _escape_fulltext_term with double-quote escaping - Apply escaping to LIKE and FUZZY operators for special chars - Escape multi-field non-exact text search values - Mark verbatim/nostopwords as API-only fields (no SQL parser path yet) - 9 new tests (128 total QB+translator), 411 total pass
1 parent 506307b commit 2fd025f

7 files changed

Lines changed: 205 additions & 78 deletions

File tree

sql_redis/executor.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,11 @@ def execute(self, sql: str, *, params: dict | None = None) -> QueryResult:
177177
score = raw_result[i + 1]
178178
row_data = raw_result[i + 2]
179179
row = dict(zip(row_data[::2], row_data[1::2]))
180-
row[score_alias or "__score"] = score
180+
alias = score_alias or "__score"
181+
# Guard: if alias collides with a document field, prefix it
182+
if alias in row:
183+
alias = f"__score_{alias}"
184+
row[alias] = score
181185
rows.append(row)
182186
else:
183187
# Standard format: [count, key1, [fields1], key2, [fields2], ...]
@@ -274,7 +278,10 @@ async def execute(self, sql: str, *, params: dict | None = None) -> QueryResult:
274278
score = raw_result[i + 1]
275279
row_data = raw_result[i + 2]
276280
row = dict(zip(row_data[::2], row_data[1::2]))
277-
row[score_alias or "__score"] = score
281+
alias = score_alias or "__score"
282+
if alias in row:
283+
alias = f"__score_{alias}"
284+
row[alias] = score
278285
rows.append(row)
279286
else:
280287
# Standard format: [count, key1, [fields1], key2, [fields2], ...]

sql_redis/parser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ class ParsedQuery:
234234
offset: int | None = None
235235
filters: list[str] = dataclasses.field(default_factory=list)
236236
scoring: ScoringSpec | None = None # Relevance scoring config
237+
# API-only flags — not set by SQL parsing, available for programmatic use
237238
verbatim: bool = False # If True, add VERBATIM to FT.SEARCH
238239
nostopwords: bool = False # If True, add NOSTOPWORDS to FT.SEARCH
239240

sql_redis/query_builder.py

Lines changed: 34 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -52,30 +52,18 @@ class QueryBuilder:
5252
TAG_SPECIAL_CHARS = r".,<>{}[]\"':;!@#$%^&*()-+=~"
5353

5454
# Characters that have special meaning in RediSearch free-text queries
55-
# (outside of double-quoted phrases) and must be escaped with a backslash.
56-
# Only characters likely to appear accidentally in user data are included;
57-
# intentional RediSearch features (~, *, %, ^) are intentionally excluded.
58-
TEXT_QUERY_SPECIAL_CHARS = frozenset({"\\", "-", "@", "|", "(", ")"})
59-
60-
@staticmethod
61-
def _escape_text_value(value: str) -> str:
62-
"""Escape characters that are special inside RediSearch double-quoted phrases.
63-
64-
Backslashes and double quotes must be escaped so they don't break
65-
the query syntax or alter its meaning.
66-
"""
67-
# Escape backslashes first (so we don't double-escape the quote escapes),
68-
# then escape double quotes.
69-
return value.replace("\\", "\\\\").replace('"', '\\"')
55+
# (outside double-quoted phrases). Must be escaped with backslash.
56+
# Includes double-quote to prevent starting/ending quoted phrases.
57+
TEXT_QUERY_SPECIAL_CHARS = set('\\|-()"@~!{}[]^$><=;:')
7058

7159
@classmethod
7260
def _escape_fulltext_term(cls, term: str) -> str:
7361
"""Escape characters that have special meaning in RediSearch free-text queries.
7462
7563
Applied to individual terms used outside of double-quoted phrases (e.g.,
76-
in parenthesized FULLTEXT expressions) so that user input containing
77-
RediSearch operator characters like |, -, (, ), @ does not alter the
78-
query semantics or produce syntax errors.
64+
in parenthesized FULLTEXT expressions, LIKE, FUZZY) so that user input
65+
containing RediSearch operator characters does not alter query semantics
66+
or produce syntax errors.
7967
"""
8068
result = []
8169
for char in term:
@@ -85,6 +73,17 @@ def _escape_fulltext_term(cls, term: str) -> str:
8573
result.append(char)
8674
return "".join(result)
8775

76+
@staticmethod
77+
def _escape_text_value(value: str) -> str:
78+
"""Escape characters that are special inside RediSearch double-quoted phrases.
79+
80+
Backslashes and double quotes must be escaped so they don't break
81+
the query syntax or alter its meaning.
82+
"""
83+
# Escape backslashes first (so we don't double-escape the quote escapes),
84+
# then escape double quotes.
85+
return value.replace("\\", "\\\\").replace('"', '\\"')
86+
8887
def build_text_condition(
8988
self,
9089
field: str | list[str],
@@ -124,30 +123,35 @@ def build_text_condition(
124123
if operator in ("=", "!="):
125124
escaped = self._escape_text_value(value)
126125
return f'{prefix}(@{field_str}:"{escaped}")'
127-
return f"{prefix}(@{field_str}:{value})"
126+
escaped = self._escape_fulltext_term(value)
127+
return f"{prefix}(@{field_str}:{escaped})"
128128

129129
# Handle different operators
130130
if operator == "LIKE":
131-
# Convert SQL LIKE pattern (%) to RediSearch prefix/suffix/infix (*)
132-
search_value = value.replace("%", "*")
131+
# Escape special chars in the non-wildcard portion, then convert % → *
132+
# Split on %, escape each segment, rejoin with *
133+
parts = value.split("%")
134+
escaped_parts = [self._escape_fulltext_term(p) for p in parts]
135+
search_value = "*".join(escaped_parts)
133136
elif operator == "FUZZY":
134-
# Wrap with % signs — count determined by fuzzy_level
137+
# Escape special chars before wrapping with % markers
138+
escaped = self._escape_fulltext_term(value)
135139
level = fuzzy_level if fuzzy_level is not None else 1
136140
if level not in (1, 2, 3):
137141
raise ValueError(
138142
f"Fuzzy level must be 1, 2, or 3 (got {level}). "
139143
"RediSearch supports a maximum Levenshtein distance of 3."
140144
)
141145
pct = "%" * level
142-
search_value = f"{pct}{value}{pct}"
146+
search_value = f"{pct}{escaped}{pct}"
143147
elif operator in ("=", "!="):
144148
# Exact phrase match — always wrap in quotes, preserve stopwords.
145149
escaped = self._escape_text_value(value)
146150
search_value = f'"{escaped}"'
147151
elif " " in value and " OR " not in value:
148-
# FULLTEXT with multi-word: tokenized search with stopword filtering.
149-
# Each term is escaped to prevent RediSearch operator characters in
150-
# user input from changing query semantics.
152+
# FULLTEXT/MATCH with multi-word: tokenized search with stopword filtering.
153+
# FULLTEXT is intentionally pass-through — users craft RediSearch queries
154+
# via fulltext(), so operator chars like ~, |, - are preserved.
151155
words = value.split()
152156
removed_stopwords = [
153157
w for w in words if w.lower() in REDIS_DEFAULT_STOPWORDS
@@ -166,23 +170,14 @@ def build_text_condition(
166170
stacklevel=2,
167171
)
168172

169-
if filtered_words:
170-
escaped_terms = " ".join(
171-
self._escape_fulltext_term(w) for w in filtered_words
172-
)
173-
else:
174-
# All words were stopwords; pass them through (escaped) so the
175-
# query doesn't become empty. RediSearch will still skip them at
176-
# query time, but this avoids a syntax error from an empty clause.
177-
escaped_terms = " ".join(self._escape_fulltext_term(w) for w in words)
178-
search_value = f"({escaped_terms})"
173+
terms = " ".join(filtered_words) if filtered_words else value
174+
search_value = f"({terms})"
179175
elif " OR " in value:
180176
# OR union within text field: split on ' OR ' and join with |
181-
or_terms = [
182-
self._escape_fulltext_term(t.strip()) for t in value.split(" OR ")
183-
]
177+
or_terms = [t.strip() for t in value.split(" OR ")]
184178
search_value = f"({'|'.join(or_terms)})"
185179
else:
180+
# Single-word FULLTEXT — escape to prevent accidental operator injection
186181
search_value = self._escape_fulltext_term(value)
187182

188183
base = f"{prefix}@{field}:{search_value}"

sql_redis/translator.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,13 @@ def _build_command(self, analyzed: AnalyzedQuery) -> TranslatedQuery:
129129
query_string = self._build_query_string(analyzed)
130130

131131
if use_aggregate:
132+
if parsed.scoring is not None:
133+
raise ValueError(
134+
"score() is not supported with FT.AGGREGATE queries. "
135+
"WITHSCORES / SCORER are FT.SEARCH-only features. "
136+
"Remove score() or avoid GROUP BY / aggregation functions "
137+
"in the same query."
138+
)
132139
return self._build_aggregate(analyzed, query_string)
133140
else:
134141
return self._build_search(analyzed, query_string)
@@ -290,7 +297,16 @@ def _build_search(
290297
if analyzed.vector_search.alias not in return_fields:
291298
return_fields.append(analyzed.vector_search.alias)
292299

293-
if return_fields and return_fields != ["*"]:
300+
# When score() is the only SELECT expression, parsed.fields is empty.
301+
# We still need a RETURN clause to avoid leaking full document payloads.
302+
# Score itself is delivered via WITHSCORES (not RETURN), but we must
303+
# emit RETURN 0 so Redis returns no document attributes beyond the score.
304+
score_only_select = parsed.scoring is not None and not return_fields
305+
306+
if score_only_select:
307+
# RETURN 0 — suppress all document fields, score comes via WITHSCORES
308+
args.extend(["RETURN", "0"])
309+
elif return_fields and return_fields != ["*"]:
294310
args.append("RETURN")
295311
args.append(str(len(return_fields)))
296312
args.extend(return_fields)

tests/test_query_builder.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,3 +531,51 @@ def test_build_missing_condition_is_not_null(self):
531531
builder = QueryBuilder()
532532
result = builder.build_missing_condition("email", is_missing=False)
533533
assert result == "-ismissing(@email)"
534+
535+
536+
class TestQueryBuilderEscaping:
537+
"""Tests for escaping special characters in text search values."""
538+
539+
def test_escape_fulltext_term_special_chars(self):
540+
"""_escape_fulltext_term escapes RediSearch operator characters."""
541+
builder = QueryBuilder()
542+
result = builder._escape_fulltext_term("hello|world")
543+
assert result == "hello\\|world"
544+
545+
def test_escape_fulltext_term_double_quote(self):
546+
"""_escape_fulltext_term escapes double quotes."""
547+
builder = QueryBuilder()
548+
result = builder._escape_fulltext_term('say "hello"')
549+
assert result == 'say \\"hello\\"'
550+
551+
def test_escape_fulltext_term_at_sign(self):
552+
"""_escape_fulltext_term escapes @ to prevent field injection."""
553+
builder = QueryBuilder()
554+
result = builder._escape_fulltext_term("user@email")
555+
assert result == "user\\@email"
556+
557+
def test_like_escapes_special_chars(self):
558+
"""LIKE pattern escapes special chars in the non-wildcard portion."""
559+
builder = QueryBuilder()
560+
result = builder.build_text_condition("title", "LIKE", "%hello|world%")
561+
assert result == "@title:*hello\\|world*"
562+
563+
def test_fuzzy_escapes_special_chars(self):
564+
"""FUZZY escapes special chars in the term before wrapping with %."""
565+
builder = QueryBuilder()
566+
result = builder.build_text_condition("title", "FUZZY", "hello@world")
567+
assert result == "@title:%hello\\@world%"
568+
569+
def test_fuzzy_escapes_double_quote(self):
570+
"""FUZZY escapes double quotes in term."""
571+
builder = QueryBuilder()
572+
result = builder.build_text_condition("title", "FUZZY", 'say"hi')
573+
assert result == '@title:%say\\"hi%'
574+
575+
def test_multi_field_non_exact_escapes(self):
576+
"""Multi-field search with non-exact operator escapes special chars."""
577+
builder = QueryBuilder()
578+
result = builder.build_text_condition(
579+
["title", "description"], "FULLTEXT", "hello|world"
580+
)
581+
assert result == "(@title|description:hello\\|world)"

0 commit comments

Comments
 (0)