Skip to content

Commit a7820d0

Browse files
committed
fix: TEXT field = operator now uses exact phrase matching
- Changed = operator on TEXT fields to always wrap values in quotes (@field:"value") for exact phrase semantics, preserving stopwords - Fixes 'bank of america' bug where stopwords like 'of' were stripped - Added _escape_text_value() to escape quotes/backslashes in values - Negation now derived from operator (consistent with TAG builder) - FULLTEXT operator retains tokenized search with stopword filtering - Fixed import formatting to match project black/isort style - Updated docstrings and tests to use FULLTEXT (actual parser operator) - All 333 tests passing (299 unit + 34 integration)
1 parent b0e37cb commit a7820d0

4 files changed

Lines changed: 75 additions & 20 deletions

File tree

sql_redis/query_builder.py

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,17 @@ class QueryBuilder:
5151
# Characters that need escaping in TAG values
5252
TAG_SPECIAL_CHARS = r".,<>{}[]\"':;!@#$%^&*()-+=~"
5353

54+
@staticmethod
55+
def _escape_text_value(value: str) -> str:
56+
"""Escape characters that are special inside RediSearch double-quoted phrases.
57+
58+
Backslashes and double quotes must be escaped so they don't break
59+
the query syntax or alter its meaning.
60+
"""
61+
# Escape backslashes first (so we don't double-escape the quote escapes),
62+
# then escape double quotes.
63+
return value.replace("\\", "\\\\").replace('"', '\\"')
64+
5465
def build_text_condition(
5566
self,
5667
field: str | list[str],
@@ -62,14 +73,16 @@ def build_text_condition(
6273
6374
Args:
6475
field: Field name or list of field names for multi-field search.
65-
operator: One of =, MATCH, LIKE, FUZZY.
76+
operator: One of =, !=, FULLTEXT, LIKE, FUZZY.
6677
value: The search term or pattern.
6778
negated: If True, prefix with - for negation.
6879
6980
Returns:
70-
RediSearch query syntax like @field:term or @field:"phrase".
81+
RediSearch query syntax like @field:"exact phrase" or @field:(term1 term2).
7182
"""
72-
prefix = "-" if negated else ""
83+
# Derive negation from both the flag and the operator itself,
84+
# consistent with how build_tag_condition handles != via operator.
85+
prefix = "-" if negated or operator == "!=" else ""
7386

7487
# Handle multi-field search
7588
if isinstance(field, list):
@@ -83,8 +96,14 @@ def build_text_condition(
8396
elif operator == "FUZZY":
8497
# Wrap with % for fuzzy matching
8598
search_value = f"%{value}%"
99+
elif operator in ("=", "!="):
100+
# Exact phrase match — always wrap in quotes, preserve stopwords.
101+
# This ensures "bank of america" stays as-is rather than
102+
# being tokenized or having stopwords stripped.
103+
escaped = self._escape_text_value(value)
104+
search_value = f'"{escaped}"'
86105
elif " " in value:
87-
# Phrase search - filter stopwords and wrap in quotes
106+
# MATCH with multi-word: tokenized search with stopword filtering
88107
words = value.split()
89108
removed_stopwords = [
90109
w for w in words if w.lower() in REDIS_DEFAULT_STOPWORDS
@@ -95,16 +114,17 @@ def build_text_condition(
95114

96115
if removed_stopwords:
97116
warnings.warn(
98-
f"Stopwords {removed_stopwords} were removed from phrase search '{value}'. "
117+
f"Stopwords {removed_stopwords} were removed from text search '{value}'. "
99118
"By default, Redis does not index stopwords. "
100-
"To include stopwords in your index, create it with STOPWORDS 0.",
119+
"To include stopwords in your index, create it with STOPWORDS 0. "
120+
"Use = operator for exact phrase matching that preserves stopwords.",
101121
UserWarning,
102122
stacklevel=2,
103123
)
104124

105-
# Use filtered phrase, or original if all words were stopwords
106-
phrase = " ".join(filtered_words) if filtered_words else value
107-
search_value = f'"{phrase}"'
125+
# Use filtered words in parentheses (AND semantics), or original if all were stopwords
126+
terms = " ".join(filtered_words) if filtered_words else value
127+
search_value = f"({terms})"
108128
else:
109129
search_value = value
110130

tests/test_parameter_substitution.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,9 +211,13 @@ def test_empty_string_value(self, param_executor: Executor, param_test_index: st
211211
Note: Redis Search doesn't handle empty string literals well in TEXT fields.
212212
This is a Redis limitation, not a parameter substitution bug.
213213
"""
214-
# Empty strings cause Redis syntax errors in TEXT field queries
214+
# Empty strings cause Redis errors in TEXT field queries
215215
# This is expected behavior - Redis Search requires non-empty search terms
216-
with pytest.raises(redis.exceptions.ResponseError, match="Syntax error"):
216+
# With exact phrase syntax (@field:""), Redis may return "Syntax error"
217+
# or "INDEXEMPTY" guidance depending on the Redis version
218+
with pytest.raises(
219+
redis.exceptions.ResponseError, match="Syntax error|INDEXEMPTY"
220+
):
217221
param_executor.execute(
218222
f"SELECT * FROM {param_test_index} WHERE name = :name",
219223
params={"name": ""},

tests/test_query_builder.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,58 @@
88
class TestQueryBuilderTextFields:
99
"""Tests for building TEXT field query syntax."""
1010

11-
def test_text_single_term(self):
12-
"""TEXT field with single term: @field:term."""
11+
def test_text_single_term_exact(self):
12+
"""TEXT field with = wraps in quotes for exact phrase: @field:"term"."""
1313
builder = QueryBuilder()
1414
result = builder.build_text_condition("title", "=", "laptop")
1515

16-
assert result == "@title:laptop"
16+
assert result == '@title:"laptop"'
1717

1818
def test_text_exact_phrase(self):
19-
"""TEXT field with phrase: @field:"exact phrase"."""
19+
"""TEXT field with = preserves multi-word phrase: @field:"exact phrase"."""
2020
builder = QueryBuilder()
2121
result = builder.build_text_condition("title", "=", "gaming laptop")
2222

2323
assert result == '@title:"gaming laptop"'
2424

25-
def test_text_match_term(self):
26-
"""TEXT field with MATCH: @field:term."""
25+
def test_text_exact_phrase_preserves_stopwords(self):
26+
"""TEXT field with = preserves stopwords in exact phrase matching."""
27+
builder = QueryBuilder()
28+
result = builder.build_text_condition("name", "=", "bank of america")
29+
30+
# Stopwords like "of" must NOT be stripped for exact phrase matching
31+
assert result == '@name:"bank of america"'
32+
33+
def test_text_exact_phrase_escapes_quotes(self):
34+
"""TEXT field with = escapes double quotes inside the value."""
2735
builder = QueryBuilder()
28-
result = builder.build_text_condition("title", "MATCH", "laptop")
36+
result = builder.build_text_condition("title", "=", 'say "hello"')
37+
38+
assert result == r'@title:"say \"hello\""'
39+
40+
def test_text_exact_phrase_escapes_backslashes(self):
41+
"""TEXT field with = escapes backslashes inside the value."""
42+
builder = QueryBuilder()
43+
result = builder.build_text_condition("path", "=", r"c:\users\docs")
44+
45+
assert result == r'@path:"c:\\users\\docs"'
46+
47+
def test_text_fulltext_term(self):
48+
"""TEXT field with FULLTEXT (tokenized search): @field:term."""
49+
builder = QueryBuilder()
50+
result = builder.build_text_condition("title", "FULLTEXT", "laptop")
2951

3052
assert result == "@title:laptop"
3153

54+
def test_text_fulltext_multi_word(self):
55+
"""TEXT field with FULLTEXT and multi-word: @field:(term1 term2)."""
56+
builder = QueryBuilder()
57+
result = builder.build_text_condition(
58+
"description", "FULLTEXT", "gaming laptop"
59+
)
60+
61+
assert result == "@description:(gaming laptop)"
62+
3263
def test_text_prefix_search(self):
3364
"""TEXT field with prefix: @field:prefix*."""
3465
builder = QueryBuilder()

tests/test_translator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def test_select_with_text_filter(self, translator: Translator, basic_index: str)
136136
)
137137

138138
assert result.command == "FT.SEARCH"
139-
assert result.query_string == "@title:hello"
139+
assert result.query_string == '@title:"hello"'
140140

141141
def test_select_with_numeric_filter(self, translator: Translator, basic_index: str):
142142
"""SELECT with NUMERIC field condition."""
@@ -202,7 +202,7 @@ def test_and_conditions(self, translator: Translator, basic_index: str):
202202
f"SELECT * FROM {basic_index} WHERE title = 'hello' AND price > 50"
203203
)
204204

205-
assert "@title:hello" in result.query_string
205+
assert '@title:"hello"' in result.query_string
206206
assert "@price:[(50 +inf]" in result.query_string
207207

208208
def test_or_conditions(self, translator: Translator, basic_index: str):

0 commit comments

Comments
 (0)