Skip to content

Commit a29ca6a

Browse files
committed
fix: TEXT field = operator now uses exact phrase matching
- Changed = operator on TEXT fields to always wrap values in quotes (@field:"value") for exact phrase semantics, preserving stopwords - Fixes 'bank of america' bug where stopwords like 'of' were stripped - MATCH operator retains tokenized search with stopword filtering - Updated tests for new behavior - All 331 tests passing (297 unit + 34 integration)
1 parent b0e37cb commit a29ca6a

4 files changed

Lines changed: 39 additions & 16 deletions

File tree

sql_redis/query_builder.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@ def build_text_condition(
6262
6363
Args:
6464
field: Field name or list of field names for multi-field search.
65-
operator: One of =, MATCH, LIKE, FUZZY.
65+
operator: One of =, !=, MATCH, LIKE, FUZZY.
6666
value: The search term or pattern.
6767
negated: If True, prefix with - for negation.
6868
6969
Returns:
70-
RediSearch query syntax like @field:term or @field:"phrase".
70+
RediSearch query syntax like @field:"exact phrase" or @field:(term1 term2).
7171
"""
7272
prefix = "-" if negated else ""
7373

@@ -83,8 +83,13 @@ def build_text_condition(
8383
elif operator == "FUZZY":
8484
# Wrap with % for fuzzy matching
8585
search_value = f"%{value}%"
86+
elif operator in ("=", "!="):
87+
# Exact phrase match — always wrap in quotes, preserve stopwords.
88+
# This ensures "bank of america" stays as-is rather than
89+
# being tokenized or having stopwords stripped.
90+
search_value = f'"{value}"'
8691
elif " " in value:
87-
# Phrase search - filter stopwords and wrap in quotes
92+
# MATCH with multi-word: tokenized search with stopword filtering
8893
words = value.split()
8994
removed_stopwords = [
9095
w for w in words if w.lower() in REDIS_DEFAULT_STOPWORDS
@@ -95,16 +100,17 @@ def build_text_condition(
95100

96101
if removed_stopwords:
97102
warnings.warn(
98-
f"Stopwords {removed_stopwords} were removed from phrase search '{value}'. "
103+
f"Stopwords {removed_stopwords} were removed from text search '{value}'. "
99104
"By default, Redis does not index stopwords. "
100-
"To include stopwords in your index, create it with STOPWORDS 0.",
105+
"To include stopwords in your index, create it with STOPWORDS 0. "
106+
"Use = operator for exact phrase matching that preserves stopwords.",
101107
UserWarning,
102108
stacklevel=2,
103109
)
104110

105-
# Use filtered phrase, or original if all words were stopwords
106-
phrase = " ".join(filtered_words) if filtered_words else value
107-
search_value = f'"{phrase}"'
111+
# Use filtered words in parentheses (AND semantics), or original if all were stopwords
112+
terms = " ".join(filtered_words) if filtered_words else value
113+
search_value = f"({terms})"
108114
else:
109115
search_value = value
110116

tests/test_parameter_substitution.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,9 +211,11 @@ def test_empty_string_value(self, param_executor: Executor, param_test_index: st
211211
Note: Redis Search doesn't handle empty string literals well in TEXT fields.
212212
This is a Redis limitation, not a parameter substitution bug.
213213
"""
214-
# Empty strings cause Redis syntax errors in TEXT field queries
214+
# Empty strings cause Redis errors in TEXT field queries
215215
# This is expected behavior - Redis Search requires non-empty search terms
216-
with pytest.raises(redis.exceptions.ResponseError, match="Syntax error"):
216+
# With exact phrase syntax (@field:""), Redis may return "Syntax error"
217+
# or "INDEXEMPTY" guidance depending on the Redis version
218+
with pytest.raises(redis.exceptions.ResponseError):
217219
param_executor.execute(
218220
f"SELECT * FROM {param_test_index} WHERE name = :name",
219221
params={"name": ""},

tests/test_query_builder.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,42 @@
88
class TestQueryBuilderTextFields:
99
"""Tests for building TEXT field query syntax."""
1010

11-
def test_text_single_term(self):
12-
"""TEXT field with single term: @field:term."""
11+
def test_text_single_term_exact(self):
12+
"""TEXT field with = wraps in quotes for exact phrase: @field:"term"."""
1313
builder = QueryBuilder()
1414
result = builder.build_text_condition("title", "=", "laptop")
1515

16-
assert result == "@title:laptop"
16+
assert result == '@title:"laptop"'
1717

1818
def test_text_exact_phrase(self):
19-
"""TEXT field with phrase: @field:"exact phrase"."""
19+
"""TEXT field with = preserves multi-word phrase: @field:"exact phrase"."""
2020
builder = QueryBuilder()
2121
result = builder.build_text_condition("title", "=", "gaming laptop")
2222

2323
assert result == '@title:"gaming laptop"'
2424

25+
def test_text_exact_phrase_preserves_stopwords(self):
26+
"""TEXT field with = preserves stopwords (Jay's 'bank of america' fix)."""
27+
builder = QueryBuilder()
28+
result = builder.build_text_condition("name", "=", "bank of america")
29+
30+
# Stopwords like "of" must NOT be stripped for exact phrase matching
31+
assert result == '@name:"bank of america"'
32+
2533
def test_text_match_term(self):
2634
"""TEXT field with MATCH: @field:term."""
2735
builder = QueryBuilder()
2836
result = builder.build_text_condition("title", "MATCH", "laptop")
2937

3038
assert result == "@title:laptop"
3139

40+
def test_text_match_multi_word(self):
41+
"""TEXT field with MATCH and multi-word: @field:(term1 term2)."""
42+
builder = QueryBuilder()
43+
result = builder.build_text_condition("description", "MATCH", "gaming laptop")
44+
45+
assert result == "@description:(gaming laptop)"
46+
3247
def test_text_prefix_search(self):
3348
"""TEXT field with prefix: @field:prefix*."""
3449
builder = QueryBuilder()

tests/test_translator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def test_select_with_text_filter(self, translator: Translator, basic_index: str)
136136
)
137137

138138
assert result.command == "FT.SEARCH"
139-
assert result.query_string == "@title:hello"
139+
assert result.query_string == '@title:"hello"'
140140

141141
def test_select_with_numeric_filter(self, translator: Translator, basic_index: str):
142142
"""SELECT with NUMERIC field condition."""
@@ -202,7 +202,7 @@ def test_and_conditions(self, translator: Translator, basic_index: str):
202202
f"SELECT * FROM {basic_index} WHERE title = 'hello' AND price > 50"
203203
)
204204

205-
assert "@title:hello" in result.query_string
205+
assert '@title:"hello"' in result.query_string
206206
assert "@price:[(50 +inf]" in result.query_string
207207

208208
def test_or_conditions(self, translator: Translator, basic_index: str):

0 commit comments

Comments
 (0)