Skip to content

Commit b94cf66

Browse files
committed
Strip stopwords from exact phrase queries (= operator)
RediSearch does not index stopwords, so exact phrase queries like "diagnosing and treating" fail with a syntax error. The = operator now strips default stopwords before wrapping in double quotes, matching how the indexer assigns consecutive positions. A warning is emitted when stopwords are removed. Also removes outdated 'Use = operator for exact phrase matching that preserves stopwords' hint from FULLTEXT stopword warning.
1 parent deed7dd commit b94cf66

2 files changed

Lines changed: 56 additions & 9 deletions

File tree

sql_redis/query_builder.py

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,37 @@ def build_text_condition(
143143
pct = "%" * level
144144
search_value = f"{pct}{escaped}{pct}"
145145
elif operator in ("=", "!="):
146-
# Exact phrase match — always wrap in quotes, preserve stopwords.
147-
escaped = self._escape_text_value(value)
146+
# Exact phrase match — wrap in double quotes.
147+
# Strip default stopwords because RediSearch does not index them;
148+
# keeping them in the quoted phrase causes a query-time error
149+
# (e.g. "diagnosing and treating" fails on "and").
150+
# Since the indexer assigns consecutive positions after dropping
151+
# stopwords, the stripped phrase matches correctly.
152+
words = value.split()
153+
removed = [w for w in words if w.lower() in REDIS_DEFAULT_STOPWORDS]
154+
filtered = [w for w in words if w.lower() not in REDIS_DEFAULT_STOPWORDS]
155+
156+
if removed:
157+
phrase_words = filtered if filtered else words
158+
if filtered:
159+
sw_msg = f"Stopwords {removed} were removed from"
160+
else:
161+
sw_msg = (
162+
f"All tokens in '{value}' are stopwords and may not "
163+
"be indexed in"
164+
)
165+
warnings.warn(
166+
f"{sw_msg} exact phrase '{value}'. "
167+
"By default, Redis does not index stopwords. "
168+
"To include stopwords in your index, create it "
169+
"with STOPWORDS 0.",
170+
UserWarning,
171+
stacklevel=2,
172+
)
173+
else:
174+
phrase_words = words
175+
176+
escaped = self._escape_text_value(" ".join(phrase_words))
148177
search_value = f'"{escaped}"'
149178
elif re.search(r"\s+OR\s+", value):
150179
# OR union within text field: split on uppercase-only OR with
@@ -200,8 +229,8 @@ def build_text_condition(
200229
warnings.warn(
201230
f"{sw_action} text search '{value}'. "
202231
"By default, Redis does not index stopwords. "
203-
"To include stopwords in your index, create it with STOPWORDS 0. "
204-
"Use = operator for exact phrase matching that preserves stopwords.",
232+
"To include stopwords in your index, create it "
233+
"with STOPWORDS 0.",
205234
UserWarning,
206235
stacklevel=2,
207236
)

tests/test_query_builder.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,31 @@ def test_text_exact_phrase(self):
2222

2323
assert result == '@title:"gaming laptop"'
2424

25-
def test_text_exact_phrase_preserves_stopwords(self):
26-
"""TEXT field with = preserves stopwords in exact phrase matching."""
25+
def test_text_exact_phrase_strips_stopwords(self):
26+
"""TEXT field with = strips stopwords and warns (RediSearch doesn't index them)."""
2727
builder = QueryBuilder()
28-
result = builder.build_text_condition("name", "=", "bank of america")
28+
import warnings
2929

30-
# Stopwords like "of" must NOT be stripped for exact phrase matching
31-
assert result == '@name:"bank of america"'
30+
with warnings.catch_warnings(record=True) as w:
31+
warnings.simplefilter("always")
32+
result = builder.build_text_condition("name", "=", "bank of america")
33+
34+
# "of" is a stopword — stripped so the phrase matches indexed positions
35+
assert result == '@name:"bank america"'
36+
assert len(w) == 1
37+
assert "Stopwords ['of']" in str(w[0].message)
38+
39+
def test_text_exact_phrase_no_stopwords_no_warning(self):
40+
"""TEXT field with = on phrase without stopwords produces no warning."""
41+
builder = QueryBuilder()
42+
import warnings
43+
44+
with warnings.catch_warnings(record=True) as w:
45+
warnings.simplefilter("always")
46+
result = builder.build_text_condition("name", "=", "bank america")
47+
48+
assert result == '@name:"bank america"'
49+
assert len(w) == 0
3250

3351
def test_text_exact_phrase_escapes_quotes(self):
3452
"""TEXT field with = escapes double quotes inside the value."""

0 commit comments

Comments
 (0)