fix: 4 review issues — case-sensitive OR, score ORDER BY, per-row alias collision, extra arg validation

nkanu17 · nkanu17 · commit 14a62005ad55 · 2026-04-01T19:01:00.000-04:00
P1 fixes:
- Lowercase 'or' no longer parsed as boolean OR in FULLTEXT; only uppercase
  'OR' triggers union semantics ('bank or america' stays as AND search)
- ORDER BY score() alias DESC omits invalid SORTBY (RediSearch sorts by
  relevance by default); ORDER BY score ASC raises ValueError

P2 fixes:
- Score alias collision detection now checks per-row instead of first-row-only,
  preventing field overwrite when later rows have different field sets
- fulltext() rejects &gt;4 args, fuzzy() rejects &gt;3 args (was silently ignoring)
- Applied to both sync and async executor paths

Add 8 new tests (384 total)
diff --git a/sql_redis/executor.py b/sql_redis/executor.py
@@ -228,18 +228,20 @@ def execute(self, sql: str, *, params: dict | None = None) -> QueryResult:
             elif with_scores:
                 # WITHSCORES format: [count, key1, score1, [fields1], key2, score2, [fields2], ...]
                 # Stride of 3: key, score, field_list
-                # Resolve alias using first row's fields for SELECT * (no RETURN)
+                # Resolve alias per-row: FT.SEARCH may omit missing attributes,
+                # so different rows can have different field sets.  We must
+                # check each row individually to avoid overwriting a real field
+                # with the score value.
                 for i in range(1, len(raw_result) - 2, 3):
                     score = raw_result[i + 1]
                     row_data = raw_result[i + 2]
                     row = dict(zip(row_data[::2], row_data[1::2]))
-                    if score_alias is None:
-                        score_alias = self._resolve_score_alias(
-                            translated.score_alias,
-                            translated.args,
-                            first_row_fields=set(row.keys()),
-                        )
-                    row[score_alias] = score
+                    row_score_alias = self._resolve_score_alias(
+                        translated.score_alias,
+                        translated.args,
+                        first_row_fields=set(row.keys()),
+                    )
+                    row[row_score_alias] = score
                     rows.append(row)
             else:
                 # Standard format: [count, key1, [fields1], key2, [fields2], ...]
@@ -343,17 +345,17 @@ async def execute(self, sql: str, *, params: dict | None = None) -> QueryResult:
                     rows.append(row)
             elif with_scores:
                 # WITHSCORES format: [count, key1, score1, [fields1], ...]
+                # Resolve alias per-row to avoid field collision on later rows.
                 for i in range(1, len(raw_result) - 2, 3):
                     score = raw_result[i + 1]
                     row_data = raw_result[i + 2]
                     row = dict(zip(row_data[::2], row_data[1::2]))
-                    if score_alias is None:
-                        score_alias = self._resolve_score_alias(
-                            translated.score_alias,
-                            translated.args,
-                            first_row_fields=set(row.keys()),
-                        )
-                    row[score_alias] = score
+                    row_score_alias = self._resolve_score_alias(
+                        translated.score_alias,
+                        translated.args,
+                        first_row_fields=set(row.keys()),
+                    )
+                    row[row_score_alias] = score
                     rows.append(row)
             else:
                 # Standard format: [count, key1, [fields1], key2, [fields2], ...]
diff --git a/sql_redis/parser.py b/sql_redis/parser.py
@@ -986,6 +986,16 @@ def _add_function_condition(
                 f"{func_name.lower()}(field, value), got {len(args)}."
             )
 
+        # Validate max argument counts to catch typos / misuse early.
+        # fulltext(field, value [, slop [, inorder]]) → max 4
+        # fuzzy(field, value [, level])               → max 3
+        _max_args = {"FULLTEXT": 4, "FUZZY": 3}
+        if func_name in _max_args and len(args) > _max_args[func_name]:
+            raise ValueError(
+                f"{func_name.lower()}() accepts at most {_max_args[func_name]} "
+                f"arguments, got {len(args)}."
+            )
+
         if func_name == "FULLTEXT" and len(args) >= 2:
             field_name = args[0].name if isinstance(args[0], exp.Column) else None
             value = self._extract_literal_value(args[1])
diff --git a/sql_redis/query_builder.py b/sql_redis/query_builder.py
@@ -146,13 +146,16 @@ def build_text_condition(
             # Exact phrase match — always wrap in quotes, preserve stopwords.
             escaped = self._escape_text_value(value)
             search_value = f'"{escaped}"'
-        elif re.search(r"\s+[Oo][Rr]\s+", value):
-            # OR union within text field: split on case-insensitive OR with
+        elif re.search(r"\s+OR\s+", value):
+            # OR union within text field: split on uppercase-only OR with
             # flexible whitespace, escape each term, join with |.
+            # Only uppercase OR is treated as a boolean operator; lowercase
+            # "or" is treated as a regular search term (e.g. "bank or america"
+            # stays as a multi-word AND search, not bank|america).
             # Multi-word operands (e.g. "gaming laptop OR tablet") are wrapped
             # in parentheses so each side is an atomic subexpression.
             or_parts: list[str] = []
-            for part in re.split(r"\s+[Oo][Rr]\s+", value):
+            for part in re.split(r"\s+OR\s+", value):
                 words = part.strip().split()
                 if not words:
                     raise ValueError(
diff --git a/sql_redis/translator.py b/sql_redis/translator.py
@@ -328,10 +328,25 @@ def _build_search(
             args.append(str(len(return_fields)))
             args.extend(return_fields)
 
-        # SORTBY
+        # SORTBY — skip if the ORDER BY field is a score() alias, because
+        # WITHSCORES already returns results in relevance order and the alias
+        # is not a sortable indexed field.
+        score_alias_name = parsed.scoring.alias if parsed.scoring else None
         if parsed.orderby_fields:
             field_name, direction = parsed.orderby_fields[0]
-            args.extend(["SORTBY", field_name, direction])
+            if field_name == score_alias_name:
+                # score() alias — not a real field; RediSearch sorts by
+                # relevance by default when no SORTBY is specified.
+                if direction == "ASC":
+                    raise ValueError(
+                        f"ORDER BY {field_name} ASC is not supported: "
+                        "RediSearch returns results in descending relevance "
+                        "order by default and does not support ascending "
+                        "score sorting via FT.SEARCH."
+                    )
+                # DESC is the default — omit SORTBY entirely
+            else:
+                args.extend(["SORTBY", field_name, direction])
 
         # LIMIT
         if parsed.limit is not None:
diff --git a/tests/test_query_builder.py b/tests/test_query_builder.py
@@ -631,17 +631,27 @@ def test_multi_field_fuzzy(self):
         )
         assert result == "(@title|description:%%laptap%%)"
 
-    def test_or_case_insensitive_lowercase(self):
-        """OR parsing is case-insensitive: 'laptop or tablet' works."""
+    def test_lowercase_or_is_not_boolean(self):
+        """Lowercase 'or' is treated as a regular search term, not a boolean operator.
+
+        'bank or america' should NOT become bank|america — it should be a
+        multi-word AND-style search with stopword filtering applied to 'or'.
+        """
         builder = QueryBuilder()
         result = builder.build_text_condition("title", "FULLTEXT", "laptop or tablet")
-        assert result == "@title:(laptop|tablet)"
+        # "or" is a stopword; remaining terms are "laptop" and "tablet"
+        assert result == "@title:(laptop tablet)"
 
-    def test_or_case_insensitive_mixed(self):
-        """OR parsing is case-insensitive: 'laptop Or tablet' works."""
+    def test_mixed_case_or_is_not_boolean(self):
+        """Mixed case 'Or' / 'oR' is treated as a regular term, not boolean OR.
+
+        Only uppercase 'OR' triggers the union operator.
+        'Or' is a stopword (or → stopword list), so it gets removed.
+        """
         builder = QueryBuilder()
         result = builder.build_text_condition("title", "FULLTEXT", "laptop Or tablet")
-        assert result == "@title:(laptop|tablet)"
+        # "Or" lowercases to "or" which is a stopword; remaining: laptop tablet
+        assert result == "@title:(laptop tablet)"
 
     def test_or_extra_whitespace(self):
         """OR parsing tolerates extra whitespace."""
diff --git a/tests/test_sql_parser.py b/tests/test_sql_parser.py
@@ -907,3 +907,17 @@ def test_fuzzy_no_value_raises(self):
         parser = SQLParser()
         with pytest.raises(ValueError, match="requires at least 2 arguments"):
             parser.parse("SELECT * FROM idx WHERE fuzzy(title)")
+
+    def test_fulltext_too_many_args_raises(self):
+        """fulltext() with more than 4 arguments raises ValueError."""
+        parser = SQLParser()
+        with pytest.raises(ValueError, match="at most 4 arguments"):
+            parser.parse(
+                "SELECT * FROM idx WHERE fulltext(title, 'hello world', 2, true, 'extra')"
+            )
+
+    def test_fuzzy_too_many_args_raises(self):
+        """fuzzy() with more than 3 arguments raises ValueError."""
+        parser = SQLParser()
+        with pytest.raises(ValueError, match="at most 3 arguments"):
+            parser.parse("SELECT * FROM idx WHERE fuzzy(title, 'laptap', 2, 'extra')")
diff --git a/tests/test_translator.py b/tests/test_translator.py
@@ -840,3 +840,34 @@ def test_score_too_many_args_raises(self, translator: Translator, basic_index: s
                 f"SELECT score('BM25', 'extra') AS relevance FROM {basic_index} "
                 "WHERE fulltext(title, 'laptop')"
             )
+
+    def test_order_by_score_desc_omits_sortby(
+        self, translator: Translator, basic_index: str
+    ):
+        """ORDER BY score_alias DESC omits SORTBY (RediSearch sorts by relevance by default)."""
+        result = translator.translate(
+            f"SELECT title, score() AS relevance FROM {basic_index} "
+            "WHERE fulltext(title, 'laptop') ORDER BY relevance DESC"
+        )
+        assert "WITHSCORES" in result.args
+        assert "SORTBY" not in result.args
+
+    def test_order_by_score_asc_raises(self, translator: Translator, basic_index: str):
+        """ORDER BY score_alias ASC raises ValueError (not supported by RediSearch)."""
+        with pytest.raises(ValueError, match="ASC is not supported"):
+            translator.translate(
+                f"SELECT title, score() AS relevance FROM {basic_index} "
+                "WHERE fulltext(title, 'laptop') ORDER BY relevance ASC"
+            )
+
+    def test_order_by_real_field_with_score_still_works(
+        self, translator: Translator, basic_index: str
+    ):
+        """ORDER BY a real field (not score alias) still emits SORTBY."""
+        result = translator.translate(
+            f"SELECT title, score() AS relevance FROM {basic_index} "
+            "WHERE fulltext(title, 'laptop') ORDER BY price DESC"
+        )
+        assert "SORTBY" in result.args
+        idx = result.args.index("SORTBY")
+        assert result.args[idx + 1] == "price"