Address review comments: strict parser validation, stable score alias resolution, OR docs fix

nkanu17 · nkanu17 · commit 1f8e261e2f45 · 2026-04-02T10:18:22.000-04:00
- Validate fulltext/fuzzy/score args are literals (allow Placeholders)
- Collect all field names across result set before resolving score alias
- Clarify OR operator case-sensitivity in README
diff --git a/README.md b/README.md
@@ -218,7 +218,7 @@ SELECT * FROM products WHERE fulltext(title, 'laptop') OR fulltext(description,
 - `=` on TEXT fields performs **exact phrase** matching (preserves stopwords)
 - `fulltext()` performs **tokenized** search (stopwords are filtered with a warning)
 - `fuzzy()` and `fulltext()` only work on TEXT fields — using them on TAG or NUMERIC raises `ValueError`
-- OR is case-insensitive: `'laptop OR tablet'`, `'laptop or tablet'`, and `'laptop Or tablet'` all work
+- OR must be **uppercase**: `'laptop OR tablet'` triggers union; lowercase `'laptop or tablet'` is treated as a regular three-word AND search
 - Special characters (`@`, `|`, `-`, `*`, `+`, etc.) in search terms are automatically escaped
 
 ### IS NULL / IS NOT NULL (ismissing)
diff --git a/sql_redis/executor.py b/sql_redis/executor.py
@@ -122,14 +122,14 @@ def _resolve_score_alias(
         first_row_fields: set[str] | None = None,
     ) -> str:
         """Determine a stable score column name that won't collide with
-        document fields.  The alias is decided once before iterating rows
-        so every row uses the same column name.
+        document fields.  The alias is resolved once and reused for every
+        row so all rows share the same column name.
 
         When a RETURN clause is present, the returned field names are used
         for collision detection.  When RETURN is absent (SELECT *), the
-        caller should pass ``first_row_fields`` — the field names from the
-        first result row — so we can detect collisions even when all
-        document attributes are returned."""
+        caller should pass ``first_row_fields`` — the union of all field
+        names across all result rows — so we can detect collisions even
+        when different documents have different field sets."""
         alias = score_alias or "__score"
         # Extract RETURN field names from args to detect collision
         try:
@@ -228,19 +228,23 @@ def execute(self, sql: str, *, params: dict | None = None) -> QueryResult:
             elif with_scores:
                 # WITHSCORES format: [count, key1, score1, [fields1], key2, score2, [fields2], ...]
                 # Stride of 3: key, score, field_list
-                # Resolve alias once from the first row so every row uses the
-                # same column name (consistent output schema).
-                resolved_alias: str | None = None
+                # First pass: collect all field names across all rows so the
+                # alias avoids collisions with any document field, not just
+                # the first row's fields.
+                all_field_names: set[str] = set()
+                parsed_rows: list[tuple[dict, Any]] = []
                 for i in range(1, len(raw_result) - 2, 3):
                     score = raw_result[i + 1]
                     row_data = raw_result[i + 2]
                     row = dict(zip(row_data[::2], row_data[1::2]))
-                    if resolved_alias is None:
-                        resolved_alias = self._resolve_score_alias(
-                            translated.score_alias,
-                            translated.args,
-                            first_row_fields=set(row.keys()),
-                        )
+                    all_field_names.update(row.keys())
+                    parsed_rows.append((row, score))
+                resolved_alias = self._resolve_score_alias(
+                    translated.score_alias,
+                    translated.args,
+                    first_row_fields=all_field_names,
+                )
+                for row, score in parsed_rows:
                     row[resolved_alias] = score
                     rows.append(row)
             else:
@@ -345,19 +349,22 @@ async def execute(self, sql: str, *, params: dict | None = None) -> QueryResult:
                     rows.append(row)
             elif with_scores:
                 # WITHSCORES format: [count, key1, score1, [fields1], ...]
-                # Resolve alias once from the first row so every row uses the
-                # same column name (consistent output schema).
-                resolved_alias: str | None = None
+                # First pass: collect all field names across all rows so the
+                # alias avoids collisions with any document field.
+                all_field_names: set[str] = set()
+                parsed_rows: list[tuple[dict, Any]] = []
                 for i in range(1, len(raw_result) - 2, 3):
                     score = raw_result[i + 1]
                     row_data = raw_result[i + 2]
                     row = dict(zip(row_data[::2], row_data[1::2]))
-                    if resolved_alias is None:
-                        resolved_alias = self._resolve_score_alias(
-                            translated.score_alias,
-                            translated.args,
-                            first_row_fields=set(row.keys()),
-                        )
+                    all_field_names.update(row.keys())
+                    parsed_rows.append((row, score))
+                resolved_alias = self._resolve_score_alias(
+                    translated.score_alias,
+                    translated.args,
+                    first_row_fields=all_field_names,
+                )
+                for row, score in parsed_rows:
                     row[resolved_alias] = score
                     rows.append(row)
             else:
diff --git a/sql_redis/parser.py b/sql_redis/parser.py
@@ -471,7 +471,12 @@ def _process_select_expression_inner(
                             "score() argument must be a literal scorer name "
                             f"(e.g., 'BM25', 'TFIDF'), got {expression.expressions[0]}."
                         )
-                    scorer = str(scorer_val)
+                    if not isinstance(scorer_val, str):
+                        raise ValueError(
+                            "score() argument must be a string scorer name "
+                            f"(e.g., 'BM25', 'TFIDF'), got {scorer_val!r}."
+                        )
+                    scorer = scorer_val
                 if result.scoring is not None:
                     raise ValueError(
                         "Only one score() expression is allowed per query."
@@ -1003,11 +1008,21 @@ def _add_function_condition(
         if func_name == "FULLTEXT" and len(args) >= 2:
             field_name = args[0].name if isinstance(args[0], exp.Column) else None
             value = self._extract_literal_value(args[1])
+            if value is None and not isinstance(args[1], exp.Placeholder):
+                raise ValueError(
+                    "fulltext() second argument must be a literal string, "
+                    f"got {args[1]}. Usage: fulltext(field, 'search terms')"
+                )
 
             # Optional 3rd arg: slop (non-negative int)
             slop = None
             if len(args) >= 3:
                 slop_val = self._extract_literal_value(args[2])
+                if slop_val is None and not isinstance(args[2], exp.Placeholder):
+                    raise ValueError(
+                        "fulltext() slop argument must be a literal integer, "
+                        f"got {args[2]}."
+                    )
                 if slop_val is not None:
                     # Reject booleans and non-integer floats — only real
                     # integers are valid for slop.
@@ -1029,6 +1044,11 @@ def _add_function_condition(
             inorder = False
             if len(args) >= 4:
                 inorder_val = self._extract_literal_value(args[3])
+                if inorder_val is None and not isinstance(args[3], exp.Placeholder):
+                    raise ValueError(
+                        "fulltext() inorder argument must be a literal boolean "
+                        f"(true/false or 1/0), got {args[3]}."
+                    )
                 if inorder_val is not None:
                     if isinstance(inorder_val, bool):
                         inorder = inorder_val
@@ -1059,11 +1079,21 @@ def _add_function_condition(
         elif func_name == "FUZZY" and len(args) >= 2:
             field_name = args[0].name if isinstance(args[0], exp.Column) else None
             value = self._extract_literal_value(args[1])
+            if value is None and not isinstance(args[1], exp.Placeholder):
+                raise ValueError(
+                    "fuzzy() second argument must be a literal string, "
+                    f"got {args[1]}. Usage: fuzzy(field, 'search term')"
+                )
 
             # Optional 3rd arg: fuzzy level (1, 2, or 3)
             fuzzy_level = None
             if len(args) >= 3:
                 level_val = self._extract_literal_value(args[2])
+                if level_val is None and not isinstance(args[2], exp.Placeholder):
+                    raise ValueError(
+                        "fuzzy() level argument must be a literal integer, "
+                        f"got {args[2]}."
+                    )
                 if level_val is not None:
                     if isinstance(level_val, bool):
                         raise ValueError(