Bit-Quill
diff --git a/‎docs/commands/ft.aggregate.md‎
Lines changed: 3 additions & 0 deletions b/‎docs/commands/ft.aggregate.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎integration/compatibility/aggregate-answers.pickle.gz‎
127 KB b/‎integration/compatibility/aggregate-answers.pickle.gz‎
127 KB
diff --git a/‎integration/compatibility/generate.py‎
Lines changed: 121 additions & 0 deletions b/‎integration/compatibility/generate.py‎
Lines changed: 121 additions & 0 deletions
diff --git a/‎integration/test_non_vector.py‎
Lines changed: 151 additions & 0 deletions b/‎integration/test_non_vector.py‎
Lines changed: 151 additions & 0 deletions
diff --git a/‎src/commands/ft.aggregate.json‎
Lines changed: 5 additions & 0 deletions b/‎src/commands/ft.aggregate.json‎
Lines changed: 5 additions & 0 deletions
@@ -92,3 +92,6 @@ The following reducer functions are available. The reducer functions that take a
 | MAX 1 <expression>            | The largest numerical values of the expression.                                                                                    |
 | AVG 1 <expression>            | The numerical average of the values of the expression.                                                                             |
 | STDDEV 1 <expression>         | The standard deviation the values of the expression.                                                                               |
+| FIRST_VALUE 1 <expression>    | The first value of the expression encountered in the group. Order depends on record retrieval order. Use only when order does not matter. |
+| FIRST_VALUE 3 <expression> BY <expression> | The value of the first expression from the record with the smallest comparison expression (ascending). Ties broken by first-encountered order. |
+| FIRST_VALUE 4 <expression> BY <expression> ASC\|DESC | The value of the first expression from the record with the minimum (ASC) or maximum (DESC) comparison expression. Ties broken by first-encountered order. Invalid keyword arguments (e.g., wrong BY token or unrecognised direction) produce a parse-time error. |
@@ -468,3 +468,124 @@ def test_search_sortby(self, key_type, dialect):
                         for limit in ["LIMIT 0 5", "LIMIT 2 3", ""]:
                             self.check(dialect, f"ft.search {key_type}_idx1 * SORTBY {sort_key} {direction} {return_keys} {limit} {wsk}")
 
+
+    # test_first_value_simple_mode is intentionally omitted.
+    # FIRST_VALUE without a BY clause is non-deterministic: the order of
+    # records within a group depends on retrieval order, which differs between
+    # Redis and Valkey implementations. Compatibility testing requires
+    # deterministic results, so only BY-clause (sorted) mode is tested here.
+
+    def test_first_value_by_clause(self, key_type, dialect):
+        """Test FIRST_VALUE with BY clause - sorted mode."""
+        self.setup_data("sortable numbers", key_type)
+
+        # (value_field, group_field, load_fields, by_field, order)
+        # order=None means default (3-arg form, no explicit ASC/DESC)
+        cases = [
+            ("@n1", "@n2", "3 @__key @n1 @n2", "@n1", "ASC"),
+            ("@n1", "@n2", "3 @__key @n1 @n2", "@n1", "DESC"),
+            ("@n1", "@n2", "3 @__key @n1 @n2", "@n1", None),   # default order
+            ("@t1", "@t2", "3 @__key @t1 @t2", "@t1", "ASC"),
+            ("@t1", "@t2", "3 @__key @t1 @t2", "@t1", "DESC"),
+            ("@t1", "@n2", "4 @__key @t1 @n1 @n2", "@n1", "ASC"),  # cross-field
+            ("@n1", "@n2", "3 @__key @n1 @n2", "@n2", "ASC"),  # tie-breaking
+        ]
+        for val, group, load, by, order in cases:
+            if order is None:
+                nargs, order_clause = "3", ""
+            else:
+                nargs, order_clause = "4", f" {order}"
+            alias = f"first_{val[1:]}_{by[1:]}_{order or 'default'}"
+            self.check(dialect,
+                f"ft.aggregate {key_type}_idx1 * "
+                f"load {load} "
+                f"groupby 1 {group} "
+                f"reduce first_value {nargs} {val} BY {by}{order_clause} as {alias}"
+            )
+
+    def test_first_value_keyword_case(self, key_type, dialect):
+        """Test FIRST_VALUE with case-insensitive keywords."""
+        self.setup_data("sortable numbers", key_type)
+
+        # 3-arg form: vary BY keyword case only
+        for by_kw in ["by", "BY", "By"]:
+            self.check(dialect,
+                f"ft.aggregate {key_type}_idx1 * "
+                f"load 3 @__key @n1 @n2 "
+                f"groupby 1 @n2 "
+                f"reduce first_value 3 @n1 {by_kw} @n1 as first_{by_kw}"
+            )
+
+        # 4-arg form: vary order keyword case (ASC and DESC variants)
+        for order_kw in ["asc", "ASC", "Asc", "desc", "DESC", "Desc"]:
+            self.check(dialect,
+                f"ft.aggregate {key_type}_idx1 * "
+                f"load 3 @__key @n1 @n2 "
+                f"groupby 1 @n2 "
+                f"reduce first_value 4 @n1 BY @n1 {order_kw} as first_{order_kw}"
+            )
+
+    def test_first_value_edge_cases(self, key_type, dialect):
+        """Test FIRST_VALUE with edge cases like nil values."""
+        self.setup_data("hard numbers", key_type)
+
+        # nil values in comparison field, both directions
+        for order in ["ASC", "DESC"]:
+            self.check(dialect,
+                f"ft.aggregate {key_type}_idx1 * "
+                f"load 3 @__key @n1 @n2 "
+                f"groupby 1 @n2 "
+                f"reduce first_value 4 @n1 BY @n1 {order} as first_nil_{order.lower()}"
+            )
+
+        # NOTE: Simple mode test removed due to non-deterministic ordering.
+        # When FIRST_VALUE is used without a BY clause, the order of values
+        # within each group is undefined, leading to inconsistent results.
+        
+        # Switch to sortable numbers for duplicate comparison values
+        self.client.execute_command("FLUSHALL SYNC")
+        time.sleep(0.5)
+        self.setup_data("sortable numbers", key_type)
+        
+        # Test with duplicate comparison values (tie-breaking)
+        self.check(dialect, 
+            f"ft.aggregate {key_type}_idx1 * "
+            f"load 3 @__key @n1 @n2 "
+            f"groupby 1 @n2 "
+            f"reduce first_value 4 @n1 BY @n2 ASC as first_dup_tie"
+        )
+
+    def test_first_value_errors(self, key_type, dialect):
+        """Test FIRST_VALUE error conditions."""
+        self.setup_data("sortable numbers", key_type)
+        
+        # Test nargs=0 (too few arguments) - this will be caught by parser
+        # Note: This may not be testable via compatibility tests if parser rejects it
+        
+        # Test nargs=2 (incomplete BY clause)
+        self.check(dialect, 
+            f"ft.aggregate {key_type}_idx1 * "
+            f"load 3 @__key @n1 @n2 "
+            f"groupby 1 @n2 "
+            f"reduce first_value 2 @n1 @n2 as first_error_nargs2"
+        )
+        
+        # Test nargs=5 (too many arguments) - this will be caught by parser
+        # Note: This may not be testable via compatibility tests if parser rejects it
+        
+        # Test invalid BY keyword (e.g., NOTBY)
+        self.check(dialect, 
+            f"ft.aggregate {key_type}_idx1 * "
+            f"load 3 @__key @n1 @n2 "
+            f"groupby 1 @n2 "
+            f"reduce first_value 3 @n1 NOTBY @n2 as first_error_notby"
+        )
+        
+        # Test invalid sort order (not ASC/DESC)
+        self.check(dialect, 
+            f"ft.aggregate {key_type}_idx1 * "
+            f"load 3 @__key @n1 @n2 "
+            f"groupby 1 @n2 "
+            f"reduce first_value 4 @n1 BY @n2 INVALID as first_error_invalid"
+        )
+
@@ -524,6 +524,157 @@ def validate_aggregate_complex_queries(client: Valkey):
     assert result[1][1] == b'406'
     assert result[1][3] == b'4060'
 
+    # 17. FIRST_VALUE reducer - simple mode (no BY clause)
+    # Note: Simple mode is non-deterministic as it depends on retrieval order
+    # We only verify that valid values are returned, not specific values
+    result = client.execute_command(
+        "FT.AGGREGATE", "products", "@price:[1 1000]",
+        "LOAD", "2", "price", "category",
+        "GROUPBY", "1", "@category",
+        "REDUCE", "FIRST_VALUE", "1", "@price", "AS", "first_price"
+    )
+    assert result[0] == 2
+    for i in range(1, len(result)):
+        row = dict(zip(result[i][::2], result[i][1::2]))
+        assert b'category' in row
+        assert b'first_price' in row
+        first_price = float(row[b'first_price'])
+        # Verify it's a valid price from the dataset (1-1000)
+        assert 1.0 <= first_price <= 1000.0
+        # Verify category matches expected values
+        if row[b'category'] == b'electronics':
+            # Electronics has odd prices (1, 3, 5, ..., 999)
+            assert int(first_price) % 2 == 1
+        else:
+            # Books has even prices (2, 4, 6, ..., 1000)
+            assert int(first_price) % 2 == 0
+
+    # 18. FIRST_VALUE reducer - sorted ASC mode (with BY clause)
+    result = client.execute_command(
+        "FT.AGGREGATE", "products", "@price:[1 1000]",
+        "LOAD", "3", "price", "rating", "category",
+        "GROUPBY", "1", "@category",
+        "REDUCE", "FIRST_VALUE", "4", "@price", "BY", "@rating", "ASC", "AS", "price_with_min_rating"
+    )
+    assert result[0] == 2
+    for i in range(1, len(result)):
+        row = dict(zip(result[i][::2], result[i][1::2]))
+        assert b'category' in row
+        assert b'price_with_min_rating' in row
+        price_with_min_rating = float(row[b'price_with_min_rating'])
+        if row[b'category'] == b'electronics':
+            # Multiple electronics have the same minimum rating (1.0), so any of their prices is valid
+            valid_prices_for_min_rating = {1, 101, 201, 301, 401, 501, 601, 701, 801, 901}
+            assert price_with_min_rating in valid_prices_for_min_rating, \
+                f"Electronics price_with_min_rating should be one of {valid_prices_for_min_rating}, got {price_with_min_rating}"
+        else:
+            # Multiple books have the same minimum rating (2.0), so any of their prices is valid
+            valid_prices_for_min_rating = {2, 102, 202, 302, 402, 502, 602, 702, 802, 902}
+            assert price_with_min_rating in valid_prices_for_min_rating, \
+                f"Books price_with_min_rating should be one of {valid_prices_for_min_rating}, got {price_with_min_rating}"
+
+    # 19. FIRST_VALUE reducer - sorted DESC mode (with BY clause)
+    result = client.execute_command(
+        "FT.AGGREGATE", "products", "@price:[1 1000]",
+        "LOAD", "3", "price", "rating", "category",
+        "GROUPBY", "1", "@category",
+        "REDUCE", "FIRST_VALUE", "4", "@price", "BY", "@rating", "DESC", "AS", "price_with_max_rating"
+    )
+    assert result[0] == 2
+    for i in range(1, len(result)):
+        row = dict(zip(result[i][::2], result[i][1::2]))
+        assert b'category' in row
+        assert b'price_with_max_rating' in row
+        price_with_max_rating = float(row[b'price_with_max_rating'])
+        if row[b'category'] == b'electronics':
+            # Multiple electronics have the same maximum rating (99.0), so any of their prices is valid
+            valid_prices_for_max_rating = {99, 199, 299, 399, 499, 599, 699, 799, 899, 999}
+            assert price_with_max_rating in valid_prices_for_max_rating, \
+                f"Electronics price_with_max_rating should be one of {valid_prices_for_max_rating}, got {price_with_max_rating}"
+        else:
+            # Multiple books have the same maximum rating (100.0), so any of their prices is valid
+            valid_prices_for_max_rating = {100, 200, 300, 400, 500, 600, 700, 800, 900, 1000}
+            assert price_with_max_rating in valid_prices_for_max_rating, \
+                f"Books price_with_max_rating should be one of {valid_prices_for_max_rating}, got {price_with_max_rating}"
+
+    # 20. FIRST_VALUE reducer - multiple groups with independent results
+    # Note: Simple mode (first_price) is non-deterministic
+    result = client.execute_command(
+        "FT.AGGREGATE", "products", "@price:[1 1000]",
+        "LOAD", "3", "price", "rating", "category",
+        "GROUPBY", "1", "@category",
+        "REDUCE", "FIRST_VALUE", "1", "@price", "AS", "first_price",
+        "REDUCE", "FIRST_VALUE", "4", "@price", "BY", "@rating", "ASC", "AS", "price_min_rating",
+        "REDUCE", "FIRST_VALUE", "4", "@price", "BY", "@rating", "DESC", "AS", "price_max_rating",
+        "REDUCE", "COUNT", "0", "AS", "count"
+    )
+    assert result[0] == 2
+    
+    electronics_found = False
+    books_found = False
+    
+    for i in range(1, len(result)):
+        row = dict(zip(result[i][::2], result[i][1::2]))
+        assert b'category' in row
+        assert b'first_price' in row
+        assert b'price_min_rating' in row
+        assert b'price_max_rating' in row
+        assert b'count' in row
+        
+        category = row[b'category']
+        first_price = float(row[b'first_price'])
+        price_min_rating = float(row[b'price_min_rating'])
+        price_max_rating = float(row[b'price_max_rating'])
+        count = int(row[b'count'])
+        
+        if category == b'electronics':
+            electronics_found = True
+            # Simple mode is non-deterministic, just verify valid range
+            assert 1.0 <= first_price <= 1000.0, f"Electronics first_price out of range: {first_price}"
+            assert int(first_price) % 2 == 1, f"Electronics should have odd prices, got {first_price}"
+            # Sorted modes - multiple electronics have the same min/max rating, so any of their prices is valid
+            valid_prices_for_min_rating = {1, 101, 201, 301, 401, 501, 601, 701, 801, 901}
+            assert price_min_rating in valid_prices_for_min_rating, \
+                f"Electronics price_min_rating should be one of {valid_prices_for_min_rating}, got {price_min_rating}"
+            valid_prices_for_max_rating = {99, 199, 299, 399, 499, 599, 699, 799, 899, 999}
+            assert price_max_rating in valid_prices_for_max_rating, \
+                f"Electronics price_max_rating should be one of {valid_prices_for_max_rating}, got {price_max_rating}"
+            assert count == 500, f"Electronics count should be 500, got {count}"
+        elif category == b'books':
+            books_found = True
+            # Simple mode is non-deterministic, just verify valid range
+            assert 1.0 <= first_price <= 1000.0, f"Books first_price out of range: {first_price}"
+            assert int(first_price) % 2 == 0, f"Books should have even prices, got {first_price}"
+            # Sorted modes - multiple books have the same min/max rating, so any of their prices is valid
+            valid_prices_for_min_rating = {2, 102, 202, 302, 402, 502, 602, 702, 802, 902}
+            assert price_min_rating in valid_prices_for_min_rating, \
+                f"Books price_min_rating should be one of {valid_prices_for_min_rating}, got {price_min_rating}"
+            valid_prices_for_max_rating = {100, 200, 300, 400, 500, 600, 700, 800, 900, 1000}
+            assert price_max_rating in valid_prices_for_max_rating, \
+                f"Books price_max_rating should be one of {valid_prices_for_max_rating}, got {price_max_rating}"
+            assert count == 500, f"Books count should be 500, got {count}"
+        else:
+            raise AssertionError(f"Unexpected category: {category}")
+    
+    assert electronics_found, "Electronics group not found in results"
+    assert books_found, "Books group not found in results"
+
+    # 21. FIRST_VALUE reducer - numeric field type handling
+    result = client.execute_command(
+        "FT.AGGREGATE", "products", "@price:[1 1000]",
+        "LOAD", "2", "price", "category",
+        "GROUPBY", "1", "@category",
+        "REDUCE", "FIRST_VALUE", "4", "@price", "BY", "@price", "ASC", "AS", "min_price"
+    )
+    assert result[0] == 2
+    for i in range(1, len(result)):
+        row = dict(zip(result[i][::2], result[i][1::2]))
+        min_price = float(row[b'min_price'])
+        if row[b'category'] == b'electronics':
+            assert min_price == 1.0, f"Electronics min_price should be 1.0, got {min_price}"
+        else:
+            assert min_price == 2.0, f"Books min_price should be 2.0, got {min_price}"
+
 class TestNonVector(ValkeySearchTestCaseBase):
 
     def test_basic(self):
 
@@ -263,6 +263,11 @@
                     "name": "STDDEV",
                     "type": "pure-token",
                     "token": "STDDEV"
+                  },
+                  {
+                    "name": "FIRST_VALUE",
+                    "type": "pure-token",
+                    "token": "FIRST_VALUE"
                   }
                 ]
               },
Original file line number	Diff line number	Diff line change
`@@ -263,6 +263,11 @@`
`263`	`263`	`"name": "STDDEV",`
`264`	`264`	`"type": "pure-token",`
`265`	`265`	`"token": "STDDEV"`
	`266`	`+ },`
	`267`	`+ {`
	`268`	`+ "name": "FIRST_VALUE",`
	`269`	`+ "type": "pure-token",`
	`270`	`+ "token": "FIRST_VALUE"`
`266`	`271`	`}`
`267`	`272`	`]`
`268`	`273`	`},`