Skip to content

Commit 70635a1

Browse files
committed
Reject GROUP BY / aggregations on vectorSearch() relations
Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
1 parent 231d477 commit 70635a1

3 files changed

Lines changed: 87 additions & 2 deletions

File tree

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchIndex.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import org.opensearch.sql.opensearch.client.OpenSearchClient;
1515
import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder;
1616
import org.opensearch.sql.opensearch.storage.scan.OpenSearchIndexScan;
17-
import org.opensearch.sql.opensearch.storage.scan.OpenSearchIndexScanBuilder;
17+
import org.opensearch.sql.opensearch.storage.scan.VectorSearchIndexScanBuilder;
1818
import org.opensearch.sql.opensearch.storage.scan.VectorSearchQueryBuilder;
1919
import org.opensearch.sql.storage.read.TableScanBuilder;
2020

@@ -94,7 +94,7 @@ public TableScanBuilder createScanBuilder() {
9494
getClient(),
9595
rb.getMaxResponseSize(),
9696
rb.build(getIndexName(), cursorKeepAlive, getClient(), getFieldTypes().isEmpty()));
97-
return new OpenSearchIndexScanBuilder(queryBuilder, createScanOperator);
97+
return new VectorSearchIndexScanBuilder(queryBuilder, createScanOperator);
9898
}
9999

100100
private QueryBuilder buildKnnQuery() {
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.opensearch.storage.scan;
7+
8+
import java.util.function.Function;
9+
import org.opensearch.sql.exception.ExpressionEvaluationException;
10+
import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder;
11+
import org.opensearch.sql.planner.logical.LogicalAggregation;
12+
13+
/**
14+
* Scan builder for vector search relations. Rejects aggregation pushdown because the semantics of
15+
* aggregations over knn-scored documents are ambiguous at the SQL surface (e.g., per-bucket _score
16+
* is undefined). Rejecting at push-down time keeps the MVP contract tight; support can be added
17+
* later without breaking existing users.
18+
*/
19+
public class VectorSearchIndexScanBuilder extends OpenSearchIndexScanBuilder {
20+
21+
public VectorSearchIndexScanBuilder(
22+
PushDownQueryBuilder translator,
23+
Function<OpenSearchRequestBuilder, OpenSearchIndexScan> scanFactory) {
24+
super(translator, scanFactory);
25+
}
26+
27+
@Override
28+
public boolean pushDownAggregation(LogicalAggregation aggregation) {
29+
throw new ExpressionEvaluationException(
30+
"GROUP BY / aggregations are not supported on vectorSearch() relations. "
31+
+ "Wrap the vectorSearch() call in a subquery and aggregate over the subquery result.");
32+
}
33+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.opensearch.storage.scan;
7+
8+
import static org.junit.jupiter.api.Assertions.assertThrows;
9+
import static org.junit.jupiter.api.Assertions.assertTrue;
10+
import static org.mockito.Mockito.mock;
11+
12+
import java.util.Collections;
13+
import org.junit.jupiter.api.Test;
14+
import org.opensearch.index.query.WrapperQueryBuilder;
15+
import org.opensearch.sql.common.setting.Settings;
16+
import org.opensearch.sql.exception.ExpressionEvaluationException;
17+
import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory;
18+
import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder;
19+
import org.opensearch.sql.planner.logical.LogicalAggregation;
20+
import org.opensearch.sql.planner.logical.LogicalValues;
21+
22+
class VectorSearchIndexScanBuilderTest {
23+
24+
@Test
25+
void pushDownAggregationIsRejected() {
26+
var requestBuilder =
27+
new OpenSearchRequestBuilder(
28+
mock(OpenSearchExprValueFactory.class), 10000, mock(Settings.class));
29+
var queryBuilder =
30+
new VectorSearchQueryBuilder(
31+
requestBuilder, new WrapperQueryBuilder("{\"knn\":{}}"), java.util.Map.of("k", "5"));
32+
var scanBuilder =
33+
new VectorSearchIndexScanBuilder(queryBuilder, rb -> mock(OpenSearchIndexScan.class));
34+
35+
var agg =
36+
new LogicalAggregation(
37+
new LogicalValues(Collections.emptyList()),
38+
Collections.emptyList(),
39+
Collections.emptyList(),
40+
false);
41+
42+
ExpressionEvaluationException ex =
43+
assertThrows(
44+
ExpressionEvaluationException.class, () -> scanBuilder.pushDownAggregation(agg));
45+
assertTrue(
46+
ex.getMessage().contains("GROUP BY"),
47+
"Error should mention GROUP BY; actual: " + ex.getMessage());
48+
assertTrue(
49+
ex.getMessage().contains("vectorSearch"),
50+
"Error should mention vectorSearch; actual: " + ex.getMessage());
51+
}
52+
}

0 commit comments

Comments
 (0)