66package org .opensearch .sql .opensearch .storage .scan ;
77
88import java .util .Map ;
9+ import java .util .Set ;
910import java .util .concurrent .atomic .AtomicBoolean ;
1011import java .util .function .Function ;
1112import org .apache .commons .lang3 .tuple .Pair ;
1213import org .opensearch .index .query .BoolQueryBuilder ;
1314import org .opensearch .index .query .ConstantScoreQueryBuilder ;
15+ import org .opensearch .index .query .ExistsQueryBuilder ;
16+ import org .opensearch .index .query .MatchBoolPrefixQueryBuilder ;
17+ import org .opensearch .index .query .MatchPhrasePrefixQueryBuilder ;
18+ import org .opensearch .index .query .MatchPhraseQueryBuilder ;
19+ import org .opensearch .index .query .MatchQueryBuilder ;
20+ import org .opensearch .index .query .MultiMatchQueryBuilder ;
21+ import org .opensearch .index .query .NestedQueryBuilder ;
1422import org .opensearch .index .query .QueryBuilder ;
1523import org .opensearch .index .query .QueryBuilders ;
24+ import org .opensearch .index .query .QueryStringQueryBuilder ;
25+ import org .opensearch .index .query .RangeQueryBuilder ;
1626import org .opensearch .index .query .ScriptQueryBuilder ;
27+ import org .opensearch .index .query .SimpleQueryStringBuilder ;
28+ import org .opensearch .index .query .TermQueryBuilder ;
29+ import org .opensearch .index .query .WildcardQueryBuilder ;
1730import org .opensearch .sql .ast .tree .Sort ;
1831import org .opensearch .sql .ast .tree .Sort .SortOption ;
1932import org .opensearch .sql .exception .ExpressionEvaluationException ;
@@ -83,11 +96,8 @@ public boolean pushDownFilter(LogicalFilter filter) {
8396 FilterQueryBuilder queryBuilder = new FilterQueryBuilder (new DefaultExpressionSerializer ());
8497 Expression queryCondition = filter .getCondition ();
8598
86- // Reject WHERE predicates that reference the synthetic _score column. The planner surfaces
87- // _score as a projectable/filterable field, but it is not a stored document field in
88- // OpenSearch. If we let this pass through, FilterQueryBuilder produces a range query on a
89- // non-existent field and the cluster silently returns 0 rows. Users who want a score floor
90- // should use option='min_score=...' instead.
99+ // _score is synthetic, not a stored field; a range query on it silently returns 0 rows.
100+ // Users who want a score floor should use option='min_score=...'.
91101 if (containsScoreReference (queryCondition )) {
92102 throw new ExpressionEvaluationException (
93103 "WHERE on _score is not supported on vectorSearch()."
@@ -109,16 +119,9 @@ public boolean pushDownFilter(LogicalFilter filter) {
109119 filterPushed = true ;
110120
111121 if (filterType == FilterType .EFFICIENT ) {
112- // knn.filter on AOSS/serverless vector collections rejects script queries. If any WHERE
113- // subtree compiled to a ScriptQueryBuilder (arithmetic, function calls, CASE, date math),
114- // refuse to embed it under knn.filter instead of shipping a request that will fail at the
115- // cluster with an opaque error.
116- if (containsScriptQuery (whereQuery )) {
117- throw new ExpressionEvaluationException (
118- "filter_type=efficient does not support predicates that compile to script queries"
119- + " (arithmetic, function calls, CASE, date math). Rewrite the WHERE clause to"
120- + " use comparable/term/range predicates, or omit filter_type." );
121- }
122+ // Fail closed: knn.filter on AOSS rejects script queries and nested predicates expand the
123+ // preview contract. Allow-list validator beats a blacklist walker.
124+ validateEfficientFilterSafe (whereQuery );
122125 QueryBuilder rebuiltKnn = rebuildKnnWithFilter .apply (whereQuery );
123126 requestBuilder .getSourceBuilder ().query (rebuiltKnn );
124127 } else {
@@ -131,10 +134,8 @@ public boolean pushDownFilter(LogicalFilter filter) {
131134
132135 @ Override
133136 public boolean pushDownLimit (LogicalLimit limit ) {
134- // OFFSET on vectorSearch() silently rewrites the search window and drops top results, which
135- // defeats the entire point of a relevance-ranked top-k query. The parent path would push
136- // `from: <offset>` into the OpenSearch request; reject it explicitly so users get a clear
137- // error instead of surprising result shifts.
137+ // OFFSET would shift the search window and silently drop top results; reject with a clear
138+ // error rather than have the parent path push `from: <offset>` into the request.
138139 if (limit .getOffset () != null && limit .getOffset () != 0 ) {
139140 throw new ExpressionEvaluationException (
140141 "OFFSET is not supported on vectorSearch(). Remove OFFSET and use LIMIT only." );
@@ -163,9 +164,8 @@ public boolean pushDownSort(LogicalSort sort) {
163164 "vectorSearch only supports ORDER BY _score DESC; _score ASC is not supported" );
164165 }
165166 }
166- // _score DESC is the natural knn order — no need to push the sort itself to OpenSearch.
167- // Preserve the parent's sort.getCount() → limit pushdown contract: SQL always sets count=0,
168- // but PPL or future callers may set a non-zero count to combine sort+limit in one node.
167+ // _score DESC is knn's natural order, so the sort itself is not pushed. Preserve the
168+ // parent's sort.getCount() → limit contract; SQL sends 0, PPL may combine sort+limit.
169169 if (sort .getCount () != 0 ) {
170170 validateLimitWithinK (sort .getCount ());
171171 limitPushed = true ;
@@ -185,20 +185,14 @@ private void validateLimitWithinK(int limit) {
185185 }
186186 }
187187
188- /**
189- * Returns true if any subexpression is a ReferenceExpression whose attr is "_score". Uses the
190- * standard ExpressionNodeVisitor so compound predicates (AND/OR/NOT, function calls, CASE) are
191- * walked uniformly.
192- */
188+ // True if any ReferenceExpression in the tree names _score (case-insensitive, so quoted/
189+ // backticked variants cannot bypass the guard).
193190 private static boolean containsScoreReference (Expression expr ) {
194191 AtomicBoolean found = new AtomicBoolean (false );
195192 expr .accept (
196193 new ExpressionNodeVisitor <Void , Void >() {
197194 @ Override
198195 public Void visitReference (ReferenceExpression node , Void context ) {
199- // Case-insensitive match so _SCORE, _Score, and any quoted/backticked variant that
200- // preserves original casing cannot bypass the guard and reach the cluster as a range
201- // query on a non-existent field.
202196 if (node .getAttr () != null && "_score" .equalsIgnoreCase (node .getAttr ())) {
203197 found .set (true );
204198 }
@@ -209,48 +203,60 @@ public Void visitReference(ReferenceExpression node, Void context) {
209203 return found .get ();
210204 }
211205
212- /**
213- * Recursively scans a QueryBuilder tree for any ScriptQueryBuilder. Handles the common wrappers
214- * that FilterQueryBuilder produces: BoolQueryBuilder (must/should/mustNot/filter) and
215- * ConstantScoreQueryBuilder. Other QueryBuilder subtypes are leaves for our purposes: if the
216- * top-level builder itself is a ScriptQueryBuilder we catch it, otherwise we treat it as
217- * script-free.
218- */
219- private static boolean containsScriptQuery (QueryBuilder qb ) {
206+ // Allow-list of leaf query types FilterQueryBuilder emits today. Any new wrapper or container
207+ // appearing here must fail closed rather than silently embed under knn.filter.
208+ private static final Set <Class <? extends QueryBuilder >> SAFE_EFFICIENT_FILTER_LEAVES =
209+ Set .of (
210+ TermQueryBuilder .class ,
211+ RangeQueryBuilder .class ,
212+ WildcardQueryBuilder .class ,
213+ MatchQueryBuilder .class ,
214+ MatchPhraseQueryBuilder .class ,
215+ MatchPhrasePrefixQueryBuilder .class ,
216+ MultiMatchQueryBuilder .class ,
217+ QueryStringQueryBuilder .class ,
218+ SimpleQueryStringBuilder .class ,
219+ MatchBoolPrefixQueryBuilder .class ,
220+ ExistsQueryBuilder .class );
221+
222+ // Package-private for direct branch coverage in unit tests. Fail-closed: recurse known
223+ // containers, reject ScriptQueryBuilder/NestedQueryBuilder with targeted messages, allow
224+ // listed leaves, reject everything else as unsupported shape.
225+ static void validateEfficientFilterSafe (QueryBuilder qb ) {
220226 if (qb == null ) {
221- return false ;
227+ return ;
222228 }
223229 if (qb instanceof ScriptQueryBuilder ) {
224- return true ;
230+ throw new ExpressionEvaluationException (
231+ "filter_type=efficient does not support predicates that compile to script queries"
232+ + " (arithmetic, function calls, CASE, date math). Rewrite the WHERE clause to"
233+ + " use comparable/term/range predicates, or omit filter_type." );
225234 }
226235 if (qb instanceof BoolQueryBuilder ) {
227236 BoolQueryBuilder bool = (BoolQueryBuilder ) qb ;
228- for (QueryBuilder child : bool .must ()) {
229- if (containsScriptQuery (child )) {
230- return true ;
231- }
232- }
233- for (QueryBuilder child : bool .filter ()) {
234- if (containsScriptQuery (child )) {
235- return true ;
236- }
237- }
238- for (QueryBuilder child : bool .should ()) {
239- if (containsScriptQuery (child )) {
240- return true ;
241- }
242- }
243- for (QueryBuilder child : bool .mustNot ()) {
244- if (containsScriptQuery (child )) {
245- return true ;
246- }
247- }
248- return false ;
237+ bool .must ().forEach (VectorSearchQueryBuilder ::validateEfficientFilterSafe );
238+ bool .filter ().forEach (VectorSearchQueryBuilder ::validateEfficientFilterSafe );
239+ bool .should ().forEach (VectorSearchQueryBuilder ::validateEfficientFilterSafe );
240+ bool .mustNot ().forEach (VectorSearchQueryBuilder ::validateEfficientFilterSafe );
241+ return ;
249242 }
250243 if (qb instanceof ConstantScoreQueryBuilder ) {
251- return containsScriptQuery (((ConstantScoreQueryBuilder ) qb ).innerQuery ());
244+ validateEfficientFilterSafe (((ConstantScoreQueryBuilder ) qb ).innerQuery ());
245+ return ;
246+ }
247+ if (qb instanceof NestedQueryBuilder ) {
248+ throw new ExpressionEvaluationException (
249+ "filter_type=efficient does not support nested predicates in this preview."
250+ + " Rewrite the WHERE clause using non-nested fields or omit filter_type." );
251+ }
252+ if (SAFE_EFFICIENT_FILTER_LEAVES .contains (qb .getClass ())) {
253+ return ;
252254 }
253- return false ;
255+ throw new ExpressionEvaluationException (
256+ "filter_type=efficient encountered an unsupported filter query shape: "
257+ + qb .getClass ().getSimpleName ()
258+ + ". Rewrite the WHERE clause using simple term/range/bool predicates,"
259+ + " or omit filter_type." );
254260 }
255261
256262 @ Override
0 commit comments