Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion core/src/main/java/org/opensearch/sql/planner/Planner.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.opensearch.sql.planner.optimizer.LogicalPlanOptimizer;
import org.opensearch.sql.planner.physical.PhysicalPlan;
import org.opensearch.sql.storage.Table;
import org.opensearch.sql.storage.read.TableScanBuilder;

/** Planner that plans and chooses the optimal physical plan. */
@RequiredArgsConstructor
Expand All @@ -34,7 +35,35 @@ public PhysicalPlan plan(LogicalPlan plan) {
if (table == null) {
return plan.accept(new DefaultImplementor<>(), null);
}
return table.implement(table.optimize(optimize(plan)));
LogicalPlan optimized = table.optimize(optimize(plan));
// Give scan builders a chance to reject shapes that push-down alone cannot express safely
// (e.g. operators that land above the scan but outside its push-down contract).
validateScanBuilders(optimized);
return table.implement(optimized);
}

/**
* Walk the optimized plan and invoke {@link TableScanBuilder#validatePlan(LogicalPlan)} on every
* scan builder, passing the fully optimized root so scan builders can inspect their ancestors.
*/
private void validateScanBuilders(LogicalPlan optimized) {
optimized.accept(
new LogicalPlanNodeVisitor<Void, Object>() {
@Override
public Void visitNode(LogicalPlan node, Object context) {
for (LogicalPlan child : node.getChild()) {
child.accept(this, context);
}
return null;
}

@Override
public Void visitTableScanBuilder(TableScanBuilder node, Object context) {
node.validatePlan(optimized);
return null;
}
},
null);
}

private Table findTable(LogicalPlan plan) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,19 @@ public boolean pushDownPageSize(LogicalPaginate paginate) {
return false;
}

/**
* Post-optimization validation hook. Called once by the planner after all push-down rules have
* run, with the fully optimized plan root. Subclasses may inspect the ancestors of this scan
* builder to reject planner shapes that push-down alone cannot express safely (for example,
* operators that land above the scan but outside its push-down contract and would be executed
* after the scan has already returned a bounded result set). Default is no-op.
*
* @param root the fully optimized logical plan containing this scan builder
*/
public void validatePlan(LogicalPlan root) {
// no-op by default
}

@Override
public <R, C> R accept(LogicalPlanNodeVisitor<R, C> visitor, C context) {
return visitor.visitTableScanBuilder(this, context);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.sql;

import static org.hamcrest.Matchers.containsString;

import java.io.IOException;
import org.junit.Test;
import org.opensearch.client.ResponseException;
import org.opensearch.sql.legacy.SQLIntegTestCase;
import org.opensearch.sql.legacy.TestsConstants;

/**
* Integration tests for vectorSearch() used inside subqueries. Locks in the rejection of outer
* WHERE on a vectorSearch() subquery, which would otherwise silently yield zero rows because the
* outer predicate is applied only after the k-NN search has already selected top-k documents by
* vector distance.
*
* <p>Uses _explain-only plus error-path queries, so the k-NN plugin is not required — the planner
* validation fires during planning, before any k-NN execution.
*/
public class VectorSearchSubqueryIT extends SQLIntegTestCase {

@Override
protected void init() throws Exception {
loadIndex(Index.ACCOUNT);
}

private static final String TEST_INDEX = TestsConstants.TEST_INDEX_ACCOUNT;

@Test
public void testOuterWhereOnSubqueryRejected() throws IOException {
// Exact failing shape from the P0 tracker: outer WHERE on a subquery that wraps vectorSearch().
// Without the guard the outer predicate is dropped from the pushed DSL and applied only in
// memory after k-NN returned top-k, which can yield silent zero rows.
ResponseException ex =
expectThrows(
ResponseException.class,
() ->
executeQuery(
"SELECT * FROM (SELECT v.firstname, v.state "
+ "FROM vectorSearch(table='"
+ TEST_INDEX
+ "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t "
+ "WHERE t.state = 'TX'"));

assertThat(
ex.getMessage(),
containsString("Outer WHERE on a vectorSearch() subquery is not supported"));
assertThat(ex.getMessage(), containsString("silently yield zero rows"));
}

@Test
public void testOuterWhereOnSubqueryRejectedWithLimit() throws IOException {
// Same shape with an outer LIMIT — exercises a second planner path (LogicalLimit above
// LogicalFilter above LogicalProject above scan builder).
ResponseException ex =
expectThrows(
ResponseException.class,
() ->
executeQuery(
"SELECT * FROM (SELECT v.firstname, v.state "
+ "FROM vectorSearch(table='"
+ TEST_INDEX
+ "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t "
+ "WHERE t.state = 'TX' "
+ "LIMIT 3"));

assertThat(
ex.getMessage(),
containsString("Outer WHERE on a vectorSearch() subquery is not supported"));
}

@Test
public void testOuterWhereOnSubqueryRejectedExplain() throws IOException {
// The guard must fire during planning, before any k-NN execution — so _explain must also
// return the validation error rather than a silently dropped predicate in the DSL.
ResponseException ex =
expectThrows(
ResponseException.class,
() ->
explainQuery(
"SELECT * FROM (SELECT v.firstname, v.state "
+ "FROM vectorSearch(table='"
+ TEST_INDEX
+ "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t "
+ "WHERE t.state = 'TX'"));

assertThat(
ex.getMessage(),
containsString("Outer WHERE on a vectorSearch() subquery is not supported"));
}

@Test
public void testInnerWhereStillWorks() throws IOException {
// Positive control: WHERE directly on vectorSearch() inside the subquery must still plan
// successfully — the rejection is scoped to OUTER filters that cannot reach the push-down
// contract. We use _explain because the default integ-test cluster has no k-NN plugin.
String explain =
explainQuery(
"SELECT * FROM (SELECT v.firstname, v.state "
+ "FROM vectorSearch(table='"
+ TEST_INDEX
+ "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v "
+ "WHERE v.state = 'TX') t");

assertThat(explain, containsString("wrapper"));
// Inner WHERE should push down, so the state predicate appears in the DSL.
assertThat(explain, containsString("state"));
}

@Test
public void testInnerWhereWithOuterProjectStillWorks() throws IOException {
// Another positive control: the outer layer can still project and limit columns from the
// subquery without the guard firing — only outer WHERE is rejected.
String explain =
explainQuery(
"SELECT t.firstname FROM (SELECT v.firstname, v.state "
+ "FROM vectorSearch(table='"
+ TEST_INDEX
+ "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v "
+ "WHERE v.state = 'TX') t "
+ "LIMIT 3");

assertThat(explain, containsString("wrapper"));
}

@Test
public void testSubqueryNoWhereStillWorks() throws IOException {
// Baseline: a subquery with no WHERE anywhere must not be rejected — the guard fires only
// when an outer LogicalFilter sits above a subquery project boundary.
String explain =
explainQuery(
"SELECT * FROM (SELECT v.firstname "
+ "FROM vectorSearch(table='"
+ TEST_INDEX
+ "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t "
+ "LIMIT 3");

assertThat(explain, containsString("wrapper"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,28 @@
import org.opensearch.sql.exception.ExpressionEvaluationException;
import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder;
import org.opensearch.sql.planner.logical.LogicalAggregation;
import org.opensearch.sql.planner.logical.LogicalFilter;
import org.opensearch.sql.planner.logical.LogicalPlan;
import org.opensearch.sql.planner.logical.LogicalProject;

/**
* Scan builder for vector search relations. Rejects aggregation pushdown: aggregations over
* vectorSearch() relations are not supported in the SQL surface. Native OpenSearch k-NN does
* support aggregations alongside similarity search; this restriction is specific to the SQL layer.
* Scan builder for vector search relations.
*
* <p>Rejects two planner shapes that the SQL surface cannot express safely:
*
* <ul>
* <li><b>Aggregations</b> — native OpenSearch k-NN supports aggregations alongside similarity
* search, but the SQL layer does not plumb them through, so we fail fast rather than return
* silently unaggregated results.
* <li><b>Outer WHERE over a vectorSearch() subquery</b> — when vectorSearch() is wrapped in a
* subquery (e.g. {@code SELECT * FROM (SELECT v.id FROM vectorSearch(...) AS v) t WHERE
* t.price < 150}), the outer predicate does not reach the push-down contract (the inner
* {@link LogicalProject} sits between the outer {@link LogicalFilter} and this scan builder,
* so the filter never matches the {@code filter(scanBuilder)} push-down pattern). The filter
* is then applied in memory, but only <i>after</i> k-NN has already returned the top-k
* documents ranked by vector distance, so the outer predicate can silently produce zero rows.
* We detect this shape in {@link #validatePlan(LogicalPlan)} and reject with a clear error.
* </ul>
*/
public class VectorSearchIndexScanBuilder extends OpenSearchIndexScanBuilder {

Expand All @@ -28,4 +45,60 @@ public boolean pushDownAggregation(LogicalAggregation aggregation) {
throw new ExpressionEvaluationException(
"Aggregations are not supported on vectorSearch() relations.");
}

/**
* Walk the fully-optimized plan and reject the outer-WHERE-over-subquery shape. We look for a
* {@link LogicalFilter} whose descendant chain reaches this scan builder through one or more
* {@link LogicalProject} nodes (the subquery-boundary marker). A filter directly above this scan
* builder is fine — that WHERE has already gone through the push-down contract.
*/
@Override
public void validatePlan(LogicalPlan root) {
checkForOuterFilter(root, false, false);
}

/**
* Recursive walker with two booleans that together encode the "outer filter separated by a
* subquery boundary" pattern:
*
* <ul>
* <li>{@code insideFilter} — true iff some ancestor on the current walk path is a {@link
* LogicalFilter}. Projects only matter below a filter, so without a filter ancestor a
* project is just the outer SELECT and should not trigger rejection.
* <li>{@code sawProjectSinceFilter} — true iff a {@link LogicalProject} has been seen between
* the <em>nearest enclosing</em> filter and the current position. That project is the
* subquery-boundary marker that separates "outer WHERE on a subquery" (bad) from "WHERE
* directly on vectorSearch()" (fine, handled by push-down).
* </ul>
*
* <p>Entering a new {@link LogicalFilter} resets {@code sawProjectSinceFilter} to false because
* the nearest enclosing filter is now this one, and the contract is evaluated from here down.
*/
private void checkForOuterFilter(
LogicalPlan node, boolean insideFilter, boolean sawProjectSinceFilter) {
if (node == this) {
if (insideFilter && sawProjectSinceFilter) {
throw new ExpressionEvaluationException(
"Outer WHERE on a vectorSearch() subquery is not supported: the predicate does not"
+ " push into the k-NN search and would be applied only after top-k results have"
+ " been selected by vector distance, which can silently yield zero rows."
+ " Move the predicate inside the subquery (WHERE directly on vectorSearch()) so"
+ " it is applied during the k-NN search.");
}
return;
}
boolean nextInsideFilter = insideFilter;
boolean nextSawProject = sawProjectSinceFilter;
if (node instanceof LogicalFilter) {
// Entering a new filter: nearest enclosing filter is now this one, reset project marker.
nextInsideFilter = true;
nextSawProject = false;
} else if (node instanceof LogicalProject && insideFilter) {
// A project between the nearest enclosing filter and a scan builder is the bug marker.
nextSawProject = true;
}
for (LogicalPlan child : node.getChild()) {
checkForOuterFilter(child, nextInsideFilter, nextSawProject);
}
}
}
Loading
Loading