Skip to content

Commit 5fa9c2c

Browse files
committed
Add happy-path execution tests for vectorSearch()
VectorSearchIT covered only rejection paths; this adds positive end-to-end coverage for top-k, POST-filter, EFFICIENT-filter, and both radial modes (max_distance / min_score). The k-NN plugin is not provisioned by the default integ-test cluster, so each test guards init() with Assume.assumeTrue(isKnnPluginInstalled()) — tests skip cleanly when k-NN is absent and run when it is (e.g. locally via scripts/setup-knn-local.sh). Provisioning k-NN in CI is a separate follow-up tracked outside this PR. Test data is a 6-doc 2D knn_vector index with two well-separated clusters so filter correctness is assertable by document id. Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
1 parent f6ec54f commit 5fa9c2c

1 file changed

Lines changed: 219 additions & 0 deletions

File tree

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.sql;
7+
8+
import static org.opensearch.sql.util.TestUtils.createIndexByRestClient;
9+
import static org.opensearch.sql.util.TestUtils.isIndexExist;
10+
import static org.opensearch.sql.util.TestUtils.performRequest;
11+
12+
import java.io.IOException;
13+
import org.json.JSONArray;
14+
import org.json.JSONObject;
15+
import org.junit.Assume;
16+
import org.junit.Test;
17+
import org.opensearch.client.Request;
18+
import org.opensearch.client.Response;
19+
import org.opensearch.sql.legacy.SQLIntegTestCase;
20+
21+
/**
22+
* Happy-path execution tests for the vectorSearch() SQL table function. These tests run an actual
23+
* k-NN query against a small in-memory knn_vector index and assert that results come back ordered
24+
* by score and respect any WHERE filters.
25+
*
26+
* <p>The k-NN plugin is not provisioned by the default integ-test cluster — each test calls {@link
27+
* Assume#assumeTrue} on {@link #isKnnPluginInstalled()} so the class is silently skipped when k-NN
28+
* is absent. Run locally after {@code scripts/setup-knn-local.sh} has wired k-NN into the test
29+
* cluster. Provisioning k-NN in CI is a separate follow-up.
30+
*/
31+
public class VectorSearchExecutionIT extends SQLIntegTestCase {
32+
33+
private static final String TEST_INDEX = "vector_exec_test";
34+
35+
// 6 docs in 2D — two clusters so filter/radial tests have distinguishable results.
36+
// Cluster A near [1, 1]: docs 1-3 (state=TX, ages 25/30/40).
37+
// Cluster B near [9, 9]: docs 4-6 (state=CA, ages 28/35/45).
38+
private static final String MAPPING =
39+
"{"
40+
+ " \"settings\": {\"index\": {\"knn\": true}},"
41+
+ " \"mappings\": {"
42+
+ " \"properties\": {"
43+
+ " \"embedding\": {\"type\": \"knn_vector\", \"dimension\": 2},"
44+
+ " \"state\": {\"type\": \"keyword\"},"
45+
+ " \"age\": {\"type\": \"integer\"}"
46+
+ " }"
47+
+ " }"
48+
+ "}";
49+
50+
private static final String BULK_BODY =
51+
"{\"index\":{\"_id\":\"1\"}}\n"
52+
+ "{\"embedding\":[1.0,1.0],\"state\":\"TX\",\"age\":25}\n"
53+
+ "{\"index\":{\"_id\":\"2\"}}\n"
54+
+ "{\"embedding\":[1.1,0.9],\"state\":\"TX\",\"age\":30}\n"
55+
+ "{\"index\":{\"_id\":\"3\"}}\n"
56+
+ "{\"embedding\":[0.9,1.2],\"state\":\"TX\",\"age\":40}\n"
57+
+ "{\"index\":{\"_id\":\"4\"}}\n"
58+
+ "{\"embedding\":[9.0,9.0],\"state\":\"CA\",\"age\":28}\n"
59+
+ "{\"index\":{\"_id\":\"5\"}}\n"
60+
+ "{\"embedding\":[9.1,8.8],\"state\":\"CA\",\"age\":35}\n"
61+
+ "{\"index\":{\"_id\":\"6\"}}\n"
62+
+ "{\"embedding\":[8.7,9.3],\"state\":\"CA\",\"age\":45}\n";
63+
64+
@Override
65+
protected void init() throws Exception {
66+
Assume.assumeTrue("k-NN plugin not installed on test cluster", isKnnPluginInstalled());
67+
if (!isIndexExist(client(), TEST_INDEX)) {
68+
createIndexByRestClient(client(), TEST_INDEX, MAPPING);
69+
Request bulk = new Request("POST", "/" + TEST_INDEX + "/_bulk?refresh=true");
70+
bulk.setJsonEntity(BULK_BODY);
71+
performRequest(client(), bulk);
72+
}
73+
}
74+
75+
private static boolean isKnnPluginInstalled() {
76+
try {
77+
Response response = client().performRequest(new Request("GET", "/_cat/plugins?h=component"));
78+
String body = new String(response.getEntity().getContent().readAllBytes());
79+
return body.contains("opensearch-knn");
80+
} catch (IOException e) {
81+
return false;
82+
}
83+
}
84+
85+
// ── Top-k happy path ────────────────────────────────────────────────
86+
87+
@Test
88+
public void testTopKReturnsNearestSortedByScore() throws IOException {
89+
JSONObject result =
90+
executeJdbcRequest(
91+
"SELECT v._id, v._score "
92+
+ "FROM vectorSearch(table='"
93+
+ TEST_INDEX
94+
+ "', field='embedding', "
95+
+ "vector='[1.0, 1.0]', option='k=3') AS v "
96+
+ "LIMIT 3");
97+
98+
// All 3 returned docs should be from cluster A (ids 1-3), ordered by score desc.
99+
JSONArray rows = result.getJSONArray("datarows");
100+
assertEquals("Expected 3 rows:\n" + result, 3, rows.length());
101+
for (int i = 0; i < rows.length(); i++) {
102+
String id = rows.getJSONArray(i).getString(0);
103+
assertTrue(
104+
"Row " + i + " id=" + id + " should be from cluster A (1,2,3):\n" + result,
105+
id.equals("1") || id.equals("2") || id.equals("3"));
106+
}
107+
// Scores must be non-increasing.
108+
double prev = Double.POSITIVE_INFINITY;
109+
for (int i = 0; i < rows.length(); i++) {
110+
double score = rows.getJSONArray(i).getDouble(1);
111+
assertTrue(
112+
"Scores must be sorted desc, got " + score + " after " + prev + ":\n" + result,
113+
score <= prev);
114+
prev = score;
115+
}
116+
}
117+
118+
// ── POST filter happy path ──────────────────────────────────────────
119+
120+
@Test
121+
public void testPostFilterReturnsOnlyMatchingDocs() throws IOException {
122+
// Query from cluster B with WHERE state='TX' should force the scan to find TX docs
123+
// (cluster A) even though the vector is closer to cluster B. Proves filter is applied.
124+
JSONObject result =
125+
executeJdbcRequest(
126+
"SELECT v._id, v._score "
127+
+ "FROM vectorSearch(table='"
128+
+ TEST_INDEX
129+
+ "', field='embedding', "
130+
+ "vector='[9.0, 9.0]', option='k=10') AS v "
131+
+ "WHERE v.state = 'TX' "
132+
+ "LIMIT 10");
133+
134+
JSONArray rows = result.getJSONArray("datarows");
135+
assertTrue("Expected at least one row:\n" + result, rows.length() > 0);
136+
for (int i = 0; i < rows.length(); i++) {
137+
String id = rows.getJSONArray(i).getString(0);
138+
assertTrue(
139+
"Row " + i + " id=" + id + " should be from TX cluster (1,2,3):\n" + result,
140+
id.equals("1") || id.equals("2") || id.equals("3"));
141+
}
142+
}
143+
144+
// ── EFFICIENT filter happy path ─────────────────────────────────────
145+
146+
@Test
147+
public void testEfficientFilterReturnsOnlyMatchingDocs() throws IOException {
148+
JSONObject result =
149+
executeJdbcRequest(
150+
"SELECT v._id, v._score "
151+
+ "FROM vectorSearch(table='"
152+
+ TEST_INDEX
153+
+ "', field='embedding', "
154+
+ "vector='[1.0, 1.0]', option='k=5,filter_type=efficient') AS v "
155+
+ "WHERE v.state = 'CA' "
156+
+ "LIMIT 5");
157+
158+
JSONArray rows = result.getJSONArray("datarows");
159+
assertTrue("Expected at least one row:\n" + result, rows.length() > 0);
160+
for (int i = 0; i < rows.length(); i++) {
161+
String id = rows.getJSONArray(i).getString(0);
162+
assertTrue(
163+
"Row " + i + " id=" + id + " should be from CA cluster (4,5,6):\n" + result,
164+
id.equals("4") || id.equals("5") || id.equals("6"));
165+
}
166+
}
167+
168+
// ── Radial happy paths ──────────────────────────────────────────────
169+
170+
@Test
171+
public void testRadialMaxDistanceReturnsOnlyNearDocs() throws IOException {
172+
// max_distance=1.0 (L2) centered on [1,1] should pick up cluster A docs and exclude
173+
// cluster B which is ~11 units away.
174+
JSONObject result =
175+
executeJdbcRequest(
176+
"SELECT v._id "
177+
+ "FROM vectorSearch(table='"
178+
+ TEST_INDEX
179+
+ "', field='embedding', "
180+
+ "vector='[1.0, 1.0]', option='max_distance=1.0') AS v "
181+
+ "LIMIT 10");
182+
183+
JSONArray rows = result.getJSONArray("datarows");
184+
assertTrue("Expected at least one row:\n" + result, rows.length() > 0);
185+
for (int i = 0; i < rows.length(); i++) {
186+
String id = rows.getJSONArray(i).getString(0);
187+
assertTrue(
188+
"Row " + i + " id=" + id + " should be within max_distance of cluster A:\n" + result,
189+
id.equals("1") || id.equals("2") || id.equals("3"));
190+
}
191+
}
192+
193+
@Test
194+
public void testRadialMinScoreReturnsOnlyHighScoreDocs() throws IOException {
195+
// For L2 space, OpenSearch score = 1/(1+distance). Centered on [1,1], cluster A docs
196+
// score ~0.8-1.0 and cluster B scores ~0.08. min_score=0.5 should exclude cluster B.
197+
JSONObject result =
198+
executeJdbcRequest(
199+
"SELECT v._id, v._score "
200+
+ "FROM vectorSearch(table='"
201+
+ TEST_INDEX
202+
+ "', field='embedding', "
203+
+ "vector='[1.0, 1.0]', option='min_score=0.5') AS v "
204+
+ "LIMIT 10");
205+
206+
JSONArray rows = result.getJSONArray("datarows");
207+
assertTrue("Expected at least one row:\n" + result, rows.length() > 0);
208+
for (int i = 0; i < rows.length(); i++) {
209+
String id = rows.getJSONArray(i).getString(0);
210+
double score = rows.getJSONArray(i).getDouble(1);
211+
assertTrue(
212+
"Row " + i + " id=" + id + " score=" + score + " should be >= 0.5:\n" + result,
213+
score >= 0.5);
214+
assertTrue(
215+
"Row " + i + " id=" + id + " should be from cluster A:\n" + result,
216+
id.equals("1") || id.equals("2") || id.equals("3"));
217+
}
218+
}
219+
}

0 commit comments

Comments
 (0)