Skip to content

Commit 98d10f0

Browse files
committed
[BugFix] Reject vectorSearch on index with user _score field
vectorSearch() exposes a synthetic v._score column backed by the document metadata _score. If a user index also defines a stored field named _score, the response tuple builder collides the two values into the same key and fails with an opaque duplicate-key error from ImmutableMap.Builder. Probe result on OpenSearch 3.6: - _id: mapping creation is rejected by OpenSearch at index time, so no collision is possible. A lock-in IT guards that assumption. - _score: mapping creation is accepted. Without a guard, vectorSearch() against such an index fails opaquely at response time. Fix: reject the mapping in VectorSearchIndex.createScanBuilder() with a clear, user-facing SQL error before any request is sent. The guard runs during planning, so _explain also surfaces the problem without needing the k-NN plugin. The check lives in VectorSearchIndex to keep the AstBuilder and TableFunction resolver paths untouched. Tests: - VectorSearchIndexTest.createScanBuilderRejectsIndexWithScoreField covers the guard via a mocked mapping. - VectorSearchIT.testUserMappingWithIdFieldIsRejectedByOpenSearch locks in OpenSearch's _id-rejection behavior. - VectorSearchIT.testVectorSearchAgainstIndexWithScoreFieldRejects exercises the guard end-to-end via _explain (no k-NN plugin needed). Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
1 parent 24bf6e0 commit 98d10f0

3 files changed

Lines changed: 120 additions & 0 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchIT.java

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import java.io.IOException;
1111
import org.junit.Test;
12+
import org.opensearch.client.Request;
1213
import org.opensearch.client.ResponseException;
1314
import org.opensearch.sql.legacy.SQLIntegTestCase;
1415
import org.opensearch.sql.legacy.TestsConstants;
@@ -434,4 +435,70 @@ public void testMissingRequiredArgRejected() throws IOException {
434435

435436
assertThat(ex.getMessage(), containsString("requires 4 arguments"));
436437
}
438+
439+
// Synthetic column collision (metadata vs. user field).
440+
//
441+
// v._score and v._id on a vectorSearch() relation are synthetic columns that expose the k-NN
442+
// similarity score and the document metadata _id. A user-defined stored field of the same name
443+
// would collide with these synthetic columns in the response tuple. Probe results on
444+
// OpenSearch 3.6:
445+
// - _id: OpenSearch's mapper rejects a property named _id at mapping time, so no collision
446+
// is possible. The testUserMappingWithIdFieldIsRejectedByOpenSearch test locks this in.
447+
// - _score: OpenSearch accepts a property named _score. Left unchecked the SQL response
448+
// layer would fail with an opaque duplicate-key error when the hit's _source contains the
449+
// user field and the metadata _score both write to the same tuple key. VectorSearchIndex
450+
// therefore rejects the mapping at scan-build time with a clear SQL error; the
451+
// testVectorSearchAgainstIndexWithScoreFieldRejects test exercises that guard (works via
452+
// _explain so no k-NN plugin is needed).
453+
454+
@Test
455+
public void testUserMappingWithIdFieldIsRejectedByOpenSearch() throws IOException {
456+
String indexName = "vs_collision_id";
457+
deleteIndexIfExists(indexName);
458+
459+
// OpenSearch rejects _id as a user mapping property; this guarantees v._id on a
460+
// vectorSearch() relation always resolves to the document metadata _id. The exact error
461+
// message belongs to OpenSearch; this test only locks in that the mapping creation fails.
462+
Request createIndex = new Request("PUT", "/" + indexName);
463+
createIndex.setJsonEntity("{\"mappings\":{\"properties\":{\"_id\":{\"type\":\"keyword\"}}}}");
464+
465+
expectThrows(ResponseException.class, () -> client().performRequest(createIndex));
466+
}
467+
468+
@Test
469+
public void testVectorSearchAgainstIndexWithScoreFieldRejects() throws IOException {
470+
String indexName = "vs_collision_score";
471+
deleteIndexIfExists(indexName);
472+
473+
// Unlike _id, OpenSearch accepts _score as a user-defined mapping property. Without the
474+
// VectorSearchIndex guard, a vectorSearch() query against such an index would fail at
475+
// response time with an opaque duplicate-key error because the stored _score field and the
476+
// metadata _score both map to the same tuple key. The guard raises a clear SQL error
477+
// up-front. Using _explain here exercises planning (which runs the guard) without needing
478+
// the k-NN plugin.
479+
Request createIndex = new Request("PUT", "/" + indexName);
480+
createIndex.setJsonEntity("{\"mappings\":{\"properties\":{\"_score\":{\"type\":\"float\"}}}}");
481+
client().performRequest(createIndex);
482+
483+
ResponseException ex =
484+
expectThrows(
485+
ResponseException.class,
486+
() ->
487+
explainQuery(
488+
"SELECT v._score FROM vectorSearch(table='"
489+
+ indexName
490+
+ "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v "
491+
+ "LIMIT 5"));
492+
493+
assertThat(ex.getMessage(), containsString("_score"));
494+
assertThat(ex.getMessage(), containsString("collides"));
495+
}
496+
497+
private void deleteIndexIfExists(String indexName) {
498+
try {
499+
client().performRequest(new Request("DELETE", "/" + indexName));
500+
} catch (IOException ignored) {
501+
// Index does not exist, which is fine.
502+
}
503+
}
437504
}

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchIndex.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,22 @@ public VectorSearchIndex(
7575

7676
@Override
7777
public TableScanBuilder createScanBuilder() {
78+
// Reject mappings that declare a user field named _score. OpenSearch rejects _id (and most
79+
// other metadata names) at mapping time, but _score is not blocked there, so a user index
80+
// can legitimately contain a stored _score field. If it did, the response path would collide
81+
// with the synthetic v._score column (both get written to the same tuple key) and the query
82+
// would fail with an opaque duplicate-key error. We check here so the user sees a clear,
83+
// actionable SQL error instead, and so _explain surfaces the problem without sending a
84+
// request to k-NN. Running in createScanBuilder keeps this out of the AstBuilder /
85+
// TableFunction resolver paths which are off-limits for this change.
86+
if (getFieldTypes().containsKey("_score")) {
87+
throw new IllegalArgumentException(
88+
String.format(
89+
"Index '%s' defines a user field named '_score' that collides with the synthetic"
90+
+ " _score column exposed by vectorSearch(). Rename the stored field or query"
91+
+ " the index without vectorSearch().",
92+
getIndexName()));
93+
}
7894
final TimeValue cursorKeepAlive =
7995
getSettings().getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE);
8096
var requestBuilder = createRequestBuilder();

opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchIndexTest.java

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,24 @@
77

88
import static org.junit.jupiter.api.Assertions.assertEquals;
99
import static org.junit.jupiter.api.Assertions.assertFalse;
10+
import static org.junit.jupiter.api.Assertions.assertThrows;
1011
import static org.junit.jupiter.api.Assertions.assertTrue;
12+
import static org.mockito.Mockito.lenient;
13+
import static org.mockito.Mockito.when;
1114

15+
import com.google.common.collect.ImmutableMap;
1216
import java.util.LinkedHashMap;
1317
import java.util.Map;
1418
import org.junit.jupiter.api.Test;
1519
import org.junit.jupiter.api.extension.ExtendWith;
1620
import org.mockito.Mock;
1721
import org.mockito.junit.jupiter.MockitoExtension;
22+
import org.opensearch.common.unit.TimeValue;
1823
import org.opensearch.sql.common.setting.Settings;
1924
import org.opensearch.sql.opensearch.client.OpenSearchClient;
25+
import org.opensearch.sql.opensearch.data.type.OpenSearchDataType;
26+
import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType;
27+
import org.opensearch.sql.opensearch.mapping.IndexMapping;
2028

2129
@ExtendWith(MockitoExtension.class)
2230
class VectorSearchIndexTest {
@@ -25,6 +33,8 @@ class VectorSearchIndexTest {
2533

2634
@Mock private Settings settings;
2735

36+
@Mock private IndexMapping indexMapping;
37+
2838
@Test
2939
void buildKnnQueryJsonTopK() {
3040
VectorSearchIndex index =
@@ -226,4 +236,31 @@ void isInstanceOfOpenSearchIndex() {
226236
client, settings, "test-index", "embedding", new float[] {1.0f}, Map.of("k", "5"));
227237
assertTrue(index instanceof OpenSearchIndex);
228238
}
239+
240+
@Test
241+
void createScanBuilderRejectsIndexWithScoreField() {
242+
// A mapping that declares a user field named _score cannot coexist with the synthetic
243+
// v._score column exposed by vectorSearch(); the guard in createScanBuilder should reject
244+
// it with a clear, user-facing error.
245+
lenient()
246+
.when(settings.getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE))
247+
.thenReturn(TimeValue.timeValueMinutes(1));
248+
when(indexMapping.getFieldMappings())
249+
.thenReturn(Map.of("_score", OpenSearchDataType.of(MappingType.Float)));
250+
when(client.getIndexMappings("test-index"))
251+
.thenReturn(ImmutableMap.of("test-index", indexMapping));
252+
253+
VectorSearchIndex index =
254+
new VectorSearchIndex(
255+
client, settings, "test-index", "embedding", new float[] {1.0f}, Map.of("k", "5"));
256+
257+
IllegalArgumentException ex =
258+
assertThrows(IllegalArgumentException.class, index::createScanBuilder);
259+
assertTrue(
260+
ex.getMessage().contains("_score"),
261+
"error message should mention the colliding _score field");
262+
assertTrue(
263+
ex.getMessage().contains("collides"),
264+
"error message should describe the collision, got: " + ex.getMessage());
265+
}
229266
}

0 commit comments

Comments
 (0)