Skip to content

Commit ba39e46

Browse files
committed
Address review nits on synthetic _score collision guard
- Use OpenSearchIndex.METADATA_FIELD_SCORE constant in place of the bare "_score" literal for the mapping-field check. - Drop "stored field" wording in the error message and code comment; the mapping does not use "store": true, so "user field" is what we actually mean. - Scope the block comment to the _id / _score cases this PR actually locks in; drop the broader "(and most other metadata names)" claim. - Shorten comments on the guard and both ITs to minimal WHY-only form. - Assert HTTP 400 in testVectorSearchAgainstIndexWithScoreFieldRejects so the IT surfaces an accidental status-code regression explicitly. Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
1 parent 0a67af8 commit ba39e46

2 files changed

Lines changed: 14 additions & 33 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchIT.java

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -303,28 +303,17 @@ public void testExplainWithoutKnnPluginStillWorks() throws IOException {
303303
}
304304

305305
// Synthetic column collision (metadata vs. user field).
306-
//
307-
// v._score and v._id on a vectorSearch() relation are synthetic columns that expose the k-NN
308-
// similarity score and the document metadata _id. A user-defined stored field of the same name
309-
// would collide with these synthetic columns in the response tuple. Probe results on
310-
// OpenSearch 3.6:
311-
// - _id: OpenSearch's mapper rejects a property named _id at mapping time, so no collision
312-
// is possible. The testUserMappingWithIdFieldIsRejectedByOpenSearch test locks this in.
313-
// - _score: OpenSearch accepts a property named _score. Left unchecked the SQL response
314-
// layer would fail with an opaque duplicate-key error when the hit's _source contains the
315-
// user field and the metadata _score both write to the same tuple key. VectorSearchIndex
316-
// therefore rejects the mapping at scan-build time with a clear SQL error; the
317-
// testVectorSearchAgainstIndexWithScoreFieldRejects test exercises that guard (works via
318-
// _explain so no k-NN plugin is needed).
306+
// vectorSearch() exposes synthetic v._id and v._score columns. A user mapping property of the
307+
// same name would collide on the response tuple key. OpenSearch blocks _id at mapping time;
308+
// _score is not blocked, so VectorSearchIndex rejects it at scan-build time.
319309

320310
@Test
321311
public void testUserMappingWithIdFieldIsRejectedByOpenSearch() throws IOException {
312+
// Locks in OpenSearch's rejection of a user property named _id: without it, v._id could
313+
// collide with a user field at response time. The exact error message belongs to OpenSearch.
322314
String indexName = "vs_collision_id";
323315
deleteIndexIfExists(indexName);
324316

325-
// OpenSearch rejects _id as a user mapping property; this guarantees v._id on a
326-
// vectorSearch() relation always resolves to the document metadata _id. The exact error
327-
// message belongs to OpenSearch; this test only locks in that the mapping creation fails.
328317
Request createIndex = new Request("PUT", "/" + indexName);
329318
createIndex.setJsonEntity("{\"mappings\":{\"properties\":{\"_id\":{\"type\":\"keyword\"}}}}");
330319

@@ -333,15 +322,10 @@ public void testUserMappingWithIdFieldIsRejectedByOpenSearch() throws IOExceptio
333322

334323
@Test
335324
public void testVectorSearchAgainstIndexWithScoreFieldRejects() throws IOException {
325+
// _explain exercises planning (where the guard runs) without needing the k-NN plugin.
336326
String indexName = "vs_collision_score";
337327
deleteIndexIfExists(indexName);
338328

339-
// Unlike _id, OpenSearch accepts _score as a user-defined mapping property. Without the
340-
// VectorSearchIndex guard, a vectorSearch() query against such an index would fail at
341-
// response time with an opaque duplicate-key error because the stored _score field and the
342-
// metadata _score both map to the same tuple key. The guard raises a clear SQL error
343-
// up-front. Using _explain here exercises planning (which runs the guard) without needing
344-
// the k-NN plugin.
345329
Request createIndex = new Request("PUT", "/" + indexName);
346330
createIndex.setJsonEntity("{\"mappings\":{\"properties\":{\"_score\":{\"type\":\"float\"}}}}");
347331
client().performRequest(createIndex);
@@ -356,6 +340,7 @@ public void testVectorSearchAgainstIndexWithScoreFieldRejects() throws IOExcepti
356340
+ "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v "
357341
+ "LIMIT 5"));
358342

343+
assertEquals(400, ex.getResponse().getStatusLine().getStatusCode());
359344
assertThat(ex.getMessage(), containsString("_score"));
360345
assertThat(ex.getMessage(), containsString("collides"));
361346
}

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchIndex.java

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -75,20 +75,16 @@ public VectorSearchIndex(
7575

7676
@Override
7777
public TableScanBuilder createScanBuilder() {
78-
// Reject mappings that declare a user field named _score. OpenSearch rejects _id (and most
79-
// other metadata names) at mapping time, but _score is not blocked there, so a user index
80-
// can legitimately contain a stored _score field. If it did, the response path would collide
81-
// with the synthetic v._score column (both get written to the same tuple key) and the query
82-
// would fail with an opaque duplicate-key error. We check here so the user sees a clear,
83-
// actionable SQL error instead, and so _explain surfaces the problem without sending a
84-
// request to k-NN. Running in createScanBuilder keeps this out of the AstBuilder /
85-
// TableFunction resolver paths which are off-limits for this change.
86-
if (getFieldTypes().containsKey("_score")) {
78+
// _score is not blocked at mapping time, so a user field named _score would collide with the
79+
// synthetic v._score column on the response tuple and fail with an opaque duplicate-key error.
80+
// Reject here so the user sees a clear SQL error (and _explain surfaces the problem without a
81+
// k-NN request).
82+
if (getFieldTypes().containsKey(METADATA_FIELD_SCORE)) {
8783
throw new IllegalArgumentException(
8884
String.format(
8985
"Index '%s' defines a user field named '_score' that collides with the synthetic"
90-
+ " _score column exposed by vectorSearch(). Rename the stored field or query"
91-
+ " the index without vectorSearch().",
86+
+ " _score column exposed by vectorSearch(). Rename the field or query the index"
87+
+ " without vectorSearch().",
9288
getIndexName()));
9389
}
9490
final TimeValue cursorKeepAlive =

0 commit comments

Comments
 (0)