Skip to content

Commit bf39a24

Browse files
committed
Address review nits on synthetic _score collision guard
- Use OpenSearchIndex.METADATA_FIELD_SCORE constant in place of the bare "_score" literal for the mapping-field check. - Drop "stored field" wording in the error message and code comment; the mapping does not use "store": true, so "user field" is what we actually mean. - Scope the block comment to the _id / _score cases this PR actually locks in; drop the broader "(and most other metadata names)" claim. - Shorten comments on the guard and both ITs to minimal WHY-only form. - Assert HTTP 400 in testVectorSearchAgainstIndexWithScoreFieldRejects so the IT surfaces an accidental status-code regression explicitly. Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
1 parent 98d10f0 commit bf39a24

2 files changed

Lines changed: 14 additions & 33 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchIT.java

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -437,28 +437,17 @@ public void testMissingRequiredArgRejected() throws IOException {
437437
}
438438

439439
// Synthetic column collision (metadata vs. user field).
440-
//
441-
// v._score and v._id on a vectorSearch() relation are synthetic columns that expose the k-NN
442-
// similarity score and the document metadata _id. A user-defined stored field of the same name
443-
// would collide with these synthetic columns in the response tuple. Probe results on
444-
// OpenSearch 3.6:
445-
// - _id: OpenSearch's mapper rejects a property named _id at mapping time, so no collision
446-
// is possible. The testUserMappingWithIdFieldIsRejectedByOpenSearch test locks this in.
447-
// - _score: OpenSearch accepts a property named _score. Left unchecked the SQL response
448-
// layer would fail with an opaque duplicate-key error when the hit's _source contains the
449-
// user field and the metadata _score both write to the same tuple key. VectorSearchIndex
450-
// therefore rejects the mapping at scan-build time with a clear SQL error; the
451-
// testVectorSearchAgainstIndexWithScoreFieldRejects test exercises that guard (works via
452-
// _explain so no k-NN plugin is needed).
440+
// vectorSearch() exposes synthetic v._id and v._score columns. A user mapping property of the
441+
// same name would collide on the response tuple key. OpenSearch blocks _id at mapping time;
442+
// _score is not blocked, so VectorSearchIndex rejects it at scan-build time.
453443

454444
@Test
455445
public void testUserMappingWithIdFieldIsRejectedByOpenSearch() throws IOException {
446+
// Locks in OpenSearch's rejection of a user property named _id: without it, v._id could
447+
// collide with a user field at response time. The exact error message belongs to OpenSearch.
456448
String indexName = "vs_collision_id";
457449
deleteIndexIfExists(indexName);
458450

459-
// OpenSearch rejects _id as a user mapping property; this guarantees v._id on a
460-
// vectorSearch() relation always resolves to the document metadata _id. The exact error
461-
// message belongs to OpenSearch; this test only locks in that the mapping creation fails.
462451
Request createIndex = new Request("PUT", "/" + indexName);
463452
createIndex.setJsonEntity("{\"mappings\":{\"properties\":{\"_id\":{\"type\":\"keyword\"}}}}");
464453

@@ -467,15 +456,10 @@ public void testUserMappingWithIdFieldIsRejectedByOpenSearch() throws IOExceptio
467456

468457
@Test
469458
public void testVectorSearchAgainstIndexWithScoreFieldRejects() throws IOException {
459+
// _explain exercises planning (where the guard runs) without needing the k-NN plugin.
470460
String indexName = "vs_collision_score";
471461
deleteIndexIfExists(indexName);
472462

473-
// Unlike _id, OpenSearch accepts _score as a user-defined mapping property. Without the
474-
// VectorSearchIndex guard, a vectorSearch() query against such an index would fail at
475-
// response time with an opaque duplicate-key error because the stored _score field and the
476-
// metadata _score both map to the same tuple key. The guard raises a clear SQL error
477-
// up-front. Using _explain here exercises planning (which runs the guard) without needing
478-
// the k-NN plugin.
479463
Request createIndex = new Request("PUT", "/" + indexName);
480464
createIndex.setJsonEntity("{\"mappings\":{\"properties\":{\"_score\":{\"type\":\"float\"}}}}");
481465
client().performRequest(createIndex);
@@ -490,6 +474,7 @@ public void testVectorSearchAgainstIndexWithScoreFieldRejects() throws IOExcepti
490474
+ "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v "
491475
+ "LIMIT 5"));
492476

477+
assertEquals(400, ex.getResponse().getStatusLine().getStatusCode());
493478
assertThat(ex.getMessage(), containsString("_score"));
494479
assertThat(ex.getMessage(), containsString("collides"));
495480
}

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchIndex.java

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -75,20 +75,16 @@ public VectorSearchIndex(
7575

7676
@Override
7777
public TableScanBuilder createScanBuilder() {
78-
// Reject mappings that declare a user field named _score. OpenSearch rejects _id (and most
79-
// other metadata names) at mapping time, but _score is not blocked there, so a user index
80-
// can legitimately contain a stored _score field. If it did, the response path would collide
81-
// with the synthetic v._score column (both get written to the same tuple key) and the query
82-
// would fail with an opaque duplicate-key error. We check here so the user sees a clear,
83-
// actionable SQL error instead, and so _explain surfaces the problem without sending a
84-
// request to k-NN. Running in createScanBuilder keeps this out of the AstBuilder /
85-
// TableFunction resolver paths which are off-limits for this change.
86-
if (getFieldTypes().containsKey("_score")) {
78+
// _score is not blocked at mapping time, so a user field named _score would collide with the
79+
// synthetic v._score column on the response tuple and fail with an opaque duplicate-key error.
80+
// Reject here so the user sees a clear SQL error (and _explain surfaces the problem without a
81+
// k-NN request).
82+
if (getFieldTypes().containsKey(METADATA_FIELD_SCORE)) {
8783
throw new IllegalArgumentException(
8884
String.format(
8985
"Index '%s' defines a user field named '_score' that collides with the synthetic"
90-
+ " _score column exposed by vectorSearch(). Rename the stored field or query"
91-
+ " the index without vectorSearch().",
86+
+ " _score column exposed by vectorSearch(). Rename the field or query the index"
87+
+ " without vectorSearch().",
9288
getIndexName()));
9389
}
9490
final TimeValue cursorKeepAlive =

0 commit comments

Comments
 (0)