Skip to content

Commit 12c5553

Browse files
committed
Address review feedback: reject empty vector / option segments, stabilize key order
- parseVector now uses split(",", -1) and rejects empty components so malformed literals like "[1.0,]" or "[1.0,,2.0]" surface an explicit error instead of silently shrinking the vector to a lower dimension. - parseOptions now rejects empty option segments with an explicit "Malformed option segment" error; a wholly empty option string continues to flow to the "Missing required option" gate for the existing, clearer message. - ALLOWED_OPTION_KEYS is now a List so the unknown-key error renders the supported keys in a stable, user-friendly order (k, max_distance, min_score, filter_type) rather than Set iteration order. Adds unit coverage for trailing, leading, and consecutive empty segments in both vector and option parsing, and for stable key order in the unknown-option-key error message. Adds an IT for the trailing comma vector case. Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
1 parent 91d6d8e commit 12c5553

3 files changed

Lines changed: 124 additions & 11 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchIT.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,20 @@ public void testNegativeMaxDistanceRejected() throws IOException {
622622
assertThat(ex.getMessage(), containsString("non-negative"));
623623
}
624624

625+
@Test
626+
public void testTrailingCommaInVectorRejected() throws IOException {
627+
ResponseException ex =
628+
expectThrows(
629+
ResponseException.class,
630+
() ->
631+
executeQuery(
632+
"SELECT v._id FROM vectorSearch(table='t', field='f', "
633+
+ "vector='[1.0,2.0,]', option='k=5') AS v"));
634+
635+
assertThat(ex.getMessage(), containsString("Invalid vector component"));
636+
assertThat(ex.getMessage(), containsString("trailing or consecutive commas"));
637+
}
638+
625639
private void deleteIndexIfExists(String indexName) {
626640
try {
627641
client().performRequest(new Request("DELETE", "/" + indexName));

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementation.java

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import java.util.LinkedHashMap;
1515
import java.util.List;
1616
import java.util.Map;
17-
import java.util.Set;
1817
import java.util.regex.Pattern;
1918
import java.util.stream.Collectors;
2019
import org.opensearch.sql.common.setting.Settings;
@@ -35,9 +34,13 @@
3534
public class VectorSearchTableFunctionImplementation extends FunctionExpression
3635
implements TableFunctionImplementation {
3736

38-
/** P0 allowed option keys. Rejects unknown/future keys to prevent unvalidated DSL injection. */
39-
static final Set<String> ALLOWED_OPTION_KEYS =
40-
Set.of("k", "max_distance", "min_score", "filter_type");
37+
/**
38+
* P0 allowed option keys. Rejects unknown/future keys to prevent unvalidated DSL injection. A
39+
* {@link List} (rather than a {@link Set}) so the unknown-key error message renders the supported
40+
* keys in a stable, user-friendly order.
41+
*/
42+
static final List<String> ALLOWED_OPTION_KEYS =
43+
List.of("k", "max_distance", "min_score", "filter_type");
4144

4245
/**
4346
* Field names must be safe for JSON interpolation: alphanumeric, dots (nested), underscores,
@@ -135,30 +138,49 @@ private float[] parseVector(String vectorLiteral) {
135138
+ " e.g., vector='[1.0,2.0,3.0]'",
136139
vectorLiteral));
137140
}
138-
String[] parts = cleaned.split(",");
141+
// Preserve trailing empties (split(",", -1)) so malformed literals like "[1.0,]" or
142+
// "[1.0,,2.0]" surface an explicit error instead of silently shrinking the vector.
143+
String[] parts = cleaned.split(",", -1);
139144
float[] vector = new float[parts.length];
140145
for (int i = 0; i < parts.length; i++) {
146+
String component = parts[i].trim();
147+
if (component.isEmpty()) {
148+
throw new ExpressionEvaluationException(
149+
String.format(
150+
"Invalid vector component at position %d: must be a number (check for"
151+
+ " trailing or consecutive commas in '%s')",
152+
i, vectorLiteral));
153+
}
141154
try {
142-
vector[i] = Float.parseFloat(parts[i].trim());
155+
vector[i] = Float.parseFloat(component);
143156
} catch (NumberFormatException e) {
144157
throw new ExpressionEvaluationException(
145-
String.format("Invalid vector component '%s': must be a number", parts[i].trim()));
158+
String.format("Invalid vector component '%s': must be a number", component));
146159
}
147160
if (!Float.isFinite(vector[i])) {
148161
throw new ExpressionEvaluationException(
149-
String.format(
150-
"Invalid vector component '%s': must be a finite number", parts[i].trim()));
162+
String.format("Invalid vector component '%s': must be a finite number", component));
151163
}
152164
}
153165
return vector;
154166
}
155167

156168
static Map<String, String> parseOptions(String optionStr) {
157169
Map<String, String> options = new LinkedHashMap<>();
158-
for (String pair : optionStr.split(",")) {
170+
// A wholly empty option string is handled downstream with a clearer "missing required option"
171+
// message than a generic malformed-segment error.
172+
if (optionStr.trim().isEmpty()) {
173+
return options;
174+
}
175+
// split(",", -1) preserves trailing empties so malformed inputs like "k=5," or "k=5,,k2=v"
176+
// surface an explicit error instead of being silently dropped.
177+
String[] pairs = optionStr.split(",", -1);
178+
for (String pair : pairs) {
159179
String trimmed = pair.trim();
160180
if (trimmed.isEmpty()) {
161-
continue;
181+
throw new ExpressionEvaluationException(
182+
"Malformed option segment '': expected key=value (check for trailing or"
183+
+ " consecutive commas)");
162184
}
163185
String[] kv = trimmed.split("=", 2);
164186
if (kv.length != 2 || kv[0].trim().isEmpty() || kv[1].trim().isEmpty()) {

opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementationTest.java

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,83 @@ void applyArguments_acceptsZeroMaxDistance() {
634634
assertTrue(table instanceof VectorSearchIndex);
635635
}
636636

637+
// ── Vector parsing: trailing / empty components (PR #5381 review) ─────
638+
639+
@Test
640+
void applyArguments_rejectsTrailingCommaInVector() {
641+
VectorSearchTableFunctionImplementation impl =
642+
createImplWithArgs("my-index", "embedding", "[1.0,2.0,]", "k=5");
643+
ExpressionEvaluationException ex =
644+
assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments());
645+
assertTrue(ex.getMessage().contains("Invalid vector component"));
646+
assertTrue(ex.getMessage().contains("trailing or consecutive commas"));
647+
}
648+
649+
@Test
650+
void applyArguments_rejectsConsecutiveCommasInVector() {
651+
VectorSearchTableFunctionImplementation impl =
652+
createImplWithArgs("my-index", "embedding", "[1.0,,2.0]", "k=5");
653+
ExpressionEvaluationException ex =
654+
assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments());
655+
assertTrue(ex.getMessage().contains("Invalid vector component"));
656+
assertTrue(ex.getMessage().contains("trailing or consecutive commas"));
657+
}
658+
659+
@Test
660+
void applyArguments_rejectsLeadingCommaInVector() {
661+
VectorSearchTableFunctionImplementation impl =
662+
createImplWithArgs("my-index", "embedding", "[,1.0,2.0]", "k=5");
663+
ExpressionEvaluationException ex =
664+
assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments());
665+
assertTrue(ex.getMessage().contains("Invalid vector component"));
666+
}
667+
668+
// ── Option parsing: empty segments (PR #5381 review) ─────────────────
669+
670+
@Test
671+
void parseOptions_rejectsTrailingEmptySegment() {
672+
ExpressionEvaluationException ex =
673+
assertThrows(
674+
ExpressionEvaluationException.class,
675+
() -> VectorSearchTableFunctionImplementation.parseOptions("k=5,"));
676+
assertTrue(ex.getMessage().contains("Malformed option segment"));
677+
assertTrue(ex.getMessage().contains("trailing or consecutive commas"));
678+
}
679+
680+
@Test
681+
void parseOptions_rejectsLeadingEmptySegment() {
682+
ExpressionEvaluationException ex =
683+
assertThrows(
684+
ExpressionEvaluationException.class,
685+
() -> VectorSearchTableFunctionImplementation.parseOptions(",k=5"));
686+
assertTrue(ex.getMessage().contains("Malformed option segment"));
687+
}
688+
689+
@Test
690+
void parseOptions_rejectsConsecutiveCommas() {
691+
ExpressionEvaluationException ex =
692+
assertThrows(
693+
ExpressionEvaluationException.class,
694+
() -> VectorSearchTableFunctionImplementation.parseOptions("k=5,,filter_type=post"));
695+
assertTrue(ex.getMessage().contains("Malformed option segment"));
696+
}
697+
698+
// ── Unknown-key error lists supported keys in stable order (PR #5381 review) ──
699+
700+
@Test
701+
void applyArguments_unknownOptionKeyErrorListsSupportedKeysInStableOrder() {
702+
VectorSearchTableFunctionImplementation impl =
703+
createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=5,bogus=1");
704+
ExpressionEvaluationException ex =
705+
assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments());
706+
String msg = ex.getMessage();
707+
int kIdx = msg.indexOf("k");
708+
int maxIdx = msg.indexOf("max_distance");
709+
int minIdx = msg.indexOf("min_score");
710+
int filterIdx = msg.indexOf("filter_type");
711+
assertTrue(kIdx >= 0 && maxIdx > kIdx && minIdx > maxIdx && filterIdx > minIdx);
712+
}
713+
637714
private VectorSearchTableFunctionImplementation createImpl() {
638715
return createImplWithArgs("my-index", "embedding", "[1.0, 2.0, 3.0]", "k=5");
639716
}

0 commit comments

Comments
 (0)