Skip to content

Commit e11163e

Browse files
authored
Relevance Functions Optional Param Support, Refactor and support new functions (opensearch-project#21661)
* Creating modularized classes for each serializer For each relevance function, creating a modularized structure where each serializer is self contained, avoiding lot of the logic being present in ConversionUtils. Also, implemented serialization of all the suboptions for each of the relevance functions Signed-off-by: Suresh N S <nssuresh@amazon.com> * Adding support for wildcard, query, matchall Signed-off-by: Suresh N S <nssuresh@amazon.com> * Fixing spotless issues Signed-off-by: Suresh N S <nssuresh@amazon.com> * Fixing unit test issue Signed-off-by: Suresh N S <nssuresh@amazon.com> * Added missing package-info file Signed-off-by: Suresh N S <nssuresh@amazon.com> --------- Signed-off-by: Suresh N S <nssuresh@amazon.com>
1 parent 40a1461 commit e11163e

21 files changed

Lines changed: 1879 additions & 108 deletions

File tree

sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/ScalarFunction.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ public enum ScalarFunction {
6161
WILDCARD(Category.FULL_TEXT, SqlKind.OTHER_FUNCTION),
6262
REGEXP(Category.FULL_TEXT, SqlKind.OTHER_FUNCTION),
6363
REGEXP_CONTAINS(Category.FULL_TEXT, SqlKind.OTHER_FUNCTION),
64+
WILDCARD_QUERY(Category.FULL_TEXT, SqlKind.OTHER_FUNCTION),
65+
QUERY(Category.FULL_TEXT, SqlKind.OTHER_FUNCTION),
66+
MATCHALL(Category.FULL_TEXT, SqlKind.OTHER_FUNCTION),
6467

6568
// ── String ───────────────────────────────────────────────────────
6669
UPPER(Category.STRING, SqlKind.OTHER_FUNCTION),

sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/TimestampFunctionAdapter.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,14 @@ TimestampString parseTimestamp(String input) {
9494
return toTimestampString(ldt);
9595
} catch (DateTimeParseException ignored) {}
9696

97+
// Handle space-separated format: "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd HH:mm:ss.SSS"
98+
if (input.contains(" ") && !input.contains("T")) {
99+
try {
100+
LocalDateTime ldt = LocalDateTime.parse(input.replace(' ', 'T'));
101+
return toTimestampString(ldt);
102+
} catch (DateTimeParseException ignored) {}
103+
}
104+
97105
return new TimestampString(input);
98106
}
99107

sandbox/plugins/analytics-backend-datafusion/src/test/java/org/opensearch/be/datafusion/TimestampFunctionAdapterTests.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,9 @@ public void testSpaceSeparatorPassthrough() {
5959
TimestampString ts = transformer.parseTimestamp("2024-01-01 10:30:00");
6060
assertEquals("2024-01-01 10:30:00", ts.toString());
6161
}
62+
63+
public void testSpaceSeparatorWithMilliseconds() {
64+
TimestampString ts = transformer.parseTimestamp("2024-01-01 10:30:00.123");
65+
assertEquals("2024-01-01 10:30:00.123", ts.toString());
66+
}
6267
}

sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/ConversionUtils.java

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919

2020
import java.io.IOException;
2121
import java.util.ArrayList;
22+
import java.util.LinkedHashMap;
2223
import java.util.List;
24+
import java.util.Map;
2325

2426
/**
2527
* Reusable utilities for extracting fields and values from PPL relevance function
@@ -29,7 +31,7 @@
2931
* {@code func(MAP('field', $ref), MAP('query', literal), [MAP('param', literal)]...)}
3032
* Each MAP has exactly 2 operands: key at index 0, value at index 1.
3133
*/
32-
final class ConversionUtils {
34+
public final class ConversionUtils {
3335

3436
/** MAP key for single-field relevance operands. */
3537
static final String KEY_FIELD = "field";
@@ -77,7 +79,7 @@ static String extractStringFromRelevanceMap(RexCall call, int operandIndex) {
7779
/**
7880
* Serializes a QueryBuilder into bytes using NamedWriteable protocol.
7981
*/
80-
static byte[] serializeQueryBuilder(QueryBuilder queryBuilder) {
82+
public static byte[] serializeQueryBuilder(QueryBuilder queryBuilder) {
8183
try (BytesStreamOutput output = new BytesStreamOutput()) {
8284
output.writeNamedWriteable(queryBuilder);
8385
return BytesReference.toBytes(output.bytes());
@@ -101,13 +103,53 @@ static String extractMapKey(RexCall call, int operandIndex) {
101103
return null;
102104
}
103105

106+
/**
107+
* Extracts optional key-value parameters from MAP_VALUE_CONSTRUCTOR operands
108+
* starting at the given index.
109+
*
110+
* <p>Each operand at index {@code startIndex} and beyond is expected to be a
111+
* MAP_VALUE_CONSTRUCTOR with exactly 2 children: a string key literal at index 0
112+
* and a string value literal at index 1.
113+
*
114+
* <p>Example: for {@code match(field, 'query', operator='AND', analyzer='standard')},
115+
* operands 2 and 3 are MAP('operator','AND') and MAP('analyzer','standard').
116+
* Calling {@code extractOptionalParams(call, 2)} returns
117+
* {@code Map.of("operator", "AND", "analyzer", "standard")}.
118+
*
119+
* @param call the relevance function RexCall
120+
* @param startIndex the first operand index to inspect (typically 2)
121+
* @return map of parameter key → string value; empty if no optional params present
122+
*/
123+
public static Map<String, String> extractOptionalParams(RexCall call, int startIndex) {
124+
List<RexNode> operands = call.getOperands();
125+
if (startIndex >= operands.size()) {
126+
return Map.of();
127+
}
128+
Map<String, String> params = new LinkedHashMap<>();
129+
for (int i = startIndex; i < operands.size(); i++) {
130+
RexNode operand = operands.get(i);
131+
if (operand instanceof RexCall mapCall && mapCall.getOperands().size() == 2) {
132+
RexNode keyNode = mapCall.getOperands().get(0);
133+
RexNode valueNode = mapCall.getOperands().get(1);
134+
if (keyNode instanceof RexLiteral keyLit && valueNode instanceof RexLiteral valueLit) {
135+
String key = keyLit.getValueAs(String.class);
136+
String value = valueLit.getValueAs(String.class);
137+
if (key != null && value != null) {
138+
params.put(key, value);
139+
}
140+
}
141+
}
142+
}
143+
return params;
144+
}
145+
104146
/**
105147
* Extracted operands from a relevance function RexCall.
106148
* @param fieldName single field name (null if not present or multi-field)
107149
* @param fields multiple field names (null if not present)
108150
* @param query the query string (null if not found)
109151
*/
110-
record RelevanceOperands(String fieldName, List<String> fields, String query) {
152+
public record RelevanceOperands(String fieldName, List<String> fields, String query) {
111153
}
112154

113155
/**
@@ -118,7 +160,7 @@ record RelevanceOperands(String fieldName, List<String> fields, String query) {
118160
* @param fieldStorage per-column storage metadata for resolving field names
119161
* @return extracted operands
120162
*/
121-
static RelevanceOperands extractRelevanceOperands(RexCall call, List<FieldStorageInfo> fieldStorage) {
163+
public static RelevanceOperands extractRelevanceOperands(RexCall call, List<FieldStorageInfo> fieldStorage) {
122164
String fieldName = null;
123165
List<String> fields = null;
124166
String query = null;

sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneAnalyticsBackendPlugin.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,10 @@ public class LuceneAnalyticsBackendPlugin implements AnalyticsSearchBackendPlugi
7171
ScalarFunction.SIMPLE_QUERY_STRING,
7272
ScalarFunction.FUZZY,
7373
ScalarFunction.WILDCARD,
74-
ScalarFunction.REGEXP
74+
ScalarFunction.REGEXP,
75+
ScalarFunction.WILDCARD_QUERY,
76+
ScalarFunction.QUERY,
77+
ScalarFunction.MATCHALL
7578
);
7679

7780
private static final Set<FieldType> STANDARD_TYPES = new HashSet<>();

sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/QuerySerializerRegistry.java

Lines changed: 20 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,19 @@
88

99
package org.opensearch.be.lucene;
1010

11-
import org.apache.calcite.rex.RexCall;
1211
import org.opensearch.analytics.spi.DelegatedPredicateSerializer;
13-
import org.opensearch.analytics.spi.FieldStorageInfo;
1412
import org.opensearch.analytics.spi.ScalarFunction;
15-
import org.opensearch.index.query.MatchBoolPrefixQueryBuilder;
16-
import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder;
17-
import org.opensearch.index.query.MatchPhraseQueryBuilder;
18-
import org.opensearch.index.query.MatchQueryBuilder;
19-
import org.opensearch.index.query.MultiMatchQueryBuilder;
20-
import org.opensearch.index.query.QueryStringQueryBuilder;
21-
import org.opensearch.index.query.SimpleQueryStringBuilder;
13+
import org.opensearch.be.lucene.serializers.MatchAllSerializer;
14+
import org.opensearch.be.lucene.serializers.MatchBoolPrefixSerializer;
15+
import org.opensearch.be.lucene.serializers.MatchPhrasePrefixSerializer;
16+
import org.opensearch.be.lucene.serializers.MatchPhraseSerializer;
17+
import org.opensearch.be.lucene.serializers.MatchSerializer;
18+
import org.opensearch.be.lucene.serializers.MultiMatchSerializer;
19+
import org.opensearch.be.lucene.serializers.QuerySerializer;
20+
import org.opensearch.be.lucene.serializers.QueryStringSerializer;
21+
import org.opensearch.be.lucene.serializers.SimpleQueryStringSerializer;
22+
import org.opensearch.be.lucene.serializers.WildcardQuerySerializer;
2223

23-
import java.util.List;
2424
import java.util.Map;
2525

2626
/**
@@ -31,104 +31,21 @@
3131
final class QuerySerializerRegistry {
3232

3333
private static final Map<ScalarFunction, DelegatedPredicateSerializer> SERIALIZERS = Map.ofEntries(
34-
Map.entry(ScalarFunction.MATCH, QuerySerializerRegistry::serializeMatch),
35-
Map.entry(ScalarFunction.MATCH_PHRASE, QuerySerializerRegistry::serializeMatchPhrase),
36-
Map.entry(ScalarFunction.MATCH_BOOL_PREFIX, QuerySerializerRegistry::serializeMatchBoolPrefix),
37-
Map.entry(ScalarFunction.MATCH_PHRASE_PREFIX, QuerySerializerRegistry::serializeMatchPhrasePrefix),
38-
Map.entry(ScalarFunction.MULTI_MATCH, QuerySerializerRegistry::serializeMultiMatch),
39-
Map.entry(ScalarFunction.QUERY_STRING, QuerySerializerRegistry::serializeQueryString),
40-
Map.entry(ScalarFunction.SIMPLE_QUERY_STRING, QuerySerializerRegistry::serializeSimpleQueryString)
34+
Map.entry(ScalarFunction.MATCH, new MatchSerializer()),
35+
Map.entry(ScalarFunction.MATCH_PHRASE, new MatchPhraseSerializer()),
36+
Map.entry(ScalarFunction.MATCH_BOOL_PREFIX, new MatchBoolPrefixSerializer()),
37+
Map.entry(ScalarFunction.MATCH_PHRASE_PREFIX, new MatchPhrasePrefixSerializer()),
38+
Map.entry(ScalarFunction.MULTI_MATCH, new MultiMatchSerializer()),
39+
Map.entry(ScalarFunction.QUERY_STRING, new QueryStringSerializer()),
40+
Map.entry(ScalarFunction.SIMPLE_QUERY_STRING, new SimpleQueryStringSerializer()),
41+
Map.entry(ScalarFunction.WILDCARD_QUERY, new WildcardQuerySerializer()),
42+
Map.entry(ScalarFunction.QUERY, new QuerySerializer()),
43+
Map.entry(ScalarFunction.MATCHALL, new MatchAllSerializer())
4144
);
4245

4346
private QuerySerializerRegistry() {}
4447

4548
static Map<ScalarFunction, DelegatedPredicateSerializer> getSerializers() {
4649
return SERIALIZERS;
4750
}
48-
49-
// TODO: Extract each serialize* method into its own dedicated class once we handle more parameters.
50-
// These methods are expected to grow significantly as optional parameters are added.
51-
52-
private static byte[] serializeMatch(RexCall call, List<FieldStorageInfo> fieldStorage) {
53-
ConversionUtils.RelevanceOperands operands = ConversionUtils.extractRelevanceOperands(call, fieldStorage);
54-
if (operands.fieldName() == null || operands.query() == null) {
55-
throw new IllegalArgumentException("match requires 'field' and 'query' parameters, got: " + call);
56-
}
57-
// TODO: extract optional params (operator, analyzer, fuzziness, boost)
58-
MatchQueryBuilder queryBuilder = new MatchQueryBuilder(operands.fieldName(), operands.query());
59-
return ConversionUtils.serializeQueryBuilder(queryBuilder);
60-
}
61-
62-
private static byte[] serializeMatchPhrase(RexCall call, List<FieldStorageInfo> fieldStorage) {
63-
ConversionUtils.RelevanceOperands operands = ConversionUtils.extractRelevanceOperands(call, fieldStorage);
64-
if (operands.fieldName() == null || operands.query() == null) {
65-
throw new IllegalArgumentException("match_phrase requires 'field' and 'query' parameters, got: " + call);
66-
}
67-
// TODO: extract optional params (slop, analyzer, zero_terms_query)
68-
MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder(operands.fieldName(), operands.query());
69-
return ConversionUtils.serializeQueryBuilder(queryBuilder);
70-
}
71-
72-
private static byte[] serializeMatchBoolPrefix(RexCall call, List<FieldStorageInfo> fieldStorage) {
73-
ConversionUtils.RelevanceOperands operands = ConversionUtils.extractRelevanceOperands(call, fieldStorage);
74-
if (operands.fieldName() == null || operands.query() == null) {
75-
throw new IllegalArgumentException("match_bool_prefix requires 'field' and 'query' parameters, got: " + call);
76-
}
77-
// TODO: extract optional params (analyzer, fuzziness, operator, minimum_should_match)
78-
MatchBoolPrefixQueryBuilder queryBuilder = new MatchBoolPrefixQueryBuilder(operands.fieldName(), operands.query());
79-
return ConversionUtils.serializeQueryBuilder(queryBuilder);
80-
}
81-
82-
private static byte[] serializeMatchPhrasePrefix(RexCall call, List<FieldStorageInfo> fieldStorage) {
83-
ConversionUtils.RelevanceOperands operands = ConversionUtils.extractRelevanceOperands(call, fieldStorage);
84-
if (operands.fieldName() == null || operands.query() == null) {
85-
throw new IllegalArgumentException("match_phrase_prefix requires 'field' and 'query' parameters, got: " + call);
86-
}
87-
// TODO: extract optional params (slop, analyzer, max_expansions, zero_terms_query)
88-
MatchPhrasePrefixQueryBuilder queryBuilder = new MatchPhrasePrefixQueryBuilder(operands.fieldName(), operands.query());
89-
return ConversionUtils.serializeQueryBuilder(queryBuilder);
90-
}
91-
92-
private static byte[] serializeMultiMatch(RexCall call, List<FieldStorageInfo> fieldStorage) {
93-
ConversionUtils.RelevanceOperands operands = ConversionUtils.extractRelevanceOperands(call, fieldStorage);
94-
if (operands.query() == null) {
95-
throw new IllegalArgumentException("multi_match requires a 'query' parameter, got: " + call);
96-
}
97-
// TODO: extract per-field boost values and optional params (type, operator, analyzer, fuzziness)
98-
List<String> fields = operands.fields();
99-
MultiMatchQueryBuilder queryBuilder = fields != null
100-
? new MultiMatchQueryBuilder(operands.query(), fields.toArray(String[]::new))
101-
: new MultiMatchQueryBuilder(operands.query());
102-
return ConversionUtils.serializeQueryBuilder(queryBuilder);
103-
}
104-
105-
private static byte[] serializeQueryString(RexCall call, List<FieldStorageInfo> fieldStorage) {
106-
ConversionUtils.RelevanceOperands operands = ConversionUtils.extractRelevanceOperands(call, fieldStorage);
107-
if (operands.query() == null) {
108-
throw new IllegalArgumentException("query_string requires a 'query' parameter, got: " + call);
109-
}
110-
// TODO: extract optional params (default_operator, analyzer, allow_leading_wildcard)
111-
QueryStringQueryBuilder queryBuilder = new QueryStringQueryBuilder(operands.query());
112-
if (operands.fields() != null) {
113-
for (String field : operands.fields()) {
114-
queryBuilder.field(field);
115-
}
116-
}
117-
return ConversionUtils.serializeQueryBuilder(queryBuilder);
118-
}
119-
120-
private static byte[] serializeSimpleQueryString(RexCall call, List<FieldStorageInfo> fieldStorage) {
121-
ConversionUtils.RelevanceOperands operands = ConversionUtils.extractRelevanceOperands(call, fieldStorage);
122-
if (operands.query() == null) {
123-
throw new IllegalArgumentException("simple_query_string requires a 'query' parameter, got: " + call);
124-
}
125-
// TODO: extract optional params (default_operator, analyzer, flags, minimum_should_match)
126-
SimpleQueryStringBuilder queryBuilder = new SimpleQueryStringBuilder(operands.query());
127-
if (operands.fields() != null) {
128-
for (String field : operands.fields()) {
129-
queryBuilder.field(field);
130-
}
131-
}
132-
return ConversionUtils.serializeQueryBuilder(queryBuilder);
133-
}
13451
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.be.lucene.serializers;
10+
11+
import org.apache.calcite.rex.RexCall;
12+
import org.opensearch.analytics.spi.DelegatedPredicateSerializer;
13+
import org.opensearch.analytics.spi.FieldStorageInfo;
14+
import org.opensearch.be.lucene.ConversionUtils;
15+
import org.opensearch.index.query.QueryBuilder;
16+
17+
import java.util.List;
18+
19+
/**
20+
* Base class for query serializers. Implements the {@link DelegatedPredicateSerializer}
21+
* contract by delegating to a template method that builds the {@link QueryBuilder}.
22+
*/
23+
public abstract class AbstractQuerySerializer implements DelegatedPredicateSerializer {
24+
25+
@Override
26+
public final byte[] serialize(RexCall call, List<FieldStorageInfo> fieldStorage) {
27+
QueryBuilder queryBuilder = buildQueryBuilder(call, fieldStorage);
28+
return ConversionUtils.serializeQueryBuilder(queryBuilder);
29+
}
30+
31+
protected abstract QueryBuilder buildQueryBuilder(RexCall call, List<FieldStorageInfo> fieldStorage);
32+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.be.lucene.serializers;
10+
11+
import org.apache.calcite.rex.RexCall;
12+
import org.opensearch.analytics.spi.FieldStorageInfo;
13+
import org.opensearch.be.lucene.ConversionUtils;
14+
import org.opensearch.index.query.QueryBuilder;
15+
16+
import java.util.List;
17+
import java.util.Map;
18+
19+
/**
20+
* Base class for relevance function serializers. Handles the common pattern of
21+
* extracting operands, validating, creating the query builder, and applying
22+
* optional parameters.
23+
*/
24+
public abstract class AbstractRelevanceSerializer extends AbstractQuerySerializer {
25+
26+
@Override
27+
protected final QueryBuilder buildQueryBuilder(RexCall call, List<FieldStorageInfo> fieldStorage) {
28+
ConversionUtils.RelevanceOperands operands = ConversionUtils.extractRelevanceOperands(call, fieldStorage);
29+
validate(operands);
30+
QueryBuilder qb = createQueryBuilder(operands);
31+
Map<String, String> params = ConversionUtils.extractOptionalParams(call, optionalParamsStartIndex());
32+
applyParams(qb, params);
33+
return qb;
34+
}
35+
36+
protected abstract QueryBuilder createQueryBuilder(ConversionUtils.RelevanceOperands operands);
37+
38+
protected abstract String functionName();
39+
40+
protected void applyParams(QueryBuilder qb, Map<String, String> params) {}
41+
42+
protected void validate(ConversionUtils.RelevanceOperands operands) {
43+
if (operands.fieldName() == null || operands.query() == null) {
44+
throw new IllegalArgumentException(functionName() + " requires 'field' and 'query' parameters");
45+
}
46+
}
47+
48+
protected int optionalParamsStartIndex() {
49+
return 2;
50+
}
51+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.be.lucene.serializers;
10+
11+
import org.apache.calcite.rex.RexCall;
12+
import org.opensearch.analytics.spi.FieldStorageInfo;
13+
import org.opensearch.index.query.MatchAllQueryBuilder;
14+
import org.opensearch.index.query.QueryBuilder;
15+
16+
import java.util.List;
17+
18+
/**
19+
* Serializer for the MATCHALL function.
20+
* Extends AbstractQuerySerializer directly (not AbstractRelevanceSerializer)
21+
* because MATCHALL is a zero-argument function with no MAP_VALUE_CONSTRUCTOR operands.
22+
*/
23+
public class MatchAllSerializer extends AbstractQuerySerializer {
24+
25+
@Override
26+
protected QueryBuilder buildQueryBuilder(RexCall call, List<FieldStorageInfo> fieldStorage) {
27+
return new MatchAllQueryBuilder();
28+
}
29+
}

0 commit comments

Comments
 (0)