Skip to content

Commit f0ca094

Browse files
committed
Merge remote-tracking branch 'origin/main' into issues/4576
Signed-off-by: Yuanchun Shen <yuanchu@amazon.com>
2 parents bc7631e + 164c3f0 commit f0ca094

91 files changed

Lines changed: 990 additions & 154 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,15 @@ public RexNode visitFunction(Function node, CalcitePlanContext context) {
418418
}
419419
}
420420

421+
if ("LIKE".equalsIgnoreCase(node.getFuncName()) && arguments.size() == 2) {
422+
RexNode defaultCaseSensitive =
423+
CalcitePlanContext.isLegacyPreferred()
424+
? context.rexBuilder.makeLiteral(false)
425+
: context.rexBuilder.makeLiteral(true);
426+
arguments = new ArrayList<>(arguments);
427+
arguments.add(defaultCaseSensitive);
428+
}
429+
421430
RexNode resolvedNode =
422431
PPLFuncImpTable.INSTANCE.resolve(
423432
context.rexBuilder, node.getFuncName(), arguments.toArray(new RexNode[0]));

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ public enum BuiltinFunctionName {
7575
MVAPPEND(FunctionName.of("mvappend")),
7676
MVJOIN(FunctionName.of("mvjoin")),
7777
MVINDEX(FunctionName.of("mvindex")),
78+
MVDEDUP(FunctionName.of("mvdedup")),
7879
FORALL(FunctionName.of("forall")),
7980
EXISTS(FunctionName.of("exists")),
8081
FILTER(FunctionName.of("filter")),
@@ -192,6 +193,7 @@ public enum BuiltinFunctionName {
192193
GREATER(FunctionName.of(">")),
193194
GTE(FunctionName.of(">=")),
194195
LIKE(FunctionName.of("like")),
196+
ILIKE(FunctionName.of("ilike")),
195197
NOT_LIKE(FunctionName.of("not like")),
196198

197199
/** Aggregation Function. */

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
import static org.opensearch.sql.expression.function.BuiltinFunctionName.HOUR_OF_DAY;
8080
import static org.opensearch.sql.expression.function.BuiltinFunctionName.IF;
8181
import static org.opensearch.sql.expression.function.BuiltinFunctionName.IFNULL;
82+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ILIKE;
8283
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_GROK;
8384
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_ITEM;
8485
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PARSE;
@@ -150,6 +151,7 @@
150151
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLYFUNCTION;
151152
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTI_MATCH;
152153
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVAPPEND;
154+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVDEDUP;
153155
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVINDEX;
154156
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVJOIN;
155157
import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOT;
@@ -326,6 +328,18 @@ default RexNode resolve(RexBuilder builder, RexNode... args) {
326328
}
327329
}
328330

331+
public interface FunctionImp3 extends FunctionImp {
332+
RexNode resolve(RexBuilder builder, RexNode arg1, RexNode arg2, RexNode arg3);
333+
334+
@Override
335+
default RexNode resolve(RexBuilder builder, RexNode... args) {
336+
if (args.length != 3) {
337+
throw new IllegalArgumentException("This function requires exactly 3 arguments");
338+
}
339+
return resolve(builder, args[0], args[1], args[2]);
340+
}
341+
}
342+
329343
/** The singleton instance. */
330344
public static final PPLFuncImpTable INSTANCE;
331345

@@ -989,6 +1003,7 @@ void populate() {
9891003

9901004
registerOperator(ARRAY, PPLBuiltinOperators.ARRAY);
9911005
registerOperator(MVAPPEND, PPLBuiltinOperators.MVAPPEND);
1006+
registerOperator(MVDEDUP, SqlLibraryOperators.ARRAY_DISTINCT);
9921007
registerOperator(MAP_APPEND, PPLBuiltinOperators.MAP_APPEND);
9931008
registerOperator(MAP_CONCAT, SqlLibraryOperators.MAP_CONCAT);
9941009
registerOperator(MAP_REMOVE, PPLBuiltinOperators.MAP_REMOVE);
@@ -1214,17 +1229,22 @@ void populate() {
12141229
arg))),
12151230
PPLTypeChecker.family(SqlTypeFamily.ANY));
12161231
register(
1217-
LIKE,
1232+
ILIKE,
12181233
(FunctionImp2)
12191234
(builder, arg1, arg2) ->
12201235
builder.makeCall(
1221-
SqlLibraryOperators.ILIKE,
1222-
arg1,
1223-
arg2,
1224-
// TODO: Figure out escaping solution. '\\' is used for JSON input but is not
1225-
// necessary for SQL function input
1226-
builder.makeLiteral("\\")),
1236+
SqlLibraryOperators.ILIKE, arg1, arg2, builder.makeLiteral("\\")),
12271237
PPLTypeChecker.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING));
1238+
register(
1239+
LIKE,
1240+
(FunctionImp3)
1241+
(builder, arg1, arg2, arg3) ->
1242+
((RexLiteral) arg3).getValueAs(Boolean.class)
1243+
? builder.makeCall(
1244+
SqlStdOperatorTable.LIKE, arg1, arg2, builder.makeLiteral("\\"))
1245+
: builder.makeCall(
1246+
SqlLibraryOperators.ILIKE, arg1, arg2, builder.makeLiteral("\\")),
1247+
PPLTypeChecker.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.BOOLEAN));
12281248
}
12291249
}
12301250

core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,8 @@ private static DefaultFunctionResolver gte() {
387387
private static DefaultFunctionResolver like() {
388388
return define(
389389
BuiltinFunctionName.LIKE.getName(),
390-
impl(nullMissingHandling(OperatorUtils::matches), BOOLEAN, STRING, STRING));
390+
impl(nullMissingHandling(OperatorUtils::matches2), BOOLEAN, STRING, STRING),
391+
impl(nullMissingHandling(OperatorUtils::matches3), BOOLEAN, STRING, STRING, BOOLEAN));
391392
}
392393

393394
private static DefaultFunctionResolver regexp() {
@@ -401,10 +402,17 @@ private static DefaultFunctionResolver notLike() {
401402
BuiltinFunctionName.NOT_LIKE.getName(),
402403
impl(
403404
nullMissingHandling(
404-
(v1, v2) -> UnaryPredicateOperators.not(OperatorUtils.matches(v1, v2))),
405+
(v1, v2) -> UnaryPredicateOperators.not(OperatorUtils.matches2(v1, v2))),
405406
BOOLEAN,
406407
STRING,
407-
STRING));
408+
STRING),
409+
impl(
410+
nullMissingHandling(
411+
(v1, v2, v3) -> UnaryPredicateOperators.not(OperatorUtils.matches3(v1, v2, v3))),
412+
BOOLEAN,
413+
STRING,
414+
STRING,
415+
BOOLEAN));
408416
}
409417

410418
private static ExprValue lookupTableFunction(

core/src/main/java/org/opensearch/sql/utils/OperatorUtils.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,31 @@ public class OperatorUtils {
2121
* @param pattern string pattern to match.
2222
* @return if text matches pattern returns true; else return false.
2323
*/
24-
public static ExprBooleanValue matches(ExprValue text, ExprValue pattern) {
24+
public static ExprBooleanValue matches2(ExprValue text, ExprValue pattern) {
2525
return ExprBooleanValue.of(
2626
Pattern.compile(patternToRegex(pattern.stringValue()), Pattern.CASE_INSENSITIVE)
2727
.matcher(text.stringValue())
2828
.matches());
2929
}
3030

31+
/**
32+
* Wildcard pattern matcher util.<br>
33+
* Percent (%) character for wildcard,<br>
34+
* Underscore (_) character for a single character match.
35+
*
36+
* @param pattern string pattern to match.
37+
* @param caseSensitive indicate the case sensitivity of the pattern.
38+
* @return if text matches pattern returns true; else return false.
39+
*/
40+
public static ExprBooleanValue matches3(
41+
ExprValue text, ExprValue pattern, ExprValue caseSensitive) {
42+
Pattern p =
43+
caseSensitive.booleanValue()
44+
? Pattern.compile(patternToRegex(pattern.stringValue()))
45+
: Pattern.compile(patternToRegex(pattern.stringValue()), Pattern.CASE_INSENSITIVE);
46+
return ExprBooleanValue.of(p.matcher(text.stringValue()).matches());
47+
}
48+
3149
/**
3250
* Checks if text matches regular expression pattern.
3351
*

core/src/test/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import static org.opensearch.sql.data.type.ExprCoreType.STRUCT;
2323
import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP;
2424
import static org.opensearch.sql.utils.ComparisonUtil.compare;
25-
import static org.opensearch.sql.utils.OperatorUtils.matches;
25+
import static org.opensearch.sql.utils.OperatorUtils.matches2;
2626

2727
import com.google.common.collect.ImmutableList;
2828
import com.google.common.collect.ImmutableMap;
@@ -554,7 +554,7 @@ public void test_gte(ExprValue v1, ExprValue v2) {
554554
public void test_like(ExprValue v1, ExprValue v2) {
555555
FunctionExpression like = DSL.like(DSL.literal(v1), DSL.literal(v2));
556556
assertEquals(BOOLEAN, like.type());
557-
assertEquals(matches(v1, v2), like.valueOf(valueEnv()));
557+
assertEquals(matches2(v1, v2), like.valueOf(valueEnv()));
558558
assertEquals(String.format("like(%s, %s)", v1.toString(), v2.toString()), like.toString());
559559
}
560560

docs/user/ppl/functions/collection.rst

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,44 @@ Example::
302302
| [1,text,2.5] |
303303
+--------------+
304304

305+
MVDEDUP
306+
-------
307+
308+
Description
309+
>>>>>>>>>>>
310+
311+
Usage: mvdedup(array) removes duplicate values from a multivalue array while preserving the order of first occurrence. NULL elements are filtered out. Returns an array with duplicates removed, or null if the input is null.
312+
313+
Argument type: array: ARRAY
314+
315+
Return type: ARRAY
316+
317+
Example::
318+
319+
os> source=people | eval array = array(1, 2, 2, 3, 1, 4), result = mvdedup(array) | fields result | head 1
320+
fetched rows / total rows = 1/1
321+
+-----------+
322+
| result |
323+
|-----------|
324+
| [1,2,3,4] |
325+
+-----------+
326+
327+
os> source=people | eval array = array('z', 'a', 'z', 'b', 'a', 'c'), result = mvdedup(array) | fields result | head 1
328+
fetched rows / total rows = 1/1
329+
+-----------+
330+
| result |
331+
|-----------|
332+
| [z,a,b,c] |
333+
+-----------+
334+
335+
os> source=people | eval array = array(), result = mvdedup(array) | fields result | head 1
336+
fetched rows / total rows = 1/1
337+
+--------+
338+
| result |
339+
|--------|
340+
| [] |
341+
+--------+
342+
305343
MVINDEX
306344
-------
307345

docs/user/ppl/functions/string.rst

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,33 +80,69 @@ Example::
8080
| 10 |
8181
+----------------------+
8282

83-
8483
LIKE
8584
----
8685

8786
Description
8887
>>>>>>>>>>>
8988

90-
Usage: like(string, PATTERN) return true if the string match the PATTERN, PATTERN is case insensitive.
89+
Usage: like(string, PATTERN[, case_sensitive]) return true if the string match the PATTERN. ``case_sensitive`` is optional. When set to ``true``, PATTERN is **case-sensitive**. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``.
90+
91+
* When ``plugins.ppl.syntax.legacy.preferred=true``, ``case_sensitive`` defaults to ``false``
92+
* When ``plugins.ppl.syntax.legacy.preferred=false``, ``case_sensitive`` defaults to ``true``
9193

9294
There are two wildcards often used in conjunction with the LIKE operator:
9395

9496
* ``%`` - The percent sign represents zero, one, or multiple characters
9597
* ``_`` - The underscore represents a single character
9698

99+
Argument type: STRING, STRING [, BOOLEAN]
100+
101+
Return type: INTEGER
102+
97103
Example::
98104

99-
os> source=people | eval `LIKE('hello world', '_ello%')` = LIKE('hello world', '_ELLO%') | fields `LIKE('hello world', '_ello%')`
105+
os> source=people | eval `LIKE('hello world', '_ello%')` = LIKE('hello world', '_ello%'), `LIKE('hello world', '_ELLo%', true)` = LIKE('hello world', '_ELLo%', true), `LIKE('hello world', '_ELLo%', false)` = LIKE('hello world', '_ELLo%', false) | fields `LIKE('hello world', '_ello%')`, `LIKE('hello world', '_ELLo%', true)`, `LIKE('hello world', '_ELLo%', false)`
100106
fetched rows / total rows = 1/1
101-
+-------------------------------+
102-
| LIKE('hello world', '_ello%') |
103-
|-------------------------------|
104-
| True |
105-
+-------------------------------+
107+
+-------------------------------+-------------------------------------+--------------------------------------+
108+
| LIKE('hello world', '_ello%') | LIKE('hello world', '_ELLo%', true) | LIKE('hello world', '_ELLo%', false) |
109+
|-------------------------------+-------------------------------------+--------------------------------------|
110+
| True | False | True |
111+
+-------------------------------+-------------------------------------+--------------------------------------+
106112

107113

108114
Limitation: The pushdown of the LIKE function to a DSL wildcard query is supported only for keyword fields.
109115

116+
ILIKE
117+
----
118+
119+
Description
120+
>>>>>>>>>>>
121+
122+
Usage: ilike(string, PATTERN) return true if the string match the PATTERN, PATTERN is **case-insensitive**.
123+
124+
There are two wildcards often used in conjunction with the ILIKE operator:
125+
126+
* ``%`` - The percent sign represents zero, one, or multiple characters
127+
* ``_`` - The underscore represents a single character
128+
129+
Argument type: STRING, STRING
130+
131+
Return type: INTEGER
132+
133+
Example::
134+
135+
os> source=people | eval `ILIKE('hello world', '_ELLo%')` = ILIKE('hello world', '_ELLo%') | fields `ILIKE('hello world', '_ELLo%')`
136+
fetched rows / total rows = 1/1
137+
+--------------------------------+
138+
| ILIKE('hello world', '_ELLo%') |
139+
|--------------------------------|
140+
| True |
141+
+--------------------------------+
142+
143+
144+
Limitation: The pushdown of the ILIKE function to a DSL wildcard query is supported only for keyword fields.
145+
110146
LOCATE
111147
-------
112148

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,4 +489,82 @@ public void testMvindexRangeSingleElement() throws IOException {
489489
verifySchema(actual, schema("result", "array"));
490490
verifyDataRows(actual, rows(List.of(3)));
491491
}
492+
493+
@Test
494+
public void testMvdedupWithDuplicates() throws IOException {
495+
JSONObject actual =
496+
executeQuery(
497+
String.format(
498+
"source=%s | eval arr = array(1, 2, 2, 3, 1, 4), result = mvdedup(arr) | head 1 |"
499+
+ " fields result",
500+
TEST_INDEX_BANK));
501+
502+
verifySchema(actual, schema("result", "array"));
503+
verifyDataRows(actual, rows(List.of(1, 2, 3, 4)));
504+
}
505+
506+
@Test
507+
public void testMvdedupWithNoDuplicates() throws IOException {
508+
JSONObject actual =
509+
executeQuery(
510+
String.format(
511+
"source=%s | eval arr = array(1, 2, 3, 4), result = mvdedup(arr) | head 1 |"
512+
+ " fields result",
513+
TEST_INDEX_BANK));
514+
515+
verifySchema(actual, schema("result", "array"));
516+
verifyDataRows(actual, rows(List.of(1, 2, 3, 4)));
517+
}
518+
519+
@Test
520+
public void testMvdedupWithAllDuplicates() throws IOException {
521+
JSONObject actual =
522+
executeQuery(
523+
String.format(
524+
"source=%s | eval arr = array(5, 5, 5, 5), result = mvdedup(arr) | head 1 |"
525+
+ " fields result",
526+
TEST_INDEX_BANK));
527+
528+
verifySchema(actual, schema("result", "array"));
529+
verifyDataRows(actual, rows(List.of(5)));
530+
}
531+
532+
@Test
533+
public void testMvdedupWithEmptyArray() throws IOException {
534+
JSONObject actual =
535+
executeQuery(
536+
String.format(
537+
"source=%s | eval arr = array(), result = mvdedup(arr) | head 1 | fields result",
538+
TEST_INDEX_BANK));
539+
540+
verifySchema(actual, schema("result", "array"));
541+
verifyDataRows(actual, rows(List.of()));
542+
}
543+
544+
@Test
545+
public void testMvdedupWithStrings() throws IOException {
546+
JSONObject actual =
547+
executeQuery(
548+
String.format(
549+
"source=%s | eval arr = array('apple', 'banana', 'apple', 'cherry', 'banana'),"
550+
+ " result = mvdedup(arr) | head 1 | fields result",
551+
TEST_INDEX_BANK));
552+
553+
verifySchema(actual, schema("result", "array"));
554+
verifyDataRows(actual, rows(List.of("apple", "banana", "cherry")));
555+
}
556+
557+
@Test
558+
public void testMvdedupPreservesOrder() throws IOException {
559+
JSONObject actual =
560+
executeQuery(
561+
String.format(
562+
"source=%s | eval arr = array('z', 'a', 'z', 'b', 'a', 'c'), result ="
563+
+ " mvdedup(arr) | head 1 | fields result",
564+
TEST_INDEX_BANK));
565+
566+
verifySchema(actual, schema("result", "array"));
567+
// Should preserve first occurrence order: z, a, b, c
568+
verifyDataRows(actual, rows(List.of("z", "a", "b", "c")));
569+
}
492570
}

0 commit comments

Comments
 (0)