Skip to content

Commit 164c3f0

Browse files
authored
Update syntax: like(string, PATTERN[, case_sensitive]) (opensearch-project#4837)
* Change like function to case-sensitive and add ilike for case-insensitive Signed-off-by: Lantao Jin <ltjin@amazon.com> * change plan in clickbench Signed-off-by: Lantao Jin <ltjin@amazon.com> * fix ut Signed-off-by: Lantao Jin <ltjin@amazon.com> * fix UT Signed-off-by: Lantao Jin <ltjin@amazon.com> * Update syntax: like(string, PATTERN[, case_sensitive]) Signed-off-by: Lantao Jin <ltjin@amazon.com> * fix IT Signed-off-by: Lantao Jin <ltjin@amazon.com> * Fix IT Signed-off-by: Lantao Jin <ltjin@amazon.com> * revert import merging Signed-off-by: Lantao Jin <ltjin@amazon.com> * support like(string, PATTERN, bool) in v2 Signed-off-by: Lantao Jin <ltjin@amazon.com> * dedup ut Signed-off-by: Lantao Jin <ltjin@amazon.com> --------- Signed-off-by: Lantao Jin <ltjin@amazon.com>
1 parent 5049a03 commit 164c3f0

54 files changed

Lines changed: 561 additions & 95 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,15 @@ public RexNode visitFunction(Function node, CalcitePlanContext context) {
418418
}
419419
}
420420

421+
if ("LIKE".equalsIgnoreCase(node.getFuncName()) && arguments.size() == 2) {
422+
RexNode defaultCaseSensitive =
423+
CalcitePlanContext.isLegacyPreferred()
424+
? context.rexBuilder.makeLiteral(false)
425+
: context.rexBuilder.makeLiteral(true);
426+
arguments = new ArrayList<>(arguments);
427+
arguments.add(defaultCaseSensitive);
428+
}
429+
421430
RexNode resolvedNode =
422431
PPLFuncImpTable.INSTANCE.resolve(
423432
context.rexBuilder, node.getFuncName(), arguments.toArray(new RexNode[0]));

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ public enum BuiltinFunctionName {
193193
GREATER(FunctionName.of(">")),
194194
GTE(FunctionName.of(">=")),
195195
LIKE(FunctionName.of("like")),
196+
ILIKE(FunctionName.of("ilike")),
196197
NOT_LIKE(FunctionName.of("not like")),
197198

198199
/** Aggregation Function. */

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
import static org.opensearch.sql.expression.function.BuiltinFunctionName.HOUR_OF_DAY;
8080
import static org.opensearch.sql.expression.function.BuiltinFunctionName.IF;
8181
import static org.opensearch.sql.expression.function.BuiltinFunctionName.IFNULL;
82+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ILIKE;
8283
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_GROK;
8384
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_ITEM;
8485
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PARSE;
@@ -327,6 +328,18 @@ default RexNode resolve(RexBuilder builder, RexNode... args) {
327328
}
328329
}
329330

331+
public interface FunctionImp3 extends FunctionImp {
332+
RexNode resolve(RexBuilder builder, RexNode arg1, RexNode arg2, RexNode arg3);
333+
334+
@Override
335+
default RexNode resolve(RexBuilder builder, RexNode... args) {
336+
if (args.length != 3) {
337+
throw new IllegalArgumentException("This function requires exactly 3 arguments");
338+
}
339+
return resolve(builder, args[0], args[1], args[2]);
340+
}
341+
}
342+
330343
/** The singleton instance. */
331344
public static final PPLFuncImpTable INSTANCE;
332345

@@ -1216,17 +1229,22 @@ void populate() {
12161229
arg))),
12171230
PPLTypeChecker.family(SqlTypeFamily.ANY));
12181231
register(
1219-
LIKE,
1232+
ILIKE,
12201233
(FunctionImp2)
12211234
(builder, arg1, arg2) ->
12221235
builder.makeCall(
1223-
SqlLibraryOperators.ILIKE,
1224-
arg1,
1225-
arg2,
1226-
// TODO: Figure out escaping solution. '\\' is used for JSON input but is not
1227-
// necessary for SQL function input
1228-
builder.makeLiteral("\\")),
1236+
SqlLibraryOperators.ILIKE, arg1, arg2, builder.makeLiteral("\\")),
12291237
PPLTypeChecker.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING));
1238+
register(
1239+
LIKE,
1240+
(FunctionImp3)
1241+
(builder, arg1, arg2, arg3) ->
1242+
((RexLiteral) arg3).getValueAs(Boolean.class)
1243+
? builder.makeCall(
1244+
SqlStdOperatorTable.LIKE, arg1, arg2, builder.makeLiteral("\\"))
1245+
: builder.makeCall(
1246+
SqlLibraryOperators.ILIKE, arg1, arg2, builder.makeLiteral("\\")),
1247+
PPLTypeChecker.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.BOOLEAN));
12301248
}
12311249
}
12321250

core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,8 @@ private static DefaultFunctionResolver gte() {
387387
private static DefaultFunctionResolver like() {
388388
return define(
389389
BuiltinFunctionName.LIKE.getName(),
390-
impl(nullMissingHandling(OperatorUtils::matches), BOOLEAN, STRING, STRING));
390+
impl(nullMissingHandling(OperatorUtils::matches2), BOOLEAN, STRING, STRING),
391+
impl(nullMissingHandling(OperatorUtils::matches3), BOOLEAN, STRING, STRING, BOOLEAN));
391392
}
392393

393394
private static DefaultFunctionResolver regexp() {
@@ -401,10 +402,17 @@ private static DefaultFunctionResolver notLike() {
401402
BuiltinFunctionName.NOT_LIKE.getName(),
402403
impl(
403404
nullMissingHandling(
404-
(v1, v2) -> UnaryPredicateOperators.not(OperatorUtils.matches(v1, v2))),
405+
(v1, v2) -> UnaryPredicateOperators.not(OperatorUtils.matches2(v1, v2))),
405406
BOOLEAN,
406407
STRING,
407-
STRING));
408+
STRING),
409+
impl(
410+
nullMissingHandling(
411+
(v1, v2, v3) -> UnaryPredicateOperators.not(OperatorUtils.matches3(v1, v2, v3))),
412+
BOOLEAN,
413+
STRING,
414+
STRING,
415+
BOOLEAN));
408416
}
409417

410418
private static ExprValue lookupTableFunction(

core/src/main/java/org/opensearch/sql/utils/OperatorUtils.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,31 @@ public class OperatorUtils {
2121
* @param pattern string pattern to match.
2222
* @return if text matches pattern returns true; else return false.
2323
*/
24-
public static ExprBooleanValue matches(ExprValue text, ExprValue pattern) {
24+
public static ExprBooleanValue matches2(ExprValue text, ExprValue pattern) {
2525
return ExprBooleanValue.of(
2626
Pattern.compile(patternToRegex(pattern.stringValue()), Pattern.CASE_INSENSITIVE)
2727
.matcher(text.stringValue())
2828
.matches());
2929
}
3030

31+
/**
32+
* Wildcard pattern matcher util.<br>
33+
* Percent (%) character for wildcard,<br>
34+
* Underscore (_) character for a single character match.
35+
*
36+
* @param pattern string pattern to match.
37+
* @param caseSensitive indicate the case sensitivity of the pattern.
38+
* @return if text matches pattern returns true; else return false.
39+
*/
40+
public static ExprBooleanValue matches3(
41+
ExprValue text, ExprValue pattern, ExprValue caseSensitive) {
42+
Pattern p =
43+
caseSensitive.booleanValue()
44+
? Pattern.compile(patternToRegex(pattern.stringValue()))
45+
: Pattern.compile(patternToRegex(pattern.stringValue()), Pattern.CASE_INSENSITIVE);
46+
return ExprBooleanValue.of(p.matcher(text.stringValue()).matches());
47+
}
48+
3149
/**
3250
* Checks if text matches regular expression pattern.
3351
*

core/src/test/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import static org.opensearch.sql.data.type.ExprCoreType.STRUCT;
2323
import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP;
2424
import static org.opensearch.sql.utils.ComparisonUtil.compare;
25-
import static org.opensearch.sql.utils.OperatorUtils.matches;
25+
import static org.opensearch.sql.utils.OperatorUtils.matches2;
2626

2727
import com.google.common.collect.ImmutableList;
2828
import com.google.common.collect.ImmutableMap;
@@ -554,7 +554,7 @@ public void test_gte(ExprValue v1, ExprValue v2) {
554554
public void test_like(ExprValue v1, ExprValue v2) {
555555
FunctionExpression like = DSL.like(DSL.literal(v1), DSL.literal(v2));
556556
assertEquals(BOOLEAN, like.type());
557-
assertEquals(matches(v1, v2), like.valueOf(valueEnv()));
557+
assertEquals(matches2(v1, v2), like.valueOf(valueEnv()));
558558
assertEquals(String.format("like(%s, %s)", v1.toString(), v2.toString()), like.toString());
559559
}
560560

docs/user/ppl/functions/string.rst

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,33 +80,69 @@ Example::
8080
| 10 |
8181
+----------------------+
8282

83-
8483
LIKE
8584
----
8685

8786
Description
8887
>>>>>>>>>>>
8988

90-
Usage: like(string, PATTERN) return true if the string match the PATTERN, PATTERN is case insensitive.
89+
Usage: like(string, PATTERN[, case_sensitive]) return true if the string match the PATTERN. ``case_sensitive`` is optional. When set to ``true``, PATTERN is **case-sensitive**. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``.
90+
91+
* When ``plugins.ppl.syntax.legacy.preferred=true``, ``case_sensitive`` defaults to ``false``
92+
* When ``plugins.ppl.syntax.legacy.preferred=false``, ``case_sensitive`` defaults to ``true``
9193

9294
There are two wildcards often used in conjunction with the LIKE operator:
9395

9496
* ``%`` - The percent sign represents zero, one, or multiple characters
9597
* ``_`` - The underscore represents a single character
9698

99+
Argument type: STRING, STRING [, BOOLEAN]
100+
101+
Return type: INTEGER
102+
97103
Example::
98104

99-
os> source=people | eval `LIKE('hello world', '_ello%')` = LIKE('hello world', '_ELLO%') | fields `LIKE('hello world', '_ello%')`
105+
os> source=people | eval `LIKE('hello world', '_ello%')` = LIKE('hello world', '_ello%'), `LIKE('hello world', '_ELLo%', true)` = LIKE('hello world', '_ELLo%', true), `LIKE('hello world', '_ELLo%', false)` = LIKE('hello world', '_ELLo%', false) | fields `LIKE('hello world', '_ello%')`, `LIKE('hello world', '_ELLo%', true)`, `LIKE('hello world', '_ELLo%', false)`
100106
fetched rows / total rows = 1/1
101-
+-------------------------------+
102-
| LIKE('hello world', '_ello%') |
103-
|-------------------------------|
104-
| True |
105-
+-------------------------------+
107+
+-------------------------------+-------------------------------------+--------------------------------------+
108+
| LIKE('hello world', '_ello%') | LIKE('hello world', '_ELLo%', true) | LIKE('hello world', '_ELLo%', false) |
109+
|-------------------------------+-------------------------------------+--------------------------------------|
110+
| True | False | True |
111+
+-------------------------------+-------------------------------------+--------------------------------------+
106112

107113

108114
Limitation: The pushdown of the LIKE function to a DSL wildcard query is supported only for keyword fields.
109115

116+
ILIKE
117+
----
118+
119+
Description
120+
>>>>>>>>>>>
121+
122+
Usage: ilike(string, PATTERN) return true if the string match the PATTERN, PATTERN is **case-insensitive**.
123+
124+
There are two wildcards often used in conjunction with the ILIKE operator:
125+
126+
* ``%`` - The percent sign represents zero, one, or multiple characters
127+
* ``_`` - The underscore represents a single character
128+
129+
Argument type: STRING, STRING
130+
131+
Return type: INTEGER
132+
133+
Example::
134+
135+
os> source=people | eval `ILIKE('hello world', '_ELLo%')` = ILIKE('hello world', '_ELLo%') | fields `ILIKE('hello world', '_ELLo%')`
136+
fetched rows / total rows = 1/1
137+
+--------------------------------+
138+
| ILIKE('hello world', '_ELLo%') |
139+
|--------------------------------|
140+
| True |
141+
+--------------------------------+
142+
143+
144+
Limitation: The pushdown of the ILIKE function to a DSL wildcard query is supported only for keyword fields.
145+
110146
LOCATE
111147
-------
112148

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,26 @@ public void testStreamstatsResetExplain() throws IOException {
667667
assertYamlEqualsIgnoreId(expected, result);
668668
}
669669

670+
@Test
671+
public void testKeywordILikeFunctionExplain() throws IOException {
672+
// ilike is only supported in v3
673+
String expected = loadExpectedPlan("explain_keyword_ilike_function.yaml");
674+
assertYamlEqualsIgnoreId(
675+
expected,
676+
explainQueryYaml(
677+
"source=opensearch-sql_test_index_account | where ilike(firstname, '%mbe%')"));
678+
}
679+
680+
@Test
681+
public void testTextILikeFunctionExplain() throws IOException {
682+
// ilike is only supported in v3
683+
String expected = loadExpectedPlan("explain_text_ilike_function.yaml");
684+
assertYamlEqualsIgnoreId(
685+
expected,
686+
explainQueryYaml(
687+
"source=opensearch-sql_test_index_account | where ilike(address, '%Holmes%')"));
688+
}
689+
670690
// Only for Calcite, as v2 gets unstable serialized string for function
671691
@Test
672692
public void testExplainOnAggregationWithSumEnhancement() throws IOException {

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,15 @@
55

66
package org.opensearch.sql.calcite.remote;
77

8+
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WILDCARD;
9+
import static org.opensearch.sql.util.MatcherUtils.rows;
10+
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
11+
import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows;
12+
813
import java.io.IOException;
14+
import org.json.JSONObject;
915
import org.junit.Test;
16+
import org.opensearch.sql.common.setting.Settings;
1017
import org.opensearch.sql.ppl.LikeQueryIT;
1118

1219
public class CalciteLikeQueryIT extends LikeQueryIT {
@@ -22,4 +29,61 @@ public void test_convert_field_text_to_keyword() throws IOException {
2229
enabledOnlyWhenPushdownIsEnabled();
2330
super.test_convert_field_text_to_keyword();
2431
}
32+
33+
@Test
34+
public void test_ilike_is_case_insensitive() throws IOException {
35+
String query =
36+
"source="
37+
+ TEST_INDEX_WILDCARD
38+
+ " | WHERE ILike(KeywordBody, 'test Wildcard%') | fields KeywordBody";
39+
JSONObject result = executeQuery(query);
40+
verifyDataRows(
41+
result,
42+
rows("test wildcard"),
43+
rows("test wildcard in the end of the text%"),
44+
rows("test wildcard in % the middle of the text"),
45+
rows("test wildcard %% beside each other"),
46+
rows("test wildcard in the end of the text_"),
47+
rows("test wildcard in _ the middle of the text"),
48+
rows("test wildcard __ beside each other"));
49+
}
50+
51+
@Test
52+
public void test_the_default_3rd_option() throws IOException {
53+
// only work in v3
54+
String query =
55+
"source="
56+
+ TEST_INDEX_WILDCARD
57+
+ " | WHERE Like(KeywordBody, 'test Wildcard%') | fields KeywordBody";
58+
withSettings(
59+
Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED,
60+
"true",
61+
() -> {
62+
try {
63+
JSONObject result = executeQuery(query);
64+
verifyDataRows(
65+
result,
66+
rows("test wildcard"),
67+
rows("test wildcard in the end of the text%"),
68+
rows("test wildcard in % the middle of the text"),
69+
rows("test wildcard %% beside each other"),
70+
rows("test wildcard in the end of the text_"),
71+
rows("test wildcard in _ the middle of the text"),
72+
rows("test wildcard __ beside each other"));
73+
} catch (IOException e) {
74+
throw new RuntimeException(e);
75+
}
76+
});
77+
withSettings(
78+
Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED,
79+
"false",
80+
() -> {
81+
try {
82+
JSONObject result = executeQuery(query);
83+
verifyNumOfRows(result, 0);
84+
} catch (IOException e) {
85+
throw new RuntimeException(e);
86+
}
87+
});
88+
}
2589
}

0 commit comments

Comments
 (0)