Skip to content

Commit b975641

Browse files
authored
[FLINK-39360][table] LIKE clause doesn't support some patterns
1 parent 34246de commit b975641

8 files changed

Lines changed: 294 additions & 58 deletions

File tree

flink-table/flink-table-api-java/src/main/java/org/apache/flink/table/functions/SqlLikeUtils.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.flink.table.functions;
1919

2020
import org.apache.flink.annotation.Internal;
21+
import org.apache.flink.table.api.ValidationException;
2122

2223
import java.util.regex.Matcher;
2324
import java.util.regex.Pattern;
@@ -140,11 +141,11 @@ static String sqlToRegexLike(String sqlPattern, char escapeChar) {
140141
}
141142

142143
public static RuntimeException invalidEscapeCharacter(String s) {
143-
return new RuntimeException("Invalid escape character '" + s + "'");
144+
return new ValidationException("Invalid escape character '" + s + "'");
144145
}
145146

146147
public static RuntimeException invalidEscapeSequence(String s, int i) {
147-
return new RuntimeException("Invalid escape sequence '" + s + "', " + i);
148+
return new ValidationException("Invalid escape sequence '" + s + "', " + i);
148149
}
149150

150151
private static void similarEscapeRuleChecking(String sqlPattern, char escapeChar) {

flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/planner/codegen/CodeGeneratorContext.scala

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ import org.apache.flink.table.runtime.util.collections._
3232
import org.apache.flink.table.types.DataType
3333
import org.apache.flink.table.types.logical._
3434
import org.apache.flink.table.types.logical.LogicalTypeRoot._
35-
import org.apache.flink.table.utils.DateTimeUtils
35+
import org.apache.flink.table.utils.{DateTimeUtils, EncodingUtils}
3636
import org.apache.flink.util.InstantiationUtil
3737

3838
import java.time.ZoneId
@@ -988,22 +988,21 @@ class CodeGeneratorContext(
988988
}
989989

990990
/**
991-
* Adds a reusable string constant to the member area of the generated class.
991+
* Adds an already pre-escaped string constant to the reusable member area of the generated class.
992992
*
993-
* The string must be already escaped with
994-
* [[org.apache.flink.table.utils.EncodingUtils.escapeJava()]].
993+
* The string must be already escaped with [[EncodingUtils.escapeJava()]].
995994
*/
996-
def addReusableEscapedStringConstant(value: String): String = {
997-
reusableStringConstants.get(value) match {
995+
def addReusablePreEscapedStringConstant(alreadyEscapedValue: String): String = {
996+
reusableStringConstants.get(alreadyEscapedValue) match {
998997
case Some(field) => field
999998
case None =>
1000999
val field = newName(this, "str")
10011000
val stmt =
10021001
s"""
1003-
|private final $BINARY_STRING $field = $BINARY_STRING.fromString("$value");
1002+
|private final $BINARY_STRING $field = $BINARY_STRING.fromString("$alreadyEscapedValue");
10041003
""".stripMargin
10051004
reusableMemberStatements.add(stmt)
1006-
reusableStringConstants(value) = field
1005+
reusableStringConstants(alreadyEscapedValue) = field
10071006
field
10081007
}
10091008
}

flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/planner/codegen/GenerateUtils.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -306,10 +306,12 @@ object GenerateUtils {
306306
// as they're not cheap to construct. For the other types, the return term is directly
307307
// the literal value
308308
case CHAR | VARCHAR =>
309-
val escapedValue =
310-
EncodingUtils.escapeJava(literalValue.asInstanceOf[BinaryStringData].toString)
311-
val field = ctx.addReusableEscapedStringConstant(escapedValue)
312-
generateNonNullLiteral(literalType, field, StringData.fromString(escapedValue))
309+
val str = literalValue.asInstanceOf[BinaryStringData]
310+
val field = ctx.addReusablePreEscapedStringConstant(EncodingUtils.escapeJava(str.toString))
311+
// The original value should be passed as literalValue
312+
// all required escaping should be done in corresponding code generation,
313+
// so that the literalValue can be also used directly when needed
314+
generateNonNullLiteral(literalType, field, str)
313315

314316
case BINARY | VARBINARY =>
315317
val bytesVal = literalValue.asInstanceOf[Array[Byte]]

flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/planner/codegen/GeneratedExpression.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,12 @@ import org.apache.flink.table.types.logical.LogicalType
3333
* @param resultType
3434
* type of the resultTerm
3535
* @param literalValue
36-
* None if the expression is not literal. Otherwise it represent the original object of the
37-
* literal.
36+
* Contains the literal value (as internal data structure) for deep literal inspection if the
37+
* originating expression was a literal. Literal inspection is useful for performance
38+
* optimizations. For example, figuring out whether a time parsing function ever produces
39+
* sub-second data by inspecting the "format" literal string. NOTE: The literal value is not
40+
* intended to be used in generated code, use `resultTerm` for this purpose. The literal value is
41+
* NOT escaped.
3842
*/
3943
case class GeneratedExpression(
4044
resultTerm: String,

flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/planner/codegen/calls/LikeCallGen.scala

Lines changed: 47 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import org.apache.flink.table.planner.codegen.CodeGenUtils.{className, newName,
2323
import org.apache.flink.table.planner.codegen.GenerateUtils.generateCallIfArgsNotNull
2424
import org.apache.flink.table.runtime.functions.SqlLikeChainChecker
2525
import org.apache.flink.table.types.logical.{BooleanType, LogicalType}
26+
import org.apache.flink.table.utils.EncodingUtils
2627

2728
import java.util.regex.Pattern
2829

@@ -57,68 +58,73 @@ class LikeCallGen extends CallGenerator {
5758
!pattern.contains("_")
5859
} else {
5960
val escape = operands(2).literalValue.get.toString
60-
if ((escape.length == 2 && escape.charAt(0) != '\\') || escape.length > 2) {
61-
throw SqlLikeUtils.invalidEscapeCharacter(escape)
62-
}
63-
val escapeChar = escape.charAt(escape.length - 1)
64-
var matched = true
65-
var i = 0
66-
val newBuilder = new StringBuilder
67-
while (i < pattern.length && matched) {
68-
var c = pattern.charAt(i)
69-
if (c == '\\') {
70-
i += 1
71-
c = pattern.charAt(i)
61+
if (escape.isEmpty) {
62+
!pattern.contains("_")
63+
} else {
64+
if (escape.length > 1) {
65+
throw SqlLikeUtils.invalidEscapeCharacter(escape)
7266
}
73-
if (c == escapeChar) {
74-
if (i == (pattern.length - 1)) {
75-
throw SqlLikeUtils.invalidEscapeSequence(pattern, i)
76-
}
77-
val nextChar = pattern.charAt(i + 1)
78-
if (nextChar == '%') {
67+
val escapeChar = escape.charAt(escape.length - 1)
68+
var matched = true
69+
var i = 0
70+
val newBuilder = new StringBuilder
71+
while (i < pattern.length && matched) {
72+
val c = pattern.charAt(i)
73+
if (c == escapeChar) {
74+
if (i == (pattern.length - 1)) {
75+
throw SqlLikeUtils.invalidEscapeSequence(pattern, i)
76+
}
77+
val nextChar = pattern.charAt(i + 1)
78+
if (nextChar == '%') {
79+
matched = false
80+
} else if ((nextChar == '_') || (nextChar == escapeChar)) {
81+
newBuilder.append(nextChar)
82+
i += 1
83+
} else {
84+
throw SqlLikeUtils.invalidEscapeSequence(pattern, i)
85+
}
86+
} else if (c == '_') {
7987
matched = false
80-
} else if ((nextChar == '_') || (nextChar == escapeChar)) {
81-
newBuilder.append(nextChar)
82-
i += 1
8388
} else {
84-
throw SqlLikeUtils.invalidEscapeSequence(pattern, i)
89+
newBuilder.append(c)
8590
}
86-
} else if (c == '_') {
87-
matched = false
88-
} else {
89-
newBuilder.append(c)
91+
i += 1
9092
}
91-
i += 1
92-
}
9393

94-
if (matched) {
95-
newPattern = newBuilder.toString
94+
if (matched) {
95+
newPattern = newBuilder.toString
96+
}
97+
matched
9698
}
97-
matched
9899
}
99100

100101
if (allowQuick) {
101102
val noneMatcher = NONE_PATTERN.matcher(newPattern)
102103
val beginMatcher = BEGIN_PATTERN.matcher(newPattern)
103104
val endMatcher = END_PATTERN.matcher(newPattern)
104105
val middleMatcher = MIDDLE_PATTERN.matcher(newPattern)
106+
val escapedNewPattern = EncodingUtils.escapeJava(newPattern)
105107

106108
if (noneMatcher.matches()) {
107-
val reusePattern = ctx.addReusableEscapedStringConstant(newPattern)
109+
val reusePattern = ctx.addReusablePreEscapedStringConstant(escapedNewPattern)
108110
s"${terms.head}.equals($reusePattern)"
109111
} else if (beginMatcher.matches()) {
110-
val field = ctx.addReusableEscapedStringConstant(beginMatcher.group(1))
112+
val escapedStartValue = EncodingUtils.escapeJava(beginMatcher.group(1))
113+
val field = ctx.addReusablePreEscapedStringConstant(escapedStartValue)
111114
s"${terms.head}.startsWith($field)"
112115
} else if (endMatcher.matches()) {
113-
val field = ctx.addReusableEscapedStringConstant(endMatcher.group(1))
116+
val escapedEndValue = EncodingUtils.escapeJava(endMatcher.group(1))
117+
val field = ctx.addReusablePreEscapedStringConstant(escapedEndValue)
114118
s"${terms.head}.endsWith($field)"
115119
} else if (middleMatcher.matches()) {
116-
val field = ctx.addReusableEscapedStringConstant(middleMatcher.group(1))
120+
val escapedMiddleValue = EncodingUtils.escapeJava(middleMatcher.group(1))
121+
val field = ctx.addReusablePreEscapedStringConstant(escapedMiddleValue)
117122
s"${terms.head}.contains($field)"
118123
} else {
119124
val field = className[SqlLikeChainChecker]
120125
val checker = newName(ctx, "likeChainChecker")
121-
ctx.addReusableMember(s"$field $checker = new $field(${"\""}$newPattern${"\""});")
126+
ctx.addReusableMember(
127+
s"$field $checker = new $field(${"\""}$escapedNewPattern${"\""});")
122128
s"$checker.check(${terms.head})"
123129
}
124130
} else {
@@ -129,15 +135,18 @@ class LikeCallGen extends CallGenerator {
129135
val escape = if (operands.size == 2) {
130136
"null"
131137
} else {
138+
val escapedEscapeLiteral =
139+
EncodingUtils.escapeJava(operands(2).literalValue.get.toString)
132140
s"""
133-
|"${operands(2).literalValue.get}"
141+
|"$escapedEscapeLiteral"
134142
""".stripMargin
135143
}
144+
val escapedPatternLiteral = EncodingUtils.escapeJava(pattern)
136145
ctx.addReusableMember(
137146
s"""
138147
|$patternClass $patternName =
139148
| $patternClass.compile(
140-
| $likeClass.sqlToRegexLike("${operands(1).literalValue.get}", $escape));
149+
| $likeClass.sqlToRegexLike("$escapedPatternLiteral", $escape));
141150
|""".stripMargin)
142151
s"$patternName.matcher(${terms.head}.toString()).matches()"
143152
}

flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/planner/codegen/calls/ScalarOperatorGens.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.flink.table.planner.codegen.calls
1919

2020
import org.apache.flink.table.api.ValidationException
2121
import org.apache.flink.table.api.config.ExecutionConfigOptions
22-
import org.apache.flink.table.data.binary.BinaryArrayData
22+
import org.apache.flink.table.data.binary.{BinaryArrayData, BinaryStringData}
2323
import org.apache.flink.table.data.util.MapDataUtil
2424
import org.apache.flink.table.data.utils.CastExecutor
2525
import org.apache.flink.table.data.writer.{BinaryArrayWriter, BinaryRowWriter}
@@ -41,6 +41,7 @@ import org.apache.flink.table.types.logical.utils.LogicalTypeChecks
4141
import org.apache.flink.table.types.logical.utils.LogicalTypeChecks.{getFieldTypes, getPrecision, getScale}
4242
import org.apache.flink.table.types.logical.utils.LogicalTypeMerging.findCommonType
4343
import org.apache.flink.table.utils.DateTimeUtils.MILLIS_PER_DAY
44+
import org.apache.flink.table.utils.EncodingUtils
4445
import org.apache.flink.types.ColumnList
4546
import org.apache.flink.util.Preconditions.checkArgument
4647

@@ -1803,6 +1804,7 @@ object ScalarOperatorGens {
18031804
}
18041805

18051806
try {
1807+
// No escaping here as it will be done in the primitiveLiteralForType according to the type of the literal value.
18061808
val result = castExecutor.cast(literalExpr.literalValue.get)
18071809
val resultTerm = newName(ctx, "stringToTime")
18081810

0 commit comments

Comments
 (0)