Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.starrocks.catalog.Type;
import com.starrocks.catalog.system.SystemId;
import com.starrocks.catalog.system.SystemTable;
import com.starrocks.common.PatternMatcher;
import com.starrocks.qe.ConnectContext;
import com.starrocks.service.InformationSchemaDataSource;
import com.starrocks.sql.analyzer.SemanticException;
Expand Down Expand Up @@ -168,10 +169,10 @@ public List<List<ScalarOperator>> evaluate(ScalarOperator predicate) {
ConstantOperator value = binary.getChild(1).cast();
switch (name.toUpperCase()) {
case "TABLE_NAME":
params.setTable_name(value.getVarchar());
params.setTable_name(PatternMatcher.escapeLikeValue(value.getVarchar()));
break;
case "TABLE_SCHEMA":
authInfo.setPattern(value.getVarchar());
authInfo.setPattern(PatternMatcher.escapeLikeValue(value.getVarchar()));
break;
default:
throw new NotImplementedException("unsupported column: " + name);
Expand Down
104 changes: 43 additions & 61 deletions fe/fe-core/src/main/java/com/starrocks/common/PatternMatcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,96 +41,66 @@ public boolean match(String candidate) {
/*
* Mysql has only 2 patterns.
* '%' to match any character sequence
* '_' to master any single character.
* '_' to match any single character.
* So we convert '%' to '.*', and '_' to '.'
*
* eg:
* abc% -> abc.*
* ab_c -> ab.c
*
* We also need to handle escape character '\'.
* User use '\' to escape reserved words like '%', '_', or '\' it self
* User use '\' to escape reserved words like '%', '_', or '\' itself
*
* eg:
* ab\%c = ab%c
* ab\_c = ab_c
* ab\\c = ab\c
* abc% -> abc.*
* ab_c -> ab.c
* ab\%c -> matches ab%c
* ab\_c -> matches ab_c
* ab\\c -> matches ab\c
*
* We also have to ignore meaningless '\' like:'ab\c', convert it to 'abc'.
* The following characters are not permitted:
* <([{^=$!|]})?*+>
* Literal segments are wrapped with {@link Pattern#quote} so regex metacharacters
* (for example '(', ')', '+') in table or database names do not break compilation.
*/
private static String convertMysqlPattern(String mysqlPattern) {
String newMysqlPattern = mysqlPattern;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < newMysqlPattern.length(); ++i) {
char ch = newMysqlPattern.charAt(i);
for (int i = 0; i < mysqlPattern.length(); ++i) {
char ch = mysqlPattern.charAt(i);
switch (ch) {
case '%':
sb.append(".*");
break;
case '.':
sb.append("\\.");
break;
case '_':
sb.append(".");
break;
case '\\': {
if (i == newMysqlPattern.length() - 1) {
// last character of this pattern. leave this '\' as it is
sb.append('\\');
break;
}
// we need to look ahead the next character
// to decide ignore this '\' or treat it as escape character.
char nextChar = newMysqlPattern.charAt(i + 1);
switch (nextChar) {
case '%':
case '_':
case '\\':
// this is a escape character, eat this '\' and get next character.
sb.append(nextChar);
++i;
break;
default:
// ignore this '\' and continue;
break;
}
break;
}
default:
sb.append(ch);
break;
}
}

// Replace all the '\' to '\\' in Java pattern
newMysqlPattern = sb.toString();
sb = new StringBuilder();
for (int i = 0; i < newMysqlPattern.length(); ++i) {
char ch = newMysqlPattern.charAt(i);
switch (ch) {
case '\\':
if (i == newMysqlPattern.length() - 1) {
// last character of this pattern. leave this '\' as it is
sb.append('\\').append('\\');
break;
}
// look ahead
if (newMysqlPattern.charAt(i + 1) == '.') {
// leave '\.' as it is.
sb.append('\\').append('.');
i++;
break;
if (i + 1 < mysqlPattern.length()) {
char next = mysqlPattern.charAt(i + 1);
if (next == '%' || next == '_') {
// \% or \_ → literal char (not special in regex)
sb.append(next);
i++;
} else if (next == '\\') {
// \\ → literal backslash → regex needs "\\\\"
sb.append("\\\\");
i++;
} else {
// meaningless \, ignore
}
} else {
// trailing \, treat as literal backslash
sb.append("\\\\");
}
sb.append('\\').append('\\');
break;
default:
// escape regex-special characters
if (".[]{}()*+?^$|".indexOf(ch) >= 0) {
sb.append('\\');
}
sb.append(ch);
break;
}
}

return sb.toString();
}

Expand All @@ -153,6 +123,18 @@ public static PatternMatcher createMysqlPattern(String mysqlPattern, boolean cas
return matcher;
}

/**
* Escape a literal value so it can be used as a MySQL LIKE pattern that
* matches the value exactly. The three LIKE-special characters {@code \},
* {@code %} and {@code _} are each prefixed with a backslash.
*/
public static String escapeLikeValue(String value) {
if (value == null) {
return null;
}
return value.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_");
}

public static boolean matchPattern(String pattern, String tableName, PatternMatcher matcher,
boolean caseSensitive) {
if (matcher != null && !matcher.match(tableName)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -544,8 +544,7 @@ public static TGetTablesInfoResponse generateTablesInfoResponse(TGetTablesInfoRe
List<BasicTable> tables = new ArrayList<>();
List<String> tableNames = metadataMgr.listTableNames(context, catalogName, dbName);
for (String tableName : tableNames) {
if (request.isSetTable_name() &&
!PatternMatcher.matchPattern(request.getTable_name(), tableName, matcher, caseSensitive)) {
if (matcher != null && !matcher.match(tableName)) {
continue;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,16 @@
import com.starrocks.catalog.system.information.FeMetricsSystemTable;
import com.starrocks.catalog.system.information.LoadTrackingLogsSystemTable;
import com.starrocks.catalog.system.information.LoadsSystemTable;
import com.starrocks.catalog.system.information.RoutineLoadJobsSystemTable;
import com.starrocks.catalog.system.information.StreamLoadsSystemTable;
import com.starrocks.catalog.system.information.TaskRunsSystemTable;
import com.starrocks.common.AnalysisException;
import com.starrocks.common.Config;
import com.starrocks.common.DdlException;
import com.starrocks.common.IdGenerator;
import com.starrocks.common.LocalExchangerType;
import com.starrocks.common.Pair;
import com.starrocks.common.PatternMatcher;
import com.starrocks.common.StarRocksException;
import com.starrocks.connector.BucketProperty;
import com.starrocks.connector.metadata.MetadataTable;
Expand Down Expand Up @@ -251,6 +254,13 @@
public class PlanFragmentBuilder {
private static final Logger LOG = LogManager.getLogger(PlanFragmentBuilder.class);

private static final Set<String> TABLES_USING_EXACT_DB_MATCH = Set.of(
LoadsSystemTable.NAME,
LoadTrackingLogsSystemTable.NAME,
StreamLoadsSystemTable.NAME,
RoutineLoadJobsSystemTable.NAME
);

public static ExecPlan createPhysicalPlan(OptExpression plan, ConnectContext connectContext,
List<ColumnRefOperator> outputColumns, ColumnRefFactory columnRefFactory,
List<String> colNames,
Expand Down Expand Up @@ -1766,13 +1776,19 @@ public PlanFragment visitPhysicalSchemaScan(OptExpression optExpression, ExecPla
if (predicate instanceof BinaryPredicateOperator) {
BinaryPredicateOperator binaryPredicateOperator = (BinaryPredicateOperator) predicate;
if (binaryPredicateOperator.getBinaryType() == BinaryType.EQ) {
boolean escapeLike = !TABLES_USING_EXACT_DB_MATCH.contains(
scanNode.getTableName().toLowerCase());
switch (columnRefOperator.getName()) {
case "TABLE_SCHEMA":
case "DATABASE_NAME":
scanNode.setSchemaDb(constantOperator.getVarchar());
scanNode.setSchemaDb(escapeLike
? PatternMatcher.escapeLikeValue(constantOperator.getVarchar())
: constantOperator.getVarchar());
break;
case "TABLE_NAME":
scanNode.setSchemaTable(constantOperator.getVarchar());
scanNode.setSchemaTable(escapeLike
? PatternMatcher.escapeLikeValue(constantOperator.getVarchar())
: constantOperator.getVarchar());
break;
case "BE_ID":
scanNode.setBeId(constantOperator.getBigint());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,73 @@ public void testNormal() {
Assertions.fail(e.getMessage());
}
}

@Test
public void testBackslashFollowedByUnderscore() {
// LIKE pattern "a\\_a" means: a, literal \, any single char, a
// This is the pattern that results from SQL: LIKE 'a\\\\_a'
PatternMatcher matcher = PatternMatcher.createMysqlPattern("a\\\\_a", true);
Assertions.assertTrue(matcher.match("a\\_a"));
Assertions.assertTrue(matcher.match("a\\1a"));
Assertions.assertTrue(matcher.match("a\\%a"));
Assertions.assertFalse(matcher.match("a_a"));
Assertions.assertFalse(matcher.match("a\\a"));
Assertions.assertFalse(matcher.match("a\\_ab"));

// LIKE pattern "a\\_" means: a, literal \, any single char
matcher = PatternMatcher.createMysqlPattern("a\\\\_", true);
Assertions.assertTrue(matcher.match("a\\x"));
Assertions.assertTrue(matcher.match("a\\_"));
Assertions.assertFalse(matcher.match("a_"));

// LIKE pattern "a\\%" means: a, literal \, followed by any sequence
matcher = PatternMatcher.createMysqlPattern("a\\\\%", true);
Assertions.assertTrue(matcher.match("a\\"));
Assertions.assertTrue(matcher.match("a\\anything"));
Assertions.assertFalse(matcher.match("a_anything"));
}

@Test
public void testRegexMetacharactersInPattern() {
// Table names with regex metacharacters like (, ), +, *, ? should work
PatternMatcher matcher = PatternMatcher.createMysqlPattern("a(b)c", true);
Assertions.assertTrue(matcher.match("a(b)c"));
Assertions.assertFalse(matcher.match("abc"));

matcher = PatternMatcher.createMysqlPattern("a+b", true);
Assertions.assertTrue(matcher.match("a+b"));
Assertions.assertFalse(matcher.match("aab"));

matcher = PatternMatcher.createMysqlPattern("a[0]b", true);
Assertions.assertTrue(matcher.match("a[0]b"));
Assertions.assertFalse(matcher.match("a0b"));
}

@Test
public void testEscapeLikeValue() {
Assertions.assertNull(PatternMatcher.escapeLikeValue(null));
Assertions.assertEquals("abc", PatternMatcher.escapeLikeValue("abc"));
Assertions.assertEquals("a\\_a", PatternMatcher.escapeLikeValue("a_a"));
Assertions.assertEquals("a\\%a", PatternMatcher.escapeLikeValue("a%a"));
Assertions.assertEquals("a\\\\a", PatternMatcher.escapeLikeValue("a\\a"));
Assertions.assertEquals("a\\\\\\_a", PatternMatcher.escapeLikeValue("a\\_a"));
}

@Test
public void testEscapeLikeValueRoundTrip() {
// Escaping a value and then using it as a LIKE pattern should match only the original value
String[] testValues = {"a_a", "a%b", "a\\b", "a\\_a", "hello", "test(1)+2"};
for (String value : testValues) {
String escaped = PatternMatcher.escapeLikeValue(value);
PatternMatcher matcher = PatternMatcher.createMysqlPattern(escaped, true);
Assertions.assertTrue(matcher.match(value),
"Escaped pattern for '" + value + "' should match itself");
}

// Escaped "a_a" should NOT match "aba" (the underscore is not a wildcard)
String escaped = PatternMatcher.escapeLikeValue("a_a");
PatternMatcher matcher = PatternMatcher.createMysqlPattern(escaped, true);
Assertions.assertFalse(matcher.match("aba"));
Assertions.assertFalse(matcher.match("a1a"));
}
}
7 changes: 5 additions & 2 deletions fe/fe-core/src/test/java/com/starrocks/sql/plan/ScanTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.starrocks.catalog.Partition;
import com.starrocks.catalog.Tablet;
import com.starrocks.common.FeConstants;
import com.starrocks.common.PatternMatcher;
import com.starrocks.planner.ScanNode;
import com.starrocks.planner.SchemaScanNode;
import org.junit.jupiter.api.Assertions;
Expand Down Expand Up @@ -386,8 +387,10 @@ public void testSchemaScanWithWhere() throws Exception {
String sql = "select column_name, table_name from information_schema.columns" +
" where table_schema = 'information_schema' and table_name = 'columns'";
ExecPlan plan = getExecPlan(sql);
Assertions.assertTrue(((SchemaScanNode) plan.getScanNodes().get(0)).getSchemaDb().equals("information_schema"));
Assertions.assertTrue(((SchemaScanNode) plan.getScanNodes().get(0)).getSchemaTable().equals("columns"));
SchemaScanNode scanNode = (SchemaScanNode) plan.getScanNodes().get(0);
// Equality values are escaped for LIKE-style pushdown; '_' in database names must be literal.
Assertions.assertEquals(PatternMatcher.escapeLikeValue("information_schema"), scanNode.getSchemaDb());
Assertions.assertEquals(PatternMatcher.escapeLikeValue("columns"), scanNode.getSchemaTable());
}

@Test
Expand Down
57 changes: 57 additions & 0 deletions test/sql/test_information_schema/R/test_tables_like_escape
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,61 @@ a_a
-- !result
drop database if exists db_${uuid0};
-- result:
-- !result

-- name: test_table_name_escape_underscore_and_backslash
create database db_${uuid0};
-- result:
-- !result
use db_${uuid0};
-- result:
-- !result
CREATE TABLE `a\_a` (`c1` int) DISTRIBUTED BY HASH(`c1`) BUCKETS 1 PROPERTIES ("replication_num" = "1");
-- result:
-- !result
CREATE TABLE `a_a` (`c1` int) DISTRIBUTED BY HASH(`c1`) BUCKETS 1 PROPERTIES ("replication_num" = "1");
-- result:
-- !result
CREATE TABLE `aba` (`c1` int) DISTRIBUTED BY HASH(`c1`) BUCKETS 1 PROPERTIES ("replication_num" = "1");
-- result:
-- !result
CREATE TABLE `a\ba` (`c1` int) DISTRIBUTED BY HASH(`c1`) BUCKETS 1 PROPERTIES ("replication_num" = "1");
-- result:
-- !result
select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name = 'a_a';
-- result:
a_a
-- !result
select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name = 'a\_a';
-- result:
a\_a
-- !result
select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name = 'a\\_a';
-- result:
a\_a
-- !result
select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a_a' order by table_name;
-- result:
a_a
aba
-- !result
select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a\_a';
-- result:
a_a
-- !result
select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a\\_a';
-- result:
a_a
-- !result
select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a\\\\_a' order by table_name;
-- result:
a\_a
a\ba
-- !result
select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a\\\\\\_a';
-- result:
a\_a
-- !result
drop database if exists db_${uuid0};
-- result:
-- !result
Loading
Loading