diff --git a/fe/fe-core/src/main/java/com/starrocks/catalog/system/information/TablesSystemTable.java b/fe/fe-core/src/main/java/com/starrocks/catalog/system/information/TablesSystemTable.java index a481041ddfe370..a5ba41f9623c97 100644 --- a/fe/fe-core/src/main/java/com/starrocks/catalog/system/information/TablesSystemTable.java +++ b/fe/fe-core/src/main/java/com/starrocks/catalog/system/information/TablesSystemTable.java @@ -23,6 +23,7 @@ import com.starrocks.catalog.Type; import com.starrocks.catalog.system.SystemId; import com.starrocks.catalog.system.SystemTable; +import com.starrocks.common.PatternMatcher; import com.starrocks.qe.ConnectContext; import com.starrocks.service.InformationSchemaDataSource; import com.starrocks.sql.analyzer.SemanticException; @@ -168,10 +169,10 @@ public List> evaluate(ScalarOperator predicate) { ConstantOperator value = binary.getChild(1).cast(); switch (name.toUpperCase()) { case "TABLE_NAME": - params.setTable_name(value.getVarchar()); + params.setTable_name(PatternMatcher.escapeLikeValue(value.getVarchar())); break; case "TABLE_SCHEMA": - authInfo.setPattern(value.getVarchar()); + authInfo.setPattern(PatternMatcher.escapeLikeValue(value.getVarchar())); break; default: throw new NotImplementedException("unsupported column: " + name); diff --git a/fe/fe-core/src/main/java/com/starrocks/common/PatternMatcher.java b/fe/fe-core/src/main/java/com/starrocks/common/PatternMatcher.java index 8ea8afa8aec965..486e7c8784bfa3 100644 --- a/fe/fe-core/src/main/java/com/starrocks/common/PatternMatcher.java +++ b/fe/fe-core/src/main/java/com/starrocks/common/PatternMatcher.java @@ -41,7 +41,7 @@ public boolean match(String candidate) { /* * Mysql has only 2 patterns. * '%' to match any character sequence - * '_' to master any single character. + * '_' to match any single character. * So we convert '%' to '.*', and '_' to '.' * * eg: @@ -49,88 +49,58 @@ public boolean match(String candidate) { * ab_c -> ab.c * * We also need to handle escape character '\'. - * User use '\' to escape reserved words like '%', '_', or '\' it self + * User use '\' to escape reserved words like '%', '_', or '\' itself * * eg: - * ab\%c = ab%c - * ab\_c = ab_c - * ab\\c = ab\c + * abc% -> abc.* + * ab_c -> ab.c + * ab\%c -> matches ab%c + * ab\_c -> matches ab_c + * ab\\c -> matches ab\c * * We also have to ignore meaningless '\' like:'ab\c', convert it to 'abc'. - * The following characters are not permitted: - * <([{^=$!|]})?*+> + * Literal segments are wrapped with {@link Pattern#quote} so regex metacharacters + * (for example '(', ')', '+') in table or database names do not break compilation. */ private static String convertMysqlPattern(String mysqlPattern) { - String newMysqlPattern = mysqlPattern; StringBuilder sb = new StringBuilder(); - for (int i = 0; i < newMysqlPattern.length(); ++i) { - char ch = newMysqlPattern.charAt(i); + for (int i = 0; i < mysqlPattern.length(); ++i) { + char ch = mysqlPattern.charAt(i); switch (ch) { case '%': sb.append(".*"); break; - case '.': - sb.append("\\."); - break; case '_': sb.append("."); break; - case '\\': { - if (i == newMysqlPattern.length() - 1) { - // last character of this pattern. leave this '\' as it is - sb.append('\\'); - break; - } - // we need to look ahead the next character - // to decide ignore this '\' or treat it as escape character. - char nextChar = newMysqlPattern.charAt(i + 1); - switch (nextChar) { - case '%': - case '_': - case '\\': - // this is a escape character, eat this '\' and get next character. - sb.append(nextChar); - ++i; - break; - default: - // ignore this '\' and continue; - break; - } - break; - } - default: - sb.append(ch); - break; - } - } - - // Replace all the '\' to '\\' in Java pattern - newMysqlPattern = sb.toString(); - sb = new StringBuilder(); - for (int i = 0; i < newMysqlPattern.length(); ++i) { - char ch = newMysqlPattern.charAt(i); - switch (ch) { case '\\': - if (i == newMysqlPattern.length() - 1) { - // last character of this pattern. leave this '\' as it is - sb.append('\\').append('\\'); - break; - } - // look ahead - if (newMysqlPattern.charAt(i + 1) == '.') { - // leave '\.' as it is. - sb.append('\\').append('.'); - i++; - break; + if (i + 1 < mysqlPattern.length()) { + char next = mysqlPattern.charAt(i + 1); + if (next == '%' || next == '_') { + // \% or \_ → literal char (not special in regex) + sb.append(next); + i++; + } else if (next == '\\') { + // \\ → literal backslash → regex needs "\\\\" + sb.append("\\\\"); + i++; + } else { + // meaningless \, ignore + } + } else { + // trailing \, treat as literal backslash + sb.append("\\\\"); } - sb.append('\\').append('\\'); break; default: + // escape regex-special characters + if (".[]{}()*+?^$|".indexOf(ch) >= 0) { + sb.append('\\'); + } sb.append(ch); break; } } - return sb.toString(); } @@ -153,6 +123,18 @@ public static PatternMatcher createMysqlPattern(String mysqlPattern, boolean cas return matcher; } + /** + * Escape a literal value so it can be used as a MySQL LIKE pattern that + * matches the value exactly. The three LIKE-special characters {@code \}, + * {@code %} and {@code _} are each prefixed with a backslash. + */ + public static String escapeLikeValue(String value) { + if (value == null) { + return null; + } + return value.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_"); + } + public static boolean matchPattern(String pattern, String tableName, PatternMatcher matcher, boolean caseSensitive) { if (matcher != null && !matcher.match(tableName)) { diff --git a/fe/fe-core/src/main/java/com/starrocks/service/InformationSchemaDataSource.java b/fe/fe-core/src/main/java/com/starrocks/service/InformationSchemaDataSource.java index 6d7fe57be4e655..3726ae6a45ebc2 100644 --- a/fe/fe-core/src/main/java/com/starrocks/service/InformationSchemaDataSource.java +++ b/fe/fe-core/src/main/java/com/starrocks/service/InformationSchemaDataSource.java @@ -544,8 +544,7 @@ public static TGetTablesInfoResponse generateTablesInfoResponse(TGetTablesInfoRe List tables = new ArrayList<>(); List tableNames = metadataMgr.listTableNames(context, catalogName, dbName); for (String tableName : tableNames) { - if (request.isSetTable_name() && - !PatternMatcher.matchPattern(request.getTable_name(), tableName, matcher, caseSensitive)) { + if (matcher != null && !matcher.match(tableName)) { continue; } diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/plan/PlanFragmentBuilder.java b/fe/fe-core/src/main/java/com/starrocks/sql/plan/PlanFragmentBuilder.java index 64117837b7ce4c..f804f19f413c85 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/plan/PlanFragmentBuilder.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/plan/PlanFragmentBuilder.java @@ -57,6 +57,8 @@ import com.starrocks.catalog.system.information.FeMetricsSystemTable; import com.starrocks.catalog.system.information.LoadTrackingLogsSystemTable; import com.starrocks.catalog.system.information.LoadsSystemTable; +import com.starrocks.catalog.system.information.RoutineLoadJobsSystemTable; +import com.starrocks.catalog.system.information.StreamLoadsSystemTable; import com.starrocks.catalog.system.information.TaskRunsSystemTable; import com.starrocks.common.AnalysisException; import com.starrocks.common.Config; @@ -64,6 +66,7 @@ import com.starrocks.common.IdGenerator; import com.starrocks.common.LocalExchangerType; import com.starrocks.common.Pair; +import com.starrocks.common.PatternMatcher; import com.starrocks.common.StarRocksException; import com.starrocks.connector.BucketProperty; import com.starrocks.connector.metadata.MetadataTable; @@ -251,6 +254,13 @@ public class PlanFragmentBuilder { private static final Logger LOG = LogManager.getLogger(PlanFragmentBuilder.class); + private static final Set TABLES_USING_EXACT_DB_MATCH = Set.of( + LoadsSystemTable.NAME, + LoadTrackingLogsSystemTable.NAME, + StreamLoadsSystemTable.NAME, + RoutineLoadJobsSystemTable.NAME + ); + public static ExecPlan createPhysicalPlan(OptExpression plan, ConnectContext connectContext, List outputColumns, ColumnRefFactory columnRefFactory, List colNames, @@ -1766,13 +1776,19 @@ public PlanFragment visitPhysicalSchemaScan(OptExpression optExpression, ExecPla if (predicate instanceof BinaryPredicateOperator) { BinaryPredicateOperator binaryPredicateOperator = (BinaryPredicateOperator) predicate; if (binaryPredicateOperator.getBinaryType() == BinaryType.EQ) { + boolean escapeLike = !TABLES_USING_EXACT_DB_MATCH.contains( + scanNode.getTableName().toLowerCase()); switch (columnRefOperator.getName()) { case "TABLE_SCHEMA": case "DATABASE_NAME": - scanNode.setSchemaDb(constantOperator.getVarchar()); + scanNode.setSchemaDb(escapeLike + ? PatternMatcher.escapeLikeValue(constantOperator.getVarchar()) + : constantOperator.getVarchar()); break; case "TABLE_NAME": - scanNode.setSchemaTable(constantOperator.getVarchar()); + scanNode.setSchemaTable(escapeLike + ? PatternMatcher.escapeLikeValue(constantOperator.getVarchar()) + : constantOperator.getVarchar()); break; case "BE_ID": scanNode.setBeId(constantOperator.getBigint()); diff --git a/fe/fe-core/src/test/java/com/starrocks/common/PatternMatcherTest.java b/fe/fe-core/src/test/java/com/starrocks/common/PatternMatcherTest.java index 2c1904a107cbf4..dfc91247f443b6 100644 --- a/fe/fe-core/src/test/java/com/starrocks/common/PatternMatcherTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/common/PatternMatcherTest.java @@ -102,4 +102,73 @@ public void testNormal() { Assertions.fail(e.getMessage()); } } + + @Test + public void testBackslashFollowedByUnderscore() { + // LIKE pattern "a\\_a" means: a, literal \, any single char, a + // This is the pattern that results from SQL: LIKE 'a\\\\_a' + PatternMatcher matcher = PatternMatcher.createMysqlPattern("a\\\\_a", true); + Assertions.assertTrue(matcher.match("a\\_a")); + Assertions.assertTrue(matcher.match("a\\1a")); + Assertions.assertTrue(matcher.match("a\\%a")); + Assertions.assertFalse(matcher.match("a_a")); + Assertions.assertFalse(matcher.match("a\\a")); + Assertions.assertFalse(matcher.match("a\\_ab")); + + // LIKE pattern "a\\_" means: a, literal \, any single char + matcher = PatternMatcher.createMysqlPattern("a\\\\_", true); + Assertions.assertTrue(matcher.match("a\\x")); + Assertions.assertTrue(matcher.match("a\\_")); + Assertions.assertFalse(matcher.match("a_")); + + // LIKE pattern "a\\%" means: a, literal \, followed by any sequence + matcher = PatternMatcher.createMysqlPattern("a\\\\%", true); + Assertions.assertTrue(matcher.match("a\\")); + Assertions.assertTrue(matcher.match("a\\anything")); + Assertions.assertFalse(matcher.match("a_anything")); + } + + @Test + public void testRegexMetacharactersInPattern() { + // Table names with regex metacharacters like (, ), +, *, ? should work + PatternMatcher matcher = PatternMatcher.createMysqlPattern("a(b)c", true); + Assertions.assertTrue(matcher.match("a(b)c")); + Assertions.assertFalse(matcher.match("abc")); + + matcher = PatternMatcher.createMysqlPattern("a+b", true); + Assertions.assertTrue(matcher.match("a+b")); + Assertions.assertFalse(matcher.match("aab")); + + matcher = PatternMatcher.createMysqlPattern("a[0]b", true); + Assertions.assertTrue(matcher.match("a[0]b")); + Assertions.assertFalse(matcher.match("a0b")); + } + + @Test + public void testEscapeLikeValue() { + Assertions.assertNull(PatternMatcher.escapeLikeValue(null)); + Assertions.assertEquals("abc", PatternMatcher.escapeLikeValue("abc")); + Assertions.assertEquals("a\\_a", PatternMatcher.escapeLikeValue("a_a")); + Assertions.assertEquals("a\\%a", PatternMatcher.escapeLikeValue("a%a")); + Assertions.assertEquals("a\\\\a", PatternMatcher.escapeLikeValue("a\\a")); + Assertions.assertEquals("a\\\\\\_a", PatternMatcher.escapeLikeValue("a\\_a")); + } + + @Test + public void testEscapeLikeValueRoundTrip() { + // Escaping a value and then using it as a LIKE pattern should match only the original value + String[] testValues = {"a_a", "a%b", "a\\b", "a\\_a", "hello", "test(1)+2"}; + for (String value : testValues) { + String escaped = PatternMatcher.escapeLikeValue(value); + PatternMatcher matcher = PatternMatcher.createMysqlPattern(escaped, true); + Assertions.assertTrue(matcher.match(value), + "Escaped pattern for '" + value + "' should match itself"); + } + + // Escaped "a_a" should NOT match "aba" (the underscore is not a wildcard) + String escaped = PatternMatcher.escapeLikeValue("a_a"); + PatternMatcher matcher = PatternMatcher.createMysqlPattern(escaped, true); + Assertions.assertFalse(matcher.match("aba")); + Assertions.assertFalse(matcher.match("a1a")); + } } \ No newline at end of file diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/plan/ScanTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/plan/ScanTest.java index f8fe8b1345723e..7e437001a1faa2 100644 --- a/fe/fe-core/src/test/java/com/starrocks/sql/plan/ScanTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/sql/plan/ScanTest.java @@ -20,6 +20,7 @@ import com.starrocks.catalog.Partition; import com.starrocks.catalog.Tablet; import com.starrocks.common.FeConstants; +import com.starrocks.common.PatternMatcher; import com.starrocks.planner.ScanNode; import com.starrocks.planner.SchemaScanNode; import org.junit.jupiter.api.Assertions; @@ -386,8 +387,10 @@ public void testSchemaScanWithWhere() throws Exception { String sql = "select column_name, table_name from information_schema.columns" + " where table_schema = 'information_schema' and table_name = 'columns'"; ExecPlan plan = getExecPlan(sql); - Assertions.assertTrue(((SchemaScanNode) plan.getScanNodes().get(0)).getSchemaDb().equals("information_schema")); - Assertions.assertTrue(((SchemaScanNode) plan.getScanNodes().get(0)).getSchemaTable().equals("columns")); + SchemaScanNode scanNode = (SchemaScanNode) plan.getScanNodes().get(0); + // Equality values are escaped for LIKE-style pushdown; '_' in database names must be literal. + Assertions.assertEquals(PatternMatcher.escapeLikeValue("information_schema"), scanNode.getSchemaDb()); + Assertions.assertEquals(PatternMatcher.escapeLikeValue("columns"), scanNode.getSchemaTable()); } @Test diff --git a/test/sql/test_information_schema/R/test_tables_like_escape b/test/sql/test_information_schema/R/test_tables_like_escape index 73475c56076f8f..f6300cfefa137b 100644 --- a/test/sql/test_information_schema/R/test_tables_like_escape +++ b/test/sql/test_information_schema/R/test_tables_like_escape @@ -18,4 +18,61 @@ a_a -- !result drop database if exists db_${uuid0}; -- result: +-- !result + +-- name: test_table_name_escape_underscore_and_backslash +create database db_${uuid0}; +-- result: +-- !result +use db_${uuid0}; +-- result: +-- !result +CREATE TABLE `a\_a` (`c1` int) DISTRIBUTED BY HASH(`c1`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); +-- result: +-- !result +CREATE TABLE `a_a` (`c1` int) DISTRIBUTED BY HASH(`c1`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); +-- result: +-- !result +CREATE TABLE `aba` (`c1` int) DISTRIBUTED BY HASH(`c1`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); +-- result: +-- !result +CREATE TABLE `a\ba` (`c1` int) DISTRIBUTED BY HASH(`c1`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); +-- result: +-- !result +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name = 'a_a'; +-- result: +a_a +-- !result +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name = 'a\_a'; +-- result: +a\_a +-- !result +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name = 'a\\_a'; +-- result: +a\_a +-- !result +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a_a' order by table_name; +-- result: +a_a +aba +-- !result +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a\_a'; +-- result: +a_a +-- !result +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a\\_a'; +-- result: +a_a +-- !result +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a\\\\_a' order by table_name; +-- result: +a\_a +a\ba +-- !result +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a\\\\\\_a'; +-- result: +a\_a +-- !result +drop database if exists db_${uuid0}; +-- result: -- !result \ No newline at end of file diff --git a/test/sql/test_information_schema/T/test_tables_like_escape b/test/sql/test_information_schema/T/test_tables_like_escape index b1d96991ca1f25..10698324d8142d 100644 --- a/test/sql/test_information_schema/T/test_tables_like_escape +++ b/test/sql/test_information_schema/T/test_tables_like_escape @@ -11,3 +11,30 @@ select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a\\_a'; drop database if exists db_${uuid0}; + +-- name: test_table_name_escape_underscore_and_backslash +create database db_${uuid0}; +use db_${uuid0}; + +CREATE TABLE `a\_a` (`c1` int) DISTRIBUTED BY HASH(`c1`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); +CREATE TABLE `a_a` (`c1` int) DISTRIBUTED BY HASH(`c1`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); +CREATE TABLE `aba` (`c1` int) DISTRIBUTED BY HASH(`c1`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); +CREATE TABLE `a\ba` (`c1` int) DISTRIBUTED BY HASH(`c1`) BUCKETS 1 PROPERTIES ("replication_num" = "1"); + +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name = 'a_a'; + +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name = 'a\_a'; + +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name = 'a\\_a'; + +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a_a' order by table_name; + +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a\_a'; + +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a\\_a'; + +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a\\\\_a' order by table_name; + +select table_name from information_schema.tables where table_schema='db_${uuid0}' and table_name like 'a\\\\\\_a'; + +drop database if exists db_${uuid0};