@@ -35,13 +35,13 @@ public void testPatternsLabelMode_NotShowNumberedToken_ForSimplePatternMethod()
3535 String expectedLogical =
3636 "LogicalProject(ENAME=[$1], patterns_field=[CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS"
3737 + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR,"
38- + " '<*>':VARCHAR))])\n "
38+ + " '<*>':VARCHAR, 'g':VARCHAR ))])\n "
3939 + " LogicalTableScan(table=[[scott, EMP]])\n " ;
4040 verifyLogical (root , expectedLogical );
4141
4242 String expectedSparkSql =
4343 "SELECT `ENAME`, CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE"
44- + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END `patterns_field`\n "
44+ + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END `patterns_field`\n "
4545 + "FROM `scott`.`EMP`" ;
4646 verifyPPLToSparkSQL (root , expectedSparkSql );
4747 }
@@ -56,18 +56,18 @@ public void testPatternsLabelMode_ShowNumberedToken_ForSimplePatternMethod() {
5656 String expectedLogical =
5757 "LogicalProject(ENAME=[$1], patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1,"
5858 + " Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1,"
59- + " '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR)), $1), 'pattern'))],"
59+ + " '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR )), $1), 'pattern'))],"
6060 + " tokens=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS"
6161 + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR,"
62- + " '<*>':VARCHAR)), $1), 'tokens'))])\n "
62+ + " '<*>':VARCHAR, 'g':VARCHAR )), $1), 'tokens'))])\n "
6363 + " LogicalTableScan(table=[[scott, EMP]])\n " ;
6464 verifyLogical (root , expectedLogical );
6565
6666 String expectedSparkSql =
6767 "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN"
68- + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `ENAME`)['pattern'] AS"
68+ + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END, `ENAME`)['pattern'] AS"
6969 + " STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR"
70- + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END,"
70+ + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END,"
7171 + " `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n "
7272 + "FROM `scott`.`EMP`" ;
7373 verifyPPLToSparkSQL (root , expectedSparkSql );
@@ -83,18 +83,18 @@ public void testPatternsLabelModeWithCustomPattern_ShowNumberedToken_ForSimplePa
8383 String expectedLogical =
8484 "LogicalProject(ENAME=[$1], patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1,"
8585 + " Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1,"
86- + " '[A-H]':VARCHAR, '<*>':VARCHAR)), $1), 'pattern'))],"
86+ + " '[A-H]':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR )), $1), 'pattern'))],"
8787 + " tokens=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS"
88- + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR)),"
88+ + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR )),"
8989 + " $1), 'tokens'))])\n "
9090 + " LogicalTableScan(table=[[scott, EMP]])\n " ;
9191 verifyLogical (root , expectedLogical );
9292
9393 String expectedSparkSql =
9494 "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN"
95- + " '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>') END, `ENAME`)['pattern'] AS STRING)"
95+ + " '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g' ) END, `ENAME`)['pattern'] AS STRING)"
9696 + " `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` ="
97- + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>') END, `ENAME`)['tokens'] AS"
97+ + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g' ) END, `ENAME`)['tokens'] AS"
9898 + " MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n "
9999 + "FROM `scott`.`EMP`" ;
100100 verifyPPLToSparkSQL (root , expectedSparkSql );
@@ -108,13 +108,13 @@ public void testPatternsLabelModeWithCustomField_NotShowNumberedToken_ForSimpleP
108108
109109 String expectedLogical =
110110 "LogicalProject(ENAME=[$1], upper=[CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR),"
111- + " '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR))])\n "
111+ + " '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR ))])\n "
112112 + " LogicalTableScan(table=[[scott, EMP]])\n " ;
113113 verifyLogical (root , expectedLogical );
114114
115115 String expectedSparkSql =
116116 "SELECT `ENAME`, CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE"
117- + " REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>') END `upper`\n "
117+ + " REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g' ) END `upper`\n "
118118 + "FROM `scott`.`EMP`" ;
119119 verifyPPLToSparkSQL (root , expectedSparkSql );
120120 }
@@ -130,19 +130,19 @@ public void testPatternsLabelModeWithPartitionBy_ShowNumberedToken_SimplePattern
130130 "LogicalProject(ENAME=[$1], DEPTNO=[$7],"
131131 + " patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL"
132132 + " AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR,"
133- + " '<*>':VARCHAR)), $1), 'pattern'))],"
133+ + " '<*>':VARCHAR, 'g':VARCHAR )), $1), 'pattern'))],"
134134 + " tokens=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS"
135135 + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR,"
136- + " '<*>':VARCHAR)), $1), 'tokens'))])\n "
136+ + " '<*>':VARCHAR, 'g':VARCHAR )), $1), 'tokens'))])\n "
137137 + " LogicalTableScan(table=[[scott, EMP]])\n " ;
138138 verifyLogical (root , expectedLogical );
139139
140140 String expectedSparkSql =
141141 "SELECT `ENAME`, `DEPTNO`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME`"
142- + " = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END,"
142+ + " = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END,"
143143 + " `ENAME`)['pattern'] AS STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE"
144144 + " WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`,"
145- + " '[a-zA-Z0-9]+', '<*>') END, `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >)"
145+ + " '[a-zA-Z0-9]+', '<*>', 'g' ) END, `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >)"
146146 + " `tokens`\n "
147147 + "FROM `scott`.`EMP`" ;
148148 verifyPPLToSparkSQL (root , expectedSparkSql );
@@ -248,18 +248,18 @@ public void testPatternsAggregationMode_NotShowNumberedToken_ForSimplePatternMet
248248 "LogicalAggregate(group=[{1}], pattern_count=[COUNT($1)], sample_logs=[TAKE($0, $2)])\n "
249249 + " LogicalProject(ENAME=[$1], patterns_field=[CASE(SEARCH($1, Sarg['':VARCHAR; NULL"
250250 + " AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR,"
251- + " '<*>':VARCHAR))], $f9=[10])\n "
251+ + " '<*>':VARCHAR, 'g':VARCHAR ))], $f9=[10])\n "
252252 + " LogicalTableScan(table=[[scott, EMP]])\n " ;
253253 verifyLogical (root , expectedLogical );
254254
255255 String expectedSparkSql =
256256 "SELECT CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`,"
257- + " '[a-zA-Z0-9]+', '<*>') END `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR"
258- + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END)"
257+ + " '[a-zA-Z0-9]+', '<*>', 'g' ) END `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR"
258+ + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END)"
259259 + " `pattern_count`, `TAKE`(`ENAME`, 10) `sample_logs`\n "
260260 + "FROM `scott`.`EMP`\n "
261261 + "GROUP BY CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE"
262- + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END" ;
262+ + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END" ;
263263 verifyPPLToSparkSQL (root , expectedSparkSql );
264264 }
265265
@@ -276,21 +276,21 @@ public void testPatternsAggregationMode_ShowNumberedToken_ForSimplePatternMethod
276276 + " $2)])\n "
277277 + " LogicalProject(ENAME=[$1], patterns_field=[CASE(SEARCH($1, Sarg['':VARCHAR; NULL"
278278 + " AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR,"
279- + " '<*>':VARCHAR))], $f9=[10])\n "
279+ + " '<*>':VARCHAR, 'g':VARCHAR ))], $f9=[10])\n "
280280 + " LogicalTableScan(table=[[scott, EMP]])\n " ;
281281 verifyLogical (root , expectedLogical );
282282
283283 String expectedSparkSql =
284284 "SELECT TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE"
285- + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`, 10))['pattern']"
285+ + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END, `TAKE`(`ENAME`, 10))['pattern']"
286286 + " AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN"
287- + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END) `pattern_count`,"
287+ + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END) `pattern_count`,"
288288 + " TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE"
289- + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`, 10))['tokens']"
289+ + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END, `TAKE`(`ENAME`, 10))['tokens']"
290290 + " AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10) `sample_logs`\n "
291291 + "FROM `scott`.`EMP`\n "
292292 + "GROUP BY CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE"
293- + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END" ;
293+ + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END" ;
294294 verifyPPLToSparkSQL (root , expectedSparkSql );
295295 }
296296
@@ -307,22 +307,22 @@ public void testPatternsAggregationModeWithGroupBy_ShowNumberedToken_ForSimplePa
307307 + " $3)])\n "
308308 + " LogicalProject(ENAME=[$1], DEPTNO=[$7], patterns_field=[CASE(SEARCH($1,"
309309 + " Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1,"
310- + " '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR))], $f9=[10])\n "
310+ + " '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR ))], $f9=[10])\n "
311311 + " LogicalTableScan(table=[[scott, EMP]])\n " ;
312312 verifyLogical (root , expectedLogical );
313313
314314 String expectedSparkSql =
315315 "SELECT `DEPTNO`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN ''"
316- + " ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`,"
316+ + " ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END, `TAKE`(`ENAME`,"
317317 + " 10))['pattern'] AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR"
318- + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END)"
318+ + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END)"
319319 + " `pattern_count`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = ''"
320- + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`,"
320+ + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END, `TAKE`(`ENAME`,"
321321 + " 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10)"
322322 + " `sample_logs`\n "
323323 + "FROM `scott`.`EMP`\n "
324324 + "GROUP BY `DEPTNO`, CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE"
325- + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END" ;
325+ + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g' ) END" ;
326326 verifyPPLToSparkSQL (root , expectedSparkSql );
327327 }
328328
0 commit comments