|
5 | 5 |
|
6 | 6 | package org.opensearch.sql.ppl.calcite; |
7 | 7 |
|
| 8 | +import static org.mockito.Mockito.doReturn; |
| 9 | + |
8 | 10 | import org.apache.calcite.rel.RelNode; |
9 | 11 | import org.apache.calcite.test.CalciteAssert; |
| 12 | +import org.junit.Before; |
10 | 13 | import org.junit.Test; |
| 14 | +import org.opensearch.sql.common.setting.Settings; |
11 | 15 |
|
12 | 16 | public class CalcitePPLRexTest extends CalcitePPLAbstractTest { |
13 | 17 | public CalcitePPLRexTest() { |
14 | 18 | super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); |
15 | 19 | } |
16 | 20 |
|
| 21 | + @Before |
| 22 | + public void setUp() { |
| 23 | + doReturn(10).when(settings).getSettingValue(Settings.Key.PPL_REX_MAX_MATCH_LIMIT); |
| 24 | + } |
| 25 | + |
17 | 26 | @Test |
18 | 27 | public void testRexBasicFieldExtraction() { |
19 | 28 | String ppl = "source=EMP | rex field=ENAME '(?<first>[A-Z]).*' | fields ENAME, first"; |
@@ -159,4 +168,62 @@ public void testRexWithSort() { |
159 | 168 | + "LIMIT 5"; |
160 | 169 | verifyPPLToSparkSQL(root, expectedSparkSql); |
161 | 170 | } |
| 171 | + |
| 172 | + @Test |
| 173 | + public void testRexWithMaxMatchZero() { |
| 174 | + // Test that max_match=0 (unlimited) is capped to the configured limit |
| 175 | + String ppl = |
| 176 | + "source=EMP | rex field=ENAME '(?<letter>[A-Z])' max_match=0 | fields ENAME, letter"; |
| 177 | + RelNode root = getRelNode(ppl); |
| 178 | + String expectedLogical = |
| 179 | + "LogicalProject(ENAME=[$1], letter=[REX_EXTRACT_MULTI($1, '(?<letter>[A-Z])', 1, 10)])\n" |
| 180 | + + " LogicalTableScan(table=[[scott, EMP]])\n"; |
| 181 | + verifyLogical(root, expectedLogical); |
| 182 | + |
| 183 | + String expectedSparkSql = |
| 184 | + "SELECT `ENAME`, `REX_EXTRACT_MULTI`(`ENAME`, '(?<letter>[A-Z])', 1, 10) `letter`\n" |
| 185 | + + "FROM `scott`.`EMP`"; |
| 186 | + verifyPPLToSparkSQL(root, expectedSparkSql); |
| 187 | + } |
| 188 | + |
| 189 | + @Test(expected = IllegalArgumentException.class) |
| 190 | + public void testRexWithMaxMatchExceedsLimit() { |
| 191 | + // Test that max_match exceeding the configured limit throws an exception |
| 192 | + String ppl = |
| 193 | + "source=EMP | rex field=ENAME '(?<letter>[A-Z])' max_match=100 | fields ENAME, letter"; |
| 194 | + getRelNode(ppl); |
| 195 | + } |
| 196 | + |
| 197 | + @Test |
| 198 | + public void testRexWithMaxMatchWithinLimit() { |
| 199 | + String ppl = |
| 200 | + "source=EMP | rex field=ENAME '(?<letter>[A-Z])' max_match=5 | fields ENAME, letter"; |
| 201 | + RelNode root = getRelNode(ppl); |
| 202 | + String expectedLogical = |
| 203 | + "LogicalProject(ENAME=[$1], letter=[REX_EXTRACT_MULTI($1, '(?<letter>[A-Z])', 1, 5)])\n" |
| 204 | + + " LogicalTableScan(table=[[scott, EMP]])\n"; |
| 205 | + verifyLogical(root, expectedLogical); |
| 206 | + |
| 207 | + String expectedSparkSql = |
| 208 | + "SELECT `ENAME`, `REX_EXTRACT_MULTI`(`ENAME`, '(?<letter>[A-Z])', 1, 5) `letter`\n" |
| 209 | + + "FROM `scott`.`EMP`"; |
| 210 | + verifyPPLToSparkSQL(root, expectedSparkSql); |
| 211 | + } |
| 212 | + |
| 213 | + @Test |
| 214 | + public void testRexWithMaxMatchAtLimit() { |
| 215 | + // Test that max_match exactly at the limit works |
| 216 | + String ppl = |
| 217 | + "source=EMP | rex field=ENAME '(?<letter>[A-Z])' max_match=10 | fields ENAME, letter"; |
| 218 | + RelNode root = getRelNode(ppl); |
| 219 | + String expectedLogical = |
| 220 | + "LogicalProject(ENAME=[$1], letter=[REX_EXTRACT_MULTI($1, '(?<letter>[A-Z])', 1, 10)])\n" |
| 221 | + + " LogicalTableScan(table=[[scott, EMP]])\n"; |
| 222 | + verifyLogical(root, expectedLogical); |
| 223 | + |
| 224 | + String expectedSparkSql = |
| 225 | + "SELECT `ENAME`, `REX_EXTRACT_MULTI`(`ENAME`, '(?<letter>[A-Z])', 1, 10) `letter`\n" |
| 226 | + + "FROM `scott`.`EMP`"; |
| 227 | + verifyPPLToSparkSQL(root, expectedSparkSql); |
| 228 | + } |
162 | 229 | } |
0 commit comments