@@ -183,13 +183,6 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
183183 RexNode fieldRex = rexVisitor .analyze (node .getField (), context );
184184 String patternStr = (String ) node .getPattern ().getValue ();
185185
186- if (node .getMode () == Rex .RexMode .SED ) {
187- RexNode sedCall = createOptimizedSedCall (fieldRex , patternStr , context );
188- String fieldName = node .getField ().toString ();
189- projectPlusOverriding (List .of (sedCall ), List .of (fieldName ), context );
190- return context .relBuilder .peek ();
191- }
192-
193186 List <String > namedGroups = RegexExpression .getNamedGroupCandidates (patternStr );
194187
195188 if (namedGroups .isEmpty ()) {
@@ -203,6 +196,7 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
203196 for (int i = 0 ; i < namedGroups .size (); i ++) {
204197 RexNode extractCall ;
205198 if (node .getMaxMatch ().isPresent () && node .getMaxMatch ().get () != 1 ) {
199+ // Use REX_EXTRACT_MULTI for multiple matches
206200 extractCall =
207201 PPLFuncImpTable .INSTANCE .resolve (
208202 context .rexBuilder ,
@@ -212,6 +206,7 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
212206 context .relBuilder .literal (i + 1 ),
213207 context .relBuilder .literal (node .getMaxMatch ().get ()));
214208 } else {
209+ // Use REX_EXTRACT for single match (default)
215210 extractCall =
216211 PPLFuncImpTable .INSTANCE .resolve (
217212 context .rexBuilder ,
@@ -224,17 +219,6 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
224219 newFieldNames .add (namedGroups .get (i ));
225220 }
226221
227- if (node .getOffsetField ().isPresent ()) {
228- RexNode offsetCall =
229- PPLFuncImpTable .INSTANCE .resolve (
230- context .rexBuilder ,
231- BuiltinFunctionName .REX_OFFSET ,
232- fieldRex ,
233- context .rexBuilder .makeLiteral (patternStr ));
234- newFields .add (offsetCall );
235- newFieldNames .add (node .getOffsetField ().get ());
236- }
237-
238222 projectPlusOverriding (newFields , newFieldNames , context );
239223 return context .relBuilder .peek ();
240224 }
@@ -1687,118 +1671,4 @@ private void buildExpandRelNode(
16871671 context .relBuilder .rename (names );
16881672 }
16891673 }
1690-
1691- /**
1692- * Creates an optimized sed call using native Calcite functions where possible. Falls back to
1693- * custom REX_SED for complex cases.
1694- */
1695- private RexNode createOptimizedSedCall (
1696- RexNode fieldRex , String sedExpression , CalcitePlanContext context ) {
1697- if (sedExpression .startsWith ("s/" )) {
1698- return createOptimizedSubstitution (fieldRex , sedExpression , context );
1699- } else if (sedExpression .startsWith ("y/" )) {
1700- return createOptimizedTransliteration (fieldRex , sedExpression , context );
1701- } else {
1702- throw new RuntimeException ("Unsupported sed pattern: " + sedExpression );
1703- }
1704- }
1705-
1706- /** Creates optimized substitution calls for s/pattern/replacement/flags syntax. */
1707- private RexNode createOptimizedSubstitution (
1708- RexNode fieldRex , String sedExpression , CalcitePlanContext context ) {
1709- try {
1710- // Parse sed substitution: s/pattern/replacement/flags
1711- if (!sedExpression .matches ("s/.+/.*/.*" )) {
1712- throw new IllegalArgumentException ("Invalid sed substitution format" );
1713- }
1714-
1715- // Find the delimiters - sed format is s/pattern/replacement/flags
1716- int firstDelimiter = sedExpression .indexOf ('/' , 2 ); // First '/' after 's/'
1717- int secondDelimiter = sedExpression .indexOf ('/' , firstDelimiter + 1 ); // Second '/'
1718- int thirdDelimiter = sedExpression .indexOf ('/' , secondDelimiter + 1 ); // Third '/' (optional)
1719-
1720- if (firstDelimiter == -1 || secondDelimiter == -1 ) {
1721- throw new IllegalArgumentException ("Invalid sed substitution format" );
1722- }
1723-
1724- String pattern = sedExpression .substring (2 , firstDelimiter );
1725- String replacement = sedExpression .substring (firstDelimiter + 1 , secondDelimiter );
1726- String flags =
1727- secondDelimiter + 1 < sedExpression .length ()
1728- ? sedExpression .substring (secondDelimiter + 1 )
1729- : "" ;
1730-
1731- // Convert sed backreferences (\1, \2) to Java style ($1, $2)
1732- String javaReplacement = replacement .replaceAll ("\\ \\ (\\ d+)" , "\\ $$1" );
1733-
1734- if (flags .isEmpty ()) {
1735- // 3-parameter REGEXP_REPLACE
1736- return PPLFuncImpTable .INSTANCE .resolve (
1737- context .rexBuilder ,
1738- BuiltinFunctionName .INTERNAL_REGEXP_REPLACE_3 ,
1739- fieldRex ,
1740- context .rexBuilder .makeLiteral (pattern ),
1741- context .rexBuilder .makeLiteral (javaReplacement ));
1742- } else if (flags .matches ("[gi]+" )) {
1743- // 4-parameter REGEXP_REPLACE with flags
1744- return PPLFuncImpTable .INSTANCE .resolve (
1745- context .rexBuilder ,
1746- BuiltinFunctionName .INTERNAL_REGEXP_REPLACE_PG_4 ,
1747- fieldRex ,
1748- context .rexBuilder .makeLiteral (pattern ),
1749- context .rexBuilder .makeLiteral (javaReplacement ),
1750- context .rexBuilder .makeLiteral (flags ));
1751- } else if (flags .matches ("\\ d+" )) {
1752- // 5-parameter REGEXP_REPLACE with occurrence
1753- int occurrence = Integer .parseInt (flags );
1754- return PPLFuncImpTable .INSTANCE .resolve (
1755- context .rexBuilder ,
1756- BuiltinFunctionName .INTERNAL_REGEXP_REPLACE_5 ,
1757- fieldRex ,
1758- context .rexBuilder .makeLiteral (pattern ),
1759- context .rexBuilder .makeLiteral (javaReplacement ),
1760- context .relBuilder .literal (1 ), // start position
1761- context .relBuilder .literal (occurrence ));
1762- } else {
1763- throw new RuntimeException (
1764- "Unsupported sed flags: " + flags + " in expression: " + sedExpression );
1765- }
1766- } catch (Exception e ) {
1767- throw new RuntimeException ("Failed to optimize sed expression: " + sedExpression , e );
1768- }
1769- }
1770-
1771- /** Creates optimized transliteration calls for y/from/to/ syntax. */
1772- private RexNode createOptimizedTransliteration (
1773- RexNode fieldRex , String sedExpression , CalcitePlanContext context ) {
1774- try {
1775- // Parse sed transliteration: y/from/to/
1776- if (!sedExpression .matches ("y/.+/.*/.*" )) {
1777- throw new IllegalArgumentException ("Invalid sed transliteration format" );
1778- }
1779-
1780- int firstSlash = sedExpression .indexOf ('/' , 1 );
1781- int secondSlash = sedExpression .indexOf ('/' , firstSlash + 1 );
1782- int thirdSlash = sedExpression .indexOf ('/' , secondSlash + 1 );
1783-
1784- if (firstSlash == -1 || secondSlash == -1 ) {
1785- throw new IllegalArgumentException ("Invalid sed transliteration format" );
1786- }
1787-
1788- String from = sedExpression .substring (firstSlash + 1 , secondSlash );
1789- String to =
1790- sedExpression .substring (
1791- secondSlash + 1 , thirdSlash != -1 ? thirdSlash : sedExpression .length ());
1792-
1793- // Use Calcite's native TRANSLATE3 function
1794- return PPLFuncImpTable .INSTANCE .resolve (
1795- context .rexBuilder ,
1796- BuiltinFunctionName .INTERNAL_TRANSLATE3 ,
1797- fieldRex ,
1798- context .rexBuilder .makeLiteral (from ),
1799- context .rexBuilder .makeLiteral (to ));
1800- } catch (Exception e ) {
1801- throw new RuntimeException ("Failed to optimize sed expression: " + sedExpression , e );
1802- }
1803- }
18041674}
0 commit comments