@@ -216,6 +216,13 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
216216 RexNode fieldRex = rexVisitor .analyze (node .getField (), context );
217217 String patternStr = (String ) node .getPattern ().getValue ();
218218
219+ if (node .getMode () == Rex .RexMode .SED ) {
220+ RexNode sedCall = createOptimizedSedCall (fieldRex , patternStr , context );
221+ String fieldName = node .getField ().toString ();
222+ projectPlusOverriding (List .of (sedCall ), List .of (fieldName ), context );
223+ return context .relBuilder .peek ();
224+ }
225+
219226 List <String > namedGroups = RegexCommonUtils .getNamedGroupCandidates (patternStr );
220227
221228 if (namedGroups .isEmpty ()) {
@@ -250,6 +257,17 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
250257 newFieldNames .add (namedGroups .get (i ));
251258 }
252259
260+ if (node .getOffsetField ().isPresent ()) {
261+ RexNode offsetCall =
262+ PPLFuncImpTable .INSTANCE .resolve (
263+ context .rexBuilder ,
264+ BuiltinFunctionName .REX_OFFSET ,
265+ fieldRex ,
266+ context .rexBuilder .makeLiteral (patternStr ));
267+ newFields .add (offsetCall );
268+ newFieldNames .add (node .getOffsetField ().get ());
269+ }
270+
253271 projectPlusOverriding (newFields , newFieldNames , context );
254272 return context .relBuilder .peek ();
255273 }
@@ -2108,4 +2126,118 @@ private void buildExpandRelNode(
21082126 context .relBuilder .rename (names );
21092127 }
21102128 }
2129+
2130+ /**
2131+ * Creates an optimized sed call using native Calcite functions where possible. Falls back to
2132+ * custom REX_SED for complex cases.
2133+ */
2134+ private RexNode createOptimizedSedCall (
2135+ RexNode fieldRex , String sedExpression , CalcitePlanContext context ) {
2136+ if (sedExpression .startsWith ("s/" )) {
2137+ return createOptimizedSubstitution (fieldRex , sedExpression , context );
2138+ } else if (sedExpression .startsWith ("y/" )) {
2139+ return createOptimizedTransliteration (fieldRex , sedExpression , context );
2140+ } else {
2141+ throw new RuntimeException ("Unsupported sed pattern: " + sedExpression );
2142+ }
2143+ }
2144+
2145+ /** Creates optimized substitution calls for s/pattern/replacement/flags syntax. */
2146+ private RexNode createOptimizedSubstitution (
2147+ RexNode fieldRex , String sedExpression , CalcitePlanContext context ) {
2148+ try {
2149+ // Parse sed substitution: s/pattern/replacement/flags
2150+ if (!sedExpression .matches ("s/.+/.*/.*" )) {
2151+ throw new IllegalArgumentException ("Invalid sed substitution format" );
2152+ }
2153+
2154+ // Find the delimiters - sed format is s/pattern/replacement/flags
2155+ int firstDelimiter = sedExpression .indexOf ('/' , 2 ); // First '/' after 's/'
2156+ int secondDelimiter = sedExpression .indexOf ('/' , firstDelimiter + 1 ); // Second '/'
2157+ int thirdDelimiter = sedExpression .indexOf ('/' , secondDelimiter + 1 ); // Third '/' (optional)
2158+
2159+ if (firstDelimiter == -1 || secondDelimiter == -1 ) {
2160+ throw new IllegalArgumentException ("Invalid sed substitution format" );
2161+ }
2162+
2163+ String pattern = sedExpression .substring (2 , firstDelimiter );
2164+ String replacement = sedExpression .substring (firstDelimiter + 1 , secondDelimiter );
2165+ String flags =
2166+ secondDelimiter + 1 < sedExpression .length ()
2167+ ? sedExpression .substring (secondDelimiter + 1 )
2168+ : "" ;
2169+
2170+ // Convert sed backreferences (\1, \2) to Java style ($1, $2)
2171+ String javaReplacement = replacement .replaceAll ("\\ \\ (\\ d+)" , "\\ $$1" );
2172+
2173+ if (flags .isEmpty ()) {
2174+ // 3-parameter REGEXP_REPLACE
2175+ return PPLFuncImpTable .INSTANCE .resolve (
2176+ context .rexBuilder ,
2177+ BuiltinFunctionName .INTERNAL_REGEXP_REPLACE_3 ,
2178+ fieldRex ,
2179+ context .rexBuilder .makeLiteral (pattern ),
2180+ context .rexBuilder .makeLiteral (javaReplacement ));
2181+ } else if (flags .matches ("[gi]+" )) {
2182+ // 4-parameter REGEXP_REPLACE with flags
2183+ return PPLFuncImpTable .INSTANCE .resolve (
2184+ context .rexBuilder ,
2185+ BuiltinFunctionName .INTERNAL_REGEXP_REPLACE_PG_4 ,
2186+ fieldRex ,
2187+ context .rexBuilder .makeLiteral (pattern ),
2188+ context .rexBuilder .makeLiteral (javaReplacement ),
2189+ context .rexBuilder .makeLiteral (flags ));
2190+ } else if (flags .matches ("\\ d+" )) {
2191+ // 5-parameter REGEXP_REPLACE with occurrence
2192+ int occurrence = Integer .parseInt (flags );
2193+ return PPLFuncImpTable .INSTANCE .resolve (
2194+ context .rexBuilder ,
2195+ BuiltinFunctionName .INTERNAL_REGEXP_REPLACE_5 ,
2196+ fieldRex ,
2197+ context .rexBuilder .makeLiteral (pattern ),
2198+ context .rexBuilder .makeLiteral (javaReplacement ),
2199+ context .relBuilder .literal (1 ), // start position
2200+ context .relBuilder .literal (occurrence ));
2201+ } else {
2202+ throw new RuntimeException (
2203+ "Unsupported sed flags: " + flags + " in expression: " + sedExpression );
2204+ }
2205+ } catch (Exception e ) {
2206+ throw new RuntimeException ("Failed to optimize sed expression: " + sedExpression , e );
2207+ }
2208+ }
2209+
2210+ /** Creates optimized transliteration calls for y/from/to/ syntax. */
2211+ private RexNode createOptimizedTransliteration (
2212+ RexNode fieldRex , String sedExpression , CalcitePlanContext context ) {
2213+ try {
2214+ // Parse sed transliteration: y/from/to/
2215+ if (!sedExpression .matches ("y/.+/.*/.*" )) {
2216+ throw new IllegalArgumentException ("Invalid sed transliteration format" );
2217+ }
2218+
2219+ int firstSlash = sedExpression .indexOf ('/' , 1 );
2220+ int secondSlash = sedExpression .indexOf ('/' , firstSlash + 1 );
2221+ int thirdSlash = sedExpression .indexOf ('/' , secondSlash + 1 );
2222+
2223+ if (firstSlash == -1 || secondSlash == -1 ) {
2224+ throw new IllegalArgumentException ("Invalid sed transliteration format" );
2225+ }
2226+
2227+ String from = sedExpression .substring (firstSlash + 1 , secondSlash );
2228+ String to =
2229+ sedExpression .substring (
2230+ secondSlash + 1 , thirdSlash != -1 ? thirdSlash : sedExpression .length ());
2231+
2232+ // Use Calcite's native TRANSLATE3 function
2233+ return PPLFuncImpTable .INSTANCE .resolve (
2234+ context .rexBuilder ,
2235+ BuiltinFunctionName .INTERNAL_TRANSLATE3 ,
2236+ fieldRex ,
2237+ context .rexBuilder .makeLiteral (from ),
2238+ context .rexBuilder .makeLiteral (to ));
2239+ } catch (Exception e ) {
2240+ throw new RuntimeException ("Failed to optimize sed expression: " + sedExpression , e );
2241+ }
2242+ }
21112243}
0 commit comments