99import lombok .EqualsAndHashCode ;
1010import lombok .Getter ;
1111import lombok .ToString ;
12- import org .pcre4j .Pcre4j ;
13- import org .pcre4j .jna .Pcre2 ;
14- import org .pcre4j .regex .Pattern ;
15- import org .pcre4j .regex .Matcher ;
16- import org .opensearch .sql .data .model .ExprBooleanValue ;
1712import org .opensearch .sql .data .model .ExprValue ;
1813import org .opensearch .sql .data .model .ExprValueUtils ;
1914import org .opensearch .sql .data .type .ExprCoreType ;
2015import org .opensearch .sql .data .type .ExprType ;
2116import org .opensearch .sql .expression .Expression ;
2217import org .opensearch .sql .expression .ExpressionNodeVisitor ;
2318import org .opensearch .sql .expression .env .Environment ;
24- import org .opensearch .sql .expression .function .FunctionName ;
19+ import org .pcre4j .Pcre4j ;
20+ import org .pcre4j .jna .Pcre2 ;
21+ import org .pcre4j .regex .Matcher ;
22+ import org .pcre4j .regex .Pattern ;
2523
2624/**
27- * Expression for PCRE-compatible regex matching using JPCRE2.
28- * Supports full PCRE features including:
29- * - Named groups (?<name>...)
30- * - Lookahead/lookbehind (including variable-length)
31- * - Backreferences
32- * - Recursion (?R) and named recursion (?&name)
33- * - Conditionals (?(condition)yes|no)
25+ * Expression for PCRE-compatible regex matching using JPCRE2. Supports full PCRE features
26+ * including: - Named groups (?<name>...) - Lookahead/lookbehind (including variable-length) -
27+ * Backreferences - Recursion (?R) and named recursion (?&name) - Conditionals (?(condition)yes|no)
3428 * - Inline flags (?i), (?m), (?s), etc.
3529 */
3630@ ToString
3731@ EqualsAndHashCode
3832public class RegexMatch implements Expression {
39- @ Getter
40- private final Expression field ;
41-
42- @ Getter
43- private final Expression pattern ;
44-
45- @ Getter
46- private final boolean negated ;
47-
48- // Pattern cache to avoid recompiling the same patterns
49- private static final ConcurrentHashMap <String , Pattern > patternCache =
50- new ConcurrentHashMap <>();
51-
52- // Maximum cache size to prevent memory issues
53- private static final int MAX_CACHE_SIZE = 1000 ;
54-
55- // Initialize PCRE4J with JNA backend (done once)
56- static {
57- Pcre4j .setup (new Pcre2 ());
33+ @ Getter private final Expression field ;
34+
35+ @ Getter private final Expression pattern ;
36+
37+ @ Getter private final boolean negated ;
38+
39+ // Pattern cache to avoid recompiling the same patterns
40+ private static final ConcurrentHashMap <String , Pattern > patternCache = new ConcurrentHashMap <>();
41+
42+ // Maximum cache size to prevent memory issues
43+ private static final int MAX_CACHE_SIZE = 1000 ;
44+
45+ // Initialize PCRE4J with JNA backend (done once)
46+ static {
47+ Pcre4j .setup (new Pcre2 ());
48+ }
49+
50+ public RegexMatch (Expression field , Expression pattern , boolean negated ) {
51+ this .field = field ;
52+ this .pattern = pattern ;
53+ this .negated = negated ;
54+ }
55+
56+ @ Override
57+ public ExprValue valueOf (Environment <Expression , ExprValue > valueEnv ) {
58+ ExprValue fieldValue = field .valueOf (valueEnv );
59+ ExprValue patternValue = pattern .valueOf (valueEnv );
60+
61+ // Handle null/missing values
62+ if (fieldValue .isNull ()
63+ || fieldValue .isMissing ()
64+ || patternValue .isNull ()
65+ || patternValue .isMissing ()) {
66+ return ExprValueUtils .booleanValue (false );
5867 }
59-
60- public RegexMatch (Expression field , Expression pattern , boolean negated ) {
61- this .field = field ;
62- this .pattern = pattern ;
63- this .negated = negated ;
68+
69+ String text = fieldValue .stringValue ();
70+ String regex = patternValue .stringValue ();
71+
72+ try {
73+ // Get compiled pattern from cache or compile new one
74+ Pattern compiledPattern = getCompiledPattern (regex );
75+
76+ // Create matcher and check for match
77+ Matcher matcher = compiledPattern .matcher (text );
78+ boolean matches = matcher .find (); // Use find() for partial match like SPL
79+
80+ // Apply negation if needed
81+ return ExprValueUtils .booleanValue (negated ? !matches : matches );
82+
83+ } catch (Exception e ) {
84+ // Return false on pattern compilation/matching errors
85+ // Note: In production, proper logging should be added here
86+ return ExprValueUtils .booleanValue (false );
6487 }
65-
66- @ Override
67- public ExprValue valueOf (Environment <Expression , ExprValue > valueEnv ) {
68- ExprValue fieldValue = field .valueOf (valueEnv );
69- ExprValue patternValue = pattern .valueOf (valueEnv );
70-
71- // Handle null/missing values
72- if (fieldValue .isNull () || fieldValue .isMissing () ||
73- patternValue .isNull () || patternValue .isMissing ()) {
74- return ExprValueUtils .booleanValue (false );
75- }
76-
77- String text = fieldValue .stringValue ();
78- String regex = patternValue .stringValue ();
79-
80- try {
81- // Get compiled pattern from cache or compile new one
82- Pattern compiledPattern = getCompiledPattern (regex );
83-
84- // Create matcher and check for match
85- Matcher matcher = compiledPattern .matcher (text );
86- boolean matches = matcher .find (); // Use find() for partial match like SPL
87-
88- // Apply negation if needed
89- return ExprValueUtils .booleanValue (negated ? !matches : matches );
90-
91- } catch (Exception e ) {
92- // Log error and return false on pattern compilation/matching errors
93- // In production, you'd want proper logging here
94- System .err .println ("Regex error: " + e .getMessage ());
95- return ExprValueUtils .booleanValue (false );
96- }
88+ }
89+
90+ /** Get compiled pattern from cache or compile and cache it. */
91+ private Pattern getCompiledPattern (String regex ) {
92+ // Check cache size and clear if needed (simple LRU-like behavior)
93+ if (patternCache .size () > MAX_CACHE_SIZE ) {
94+ patternCache .clear ();
9795 }
98-
99- /**
100- * Get compiled pattern from cache or compile and cache it.
101- */
102- private Pattern getCompiledPattern (String regex ) {
103- // Check cache size and clear if needed (simple LRU-like behavior)
104- if (patternCache .size () > MAX_CACHE_SIZE ) {
105- patternCache .clear ();
106- }
107-
108- return patternCache .computeIfAbsent (regex , r -> {
109- // Compile with PCRE2 defaults
110- // pcre4j compiles the pattern with full PCRE2 support
111- return Pattern .compile (r );
96+
97+ return patternCache .computeIfAbsent (
98+ regex ,
99+ r -> {
100+ // Compile with PCRE2 defaults
101+ // pcre4j compiles the pattern with full PCRE2 support
102+ return Pattern .compile (r );
112103 });
113- }
114-
115- @ Override
116- public ExprType type () {
117- return ExprCoreType .BOOLEAN ;
118- }
119-
120- @ Override
121- public <T , C > T accept (ExpressionNodeVisitor <T , C > visitor , C context ) {
122- // This will be implemented when we add the visitor pattern for expressions
123- return visitor .visitNode (this , context );
124- }
125- }
104+ }
105+
106+ @ Override
107+ public ExprType type () {
108+ return ExprCoreType .BOOLEAN ;
109+ }
110+
111+ @ Override
112+ public <T , C > T accept (ExpressionNodeVisitor <T , C > visitor , C context ) {
113+ return visitor .visitRegex (this , context );
114+ }
115+ }
0 commit comments