Skip to content

Commit 71b887e

Browse files
committed
tomo - add unit test for regex util class
Signed-off-by: Jialiang Liang <jiallian@amazon.com>
1 parent f19431c commit 71b887e

2 files changed

Lines changed: 221 additions & 2 deletions

File tree

core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,9 @@
1919
*/
2020
public class RegexCommonUtils {
2121

22-
// Pattern to identify named capture groups in regex
2322
private static final Pattern NAMED_GROUP_PATTERN =
2423
Pattern.compile("\\(\\?<([a-zA-Z][a-zA-Z0-9]*)>");
2524

26-
// Maximum cache size to prevent memory issues
2725
private static final int MAX_CACHE_SIZE = 1000;
2826

2927
private static final Map<String, Pattern> patternCache =
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.expression.parse;
7+
8+
import static org.junit.jupiter.api.Assertions.*;
9+
10+
import java.util.List;
11+
import java.util.regex.Pattern;
12+
import java.util.regex.PatternSyntaxException;
13+
import org.junit.jupiter.api.Test;
14+
15+
public class RegexCommonUtilsTest {
16+
17+
@Test
18+
public void testGetCompiledPattern() {
19+
String regex = "test.*pattern";
20+
Pattern pattern1 = RegexCommonUtils.getCompiledPattern(regex);
21+
Pattern pattern2 = RegexCommonUtils.getCompiledPattern(regex);
22+
23+
assertNotNull(pattern1);
24+
assertSame(pattern1, pattern2, "Should return cached pattern");
25+
assertEquals(regex, pattern1.pattern());
26+
}
27+
28+
@Test
29+
public void testGetCompiledPatternWithInvalidRegex() {
30+
String invalidRegex = "[invalid";
31+
32+
assertThrows(
33+
PatternSyntaxException.class,
34+
() -> {
35+
RegexCommonUtils.getCompiledPattern(invalidRegex);
36+
});
37+
}
38+
39+
@Test
40+
public void testGetNamedGroupCandidatesSingle() {
41+
String pattern = "(?<name>[a-z]+)";
42+
List<String> groups = RegexCommonUtils.getNamedGroupCandidates(pattern);
43+
44+
assertEquals(1, groups.size());
45+
assertEquals("name", groups.get(0));
46+
}
47+
48+
@Test
49+
public void testGetNamedGroupCandidatesMultiple() {
50+
String pattern = "(?<first>[a-z]+)\\s+(?<second>[0-9]+)\\s+(?<third>.*)";
51+
List<String> groups = RegexCommonUtils.getNamedGroupCandidates(pattern);
52+
53+
assertEquals(3, groups.size());
54+
assertEquals("first", groups.get(0));
55+
assertEquals("second", groups.get(1));
56+
assertEquals("third", groups.get(2));
57+
}
58+
59+
@Test
60+
public void testGetNamedGroupCandidatesWithMixedGroups() {
61+
String pattern = "([a-z]+)\\s+(?<named1>[0-9]+)\\s+(\\d+)\\s+(?<named2>.*)";
62+
List<String> groups = RegexCommonUtils.getNamedGroupCandidates(pattern);
63+
64+
assertEquals(2, groups.size());
65+
assertEquals("named1", groups.get(0));
66+
assertEquals("named2", groups.get(1));
67+
}
68+
69+
@Test
70+
public void testGetNamedGroupCandidatesNoGroups() {
71+
String pattern = "[a-z]+\\s+[0-9]+";
72+
List<String> groups = RegexCommonUtils.getNamedGroupCandidates(pattern);
73+
74+
assertEquals(0, groups.size());
75+
}
76+
77+
@Test
78+
public void testGetNamedGroupCandidatesEmailPattern() {
79+
String pattern = ".+@(?<host>.+)";
80+
List<String> groups = RegexCommonUtils.getNamedGroupCandidates(pattern);
81+
82+
assertEquals(1, groups.size());
83+
assertEquals("host", groups.get(0));
84+
}
85+
86+
@Test
87+
public void testMatchesPartialWithMatch() {
88+
assertTrue(RegexCommonUtils.matchesPartial("test string", "test"));
89+
assertTrue(RegexCommonUtils.matchesPartial("test string", "string"));
90+
assertTrue(RegexCommonUtils.matchesPartial("test string", "st.*ng"));
91+
assertTrue(RegexCommonUtils.matchesPartial("user@domain.com", ".*@domain\\.com"));
92+
}
93+
94+
@Test
95+
public void testMatchesPartialWithoutMatch() {
96+
assertFalse(RegexCommonUtils.matchesPartial("test string", "notfound"));
97+
assertFalse(RegexCommonUtils.matchesPartial("test string", "^string"));
98+
assertFalse(RegexCommonUtils.matchesPartial("user@domain.com", ".*@other\\.com"));
99+
}
100+
101+
@Test
102+
public void testMatchesPartialWithNullInputs() {
103+
assertFalse(RegexCommonUtils.matchesPartial(null, "pattern"));
104+
assertFalse(RegexCommonUtils.matchesPartial("text", null));
105+
assertFalse(RegexCommonUtils.matchesPartial(null, null));
106+
}
107+
108+
@Test
109+
public void testMatchesPartialWithEmptyString() {
110+
assertTrue(RegexCommonUtils.matchesPartial("", ""));
111+
assertTrue(RegexCommonUtils.matchesPartial("text", ""));
112+
assertFalse(RegexCommonUtils.matchesPartial("", "pattern"));
113+
}
114+
115+
@Test
116+
public void testMatchesPartialWithInvalidRegex() {
117+
assertThrows(
118+
PatternSyntaxException.class,
119+
() -> {
120+
RegexCommonUtils.matchesPartial("text", "[invalid");
121+
});
122+
}
123+
124+
@Test
125+
public void testExtractNamedGroupSuccess() {
126+
Pattern pattern = Pattern.compile("(?<user>[^@]+)@(?<domain>.+)");
127+
String text = "john@example.com";
128+
129+
assertEquals("john", RegexCommonUtils.extractNamedGroup(text, pattern, "user"));
130+
assertEquals("example.com", RegexCommonUtils.extractNamedGroup(text, pattern, "domain"));
131+
}
132+
133+
@Test
134+
public void testExtractNamedGroupNoMatch() {
135+
Pattern pattern = Pattern.compile("(?<user>[^@]+)@(?<domain>.+)");
136+
String text = "not_an_email";
137+
138+
assertNull(RegexCommonUtils.extractNamedGroup(text, pattern, "user"));
139+
assertNull(RegexCommonUtils.extractNamedGroup(text, pattern, "domain"));
140+
}
141+
142+
@Test
143+
public void testExtractNamedGroupNonExistentGroup() {
144+
Pattern pattern = Pattern.compile("(?<user>[^@]+)@(?<domain>.+)");
145+
String text = "john@example.com";
146+
147+
assertNull(RegexCommonUtils.extractNamedGroup(text, pattern, "nonexistent"));
148+
}
149+
150+
@Test
151+
public void testExtractNamedGroupWithNullInputs() {
152+
Pattern pattern = Pattern.compile("(?<user>[^@]+)@(?<domain>.+)");
153+
String text = "john@example.com";
154+
155+
assertNull(RegexCommonUtils.extractNamedGroup(null, pattern, "user"));
156+
assertNull(RegexCommonUtils.extractNamedGroup(text, null, "user"));
157+
assertNull(RegexCommonUtils.extractNamedGroup(text, pattern, null));
158+
assertNull(RegexCommonUtils.extractNamedGroup(null, null, null));
159+
}
160+
161+
@Test
162+
public void testExtractNamedGroupComplexPattern() {
163+
Pattern pattern = Pattern.compile("(?<year>\\d{4})-(?<month>\\d{2})-(?<day>\\d{2})");
164+
String text = "2024-03-15";
165+
166+
assertEquals("2024", RegexCommonUtils.extractNamedGroup(text, pattern, "year"));
167+
assertEquals("03", RegexCommonUtils.extractNamedGroup(text, pattern, "month"));
168+
assertEquals("15", RegexCommonUtils.extractNamedGroup(text, pattern, "day"));
169+
}
170+
171+
@Test
172+
public void testPatternCachingBehavior() {
173+
// Test that patterns are cached and reused
174+
String regex1 = "test1.*";
175+
String regex2 = "test2.*";
176+
177+
Pattern p1a = RegexCommonUtils.getCompiledPattern(regex1);
178+
Pattern p2a = RegexCommonUtils.getCompiledPattern(regex2);
179+
Pattern p1b = RegexCommonUtils.getCompiledPattern(regex1);
180+
Pattern p2b = RegexCommonUtils.getCompiledPattern(regex2);
181+
182+
assertSame(p1a, p1b, "Same regex should return cached pattern");
183+
assertSame(p2a, p2b, "Same regex should return cached pattern");
184+
assertNotSame(p1a, p2a, "Different regex should return different patterns");
185+
}
186+
187+
@Test
188+
public void testGetNamedGroupCandidatesWithNumericNames() {
189+
String pattern = "(?<group1>[a-z]+)\\s+(?<group2>[0-9]+)";
190+
List<String> groups = RegexCommonUtils.getNamedGroupCandidates(pattern);
191+
192+
assertEquals(2, groups.size());
193+
assertEquals("group1", groups.get(0));
194+
assertEquals("group2", groups.get(1));
195+
}
196+
197+
@Test
198+
public void testGetNamedGroupCandidatesSpecialCharacters() {
199+
// Test that groups with special characters are not captured (only alphanumeric starting with
200+
// letter)
201+
String pattern = "(?<valid_group>[a-z]+)\\s+(?<123invalid>[0-9]+)\\s+(?<also-invalid>.*)";
202+
List<String> groups = RegexCommonUtils.getNamedGroupCandidates(pattern);
203+
204+
// Based on the NAMED_GROUP_PATTERN regex, only groups starting with letter and containing
205+
// alphanumeric are valid
206+
// The pattern is: "\\(\\?<([a-zA-Z][a-zA-Z0-9]*)>"
207+
// So "valid_group" won't match because of underscore, "123invalid" won't match because it
208+
// starts with number
209+
assertEquals(0, groups.size());
210+
}
211+
212+
@Test
213+
public void testGetNamedGroupCandidatesValidAlphanumeric() {
214+
String pattern = "(?<groupA>[a-z]+)\\s+(?<group2B>[0-9]+)";
215+
List<String> groups = RegexCommonUtils.getNamedGroupCandidates(pattern);
216+
217+
assertEquals(2, groups.size());
218+
assertEquals("groupA", groups.get(0));
219+
assertEquals("group2B", groups.get(1));
220+
}
221+
}

0 commit comments

Comments
 (0)