Skip to content

Commit cfdcfbf

Browse files
authored
Add flags option to Regex::exec (#12546)
1 parent 62180fc commit cfdcfbf

3 files changed

Lines changed: 47 additions & 8 deletions

File tree

include/tsutil/Regex.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ enum REFlags {
3636
RE_CASE_INSENSITIVE = 0x00000008u, ///< Ignore case (default: case sensitive).
3737
RE_UNANCHORED = 0x00000400u, ///< Unanchored (DFA defaults to anchored).
3838
RE_ANCHORED = 0x80000000u, ///< Anchored (Regex defaults to unanchored).
39+
RE_NOTEMPTY = 0x00000004u ///< Not empty (default: may match empty string).
3940
};
4041

4142
/// @brief Wrapper for PCRE2 match data.
@@ -124,7 +125,7 @@ class Regex
124125
*
125126
* It is safe to call this method concurrently on the same instance of @a this.
126127
*/
127-
bool exec(std::string_view subject) const;
128+
bool exec(std::string_view subject, uint32_t flags = 0) const;
128129

129130
/** Execute the regular expression.
130131
*
@@ -137,7 +138,7 @@ class Regex
137138
* Each capture group takes 3 elements of @a ovector, therefore @a ovecsize must
138139
* be a multiple of 3 and at least three times the number of desired capture groups.
139140
*/
140-
int exec(std::string_view subject, RegexMatches &matches) const;
141+
int exec(std::string_view subject, RegexMatches &matches, uint32_t flags = 0) const;
141142

142143
/// @return The number of capture groups in the compiled pattern.
143144
int get_capture_count();

src/tsutil/Regex.cc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@
3131
#include <vector>
3232
#include <mutex>
3333

34-
static_assert(RE_CASE_INSENSITIVE == PCRE2_CASELESS, "Update RE_CASE_INSERSITIVE for current PCRE2 version.");
35-
static_assert(RE_UNANCHORED == PCRE2_MULTILINE, "Update RE_MULTILINE for current PCRE2 version.");
34+
static_assert(RE_CASE_INSENSITIVE == PCRE2_CASELESS, "Update RE_CASE_INSENSITIVE for current PCRE2 version.");
35+
static_assert(RE_UNANCHORED == PCRE2_MULTILINE, "Update RE_UNANCHORED for current PCRE2 version.");
3636
static_assert(RE_ANCHORED == PCRE2_ANCHORED, "Update RE_ANCHORED for current PCRE2 version.");
37+
static_assert(RE_NOTEMPTY == PCRE2_NOTEMPTY, "Update RE_NOTEMPTY for current PCRE2 version.");
3738

3839
//----------------------------------------------------------------------------
3940
namespace
@@ -296,28 +297,28 @@ Regex::compile(std::string_view pattern, std::string &error, int &erroroffset, u
296297

297298
//----------------------------------------------------------------------------
298299
bool
299-
Regex::exec(std::string_view subject) const
300+
Regex::exec(std::string_view subject, uint32_t flags) const
300301
{
301302
if (_Code::get(_code) == nullptr) {
302303
return false;
303304
}
304305
RegexMatches matches;
305306

306-
int count = this->exec(subject, matches);
307+
int count = this->exec(subject, matches, flags);
307308
return count > 0;
308309
}
309310

310311
//----------------------------------------------------------------------------
311312
int32_t
312-
Regex::exec(std::string_view subject, RegexMatches &matches) const
313+
Regex::exec(std::string_view subject, RegexMatches &matches, uint32_t flags) const
313314
{
314315
auto code = _Code::get(_code);
315316

316317
// check if there is a compiled regex
317318
if (code == nullptr) {
318319
return 0;
319320
}
320-
int count = pcre2_match(code, reinterpret_cast<PCRE2_SPTR>(subject.data()), subject.size(), 0, 0,
321+
int count = pcre2_match(code, reinterpret_cast<PCRE2_SPTR>(subject.data()), subject.size(), 0, flags,
321322
RegexMatches::_MatchData::get(matches._match_data), RegexContext::get_instance()->get_match_context());
322323

323324
matches._size = count;

src/tsutil/unit_tests/test_Regex.cc

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,3 +190,40 @@ TEST_CASE("Regex", "[libts][Regex]")
190190
}
191191
#endif
192192
}
193+
194+
TEST_CASE("Regex RE_NOTEMPTY flag behavior", "[libts][Regex][flags][RE_NOTEMPTY]")
195+
{
196+
// Pattern that only matches empty string
197+
Regex r;
198+
REQUIRE(r.compile("^$") == true);
199+
200+
SECTION("default exec matches empty subject")
201+
{
202+
// boolean overload
203+
CHECK(r.exec(std::string_view("")) == true);
204+
205+
// matches overload should return 1 (one match - the whole subject)
206+
RegexMatches matches;
207+
CHECK(r.exec(std::string_view(""), matches) == 1);
208+
CHECK(matches.size() == 1);
209+
CHECK(matches[0] == std::string_view(""));
210+
}
211+
212+
SECTION("RE_NOTEMPTY prevents empty matches")
213+
{
214+
// boolean overload with RE_NOTEMPTY should not match
215+
CHECK(r.exec(std::string_view(""), RE_NOTEMPTY) == false);
216+
217+
// matches overload should return a negative value (PCRE2_ERROR_NOMATCH)
218+
RegexMatches matches;
219+
int rc = r.exec(std::string_view(""), matches, RE_NOTEMPTY);
220+
CHECK(rc < 0);
221+
}
222+
223+
SECTION("non-empty subject unaffected by RE_NOTEMPTY for this pattern")
224+
{
225+
// '^$' should not match 'a' in any case
226+
CHECK(r.exec(std::string_view("a")) == false);
227+
CHECK(r.exec(std::string_view("a"), RE_NOTEMPTY) == false);
228+
}
229+
}

0 commit comments

Comments
 (0)