Skip to content

Commit 652ec92

Browse files
perf: replace std::regex with POSIX regex_t in grep and sed
std::regex is known to be 5-10x slower than POSIX regex. Replace with regex_t via the existing scoped_regex RAII wrapper. Also removes the <regex> header dependency, reducing compile time and binary size.
1 parent 244ad4f commit 652ec92

2 files changed

Lines changed: 39 additions & 25 deletions

File tree

src/applets/grep.cpp

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,17 @@
22
// Supported flags: -E (extended regex), -i (ignore case), -v (invert match),
33
// -n (line numbers), -r (recursive), -c (count only),
44
// -l (files with matches), -q (quiet)
5-
// Known differences from GNU grep: uses std::regex (slower on large files),
6-
// no PCRE2, no color, no context lines.
75

86
#include <cstdio>
97
#include <filesystem>
10-
#include <regex>
118
#include <string>
129
#include <string_view>
1310
#include <vector>
1411

1512
#include <cfbox/args.hpp>
1613
#include <cfbox/help.hpp>
1714
#include <cfbox/io.hpp>
15+
#include <cfbox/regex.hpp>
1816

1917
namespace {
2018

@@ -55,15 +53,12 @@ auto grep_file(const std::string& pattern, const GrepOptions& opts,
5553

5654
auto lines = cfbox::io::split_lines(result.value());
5755

58-
auto flags = std::regex::ECMAScript;
59-
if (opts.extended) flags = std::regex::egrep;
60-
if (opts.ignore_case) flags |= std::regex::icase;
56+
int cflags = opts.extended ? REG_EXTENDED : 0;
57+
if (opts.ignore_case) cflags |= REG_ICASE;
6158

62-
std::regex re;
63-
try {
64-
re = std::regex(pattern, flags);
65-
} catch (const std::regex_error& e) {
66-
std::fprintf(stderr, "cfbox grep: invalid regex: %s\n", e.what());
59+
cfbox::util::scoped_regex re;
60+
if (re.compile(pattern.c_str(), cflags) != 0) {
61+
std::fprintf(stderr, "cfbox grep: invalid regex: %s\n", pattern.c_str());
6762
return 2;
6863
}
6964

@@ -72,7 +67,7 @@ auto grep_file(const std::string& pattern, const GrepOptions& opts,
7267

7368
for (std::size_t i = 0; i < lines.size(); ++i) {
7469
const auto& line = lines[i];
75-
bool matched = std::regex_search(line, re);
70+
bool matched = re.exec(line.c_str(), 0, nullptr, 0) == 0;
7671
if (opts.invert) matched = !matched;
7772

7873
if (matched) {

src/applets/sed.cpp

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
// no a/i/c commands, no hold space, no multi-line pattern space.
66

77
#include <cstdio>
8-
#include <regex>
8+
#include <cstring>
99
#include <string>
1010
#include <string_view>
1111
#include <vector>
1212

1313
#include <cfbox/args.hpp>
1414
#include <cfbox/help.hpp>
1515
#include <cfbox/io.hpp>
16+
#include <cfbox/regex.hpp>
1617

1718
namespace {
1819

@@ -225,20 +226,38 @@ auto address_matches(const Address& addr, std::size_t line, std::size_t total_li
225226
}
226227

227228
auto apply_substitute(std::string& line, const SedCommand& cmd) -> bool {
228-
try {
229-
std::regex re(cmd.pattern);
230-
if (!std::regex_search(line, re)) return false;
231-
232-
if (cmd.global) {
233-
line = std::regex_replace(line, re, cmd.replacement);
234-
} else {
235-
line = std::regex_replace(line, re, cmd.replacement,
236-
std::regex_constants::format_first_only);
229+
cfbox::util::scoped_regex re;
230+
if (re.compile(cmd.pattern.c_str(), REG_EXTENDED) != 0) return false;
231+
232+
regmatch_t m;
233+
if (re.exec(line.c_str(), 1, &m, 0) != 0) return false;
234+
235+
if (!cmd.global) {
236+
// Single replacement
237+
std::string result;
238+
auto* p = line.c_str();
239+
result.append(p, static_cast<std::size_t>(m.rm_so));
240+
result.append(cmd.replacement);
241+
result.append(p + m.rm_eo);
242+
line = result;
243+
} else {
244+
// Global replacement
245+
std::string result;
246+
auto* p = line.c_str();
247+
auto offset = p;
248+
while (re.exec(offset, 1, &m, 0) == 0 && m.rm_so >= 0) {
249+
result.append(offset, static_cast<std::size_t>(m.rm_so));
250+
result.append(cmd.replacement);
251+
offset += m.rm_eo;
252+
if (m.rm_so == m.rm_eo) {
253+
if (*offset) result += *offset++;
254+
else break;
255+
}
237256
}
238-
return true;
239-
} catch (const std::regex_error&) {
240-
return false;
257+
result.append(offset);
258+
line = result;
241259
}
260+
return true;
242261
}
243262

244263
auto process_lines(const std::vector<std::string>& lines,

0 commit comments

Comments
 (0)