Skip to content

Commit 2d5e77d

Browse files
eyupcanakmancakebaker
authored andcommitted
expr: avoid panic on regex retry-limit
Treat onig retry-limit-in-match errors as no match. GNU expr returns exit 1 for the same input. Fixes #11612
1 parent a3ac3ba commit 2d5e77d

2 files changed

Lines changed: 33 additions & 29 deletions

File tree

src/uu/expr/src/syntax_tree.rs

Lines changed: 24 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use std::{cell::Cell, collections::BTreeMap};
99

1010
use num_bigint::BigInt;
1111
use num_traits::ToPrimitive;
12-
use onig::{Regex, RegexOptions, Syntax};
12+
use onig::{MatchParam, Regex, RegexOptions, SearchOptions, Syntax};
1313

1414
use crate::{
1515
ExprError, ExprResult,
@@ -366,6 +366,25 @@ fn build_regex(pattern_bytes: Vec<u8>) -> ExprResult<(Regex, String)> {
366366
Ok((re, re_string))
367367
}
368368

369+
/// Run a regex search, treating runtime match errors as no match.
370+
fn regex_search<T: onig::EncodedChars>(
371+
regex: &Regex,
372+
chars: T,
373+
to: usize,
374+
region: &mut onig::Region,
375+
) -> Option<usize> {
376+
regex
377+
.search_with_param(
378+
chars,
379+
0,
380+
to,
381+
SearchOptions::SEARCH_OPTION_NONE,
382+
Some(region),
383+
MatchParam::default(),
384+
)
385+
.unwrap_or(None)
386+
}
387+
369388
/// Find matches in the input using the compiled regex
370389
fn find_match(regex: Regex, re_string: String, left_bytes: Vec<u8>) -> String {
371390
use onig::EncodedBytes;
@@ -380,13 +399,7 @@ fn find_match(regex: Regex, re_string: String, left_bytes: Vec<u8>) -> String {
380399
// In UTF-8 locale, check if input is valid UTF-8
381400
if let Ok(left_str) = std::str::from_utf8(&left_bytes) {
382401
// Valid UTF-8, match as UTF-8
383-
let pos = regex.search_with_encoding(
384-
left_str,
385-
0,
386-
left_str.len(),
387-
onig::SearchOptions::SEARCH_OPTION_NONE,
388-
Some(&mut region),
389-
);
402+
let pos = regex_search(&regex, left_str, left_str.len(), &mut region);
390403

391404
if pos.is_some() {
392405
if regex.captures_len() > 0 {
@@ -421,13 +434,7 @@ fn find_match(regex: Regex, re_string: String, left_bytes: Vec<u8>) -> String {
421434
.ok();
422435

423436
if let Some(re_ascii) = re_ascii {
424-
let pos = re_ascii.search_with_encoding(
425-
left_encoded,
426-
0,
427-
left_bytes.len(),
428-
onig::SearchOptions::SEARCH_OPTION_NONE,
429-
Some(&mut region),
430-
);
437+
let pos = regex_search(&re_ascii, left_encoded, left_bytes.len(), &mut region);
431438

432439
if pos.is_some() {
433440
if re_ascii.captures_len() > 0 {
@@ -469,13 +476,7 @@ fn find_match(regex: Regex, re_string: String, left_bytes: Vec<u8>) -> String {
469476
UEncoding::Ascii => {
470477
// In ASCII/C locale, work with bytes directly
471478
let left_encoded = EncodedBytes::ascii(&left_bytes);
472-
let pos = regex.search_with_encoding(
473-
left_encoded,
474-
0,
475-
left_bytes.len(),
476-
onig::SearchOptions::SEARCH_OPTION_NONE,
477-
Some(&mut region),
478-
);
479+
let pos = regex_search(&regex, left_encoded, left_bytes.len(), &mut region);
479480

480481
if pos.is_some() {
481482
if regex.captures_len() > 0 {
@@ -515,13 +516,7 @@ fn evaluate_match_expression(left_bytes: Vec<u8>, right_bytes: Vec<u8>) -> ExprR
515516
// Try to find the actual capture bytes for ASCII locale
516517
let mut region = onig::Region::new();
517518
let left_encoded = onig::EncodedBytes::ascii(&left_bytes);
518-
let pos = regex.search_with_encoding(
519-
left_encoded,
520-
0,
521-
left_bytes.len(),
522-
onig::SearchOptions::SEARCH_OPTION_NONE,
523-
Some(&mut region),
524-
);
519+
let pos = regex_search(&regex, left_encoded, left_bytes.len(), &mut region);
525520

526521
if pos.is_some() {
527522
if let Some((start, end)) = region.pos(1) {

tests/by-util/test_expr.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,15 @@ fn test_regex_newline() {
488488
.stdout_only("18\n");
489489
}
490490

491+
#[test]
492+
fn test_regex_catastrophic_backtracking() {
493+
let input = "a".repeat(30) + "c";
494+
new_ucmd!()
495+
.args(&[input.as_str(), ":", "\\(a\\+a\\+\\)\\+b"])
496+
.fails_with_code(1)
497+
.stdout_only("\n");
498+
}
499+
491500
#[test]
492501
fn test_substr() {
493502
new_ucmd!()

0 commit comments

Comments
 (0)