From 5c044b5feff3e0533538e259862666bc0d80ce57 Mon Sep 17 00:00:00 2001 From: can1357 Date: Wed, 18 Mar 2026 23:26:09 +0100 Subject: [PATCH] tr: reject unknown character classes during sequence parsing --- src/uu/tr/locales/en-US.ftl | 1 + src/uu/tr/locales/fr-FR.ftl | 1 + src/uu/tr/src/operation.rs | 59 +++++++++++++++++++++---------------- tests/by-util/test_tr.rs | 11 ++++++- 4 files changed, 46 insertions(+), 26 deletions(-) diff --git a/src/uu/tr/locales/en-US.ftl b/src/uu/tr/locales/en-US.ftl index 087436847a9..f91f2de3033 100644 --- a/src/uu/tr/locales/en-US.ftl +++ b/src/uu/tr/locales/en-US.ftl @@ -27,6 +27,7 @@ tr-warning-invalid-utf8 = invalid utf8 sequence # Sequence parsing error messages tr-error-missing-char-class-name = missing character class name '[::]' +tr-error-invalid-char-class = invalid character class { $class } tr-error-missing-equivalence-class-char = missing equivalence class character '[==]' tr-error-multiple-char-repeat-in-set2 = only one [c*] repeat construct may appear in string2 tr-error-char-repeat-in-set1 = the [c*] repeat construct may not appear in string1 diff --git a/src/uu/tr/locales/fr-FR.ftl b/src/uu/tr/locales/fr-FR.ftl index 075b4a4477a..0b59ea133b0 100644 --- a/src/uu/tr/locales/fr-FR.ftl +++ b/src/uu/tr/locales/fr-FR.ftl @@ -28,6 +28,7 @@ tr-warning-invalid-utf8 = séquence UTF-8 non valide # Messages d'erreur d'analyse de séquence tr-error-missing-char-class-name = nom de classe de caractères manquant '[::]' +tr-error-invalid-char-class = classe de caractères non valide { $class } tr-error-missing-equivalence-class-char = caractère de classe d'équivalence manquant '[==]' tr-error-multiple-char-repeat-in-set2 = seule une construction de répétition [c*] peut apparaître dans string2 tr-error-char-repeat-in-set1 = la construction de répétition [c*] ne peut pas apparaître dans string1 diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 2f45bc9b990..f945bebca7c 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -34,6 +34,7 @@ pub trait ChunkProcessor { #[derive(Debug, Clone)] pub enum BadSequence { MissingCharClassName, + InvalidCharClass(String), MissingEquivalentClassChar, MultipleCharRepeatInSet2, CharRepeatInSet1, @@ -53,6 +54,13 @@ impl Display for BadSequence { Self::MissingCharClassName => { write!(f, "{}", translate!("tr-error-missing-char-class-name")) } + Self::InvalidCharClass(class) => { + write!( + f, + "{}", + translate!("tr-error-invalid-char-class", "class" => format!("'{}'", class)) + ) + } Self::MissingEquivalentClassChar => { write!( f, @@ -499,31 +507,32 @@ impl Sequence { } fn parse_class(input: &[u8]) -> IResult<&[u8], Result> { - delimited( - tag("[:"), - alt(( - map( - alt(( - value(Self::Class(Class::Alnum), tag("alnum")), - value(Self::Class(Class::Alpha), tag("alpha")), - value(Self::Class(Class::Blank), tag("blank")), - value(Self::Class(Class::Control), tag("cntrl")), - value(Self::Class(Class::Digit), tag("digit")), - value(Self::Class(Class::Graph), tag("graph")), - value(Self::Class(Class::Lower), tag("lower")), - value(Self::Class(Class::Print), tag("print")), - value(Self::Class(Class::Punct), tag("punct")), - value(Self::Class(Class::Space), tag("space")), - value(Self::Class(Class::Upper), tag("upper")), - value(Self::Class(Class::Xdigit), tag("xdigit")), - )), - Ok, - ), - value(Err(BadSequence::MissingCharClassName), tag("")), - )), - tag(":]"), - ) - .parse(input) + preceded(tag("[:"), terminated(take_until(":]"), tag(":]"))) + .parse(input) + .map(|(l, class_name)| { + ( + l, + match class_name { + b"" => Err(BadSequence::MissingCharClassName), + b"alnum" => Ok(Self::Class(Class::Alnum)), + b"alpha" => Ok(Self::Class(Class::Alpha)), + b"blank" => Ok(Self::Class(Class::Blank)), + b"cntrl" => Ok(Self::Class(Class::Control)), + b"digit" => Ok(Self::Class(Class::Digit)), + b"graph" => Ok(Self::Class(Class::Graph)), + b"lower" => Ok(Self::Class(Class::Lower)), + b"print" => Ok(Self::Class(Class::Print)), + b"punct" => Ok(Self::Class(Class::Punct)), + b"space" => Ok(Self::Class(Class::Space)), + b"upper" => Ok(Self::Class(Class::Upper)), + b"xdigit" => Ok(Self::Class(Class::Xdigit)), + _ => Err(BadSequence::InvalidCharClass(format!( + "[:{}:]", + String::from_utf8_lossy(class_name) + ))), + }, + ) + }) } fn parse_char_equal(input: &[u8]) -> IResult<&[u8], Result> { diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index 4a7c266b92a..fd4fc91a7fd 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore aabbaa aabbcc aabc abbb abbbcddd abcc abcdefabcdef abcdefghijk abcdefghijklmn abcdefghijklmnop ABCDEFGHIJKLMNOPQRS abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFZZ abcxyz ABCXYZ abcxyzabcxyz ABCXYZABCXYZ acbdef alnum amzamz AMZXAMZ bbbd cclass cefgm cntrl compl dabcdef dncase Gzabcdefg PQRST upcase wxyzz xdigit XXXYYY xycde xyyye xyyz xyzzzzxyzzzz ZABCDEF Zamz Cdefghijkl Cdefghijklmn asdfqqwweerr qwerr asdfqwer qwer aassddffqwer asdfqwer +// spell-checker:ignore aabbaa aabbcc aabc abbb abbbcddd abcc abcdefabcdef abcdefghijk abcdefghijklmn abcdefghijklmnop ABCDEFGHIJKLMNOPQRS abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFZZ abcxyz ABCXYZ abcxyzabcxyz ABCXYZABCXYZ acbdef alnum amzamz AMZXAMZ bbbd cclass cefgm cntrl compl dabcdef dncase fooclass Gzabcdefg PQRST upcase wxyzz xdigit XXXYYY xycde xyyye xyyz xyzzzzxyzzzz ZABCDEF Zamz Cdefghijkl Cdefghijklmn asdfqqwweerr qwerr asdfqwer qwer aassddffqwer asdfqwer use uutests::at_and_ucmd; use uutests::new_ucmd; @@ -1185,6 +1185,15 @@ fn check_against_gnu_tr_tests_empty_cc() { .stderr_is("tr: missing character class name '[::]'\n"); } +#[test] +fn check_against_gnu_tr_tests_invalid_cc() { + new_ucmd!() + .args(&["[:fooclass:]", "x"]) + .pipe_in("") + .fails() + .stderr_is("tr: invalid character class '[:fooclass:]'\n"); +} + #[test] fn check_against_gnu_tr_tests_repeat_set1() { new_ucmd!()