@@ -840,6 +840,56 @@ pub enum BitOrder {
840840#[ cfg( feature = "alloc" ) ]
841841use crate :: BitOrder :: * ;
842842
843+ /// Interpretation of a byte for decoding purposes
844+ ///
845+ /// For a given encoding, a byte can either be a symbol of that encoding (with a value within the
846+ /// number of symbols of that encoding), a padding character, an ignored character, or an invalid
847+ /// character.
848+ #[ derive( Debug , Copy , Clone , PartialEq , Eq ) ]
849+ pub enum Character {
850+ /// A symbol
851+ Symbol {
852+ /// The value of the symbol
853+ value : usize ,
854+ } ,
855+
856+ /// A padding character
857+ Padding ,
858+
859+ /// An ignored character
860+ Ignored ,
861+
862+ /// An invalid character
863+ Invalid ,
864+ }
865+
866+ impl Character {
867+ /// Returns whether the character is a symbol
868+ ///
869+ /// If the character is a symbol, its value is returned.
870+ pub fn is_symbol ( self ) -> Option < usize > {
871+ match self {
872+ Character :: Symbol { value } => Some ( value) ,
873+ _ => None ,
874+ }
875+ }
876+
877+ /// Returns whether the character is padding
878+ pub fn is_padding ( self ) -> bool {
879+ matches ! ( self , Character :: Padding )
880+ }
881+
882+ /// Returns whether the character is ignored
883+ pub fn is_ignored ( self ) -> bool {
884+ matches ! ( self , Character :: Ignored )
885+ }
886+
887+ /// Returns whether the character is invalid
888+ pub fn is_invalid ( self ) -> bool {
889+ matches ! ( self , Character :: Invalid )
890+ }
891+ }
892+
843893#[ doc( hidden) ]
844894#[ cfg( feature = "alloc" ) ]
845895pub type InternalEncoding = Cow < ' static , [ u8 ] > ;
@@ -1600,6 +1650,16 @@ impl Encoding {
16001650 self . bit ( )
16011651 }
16021652
1653+ /// Interprets a byte as a character
1654+ pub fn interpret_byte ( & self , byte : u8 ) -> Character {
1655+ match self . val ( ) [ byte as usize ] {
1656+ INVALID => Character :: Invalid ,
1657+ IGNORE => Character :: Ignored ,
1658+ PADDING => Character :: Padding ,
1659+ value => Character :: Symbol { value : value as usize } ,
1660+ }
1661+ }
1662+
16031663 /// Returns whether the encoding is canonical
16041664 ///
16051665 /// An encoding is not canonical if one of the following conditions holds:
0 commit comments