Skip to content

Commit c05c5cd

Browse files
committed
Add interpretation function from byte to character
This is useful for custom processing that cannot be easily provided in a generic way. For example, the `FromBase64` function of ECMA-262 is not a simple instantiation of the existing customizations. It needs additional special processing.
1 parent 73e45be commit c05c5cd

3 files changed

Lines changed: 62 additions & 4 deletions

File tree

cmp/build.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ fn main() {
66
.compiler(compiler)
77
.define("COMPILER", Some(*compiler))
88
.file("src/ref.c")
9+
.flag_if_supported("-Wno-unterminated-string-initialization")
910
.compile(&format!("libref_{}.a", compiler));
1011
}
1112
}

cmp/tests/lib.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,7 @@ fn encode_exact() {
3131
fn difference() {
3232
let x = b"AAB=";
3333
assert_eq!(BASE64.decode(x).err().unwrap(), DecodeError { position: 2, kind: Trailing });
34-
assert_eq!(
35-
BASE64_STANDARD.decode(x).err().unwrap(),
36-
InvalidLastSymbol { offset: 2, symbol: b'B', symbol_value: 0x01 },
37-
);
34+
assert_eq!(BASE64_STANDARD.decode(x).err().unwrap(), InvalidLastSymbol(2, b'B'),);
3835
let x = b"AA\nB=";
3936
assert_eq!(BASE64.decode(x).err().unwrap(), DecodeError { position: 4, kind: Length });
4037
assert_eq!(BASE64_STANDARD.decode(x).err().unwrap(), InvalidByte(2, b'\n'));

lib/src/lib.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,56 @@ pub enum BitOrder {
840840
#[cfg(feature = "alloc")]
841841
use crate::BitOrder::*;
842842

843+
/// Interpretation of a byte for decoding purposes
844+
///
845+
/// For a given encoding, a byte can either be a symbol of that encoding (with a value within the
846+
/// number of symbols of that encoding), a padding character, an ignored character, or an invalid
847+
/// character.
848+
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
849+
pub enum Character {
850+
/// A symbol
851+
Symbol {
852+
/// The value of the symbol
853+
value: usize,
854+
},
855+
856+
/// A padding character
857+
Padding,
858+
859+
/// An ignored character
860+
Ignored,
861+
862+
/// An invalid character
863+
Invalid,
864+
}
865+
866+
impl Character {
867+
/// Returns whether the character is a symbol
868+
///
869+
/// If the character is a symbol, its value is returned.
870+
pub fn is_symbol(self) -> Option<usize> {
871+
match self {
872+
Character::Symbol { value } => Some(value),
873+
_ => None,
874+
}
875+
}
876+
877+
/// Returns whether the character is padding
878+
pub fn is_padding(self) -> bool {
879+
matches!(self, Character::Padding)
880+
}
881+
882+
/// Returns whether the character is ignored
883+
pub fn is_ignored(self) -> bool {
884+
matches!(self, Character::Ignored)
885+
}
886+
887+
/// Returns whether the character is invalid
888+
pub fn is_invalid(self) -> bool {
889+
matches!(self, Character::Invalid)
890+
}
891+
}
892+
843893
#[doc(hidden)]
844894
#[cfg(feature = "alloc")]
845895
pub type InternalEncoding = Cow<'static, [u8]>;
@@ -1600,6 +1650,16 @@ impl Encoding {
16001650
self.bit()
16011651
}
16021652

1653+
/// Interprets a byte as a character
1654+
pub fn interpret_byte(&self, byte: u8) -> Character {
1655+
match self.val()[byte as usize] {
1656+
INVALID => Character::Invalid,
1657+
IGNORE => Character::Ignored,
1658+
PADDING => Character::Padding,
1659+
value => Character::Symbol { value: value as usize },
1660+
}
1661+
}
1662+
16031663
/// Returns whether the encoding is canonical
16041664
///
16051665
/// An encoding is not canonical if one of the following conditions holds:

0 commit comments

Comments
 (0)