@@ -93,13 +93,18 @@ impl char {
9393 /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
9494 /// `char` and `str` methods are based on.
9595 ///
96- /// New versions of Unicode are released regularly and subsequently all methods
97- /// in the standard library depending on Unicode are updated. Therefore the
98- /// behavior of some `char` and `str` methods and the value of this constant
99- /// changes over time. This is *not* considered to be a breaking change.
96+ /// New versions of Unicode are released regularly, and subsequently all methods
97+ /// in the standard library depending on Unicode are updated. Therefore, the
98+ /// behavior of some `char` and `str` methods, and the value of this constant,
99+ /// change over time (within the boundaries of Unicode's [stability policies]).
100+ /// This is *not* considered to be a breaking change.
101+ ///
102+ /// [stability policies]: https://www.unicode.org/policies/stability_policy.html
100103 ///
101104 /// The version numbering scheme is explained in
102- /// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
105+ /// [Section 3.1 (Version Numbering)] of the Unicode Standard.
106+ ///
107+ /// [Section 3.1 (Version Numbering)]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G49512
103108 #[ stable( feature = "assoc_char_consts" , since = "1.52.0" ) ]
104109 pub const UNICODE_VERSION : ( u8 , u8 , u8 ) = crate :: unicode:: UNICODE_VERSION ;
105110
@@ -480,7 +485,7 @@ impl char {
480485 '\\' => EscapeDebug :: backslash ( ascii:: Char :: ReverseSolidus ) ,
481486 '\"' if args. escape_double_quote => EscapeDebug :: backslash ( ascii:: Char :: QuotationMark ) ,
482487 '\'' if args. escape_single_quote => EscapeDebug :: backslash ( ascii:: Char :: Apostrophe ) ,
483- _ if args. escape_grapheme_extended && self . is_grapheme_extended ( ) => {
488+ _ if args. escape_grapheme_extender && self . is_grapheme_extender ( ) => {
484489 EscapeDebug :: unicode ( self )
485490 }
486491 _ if is_printable ( self ) => EscapeDebug :: printable ( self ) ,
@@ -753,11 +758,11 @@ impl char {
753758
754759 /// Returns `true` if this `char` has the `Alphabetic` property.
755760 ///
756- /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [ Unicode Standard] and
757- /// specified in the [ Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
761+ /// `Alphabetic` is [ described] in Chapter 4 (Character Properties) of the Unicode Standard, and
762+ /// [ specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
758763 ///
759- /// [Unicode Standard ]: https://www.unicode.org/versions/latest/
760- /// [ucd ]: https://www.unicode.org/reports/tr44/
764+ /// [described ]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G32524
765+ /// [specified ]: https://www.unicode.org/reports/tr44/#Alphabetic
761766 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
762767 ///
763768 /// # Examples
@@ -786,11 +791,11 @@ impl char {
786791 /// Returns `true` if this `char` has the `Cased` property.
787792 /// A character is cased if and only if it is uppercase, lowercase, or titlecase.
788793 ///
789- /// `Cased` is described in Chapter 4 (Character Properties) of the [ Unicode Standard] and
790- /// specified in the [ Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
794+ /// `Cased` is [ described] in Chapter 3 (Character Properties) of the Unicode Standard and
795+ /// [ specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
791796 ///
792- /// [Unicode Standard ]: https://www.unicode.org/versions/latest/
793- /// [ucd ]: https://www.unicode.org/reports/tr44/
797+ /// [described ]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G44595
798+ /// [specified ]: https://www.unicode.org/reports/tr44/#Cased
794799 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
795800 ///
796801 /// # Examples
@@ -849,11 +854,11 @@ impl char {
849854
850855 /// Returns `true` if this `char` has the `Lowercase` property.
851856 ///
852- /// `Lowercase` is described in Chapter 4 (Character Properties) of the [ Unicode Standard] and
853- /// specified in the [ Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
857+ /// `Lowercase` is [ described] in Chapter 4 (Character Properties) of the Unicode Standard, and
858+ /// [ specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
854859 ///
855- /// [Unicode Standard ]: https://www.unicode.org/versions/latest/
856- /// [ucd ]: https://www.unicode.org/reports/tr44/
860+ /// [described ]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G136255
861+ /// [specified ]: https://www.unicode.org/reports/tr44/#Lowercase
857862 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
858863 ///
859864 /// # Examples
@@ -889,15 +894,15 @@ impl char {
889894 }
890895 }
891896
892- /// Returns `true` if this `char` has the general category for titlecase letters.
897+ /// Returns `true` if this `char` is in the general category for titlecase letters.
893898 /// Conceptually, these characters consist of an uppercase portion followed by a lowercase portion.
894899 ///
895- /// Titlecase letters (code points with the general category of `Lt`) are described in Chapter 4
896- /// (Character Properties) of the [ Unicode Standard] and specified in the [ Unicode Character
897- /// Database][ucd] [`UnicodeData.txt`].
900+ /// Titlecase letters (code points with the general category of `Lt`) are [ described] in Chapter 4
901+ /// (Character Properties) of the Unicode Standard, and [ specified] in the Unicode Character
902+ /// Database [`UnicodeData.txt`].
898903 ///
899- /// [Unicode Standard ]: https://www.unicode.org/versions/latest/
900- /// [ucd ]: https://www.unicode.org/reports/tr44/
904+ /// [described ]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G124722
905+ /// [specified ]: https://www.unicode.org/reports/tr44/#GC_Values_Table
901906 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
902907 ///
903908 /// # Examples
@@ -925,11 +930,11 @@ impl char {
925930
926931 /// Returns `true` if this `char` has the `Uppercase` property.
927932 ///
928- /// `Uppercase` is described in Chapter 4 (Character Properties) of the [ Unicode Standard] and
929- /// specified in the [ Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
933+ /// `Uppercase` is [ described] in Chapter 4 (Character Properties) of the Unicode Standard, and
934+ /// [ specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
930935 ///
931- /// [Unicode Standard ]: https://www.unicode.org/versions/latest/
932- /// [ucd ]: https://www.unicode.org/reports/tr44/
936+ /// [described ]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G136255
937+ /// [specified ]: https://www.unicode.org/reports/tr44/#Uppercase
933938 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
934939 ///
935940 /// # Examples
@@ -965,11 +970,41 @@ impl char {
965970 }
966971 }
967972
973+ /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
974+ ///
975+ /// [`is_alphabetic()`]: Self::is_alphabetic
976+ /// [`is_numeric()`]: Self::is_numeric
977+ ///
978+ /// # Examples
979+ ///
980+ /// Basic usage:
981+ ///
982+ /// ```
983+ /// assert!('٣'.is_alphanumeric());
984+ /// assert!('7'.is_alphanumeric());
985+ /// assert!('৬'.is_alphanumeric());
986+ /// assert!('¾'.is_alphanumeric());
987+ /// assert!('①'.is_alphanumeric());
988+ /// assert!('K'.is_alphanumeric());
989+ /// assert!('و'.is_alphanumeric());
990+ /// assert!('藏'.is_alphanumeric());
991+ /// ```
992+ #[ must_use]
993+ #[ stable( feature = "rust1" , since = "1.0.0" ) ]
994+ #[ inline]
995+ pub fn is_alphanumeric ( self ) -> bool {
996+ match self {
997+ 'a' ..='z' | 'A' ..='Z' | '0' ..='9' => true ,
998+ '\0' ..='\u{A9}' => false ,
999+ _ => unicode:: Alphabetic ( self ) || unicode:: N ( self ) ,
1000+ }
1001+ }
1002+
9681003 /// Returns `true` if this `char` has the `White_Space` property.
9691004 ///
970- /// `White_Space` is specified in the [ Unicode Character Database][ucd] [`PropList.txt`].
1005+ /// `White_Space` is [ specified] in the Unicode Character Database [`PropList.txt`].
9711006 ///
972- /// [ucd ]: https://www.unicode.org/reports/tr44/
1007+ /// [specified ]: https://www.unicode.org/reports/tr44/#White_Space
9731008 /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
9741009 ///
9751010 /// # Examples
@@ -999,53 +1034,25 @@ impl char {
9991034 }
10001035 }
10011036
1002- /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
1003- ///
1004- /// [`is_alphabetic()`]: #method.is_alphabetic
1005- /// [`is_numeric()`]: #method.is_numeric
1006- ///
1007- /// # Examples
1008- ///
1009- /// Basic usage:
1010- ///
1011- /// ```
1012- /// assert!('٣'.is_alphanumeric());
1013- /// assert!('7'.is_alphanumeric());
1014- /// assert!('৬'.is_alphanumeric());
1015- /// assert!('¾'.is_alphanumeric());
1016- /// assert!('①'.is_alphanumeric());
1017- /// assert!('K'.is_alphanumeric());
1018- /// assert!('و'.is_alphanumeric());
1019- /// assert!('藏'.is_alphanumeric());
1020- /// ```
1021- #[ must_use]
1022- #[ stable( feature = "rust1" , since = "1.0.0" ) ]
1023- #[ inline]
1024- pub fn is_alphanumeric ( self ) -> bool {
1025- match self {
1026- 'a' ..='z' | 'A' ..='Z' | '0' ..='9' => true ,
1027- '\0' ..='\u{A9}' => false ,
1028- _ => unicode:: Alphabetic ( self ) || unicode:: N ( self ) ,
1029- }
1030- }
1031-
10321037 /// Returns `true` if this `char` has the general category for control codes.
10331038 ///
1034- /// Control codes (code points with the general category of `Cc`) are described in Chapter 4
1035- /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
1036- /// Database][ucd] [`UnicodeData.txt`].
1039+ /// Control codes (code points with the general category of `Cc`) are [described] in Chapter 23
1040+ /// (Special Areas and Format Characters) of the Unicode Standard, and [specified] in the Unicode Character
1041+ /// Database [`UnicodeData.txt`]. The full set of Unicode control codes is
1042+ /// `'\0'..='\x1f' | '\x7f'..='\u{9f}'`, and will never change.
10371043 ///
1038- /// [Unicode Standard ]: https://www.unicode.org/versions/latest/
1039- /// [ucd ]: https://www.unicode.org/reports/tr44/
1044+ /// [described ]: https://www.unicode.org/versions/latest/core-spec/chapter-23/#G20365
1045+ /// [specified ]: https://www.unicode.org/reports/tr44/#GC_Values_Table
10401046 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
10411047 ///
10421048 /// # Examples
10431049 ///
10441050 /// Basic usage:
10451051 ///
10461052 /// ```
1047- /// // U+009C, STRING TERMINATOR
1048- /// assert!(''.is_control());
1053+ /// assert!('\t'.is_control());
1054+ /// assert!('\n'.is_control());
1055+ /// assert!('\u{9C}'.is_control()); // STRING TERMINATOR
10491056 /// assert!(!'q'.is_control());
10501057 /// ```
10511058 #[ must_use]
@@ -1061,29 +1068,28 @@ impl char {
10611068
10621069 /// Returns `true` if this `char` has the `Grapheme_Extend` property.
10631070 ///
1064- /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
1065- /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
1066- /// [`DerivedCoreProperties.txt`].
1071+ /// `Grapheme_Extend` is [described] in Chapter 3 (Conformance) of the Unicode Standard,
1072+ /// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
10671073 ///
1068- /// [uax29 ]: https://www.unicode.org/reports/tr29/
1069- /// [ucd ]: https://www.unicode.org/reports/tr44/
1074+ /// [described ]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G41165
1075+ /// [specified ]: https://www.unicode.org/reports/tr44/#Grapheme_Extend
10701076 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
10711077 #[ must_use]
10721078 #[ inline]
1073- pub ( crate ) fn is_grapheme_extended ( self ) -> bool {
1079+ fn is_grapheme_extender ( self ) -> bool {
10741080 self > '\u{02FF}' && unicode:: Grapheme_Extend ( self )
10751081 }
10761082
10771083 /// Returns `true` if this `char` has the `Case_Ignorable` property. This narrow-use property
10781084 /// is used to implement context-dependent casing for the Greek letter sigma (uppercase Σ),
10791085 /// which has two lowercase forms.
10801086 ///
1081- /// `Case_Ignorable` is [described][D136] in Chapter 3 (Conformance) of the Unicode Core Specification,
1082- /// and specified in the [ Unicode Character Database][ucd] [`DerivedCoreProperties.txt`];
1087+ /// `Case_Ignorable` is [described] in Chapter 3 (Conformance) of the Unicode Core Specification,
1088+ /// and [ specified] in the Unicode Character Database [`DerivedCoreProperties.txt`];
10831089 /// see those resources for more information.
10841090 ///
1085- /// [D136 ]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63116
1086- /// [ucd ]: https://www.unicode.org/reports/tr44/
1091+ /// [described ]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63116
1092+ /// [specified ]: https://www.unicode.org/reports/tr44/#Case_Ignorable
10871093 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
10881094 #[ must_use]
10891095 #[ inline]
@@ -1099,20 +1105,20 @@ impl char {
10991105 /// Returns `true` if this `char` has one of the general categories for numbers.
11001106 ///
11011107 /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
1102- /// characters, and `No` for other numeric characters) are specified in the [ Unicode Character
1103- /// Database][ucd] [`UnicodeData.txt`].
1108+ /// characters, and `No` for other numeric characters) are [ specified] in the Unicode Character
1109+ /// Database [`UnicodeData.txt`].
11041110 ///
11051111 /// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'.
1106- /// If you want everything including characters with overlapping purposes then you might want to use
1107- /// a unicode or language-processing library that exposes the appropriate character properties instead
1108- /// of looking at the unicode categories.
1112+ /// If you want everything including characters with overlapping purposes, then you might want to use
1113+ /// a Unicode or language-processing library that exposes the appropriate character properties
1114+ /// (e.g. [`Numeric_Type`]) instead of looking at the Unicode categories.
11091115 ///
11101116 /// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use
11111117 /// `is_ascii_digit` or `is_digit` instead.
11121118 ///
1113- /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1114- /// [ucd]: https://www.unicode.org/reports/tr44/
1119+ /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
11151120 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1121+ /// [`Numeric_Type`]: https://www.unicode.org/reports/tr44/#Numeric_Type
11161122 ///
11171123 /// # Examples
11181124 ///
@@ -2151,8 +2157,8 @@ impl char {
21512157}
21522158
21532159pub ( crate ) struct EscapeDebugExtArgs {
2154- /// Escape Extended Grapheme codepoints?
2155- pub ( crate ) escape_grapheme_extended : bool ,
2160+ /// Escape Grapheme Extender codepoints?
2161+ pub ( crate ) escape_grapheme_extender : bool ,
21562162
21572163 /// Escape single quotes?
21582164 pub ( crate ) escape_single_quote : bool ,
@@ -2163,7 +2169,7 @@ pub(crate) struct EscapeDebugExtArgs {
21632169
21642170impl EscapeDebugExtArgs {
21652171 pub ( crate ) const ESCAPE_ALL : Self = Self {
2166- escape_grapheme_extended : true ,
2172+ escape_grapheme_extender : true ,
21672173 escape_single_quote : true ,
21682174 escape_double_quote : true ,
21692175 } ;
0 commit comments