Skip to content

Commit ffa8436

Browse files
Improve core::char::mathods.rs docs
And rename a struct field.
1 parent 38799a3 commit ffa8436

6 files changed

Lines changed: 105 additions & 94 deletions

File tree

library/core/src/char/methods.rs

Lines changed: 93 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,18 @@ impl char {
9393
/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
9494
/// `char` and `str` methods are based on.
9595
///
96-
/// New versions of Unicode are released regularly and subsequently all methods
97-
/// in the standard library depending on Unicode are updated. Therefore the
98-
/// behavior of some `char` and `str` methods and the value of this constant
99-
/// changes over time. This is *not* considered to be a breaking change.
96+
/// New versions of Unicode are released regularly, and subsequently all methods
97+
/// in the standard library depending on Unicode are updated. Therefore, the
98+
/// behavior of some `char` and `str` methods, and the value of this constant,
99+
/// change over time (within the boundaries of Unicode's [stability policies]).
100+
/// This is *not* considered to be a breaking change.
101+
///
102+
/// [stability policies]: https://www.unicode.org/policies/stability_policy.html
100103
///
101104
/// The version numbering scheme is explained in
102-
/// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
105+
/// [Section 3.1 (Version Numbering)] of the Unicode Standard.
106+
///
107+
/// [Section 3.1 (Version Numbering)]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G49512
103108
#[stable(feature = "assoc_char_consts", since = "1.52.0")]
104109
pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
105110

@@ -480,7 +485,7 @@ impl char {
480485
'\\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus),
481486
'\"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark),
482487
'\'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe),
483-
_ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
488+
_ if args.escape_grapheme_extender && self.is_grapheme_extender() => {
484489
EscapeDebug::unicode(self)
485490
}
486491
_ if is_printable(self) => EscapeDebug::printable(self),
@@ -753,11 +758,11 @@ impl char {
753758

754759
/// Returns `true` if this `char` has the `Alphabetic` property.
755760
///
756-
/// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
757-
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
761+
/// `Alphabetic` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
762+
/// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
758763
///
759-
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
760-
/// [ucd]: https://www.unicode.org/reports/tr44/
764+
/// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G32524
765+
/// [specified]: https://www.unicode.org/reports/tr44/#Alphabetic
761766
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
762767
///
763768
/// # Examples
@@ -786,11 +791,11 @@ impl char {
786791
/// Returns `true` if this `char` has the `Cased` property.
787792
/// A character is cased if and only if it is uppercase, lowercase, or titlecase.
788793
///
789-
/// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
790-
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
794+
/// `Cased` is [described] in Chapter 3 (Character Properties) of the Unicode Standard and
795+
/// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
791796
///
792-
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
793-
/// [ucd]: https://www.unicode.org/reports/tr44/
797+
/// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G44595
798+
/// [specified]: https://www.unicode.org/reports/tr44/#Cased
794799
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
795800
///
796801
/// # Examples
@@ -849,11 +854,11 @@ impl char {
849854

850855
/// Returns `true` if this `char` has the `Lowercase` property.
851856
///
852-
/// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
853-
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
857+
/// `Lowercase` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
858+
/// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
854859
///
855-
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
856-
/// [ucd]: https://www.unicode.org/reports/tr44/
860+
/// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G136255
861+
/// [specified]: https://www.unicode.org/reports/tr44/#Lowercase
857862
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
858863
///
859864
/// # Examples
@@ -889,15 +894,15 @@ impl char {
889894
}
890895
}
891896

892-
/// Returns `true` if this `char` has the general category for titlecase letters.
897+
/// Returns `true` if this `char` is in the general category for titlecase letters.
893898
/// Conceptually, these characters consist of an uppercase portion followed by a lowercase portion.
894899
///
895-
/// Titlecase letters (code points with the general category of `Lt`) are described in Chapter 4
896-
/// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
897-
/// Database][ucd] [`UnicodeData.txt`].
900+
/// Titlecase letters (code points with the general category of `Lt`) are [described] in Chapter 4
901+
/// (Character Properties) of the Unicode Standard, and [specified] in the Unicode Character
902+
/// Database [`UnicodeData.txt`].
898903
///
899-
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
900-
/// [ucd]: https://www.unicode.org/reports/tr44/
904+
/// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G124722
905+
/// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
901906
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
902907
///
903908
/// # Examples
@@ -925,11 +930,11 @@ impl char {
925930

926931
/// Returns `true` if this `char` has the `Uppercase` property.
927932
///
928-
/// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
929-
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
933+
/// `Uppercase` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
934+
/// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
930935
///
931-
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
932-
/// [ucd]: https://www.unicode.org/reports/tr44/
936+
/// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G136255
937+
/// [specified]: https://www.unicode.org/reports/tr44/#Uppercase
933938
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
934939
///
935940
/// # Examples
@@ -965,11 +970,41 @@ impl char {
965970
}
966971
}
967972

973+
/// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
974+
///
975+
/// [`is_alphabetic()`]: Self::is_alphabetic
976+
/// [`is_numeric()`]: Self::is_numeric
977+
///
978+
/// # Examples
979+
///
980+
/// Basic usage:
981+
///
982+
/// ```
983+
/// assert!('٣'.is_alphanumeric());
984+
/// assert!('7'.is_alphanumeric());
985+
/// assert!('৬'.is_alphanumeric());
986+
/// assert!('¾'.is_alphanumeric());
987+
/// assert!('①'.is_alphanumeric());
988+
/// assert!('K'.is_alphanumeric());
989+
/// assert!('و'.is_alphanumeric());
990+
/// assert!('藏'.is_alphanumeric());
991+
/// ```
992+
#[must_use]
993+
#[stable(feature = "rust1", since = "1.0.0")]
994+
#[inline]
995+
pub fn is_alphanumeric(self) -> bool {
996+
match self {
997+
'a'..='z' | 'A'..='Z' | '0'..='9' => true,
998+
'\0'..='\u{A9}' => false,
999+
_ => unicode::Alphabetic(self) || unicode::N(self),
1000+
}
1001+
}
1002+
9681003
/// Returns `true` if this `char` has the `White_Space` property.
9691004
///
970-
/// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
1005+
/// `White_Space` is [specified] in the Unicode Character Database [`PropList.txt`].
9711006
///
972-
/// [ucd]: https://www.unicode.org/reports/tr44/
1007+
/// [specified]: https://www.unicode.org/reports/tr44/#White_Space
9731008
/// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
9741009
///
9751010
/// # Examples
@@ -999,53 +1034,25 @@ impl char {
9991034
}
10001035
}
10011036

1002-
/// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
1003-
///
1004-
/// [`is_alphabetic()`]: #method.is_alphabetic
1005-
/// [`is_numeric()`]: #method.is_numeric
1006-
///
1007-
/// # Examples
1008-
///
1009-
/// Basic usage:
1010-
///
1011-
/// ```
1012-
/// assert!('٣'.is_alphanumeric());
1013-
/// assert!('7'.is_alphanumeric());
1014-
/// assert!('৬'.is_alphanumeric());
1015-
/// assert!('¾'.is_alphanumeric());
1016-
/// assert!('①'.is_alphanumeric());
1017-
/// assert!('K'.is_alphanumeric());
1018-
/// assert!('و'.is_alphanumeric());
1019-
/// assert!('藏'.is_alphanumeric());
1020-
/// ```
1021-
#[must_use]
1022-
#[stable(feature = "rust1", since = "1.0.0")]
1023-
#[inline]
1024-
pub fn is_alphanumeric(self) -> bool {
1025-
match self {
1026-
'a'..='z' | 'A'..='Z' | '0'..='9' => true,
1027-
'\0'..='\u{A9}' => false,
1028-
_ => unicode::Alphabetic(self) || unicode::N(self),
1029-
}
1030-
}
1031-
10321037
/// Returns `true` if this `char` has the general category for control codes.
10331038
///
1034-
/// Control codes (code points with the general category of `Cc`) are described in Chapter 4
1035-
/// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
1036-
/// Database][ucd] [`UnicodeData.txt`].
1039+
/// Control codes (code points with the general category of `Cc`) are [described] in Chapter 23
1040+
/// (Special Areas and Format Characters) of the Unicode Standard, and [specified] in the Unicode Character
1041+
/// Database [`UnicodeData.txt`]. The full set of Unicode control codes is
1042+
/// `'\0'..='\x1f' | '\x7f'..='\u{9f}'`, and will never change.
10371043
///
1038-
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
1039-
/// [ucd]: https://www.unicode.org/reports/tr44/
1044+
/// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-23/#G20365
1045+
/// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
10401046
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
10411047
///
10421048
/// # Examples
10431049
///
10441050
/// Basic usage:
10451051
///
10461052
/// ```
1047-
/// // U+009C, STRING TERMINATOR
1048-
/// assert!('œ'.is_control());
1053+
/// assert!('\t'.is_control());
1054+
/// assert!('\n'.is_control());
1055+
/// assert!('\u{9C}'.is_control()); // STRING TERMINATOR
10491056
/// assert!(!'q'.is_control());
10501057
/// ```
10511058
#[must_use]
@@ -1061,29 +1068,28 @@ impl char {
10611068

10621069
/// Returns `true` if this `char` has the `Grapheme_Extend` property.
10631070
///
1064-
/// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
1065-
/// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
1066-
/// [`DerivedCoreProperties.txt`].
1071+
/// `Grapheme_Extend` is [described] in Chapter 3 (Conformance) of the Unicode Standard,
1072+
/// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
10671073
///
1068-
/// [uax29]: https://www.unicode.org/reports/tr29/
1069-
/// [ucd]: https://www.unicode.org/reports/tr44/
1074+
/// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G41165
1075+
/// [specified]: https://www.unicode.org/reports/tr44/#Grapheme_Extend
10701076
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
10711077
#[must_use]
10721078
#[inline]
1073-
pub(crate) fn is_grapheme_extended(self) -> bool {
1079+
fn is_grapheme_extender(self) -> bool {
10741080
self > '\u{02FF}' && unicode::Grapheme_Extend(self)
10751081
}
10761082

10771083
/// Returns `true` if this `char` has the `Case_Ignorable` property. This narrow-use property
10781084
/// is used to implement context-dependent casing for the Greek letter sigma (uppercase Σ),
10791085
/// which has two lowercase forms.
10801086
///
1081-
/// `Case_Ignorable` is [described][D136] in Chapter 3 (Conformance) of the Unicode Core Specification,
1082-
/// and specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`];
1087+
/// `Case_Ignorable` is [described] in Chapter 3 (Conformance) of the Unicode Core Specification,
1088+
/// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`];
10831089
/// see those resources for more information.
10841090
///
1085-
/// [D136]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63116
1086-
/// [ucd]: https://www.unicode.org/reports/tr44/
1091+
/// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63116
1092+
/// [specified]: https://www.unicode.org/reports/tr44/#Case_Ignorable
10871093
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
10881094
#[must_use]
10891095
#[inline]
@@ -1099,20 +1105,20 @@ impl char {
10991105
/// Returns `true` if this `char` has one of the general categories for numbers.
11001106
///
11011107
/// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
1102-
/// characters, and `No` for other numeric characters) are specified in the [Unicode Character
1103-
/// Database][ucd] [`UnicodeData.txt`].
1108+
/// characters, and `No` for other numeric characters) are [specified] in the Unicode Character
1109+
/// Database [`UnicodeData.txt`].
11041110
///
11051111
/// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'.
1106-
/// If you want everything including characters with overlapping purposes then you might want to use
1107-
/// a unicode or language-processing library that exposes the appropriate character properties instead
1108-
/// of looking at the unicode categories.
1112+
/// If you want everything including characters with overlapping purposes, then you might want to use
1113+
/// a Unicode or language-processing library that exposes the appropriate character properties
1114+
/// (e.g. [`Numeric_Type`]) instead of looking at the Unicode categories.
11091115
///
11101116
/// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use
11111117
/// `is_ascii_digit` or `is_digit` instead.
11121118
///
1113-
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
1114-
/// [ucd]: https://www.unicode.org/reports/tr44/
1119+
/// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
11151120
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1121+
/// [`Numeric_Type`]: https://www.unicode.org/reports/tr44/#Numeric_Type
11161122
///
11171123
/// # Examples
11181124
///
@@ -2151,8 +2157,8 @@ impl char {
21512157
}
21522158

21532159
pub(crate) struct EscapeDebugExtArgs {
2154-
/// Escape Extended Grapheme codepoints?
2155-
pub(crate) escape_grapheme_extended: bool,
2160+
/// Escape Grapheme Extender codepoints?
2161+
pub(crate) escape_grapheme_extender: bool,
21562162

21572163
/// Escape single quotes?
21582164
pub(crate) escape_single_quote: bool,
@@ -2163,7 +2169,7 @@ pub(crate) struct EscapeDebugExtArgs {
21632169

21642170
impl EscapeDebugExtArgs {
21652171
pub(crate) const ESCAPE_ALL: Self = Self {
2166-
escape_grapheme_extended: true,
2172+
escape_grapheme_extender: true,
21672173
escape_single_quote: true,
21682174
escape_double_quote: true,
21692175
};

library/core/src/fmt/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2941,7 +2941,7 @@ impl Debug for str {
29412941
let mut chars = rest.chars();
29422942
if let Some(c) = chars.next() {
29432943
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
2944-
escape_grapheme_extended: true,
2944+
escape_grapheme_extender: true,
29452945
escape_single_quote: false,
29462946
escape_double_quote: true,
29472947
});
@@ -2973,7 +2973,7 @@ impl Debug for char {
29732973
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
29742974
f.write_char('\'')?;
29752975
let esc = self.escape_debug_ext(EscapeDebugExtArgs {
2976-
escape_grapheme_extended: true,
2976+
escape_grapheme_extender: true,
29772977
escape_single_quote: true,
29782978
escape_double_quote: false,
29792979
});

library/core/src/str/lossy.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ impl fmt::Debug for Debug<'_> {
123123
let mut from = 0;
124124
for (i, c) in valid.char_indices() {
125125
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
126-
escape_grapheme_extended: true,
126+
escape_grapheme_extender: true,
127127
escape_single_quote: false,
128128
escape_double_quote: true,
129129
});

library/core/src/str/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3195,7 +3195,7 @@ impl_fn_for_zst! {
31953195
#[derive(Clone)]
31963196
struct CharEscapeDebugContinue impl Fn = |c: char| -> char::EscapeDebug {
31973197
c.escape_debug_ext(EscapeDebugExtArgs {
3198-
escape_grapheme_extended: false,
3198+
escape_grapheme_extender: false,
31993199
escape_single_quote: true,
32003200
escape_double_quote: true
32013201
})

library/core/src/unicode/mod.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,13 @@ pub mod unicode_data;
2727
/// New versions of Unicode are released regularly and subsequently all methods
2828
/// in the standard library depending on Unicode are updated. Therefore the
2929
/// behavior of some `char` and `str` methods and the value of this constant
30-
/// changes over time. This is *not* considered to be a breaking change.
30+
/// changes over time, within the boundaries of Unicode's [stability policies].
31+
/// This is *not* considered to be a breaking change.
32+
///
33+
/// [stability policies]: https://www.unicode.org/policies/stability_policy.html
3134
///
3235
/// The version numbering scheme is explained in
33-
/// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
36+
/// [Section 3.1 (Version Numbering)] of the Unicode Standard.
37+
///
38+
/// [Section 3.1 (Version Numbering)]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G49512
3439
pub const UNICODE_VERSION: (u8, u8, u8) = unicode_data::UNICODE_VERSION;

library/core/src/wtf8.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ impl fmt::Debug for Wtf8 {
147147
use crate::fmt::Write as _;
148148
for c in s.chars().flat_map(|c| {
149149
c.escape_debug_ext(EscapeDebugExtArgs {
150-
escape_grapheme_extended: true,
150+
escape_grapheme_extender: true,
151151
escape_single_quote: false,
152152
escape_double_quote: true,
153153
})

0 commit comments

Comments
 (0)