@@ -777,12 +777,76 @@ impl char {
777777 #[ inline]
778778 pub fn is_alphabetic ( self ) -> bool {
779779 match self {
780- 'A ' ..='Z ' | 'a ' ..='z ' => true ,
780+ 'a ' ..='z ' | 'A ' ..='Z ' => true ,
781781 '\0' ..='\u{A9}' => false ,
782782 _ => unicode:: Alphabetic ( self ) ,
783783 }
784784 }
785785
786+ /// Returns `true` if this `char` has the `Cased` property.
787+ /// A character is cased if and only if it is uppercase, lowercase, or titlecase.
788+ ///
789+ /// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
790+ /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
791+ ///
792+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
793+ /// [ucd]: https://www.unicode.org/reports/tr44/
794+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
795+ ///
796+ /// # Examples
797+ ///
798+ /// Basic usage:
799+ ///
800+ /// ```
801+ /// #![feature(titlecase)]
802+ /// assert!('A'.is_cased());
803+ /// assert!('a'.is_cased());
804+ /// assert!(!'京'.is_cased());
805+ /// ```
806+ #[ must_use]
807+ #[ unstable( feature = "titlecase" , issue = "153892" ) ]
808+ #[ inline]
809+ pub fn is_cased ( self ) -> bool {
810+ match self {
811+ 'a' ..='z' | 'A' ..='Z' => true ,
812+ '\0' ..='\u{A9}' => false ,
813+ _ => unicode:: Cased ( self ) ,
814+ }
815+ }
816+
817+ /// Returns the case of this character:
818+ /// [`Some(CharCase::Upper)`][`CharCase::Upper`] if [`self.is_uppercase()`][`char::is_uppercase`],
819+ /// [`Some(CharCase::Lower)`][`CharCase::Lower`] if [`self.is_lowercase()`][`char::is_lowercase`],
820+ /// [`Some(CharCase::Title)`][`CharCase::Title`] if [`self.is_titlecase()`][`char::is_titlecase`], and
821+ /// `None` if [`!self.is_cased()`][`char::is_cased`].
822+ ///
823+ /// # Examples
824+ ///
825+ /// ```
826+ /// #![feature(titlecase)]
827+ /// use core::char::CharCase;
828+ /// assert_eq!('a'.case(), Some(CharCase::Lower));
829+ /// assert_eq!('δ'.case(), Some(CharCase::Lower));
830+ /// assert_eq!('A'.case(), Some(CharCase::Upper));
831+ /// assert_eq!('Δ'.case(), Some(CharCase::Upper));
832+ /// assert_eq!('Dž'.case(), Some(CharCase::Title));
833+ /// assert_eq!('中'.case(), None);
834+ /// ```
835+ #[ must_use]
836+ #[ unstable( feature = "titlecase" , issue = "153892" ) ]
837+ #[ inline]
838+ pub fn case ( self ) -> Option < CharCase > {
839+ match self {
840+ 'a' ..='z' => Some ( CharCase :: Lower ) ,
841+ 'A' ..='Z' => Some ( CharCase :: Upper ) ,
842+ '\0' ..='\u{A9}' => None ,
843+ _ if !unicode:: Cased ( self ) => None ,
844+ _ if unicode:: Lowercase ( self ) => Some ( CharCase :: Lower ) ,
845+ _ if unicode:: Uppercase ( self ) => Some ( CharCase :: Upper ) ,
846+ _ => Some ( CharCase :: Title ) ,
847+ }
848+ }
849+
786850 /// Returns `true` if this `char` has the `Lowercase` property.
787851 ///
788852 /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
@@ -825,6 +889,40 @@ impl char {
825889 }
826890 }
827891
892+ /// Returns `true` if this `char` has the general category for titlecase letters.
893+ /// Conceptually, these characters consist of an uppercase portion followed by a lowercase portion.
894+ ///
895+ /// Titlecase letters (code points with the general category of `Lt`) are described in Chapter 4
896+ /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
897+ /// Database][ucd] [`UnicodeData.txt`].
898+ ///
899+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
900+ /// [ucd]: https://www.unicode.org/reports/tr44/
901+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
902+ ///
903+ /// # Examples
904+ ///
905+ /// Basic usage:
906+ ///
907+ /// ```
908+ /// #![feature(titlecase)]
909+ /// assert!('Dž'.is_titlecase());
910+ /// assert!('ῼ'.is_titlecase());
911+ /// assert!(!'D'.is_titlecase());
912+ /// assert!(!'z'.is_titlecase());
913+ /// assert!(!'中'.is_titlecase());
914+ /// assert!(!' '.is_titlecase());
915+ /// ```
916+ #[ must_use]
917+ #[ unstable( feature = "titlecase" , issue = "153892" ) ]
918+ #[ inline]
919+ pub fn is_titlecase ( self ) -> bool {
920+ match self {
921+ '\0' ..='\u{01C4}' => false ,
922+ _ => self . is_cased ( ) && !self . is_lowercase ( ) && !self . is_uppercase ( ) ,
923+ }
924+ }
925+
828926 /// Returns `true` if this `char` has the `Uppercase` property.
829927 ///
830928 /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
@@ -925,7 +1023,7 @@ impl char {
9251023 #[ inline]
9261024 pub fn is_alphanumeric ( self ) -> bool {
9271025 match self {
928- '0 ' ..='9 ' | 'A' ..='Z' | 'a ' ..='z ' => true ,
1026+ 'a ' ..='z ' | 'A' ..='Z' | '0 ' ..='9 ' => true ,
9291027 '\0' ..='\u{A9}' => false ,
9301028 _ => unicode:: Alphabetic ( self ) || unicode:: N ( self ) ,
9311029 }
@@ -976,26 +1074,6 @@ impl char {
9761074 self > '\u{02FF}' && unicode:: Grapheme_Extend ( self )
9771075 }
9781076
979- /// Returns `true` if this `char` has the `Cased` property.
980- ///
981- /// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
982- /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
983- ///
984- /// [Unicode Standard]: https://www.unicode.org/versions/latest/
985- /// [ucd]: https://www.unicode.org/reports/tr44/
986- /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
987- #[ must_use]
988- #[ inline]
989- #[ doc( hidden) ]
990- #[ unstable( feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" ) ]
991- pub fn is_cased ( self ) -> bool {
992- match self {
993- 'A' ..='Z' | 'a' ..='z' => true ,
994- '\0' ..='\u{A9}' => false ,
995- _ => unicode:: Cased ( self ) ,
996- }
997- }
998-
9991077 /// Returns `true` if this `char` has the `Case_Ignorable` property.
10001078 ///
10011079 /// `Case_Ignorable` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
@@ -1119,17 +1197,123 @@ impl char {
11191197 /// // convert into themselves.
11201198 /// assert_eq!('山'.to_lowercase().to_string(), "山");
11211199 /// ```
1122- #[ must_use = "this returns the lowercase character as a new iterator, \
1200+ #[ must_use = "this returns the lowercased character as a new iterator, \
11231201 without modifying the original"]
11241202 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
11251203 #[ inline]
11261204 pub fn to_lowercase ( self ) -> ToLowercase {
11271205 ToLowercase ( CaseMappingIter :: new ( conversions:: to_lower ( self ) ) )
11281206 }
11291207
1208+ /// Returns an iterator that yields the titlecase mapping of this `char` as one or more
1209+ /// `char`s.
1210+ ///
1211+ /// This is usually, but not always, equivalent to the uppercase mapping
1212+ /// returned by [`Self::to_uppercase`]. Prefer this method when seeking to capitalize
1213+ /// Only The First Letter of a word, but use [`Self::to_uppercase`] for ALL CAPS.
1214+ ///
1215+ /// If this `char` does not have an titlecase mapping, the iterator yields the same `char`.
1216+ ///
1217+ /// If this `char` has a one-to-one titlecase mapping given by the [Unicode Character
1218+ /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1219+ ///
1220+ /// [ucd]: https://www.unicode.org/reports/tr44/
1221+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1222+ ///
1223+ /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
1224+ /// the `char`(s) given by [`SpecialCasing.txt`].
1225+ ///
1226+ /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1227+ ///
1228+ /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1229+ /// is independent of context and language.
1230+ ///
1231+ /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1232+ /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1233+ ///
1234+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1235+ ///
1236+ /// # Examples
1237+ ///
1238+ /// As an iterator:
1239+ ///
1240+ /// ```
1241+ /// #![feature(titlecase)]
1242+ /// for c in 'ß'.to_titlecase() {
1243+ /// print!("{c}");
1244+ /// }
1245+ /// println!();
1246+ /// ```
1247+ ///
1248+ /// Using `println!` directly:
1249+ ///
1250+ /// ```
1251+ /// #![feature(titlecase)]
1252+ /// println!("{}", 'ß'.to_titlecase());
1253+ /// ```
1254+ ///
1255+ /// Both are equivalent to:
1256+ ///
1257+ /// ```
1258+ /// println!("Ss");
1259+ /// ```
1260+ ///
1261+ /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1262+ ///
1263+ /// ```
1264+ /// #![feature(titlecase)]
1265+ /// assert_eq!('c'.to_titlecase().to_string(), "C");
1266+ /// assert_eq!('dž'.to_titlecase().to_string(), "Dž");
1267+ /// assert_eq!('ῼ'.to_titlecase().to_string(), "ῼ");
1268+ ///
1269+ /// // Sometimes the result is more than one character:
1270+ /// assert_eq!('ß'.to_titlecase().to_string(), "Ss");
1271+ ///
1272+ /// // Characters that do not have separate cased forms
1273+ /// // convert into themselves.
1274+ /// assert_eq!('山'.to_titlecase().to_string(), "山");
1275+ /// ```
1276+ ///
1277+ /// # Note on locale
1278+ ///
1279+ /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1280+ ///
1281+ /// * 'Dotless': I / ı, sometimes written ï
1282+ /// * 'Dotted': İ / i
1283+ ///
1284+ /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
1285+ ///
1286+ /// ```
1287+ /// #![feature(titlecase)]
1288+ /// let upper_i = 'i'.to_titlecase().to_string();
1289+ /// ```
1290+ ///
1291+ /// The value of `upper_i` here relies on the language of the text: if we're
1292+ /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
1293+ /// be `"İ"`. `to_titlecase()` does not take this into account, and so:
1294+ ///
1295+ /// ```
1296+ /// #![feature(titlecase)]
1297+ /// let upper_i = 'i'.to_titlecase().to_string();
1298+ ///
1299+ /// assert_eq!(upper_i, "I");
1300+ /// ```
1301+ ///
1302+ /// holds across languages.
1303+ #[ must_use = "this returns the titlecased character as a new iterator, \
1304+ without modifying the original"]
1305+ #[ unstable( feature = "titlecase" , issue = "153892" ) ]
1306+ #[ inline]
1307+ pub fn to_titlecase ( self ) -> ToTitlecase {
1308+ ToTitlecase ( CaseMappingIter :: new ( conversions:: to_title ( self ) ) )
1309+ }
1310+
11301311 /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
11311312 /// `char`s.
11321313 ///
1314+ /// Prefer this method when converting a word into ALL CAPS, but consider [`Self::to_titlecase`]
1315+ /// instead if you seek to capitalize Only The First Letter.
1316+ ///
11331317 /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
11341318 ///
11351319 /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
@@ -1179,9 +1363,11 @@ impl char {
11791363 ///
11801364 /// ```
11811365 /// assert_eq!('c'.to_uppercase().to_string(), "C");
1366+ /// assert_eq!('dž'.to_uppercase().to_string(), "DŽ");
11821367 ///
11831368 /// // Sometimes the result is more than one character:
11841369 /// assert_eq!('ſt'.to_uppercase().to_string(), "ST");
1370+ /// assert_eq!('ῼ'.to_uppercase().to_string(), "ΩΙ");
11851371 ///
11861372 /// // Characters that do not have both uppercase and lowercase
11871373 /// // convert into themselves.
@@ -1190,7 +1376,7 @@ impl char {
11901376 ///
11911377 /// # Note on locale
11921378 ///
1193- /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
1379+ /// In Turkish and Azeri , the equivalent of 'i' in Latin has five forms instead of two:
11941380 ///
11951381 /// * 'Dotless': I / ı, sometimes written ï
11961382 /// * 'Dotted': İ / i
@@ -1202,7 +1388,7 @@ impl char {
12021388 /// ```
12031389 ///
12041390 /// The value of `upper_i` here relies on the language of the text: if we're
1205- /// in `en-US`, it should be `"I"`, but if we're in `tr_TR `, it should
1391+ /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ `, it should
12061392 /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
12071393 ///
12081394 /// ```
@@ -1212,7 +1398,7 @@ impl char {
12121398 /// ```
12131399 ///
12141400 /// holds across languages.
1215- #[ must_use = "this returns the uppercase character as a new iterator, \
1401+ #[ must_use = "this returns the uppercased character as a new iterator, \
12161402 without modifying the original"]
12171403 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
12181404 #[ inline]
@@ -1455,7 +1641,7 @@ impl char {
14551641 #[ rustc_const_stable( feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" ) ]
14561642 #[ inline]
14571643 pub const fn is_ascii_alphabetic ( & self ) -> bool {
1458- matches ! ( * self , 'A ' ..='Z ' | 'a ' ..='z ' )
1644+ matches ! ( * self , 'a ' ..='z ' | 'A ' ..='Z ' )
14591645 }
14601646
14611647 /// Checks if the value is an ASCII uppercase character:
0 commit comments