@@ -61,7 +61,7 @@ use crate::alloc::Allocator;
6161#[ cfg( not( no_global_oom_handling) ) ]
6262use crate :: borrow:: { Cow , ToOwned } ;
6363use crate :: boxed:: Box ;
64- use crate :: collections:: TryReserveError ;
64+ use crate :: collections:: { TryReserveError , VecDeque } ;
6565use crate :: str:: { self , CharIndices , Chars , Utf8Error , from_utf8_unchecked_mut} ;
6666#[ cfg( not( no_global_oom_handling) ) ]
6767use crate :: str:: { FromStr , from_boxed_utf8_unchecked} ;
@@ -3604,3 +3604,203 @@ impl From<char> for String {
36043604 c. to_string ( )
36053605 }
36063606}
3607+
3608+ // In place case changes
3609+
3610+ impl String {
3611+ /// Converts this string to its uppercase equivalent in-place.
3612+ ///
3613+ /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
3614+ /// `Uppercase`.
3615+ ///
3616+ /// Since some characters can expand into multiple characters when changing
3617+ /// the case, this method may change the length of the string. If the string
3618+ /// shrinks, the excess capacity is not reclaimed.
3619+ ///
3620+ /// # Examples
3621+ ///
3622+ /// Basic usage:
3623+ ///
3624+ /// ```
3625+ /// #![feature(string_make_uplowercase)]
3626+ ///
3627+ /// let mut s = String::from("hello");
3628+ /// s.make_uppercase();
3629+ ///
3630+ /// assert_eq!("HELLO", s);
3631+ /// ```
3632+ ///
3633+ /// Scripts without case are not changed:
3634+ ///
3635+ /// ```
3636+ /// #![feature(string_make_uplowercase)]
3637+ ///
3638+ /// let mut new_year = String::from("农历新年");
3639+ /// new_year.make_uppercase();
3640+ ///
3641+ /// assert_eq!("农历新年", new_year);
3642+ /// ```
3643+ ///
3644+ /// One character can become multiple:
3645+ ///
3646+ /// ```
3647+ /// #![feature(string_make_uplowercase)]
3648+ ///
3649+ /// let mut s = String::from("tschüß");
3650+ /// s.make_uppercase();
3651+ ///
3652+ /// assert_eq!("TSCHÜSS", s);
3653+ /// ```
3654+ #[ cfg( not( no_global_oom_handling) ) ]
3655+ #[ unstable( feature = "string_make_uplowercase" , issue = "135885" ) ]
3656+ pub fn make_uppercase ( & mut self ) {
3657+ let mut wc = WriteChars :: new ( self ) ;
3658+ while let Some ( l_c) = wc. pop ( ) {
3659+ l_c. to_uppercase ( ) . for_each ( |u_c| wc. write ( u_c) ) ;
3660+ }
3661+ }
3662+
3663+ /// Converts this string to its lowercase equivalent in-place.
3664+ ///
3665+ /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
3666+ /// `Lowercase`.
3667+ ///
3668+ /// Since some characters can expand into multiple characters when changing
3669+ /// the case, this method may change the length of the string. If the string
3670+ /// shrinks, the excess capacity is not reclaimed.
3671+ ///
3672+ /// # Examples
3673+ ///
3674+ /// Basic usage:
3675+ ///
3676+ /// ```
3677+ /// #![feature(string_make_uplowercase)]
3678+ ///
3679+ /// let mut s = String::from("HELLO");
3680+ /// s.make_lowercase();
3681+ ///
3682+ /// assert_eq!("hello", s);
3683+ /// ```
3684+ ///
3685+ /// ```
3686+ /// #![feature(string_make_uplowercase)]
3687+ ///
3688+ /// let mut odysseus = String::from("ὈΔΥΣΣΕΎΣ");
3689+ /// odysseus.make_lowercase();
3690+ ///
3691+ /// assert_eq!("ὀδυσσεύς", odysseus);
3692+ /// ```
3693+ ///
3694+ /// Languages without case are not changed:
3695+ ///
3696+ /// ```
3697+ /// #![feature(string_make_uplowercase)]
3698+ ///
3699+ /// let mut new_year = String::from("农历新年");
3700+ /// new_year.make_lowercase();
3701+ ///
3702+ /// assert_eq!("农历新年", new_year);
3703+ /// ```
3704+ #[ cfg( not( no_global_oom_handling) ) ]
3705+ #[ unstable( feature = "string_make_uplowercase" , issue = "135885" ) ]
3706+ pub fn make_lowercase ( & mut self ) {
3707+ let mut wc = WriteChars :: new ( self ) ;
3708+ // This is unfortunately paid whether or not you have sigmas in the str
3709+ // but it is kind of mandatory because as we are overwriting the source bytes
3710+ // we have to compute this information as we go.
3711+ let mut word_final_so_far = false ;
3712+ while let Some ( u_c) = wc. pop ( ) {
3713+ if u_c == 'Σ' {
3714+ if word_final_so_far && !crate :: str:: case_ignorable_then_cased ( wc. rest ( ) . chars ( ) ) {
3715+ // actually word final
3716+ wc. write ( 'ς' ) ;
3717+ } else {
3718+ wc. write ( 'σ' ) ;
3719+ }
3720+ } else {
3721+ u_c. to_lowercase ( ) . for_each ( |l_c| wc. write ( l_c) ) ;
3722+ }
3723+ word_final_so_far = u_c. is_cased ( ) || ( word_final_so_far && u_c. is_case_ignorable ( ) ) ;
3724+ }
3725+ }
3726+ }
3727+
3728+ /// A helper for in place modification of strings, where we gradually "pop" characters,
3729+ /// hereby making room to write back to the string buffer
3730+ #[ unstable( issue = "none" , feature = "std_internals" ) ]
3731+ struct WriteChars < ' a > {
3732+ // This is the internal buffer of the string temporarily changed to Vec<u8> because
3733+ // it will contain non utf8 bytes.
3734+ // invariant: self.v.len() == original string until drop is run
3735+ v : Vec < u8 > ,
3736+ // A reference kept to restore the string at the end
3737+ // (ie drop time)
3738+ s : & ' a mut String ,
3739+ // invariant: write_offset <= read_offset
3740+ write_offset : usize ,
3741+ // invariant: self.read_offset <= self.v.len()
3742+ // before the Drop
3743+ read_offset : usize ,
3744+ buffer : VecDeque < u8 > ,
3745+ }
3746+
3747+ impl < ' a > Drop for WriteChars < ' a > {
3748+ // Set the proper length of the strings storage
3749+ // or grow it to add what is still in the buffer.
3750+ fn drop ( & mut self ) {
3751+ if self . buffer . is_empty ( ) {
3752+ // SAFETY: if the queue is empty, then
3753+ // there were less bytes than in the original so we can simply shrink
3754+ unsafe {
3755+ self . v . set_len ( self . write_offset ) ;
3756+ }
3757+ } else {
3758+ let ( q1, q2) = self . buffer . as_slices ( ) ;
3759+ self . v . extend_from_slice ( q1) ;
3760+ self . v . extend_from_slice ( q2) ;
3761+ } ;
3762+ // SAFETY: this is valid utf8
3763+ * self . s = unsafe { String :: from_utf8_unchecked ( core:: mem:: take ( & mut self . v ) ) }
3764+ }
3765+ }
3766+
3767+ #[ unstable( issue = "none" , feature = "std_internals" ) ]
3768+ impl < ' a > WriteChars < ' a > {
3769+ fn new ( s : & ' a mut String ) -> Self {
3770+ let v = core:: mem:: take ( s) . into_bytes ( ) ;
3771+ WriteChars { s, v, write_offset : 0 , read_offset : 0 , buffer : VecDeque :: new ( ) }
3772+ }
3773+
3774+ fn rest ( & self ) -> & str {
3775+ // SAFETY: read_offset is always ok to read from
3776+ unsafe { str:: from_utf8_unchecked ( & self . v [ self . read_offset ..] ) }
3777+ }
3778+
3779+ fn pop ( & mut self ) -> Option < char > {
3780+ // SAFETY: The bytes from read_offset are valid UTF8
3781+ let ( code_point, width) = unsafe {
3782+ core:: str:: next_code_point_with_width ( & mut self . v [ self . read_offset ..] . iter ( ) ) ?
3783+ } ;
3784+ self . read_offset += width;
3785+ // Dump what is buffered in the newly freed space
3786+ while self . write_offset < self . read_offset
3787+ && let Some ( b) = self . buffer . pop_front ( )
3788+ {
3789+ self . v [ self . write_offset ] = b;
3790+ self . write_offset += 1 ;
3791+ }
3792+ // SAFETY: The code point is valid
3793+ let c = unsafe { char:: from_u32_unchecked ( code_point) } ;
3794+ Some ( c)
3795+ }
3796+
3797+ fn write ( & mut self , c : char ) {
3798+ let writable_slice = & mut self . v [ self . write_offset ..self . read_offset ] ;
3799+ let mut buffer = [ 0u8 ; 4 ] ;
3800+ let len = c. encode_utf8 ( & mut buffer) . len ( ) ;
3801+ let direct_copy_length = core:: cmp:: min ( len, writable_slice. len ( ) ) ;
3802+ writable_slice[ ..direct_copy_length] . copy_from_slice ( & buffer[ ..direct_copy_length] ) ;
3803+ self . write_offset += direct_copy_length;
3804+ self . buffer . extend ( & buffer[ direct_copy_length..len] ) ;
3805+ }
3806+ }
0 commit comments