File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 4545namespace TiCC {
4646 using namespace icu ;
4747
48- std::string UnicodeToUTF8 ( const UnicodeString& );
48+ std::string UnicodeToUTF8 ( const UnicodeString&,
49+ const std::string& = " " );
4950
5051 UnicodeString UnicodeFromEnc ( const std::string& ,
5152 const std::string& = " UTF8" );
5253
53- inline UnicodeString UnicodeFromUTF8 ( const std::string& s ){
54- return UnicodeString::fromUTF8 ( s );
55- }
54+ UnicodeString UnicodeFromUTF8 ( const std::string&,
55+ const std::string& = " " );
5656
5757 // / \brief a class that can normalize UnicodeStrings to NFC/NFD/NFKC/NFKD
5858 class UnicodeNormalizer {
Original file line number Diff line number Diff line change @@ -50,17 +50,28 @@ namespace TiCC {
5050 return UnicodeString ( s.c_str (), s.length (), enc.c_str () );
5151 }
5252
53- string UnicodeToUTF8 ( const UnicodeString& s ){
53+ string UnicodeToUTF8 ( const UnicodeString& s,
54+ const string& normalization ){
5455 // / convert a UnicodeString to a UTF-8 string
5556 /* !
5657 \param s the UnicodeString to convert
58+ \param normalization the normalization to use. Default NFC
5759 \return an UTF-8 encoded string
5860 */
61+ UnicodeNormalizer UN ( normalization);
62+ UnicodeString normalized = UN.normalize ( s );
5963 string result;
60- s .toUTF8String (result);
64+ normalized .toUTF8String (result);
6165 return result;
6266 }
6367
68+ UnicodeString UnicodeFromUTF8 ( const string& s,
69+ const string& normalization ){
70+ UnicodeNormalizer UN ( normalization);
71+ UnicodeString result = UnicodeString::fromUTF8 ( s );
72+ return UN.normalize ( result );
73+ }
74+
6475 UnicodeNormalizer::UnicodeNormalizer ( const string& enc ): _normalizer(0 ) {
6576 // / create an UnicodeNormalizer object
6677 /* !
Original file line number Diff line number Diff line change @@ -889,13 +889,14 @@ void test_unicode( const string& path ){
889889 UnicodeString ng1 = N.normalize ( greek1 );
890890 UnicodeString ng2 = N.normalize ( greek2 );
891891 assertEqual ( UnicodeToUTF8 (ng1), UnicodeToUTF8 (ng2) );
892+ assertEqual ( UnicodeToUTF8 (ng1," NFD" ), UnicodeToUTF8 (ng2," NFD" ) );
892893 N.setMode (" NFD" );
893894 UnicodeString ng11 = N.normalize ( greek1 );
894895 UnicodeString ng12 = N.normalize ( greek2 );
895896 assertEqual ( UnicodeToUTF8 (ng11), UnicodeToUTF8 (ng12) );
896897 string utf8_1 = " ἀντιϰειμένου" ;
897- string utf8_2 = " ἀντικειμένου " ;
898- assertEqual ( TiCC::utf8_uppercase ( utf8_1 ), " ἈΝΤΙΚΕΙΜΈΝΟΥ " );
898+ string utf8_2 = " ἀντικειμένου " ;
899+ assertEqual ( TiCC::utf8_uppercase ( utf8_1 ), " ἈΝΤΙΚΕΙΜΈΝΟΥ " );
899900 assertEqual ( TiCC::utf8_lowercase ( " ἈΝΤΙΚΕΙΜΈΝΟΥ" ), utf8_2 );
900901 assertEqual ( TiCC::utf8_uppercase ( " æ en ß en œ" ), " Æ EN SS EN Œ" );
901902}
You can’t perform that action at this time.
0 commit comments