@@ -150,34 +150,47 @@ public static function toAscii(string $s): string
150150 ['" ' , '" ' , '" ' , "' " , "' " , "' " , '^ ' , 'Ya ' , 'ya ' , 'Yu ' , 'yu ' ],
151151 $ s
152152 );
153- // temporarily hide these characters to distinguish them from the garbage that iconv creates
154- $ s = strtr ($ s , '` \'"^~? ' , "\x01\x02\x03\x04\x05\x06" );
155- if ($ transliterator !== null ) {
153+
154+ if ($ transliterator ) {
156155 $ s = $ transliterator ->transliterate ($ s );
157- }
158- if (ICONV_IMPL === 'glibc ' ) {
159- // glibc implementation is very limited. replace some characters directly
160- $ s = str_replace (
161- ["\u{BB}" , "\u{AB}" , "\u{2026}" , "\u{2122}" , "\u{A9}" , "\u{AE}" ], // » « … ™ © ®
162- ['>> ' , '<< ' , '... ' , 'TM ' , '(c) ' , '(R) ' ],
163- $ s
164- );
165- // transliterate the rest into Windows-1250 and then into ASCII, so most Eastern European characters are preserved
166- $ s = iconv ('UTF-8 ' , 'WINDOWS-1250//TRANSLIT//IGNORE ' , $ s );
167- $ s = strtr ($ s , "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
168- . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
169- . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
170- . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe"
171- . "\x96\xa0\x8b\x97\x9b\xa6\xad\xb7" ,
172- 'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-. ' );
173- $ s = self ::pcre ('preg_replace ' , ['#[^\x00-\x7F]++# ' , '' , $ s ]);
174- } else {
156+ if (ICONV_IMPL === 'glibc ' ) {
157+ // temporarily hide ? to distinguish them from the garbage that iconv creates
158+ $ s = strtr ($ s , '? ' , "\x01" );
159+ }
160+ // use iconv because The transliterator leaves some characters out of ASCII, eg → ʾ
175161 $ s = iconv ('UTF-8 ' , 'ASCII//TRANSLIT//IGNORE ' , $ s );
162+ if (ICONV_IMPL === 'glibc ' ) {
163+ // remove garbage and restore ? characters
164+ $ s = str_replace (['? ' , "\x01" ], ['' , '? ' ], $ s );
165+ }
166+ } else {
167+ // temporarily hide these characters to distinguish them from the garbage that iconv creates
168+ $ s = strtr ($ s , '` \'"^~? ' , "\x01\x02\x03\x04\x05\x06" );
169+ if (ICONV_IMPL === 'glibc ' ) {
170+ // glibc implementation is very limited. replace some characters directly
171+ $ s = str_replace (
172+ ["\u{BB}" , "\u{AB}" , "\u{2026}" , "\u{2122}" , "\u{A9}" , "\u{AE}" ], // » « … ™ © ®
173+ ['>> ' , '<< ' , '... ' , 'TM ' , '(c) ' , '(R) ' ],
174+ $ s
175+ );
176+ // transliterate the rest into Windows-1250 and then into ASCII, so most Eastern European characters are preserved
177+ $ s = iconv ('UTF-8 ' , 'WINDOWS-1250//TRANSLIT//IGNORE ' , $ s );
178+ $ s = strtr ($ s , "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
179+ . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
180+ . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
181+ . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe"
182+ . "\x96\xa0\x8b\x97\x9b\xa6\xad\xb7" ,
183+ 'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-. ' );
184+ $ s = self ::pcre ('preg_replace ' , ['#[^\x00-\x7F]++# ' , '' , $ s ]);
185+ } else {
186+ $ s = iconv ('UTF-8 ' , 'ASCII//TRANSLIT//IGNORE ' , $ s );
187+ }
188+ // remove garbage that iconv creates during transliteration (eg Ý -> Y')
189+ $ s = str_replace (['` ' , "' " , '" ' , '^ ' , '~ ' , '? ' ], '' , $ s );
190+ // restore temporarily hidden characters
191+ $ s = strtr ($ s , "\x01\x02\x03\x04\x05\x06" , '` \'"^~? ' );
176192 }
177- // remove garbage that iconv creates during transliteration (eg Ý -> Y')
178- $ s = str_replace (['` ' , "' " , '" ' , '^ ' , '~ ' , '? ' ], '' , $ s );
179- // restore temporarily hidden characters
180- $ s = strtr ($ s , "\x01\x02\x03\x04\x05\x06" , '` \'"^~? ' );
193+
181194 return $ s ;
182195 }
183196
0 commit comments