@@ -2901,7 +2901,33 @@ function urldecode_deep( $value ) {
29012901}
29022902
29032903/**
2904- * Converts email addresses characters to HTML entities to block spam bots.
2904+ * Obscures email addresses in HTML to prevent spam bots from harvesting them.
2905+ *
2906+ * Typically this will randomly replace characters from the email address with
2907+ * HTML character references; however, when the hex encoding parameter is set,
2908+ * some characters will also be represented in their percent-encoded form.
2909+ *
2910+ * Because this function is randomized, the outputs for any given input may
2911+ * differ between calls. This helps diversify the ways the email addresses
2912+ * are obscured.
2913+ *
2914+ * When non-UTF-8 inputs are provided, any spans of invalid UTF-8 bytes will
2915+ * be passed through without any obfuscation.
2916+ *
2917+ * Example:
2918+ *
2919+ * $email = 'noreply@example.com';
2920+ * $obscured = antispambot( $email );
2921+ * $obscured === 'noreply@example.com';
2922+ *
2923+ * // Hex-encoding also obscures characters with percent-encoding.
2924+ * $obscured = antispambot( $email, 1 );
2925+ * $obscured === '%6eore%70l%79@%65x%61mple%2e%63%6fm';
2926+ *
2927+ * // Non-UTF-8 characters are not obfuscated. "\xFC" is Latin1 "ü".
2928+ * $obscured = antispambot( "b\xFCcher@library.de" );
2929+ * $obscured === 'b�cher@library.de';
2930+ * $obscured === "b\xFCcher@library.de"
29052931 *
29062932 * @since 0.71
29072933 * @since {WP_VERSION} Masquerades multi-byte characters.
@@ -2911,45 +2937,49 @@ function urldecode_deep( $value ) {
29112937 * @return string Converted email address.
29122938 */
29132939function antispambot ( $ email_address , $ hex_encoding = 0 ) {
2914- /*
2915- * Email addresses passed into this function should not contain invalid UTF-8, but if they do,
2916- * enforce the constraint by refusing to print any email address.
2917- */
2918- if ( ! wp_check_invalid_utf8 ( $ email_address ) ) {
2919- return '' ;
2920- }
2921-
29222940 $ obfuscated = '' ;
29232941 $ at = 0 ;
2924- $ next_at = 0 ;
29252942 $ end = strlen ( $ email_address );
29262943 $ invalid_length = 0 ;
2944+
29272945 while ( $ at < $ end ) {
2928- if ( 0 === _wp_scan_utf8 ( $ email_address , $ next_at , $ invalid_length , null , 1 ) ) {
2946+ $ was_at = $ at ;
2947+ if (
2948+ 0 === _wp_scan_utf8 ( $ email_address , $ at , $ invalid_length , null , 1 ) &&
2949+ 0 === $ invalid_length
2950+ ) {
29292951 break ;
29302952 }
29312953
2932- $ character = substr ( $ email_address , $ at , $ next_at - $ at ) ;
2954+ $ character_length = $ at - $ was_at ;
29332955
2934- switch ( rand ( 0 , 1 + $ hex_encoding ) ) {
2935- case 0 :
2936- $ code_point = mb_ord ( $ character );
2937- $ obfuscated .= "&# {$ code_point }; " ;
2938- break ;
2956+ if ( $ character_length > 0 ) {
2957+ $ character = substr ( $ email_address , $ was_at , $ character_length );
29392958
2940- case 1 :
2941- $ obfuscated .= $ character ;
2942- break ;
2959+ switch ( rand ( 0 , 1 + $ hex_encoding ) ) {
2960+ case 0 :
2961+ $ code_point = mb_ord ( $ character );
2962+ $ obfuscated .= "&# {$ code_point }; " ;
2963+ break ;
29432964
2944- case 2 :
2945- for ( $ i = 0 , $ byte_count = strlen ( $ character ); $ i < $ byte_count ; $ i ++ ) {
2946- $ hex_value = bin2hex ( $ character [ $ i ] );
2947- $ obfuscated .= "% {$ hex_value }" ;
2948- }
2949- break ;
2965+ case 1 :
2966+ $ obfuscated .= $ character ;
2967+ break ;
2968+
2969+ case 2 :
2970+ for ( $ i = 0 ; $ i < $ character_length ; $ i ++ ) {
2971+ $ hex_value = bin2hex ( $ character [ $ i ] );
2972+ $ obfuscated .= "% {$ hex_value }" ;
2973+ }
2974+ break ;
2975+ }
2976+ }
2977+
2978+ if ( 0 !== $ invalid_length ) {
2979+ $ obfuscated .= substr ( $ email_address , $ at , $ invalid_length );
29502980 }
29512981
2952- $ at = $ next_at ;
2982+ $ at + = $ invalid_length ;
29532983 }
29542984
29552985 return str_replace ( '@ ' , '@ ' , $ obfuscated );
0 commit comments