Skip to content

Commit 6fac136

Browse files
committed
Update docs, fix bug, pass invalid UTF-8 un-obscured.
1 parent 329e5db commit 6fac136

1 file changed

Lines changed: 57 additions & 27 deletions

File tree

src/wp-includes/formatting.php

Lines changed: 57 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2901,7 +2901,33 @@ function urldecode_deep( $value ) {
29012901
}
29022902

29032903
/**
2904-
* Converts email addresses characters to HTML entities to block spam bots.
2904+
* Obscures email addresses in HTML to prevent spam bots from harvesting them.
2905+
*
2906+
* Typically this will randomly replace characters from the email address with
2907+
* HTML character references; however, when the hex encoding parameter is set,
2908+
* some characters will also be represented in their percent-encoded form.
2909+
*
2910+
* Because this function is randomized, the outputs for any given input may
2911+
* differ between calls. This helps diversify the ways the email addresses
2912+
* are obscured.
2913+
*
2914+
* When non-UTF-8 inputs are provided, any spans of invalid UTF-8 bytes will
2915+
* be passed through without any obfuscation.
2916+
*
2917+
* Example:
2918+
*
2919+
* $email = 'noreply@example.com';
2920+
* $obscured = antispambot( $email );
2921+
* $obscured === 'noreply@example.com';
2922+
*
2923+
* // Hex-encoding also obscures characters with percent-encoding.
2924+
* $obscured = antispambot( $email, 1 );
2925+
* $obscured === '%6eore%70l%79@%65x%61mple%2e%63%6fm';
2926+
*
2927+
* // Non-UTF-8 characters are not obfuscated. "\xFC" is Latin1 "ü".
2928+
* $obscured = antispambot( "b\xFCcher@library.de" );
2929+
* $obscured === 'b�cher@library.de';
2930+
* $obscured === "b\xFCcher@library.de"
29052931
*
29062932
* @since 0.71
29072933
* @since {WP_VERSION} Masquerades multi-byte characters.
@@ -2911,45 +2937,49 @@ function urldecode_deep( $value ) {
29112937
* @return string Converted email address.
29122938
*/
29132939
function antispambot( $email_address, $hex_encoding = 0 ) {
2914-
/*
2915-
* Email addresses passed into this function should not contain invalid UTF-8, but if they do,
2916-
* enforce the constraint by refusing to print any email address.
2917-
*/
2918-
if ( ! wp_check_invalid_utf8( $email_address ) ) {
2919-
return '';
2920-
}
2921-
29222940
$obfuscated = '';
29232941
$at = 0;
2924-
$next_at = 0;
29252942
$end = strlen( $email_address );
29262943
$invalid_length = 0;
2944+
29272945
while ( $at < $end ) {
2928-
if ( 0 === _wp_scan_utf8( $email_address, $next_at, $invalid_length, null, 1 ) ) {
2946+
$was_at = $at;
2947+
if (
2948+
0 === _wp_scan_utf8( $email_address, $at, $invalid_length, null, 1 ) &&
2949+
0 === $invalid_length
2950+
) {
29292951
break;
29302952
}
29312953

2932-
$character = substr( $email_address, $at, $next_at - $at );
2954+
$character_length = $at - $was_at;
29332955

2934-
switch ( rand( 0, 1 + $hex_encoding ) ) {
2935-
case 0:
2936-
$code_point = mb_ord( $character );
2937-
$obfuscated .= "&#{$code_point};";
2938-
break;
2956+
if ( $character_length > 0 ) {
2957+
$character = substr( $email_address, $was_at, $character_length );
29392958

2940-
case 1:
2941-
$obfuscated .= $character;
2942-
break;
2959+
switch ( rand( 0, 1 + $hex_encoding ) ) {
2960+
case 0:
2961+
$code_point = mb_ord( $character );
2962+
$obfuscated .= "&#{$code_point};";
2963+
break;
29432964

2944-
case 2:
2945-
for ( $i = 0, $byte_count = strlen( $character ); $i < $byte_count; $i++ ) {
2946-
$hex_value = bin2hex( $character[ $i ] );
2947-
$obfuscated .= "%{$hex_value}";
2948-
}
2949-
break;
2965+
case 1:
2966+
$obfuscated .= $character;
2967+
break;
2968+
2969+
case 2:
2970+
for ( $i = 0; $i < $character_length; $i++ ) {
2971+
$hex_value = bin2hex( $character[ $i ] );
2972+
$obfuscated .= "%{$hex_value}";
2973+
}
2974+
break;
2975+
}
2976+
}
2977+
2978+
if ( 0 !== $invalid_length ) {
2979+
$obfuscated .= substr( $email_address, $at, $invalid_length );
29502980
}
29512981

2952-
$at = $next_at;
2982+
$at += $invalid_length;
29532983
}
29542984

29552985
return str_replace( '@', '&#64;', $obfuscated );

0 commit comments

Comments
 (0)