@@ -2173,6 +2173,20 @@ function sanitize_user( $username, $strict = false ) {
21732173 return apply_filters ( 'sanitize_user ' , $ username , $ raw_username , $ strict );
21742174}
21752175
2176+
2177+ /**
2178+ * Returns a string with all controls and all non-ASCII bytes removed.
2179+ *
2180+ * @since 7.0.0
2181+ *
2182+ * @param string $input The string to be sanitized.
2183+ * @return string The modified string.
2184+ */
2185+ function wp_ascii_without_controls ( $ input ) {
2186+ return preg_replace ( '/[\x00-\x19\x7F-\xFF]/ ' , '' , $ input );
2187+ }
2188+
2189+
21762190/**
21772191 * Sanitizes a string key.
21782192 *
@@ -2912,7 +2926,9 @@ function antispambot( $email_address, $hex_encoding = 0 ) {
29122926 for ( $ i = 0 , $ len = strlen ( $ email_address ); $ i < $ len ; $ i ++ ) {
29132927 $ j = rand ( 0 , 1 + $ hex_encoding );
29142928
2915- if ( 0 === $ j ) {
2929+ if ( ord ( $ email_address [ $ i ] ) > 127 ) {
2930+ $ email_no_spam_address .= $ email_address [ $ i ];
2931+ } elseif ( 0 === $ j ) {
29162932 $ email_no_spam_address .= '&# ' . ord ( $ email_address [ $ i ] ) . '; ' ;
29172933 } elseif ( 1 === $ j ) {
29182934 $ email_no_spam_address .= $ email_address [ $ i ];
@@ -3528,7 +3544,21 @@ function convert_smilies( $text ) {
35283544/**
35293545 * Verifies that an email is valid.
35303546 *
3531- * Does not grok i18n domains. Not RFC compliant.
3547+ * The mostly matches what people think is the format of email
3548+ * addresses, and is close to all three current specifications.
3549+ *
3550+ * Email address syntax is specified in RFC 5322 for ASCII-only email
3551+ * and in RFC 6532 for unicode email (both unicode domains and
3552+ * localparts). In addition, the HTML WHATWG specification contains a
3553+ * third syntax which is used for HTML form input (except that major
3554+ * browsers deviate a little from the WHATWG specification).
3555+ *
3556+ * This function matches the WHATWG and RFC 6532 specifications fairly
3557+ * well, although there are some differences. " "@example.com (quote
3558+ * space quote at ...) is allowed by the RFCs and rejected by this
3559+ * code, while ..@example.com is allowed by this code and prohibited
3560+ * by the RFCs. info@grå.org is allowed by this code and major
3561+ * browsers, but prohibited by WHATWG's regex (as of April 2023).
35323562 *
35333563 * @since 0.71
35343564 *
@@ -3572,7 +3602,7 @@ function is_email( $email, $deprecated = false ) {
35723602 * LOCAL PART
35733603 * Test for invalid characters.
35743604 */
3575- if ( ! preg_match ( '/^[a-zA-Z0-9!#$%& \'*+\/=?^_`{|}~\.-]+$/ ' , $ local ) ) {
3605+ if ( ! ( wp_is_valid_utf8 ( $ local ) && preg_match ( '/^[a-zA-Z0-9\x80-\xff !#$%& \'*+\/=?^_`{|}~\.-]+$/ ' , $ local ) && preg_match ( ' /^\X+$/ ' , $ local ) ) ) {
35763606 /** This filter is documented in wp-includes/formatting.php */
35773607 return apply_filters ( 'is_email ' , false , $ email , 'local_invalid_chars ' );
35783608 }
@@ -3610,7 +3640,7 @@ function is_email( $email, $deprecated = false ) {
36103640 }
36113641
36123642 // Test for invalid characters.
3613- if ( ! preg_match ( '/^[a-z0-9- ]+$/i ' , $ sub ) ) {
3643+ if ( ! ( wp_is_valid_utf8 ( $ sub ) && preg_match ( '/^[a-z0-9\x80-\xff- ]+$/i ' , $ sub ) && preg_match ( ' /^\X+$/ ' , $ sub ) ) ) {
36143644 /** This filter is documented in wp-includes/formatting.php */
36153645 return apply_filters ( 'is_email ' , false , $ email , 'sub_invalid_chars ' );
36163646 }
@@ -3786,8 +3816,8 @@ function sanitize_email( $email ) {
37863816 * LOCAL PART
37873817 * Test for invalid characters.
37883818 */
3789- $ local = preg_replace ( '/[^a-zA-Z0-9!#$%& \'*+\/=?^_`{|}~\.-]/ ' , '' , $ local );
3790- if ( '' === $ local ) {
3819+ $ local = preg_replace ( '/[^a-zA-Z0-9!#$%& \'*+\/=?^_`{|}~\.\x80-\xff -]/ ' , '' , $ local );
3820+ if ( '' === $ local || ! wp_is_valid_utf8 ( $ local ) ) {
37913821 /** This filter is documented in wp-includes/formatting.php */
37923822 return apply_filters ( 'sanitize_email ' , '' , $ email , 'local_invalid_chars ' );
37933823 }
@@ -3827,10 +3857,10 @@ function sanitize_email( $email ) {
38273857 $ sub = trim ( $ sub , " \t\n\r\0\x0B- " );
38283858
38293859 // Test for invalid characters.
3830- $ sub = preg_replace ( '/[^a-z0-9-]+/i ' , '' , $ sub );
3860+ $ sub = preg_replace ( '/[^a-z0-9\x80-\xff -]+/i ' , '' , $ sub );
38313861
38323862 // If there's anything left, add it to the valid subs.
3833- if ( '' !== $ sub ) {
3863+ if ( '' !== $ sub && wp_is_valid_utf8 ( $ sub ) ) {
38343864 $ new_subs [] = $ sub ;
38353865 }
38363866 }
0 commit comments