@@ -2176,6 +2176,7 @@ function sanitize_user( $username, $strict = false ) {
21762176 return apply_filters ( 'sanitize_user ' , $ username , $ raw_username , $ strict );
21772177}
21782178
2179+
21792180/**
21802181 * Sanitizes a string key.
21812182 *
@@ -3589,7 +3590,14 @@ function convert_smilies( $text ) {
35893590/**
35903591 * Verifies that an email is valid.
35913592 *
3592- * Does not grok i18n domains. Not RFC compliant.
3593+ * This accepts the addresses that matches the WHATWG specifications,
3594+ * i.e. what browsers use for `<input type=email>`. It also accepts some
3595+ * additional addresses.
3596+ *
3597+ * By default this accepts addresses like info@grå.org (also accepted
3598+ * by Firefox) `<input type=email>`. You can disable Unicode support by
3599+ * using the wp_is_ascii_email filter instead of wp_is_unicode_email,
3600+ * which is the default.
35933601 *
35943602 * @since 0.71
35953603 *
@@ -3602,84 +3610,65 @@ function is_email( $email, $deprecated = false ) {
36023610 _deprecated_argument ( __FUNCTION__ , '3.0.0 ' );
36033611 }
36043612
3605- // Test for the minimum length the email can be.
3606- if ( strlen ( $ email ) < 6 ) {
3607- /**
3608- * Filters whether an email address is valid.
3609- *
3610- * This filter is evaluated under several different contexts, such as 'email_too_short',
3611- * 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits',
3612- * 'domain_no_periods', 'sub_hyphen_limits', 'sub_invalid_chars', or no specific context.
3613- *
3614- * @since 2.8.0
3615- *
3616- * @param string|false $is_email The email address if successfully passed the is_email() checks, false otherwise.
3617- * @param string $email The email address being checked.
3618- * @param string $context Context under which the email was tested.
3619- */
3620- return apply_filters ( 'is_email ' , false , $ email , 'email_too_short ' );
3621- }
3622-
3623- // Test for an @ character after the first position.
3624- if ( false === strpos ( $ email , '@ ' , 1 ) ) {
3625- /** This filter is documented in wp-includes/formatting.php */
3626- return apply_filters ( 'is_email ' , false , $ email , 'email_no_at ' );
3627- }
3628-
3629- // Split out the local and domain parts.
3630- list ( $ local , $ domain ) = explode ( '@ ' , $ email , 2 );
3631-
3632- /*
3633- * LOCAL PART
3634- * Test for invalid characters.
3635- */
3636- if ( ! preg_match ( '/^[a-zA-Z0-9!#$%& \'*+\/=?^_`{|}~\.-]+$/ ' , $ local ) ) {
3637- /** This filter is documented in wp-includes/formatting.php */
3638- return apply_filters ( 'is_email ' , false , $ email , 'local_invalid_chars ' );
3639- }
3640-
3641- /*
3642- * DOMAIN PART
3643- * Test for sequences of periods.
3613+ /**
3614+ * Filters whether an email address is valid.
3615+ *
3616+ * This filter is evaluated under several different contexts, such as
3617+ * 'local_invalid_chars', 'domain_no_periods', or no specific context.
3618+ * Filters registered on this hook perform the actual validation; the
3619+ * default filter is registered in default-filters.php.
3620+ *
3621+ * @since 2.8.0
3622+ *
3623+ * @param string|false $is_email The email address if successfully passed the is_email() checks, false otherwise.
3624+ * @param string $email The email address being checked.
3625+ * @param string|null $context Context under which the email was tested, or null for the initial call.
36443626 */
3645- if ( preg_match ( '/\.{2,}/ ' , $ domain ) ) {
3646- /** This filter is documented in wp-includes/formatting.php */
3647- return apply_filters ( 'is_email ' , false , $ email , 'domain_period_sequence ' );
3648- }
3649-
3650- // Test for leading and trailing periods and whitespace.
3651- if ( trim ( $ domain , " \t\n\r\0\x0B. " ) !== $ domain ) {
3652- /** This filter is documented in wp-includes/formatting.php */
3653- return apply_filters ( 'is_email ' , false , $ email , 'domain_period_limits ' );
3654- }
3655-
3656- // Split the domain into subs.
3657- $ subs = explode ( '. ' , $ domain );
3627+ return apply_filters ( 'is_email ' , false , $ email , null );
3628+ }
36583629
3659- // Assume the domain will have at least two subs.
3660- if ( 2 > count ( $ subs ) ) {
3661- /** This filter is documented in wp-includes/formatting.php */
3662- return apply_filters ( 'is_email ' , false , $ email , 'domain_no_periods ' );
3630+ /**
3631+ * Default is_email filter for databases that support Unicode (db charset is utf8mb4).
3632+ *
3633+ * Validates the email address using {@see WP_Email_Address::from_string()} with Unicode enabled.
3634+ * Only acts when $context is null (which it is in the initial validation call); later rescue-context calls are passed through.
3635+ *
3636+ * @since 7.1.0
3637+ *
3638+ * @param string|false $value The current filter value.
3639+ * @param string $email The email address being checked.
3640+ * @param string|null $context Validation context, or null for the initial call.
3641+ * @return string|false The email address if valid, false otherwise.
3642+ */
3643+ function wp_is_unicode_email ( $ value , $ email , $ context ) {
3644+ if ( null !== $ context ) {
3645+ return $ value ;
36633646 }
36643647
3665- // Loop through each sub.
3666- foreach ( $ subs as $ sub ) {
3667- // Test for leading and trailing hyphens and whitespace.
3668- if ( trim ( $ sub , " \t\n\r\0\x0B- " ) !== $ sub ) {
3669- /** This filter is documented in wp-includes/formatting.php */
3670- return apply_filters ( 'is_email ' , false , $ email , 'sub_hyphen_limits ' );
3671- }
3648+ $ result = WP_Email_Address::from_string ( $ email , 'unicode ' );
3649+ return $ result ? $ result ->get_unicode_address () : false ;
3650+ }
36723651
3673- // Test for invalid characters.
3674- if ( ! preg_match ( '/^[a-z0-9-]+$/i ' , $ sub ) ) {
3675- /** This filter is documented in wp-includes/formatting.php */
3676- return apply_filters ( 'is_email ' , false , $ email , 'sub_invalid_chars ' );
3677- }
3652+ /**
3653+ * Default is_email filter for databases that do not support Unicode (db charset is not utf8mb4).
3654+ *
3655+ * Validates the email address using {@see WP_Email_Address::from_string()} with Unicode disabled.
3656+ * Only acts when $context is null (which it is in the initial validation call); later rescue-context calls are passed through.
3657+ *
3658+ * @since 7.1.0
3659+ *
3660+ * @param string|false $value The current filter value.
3661+ * @param string $email The email address being checked.
3662+ * @param string|null $context Validation context, or null for the initial call.
3663+ * @return string|false The email address if valid, false otherwise.
3664+ */
3665+ function wp_is_ascii_email ( $ value , $ email , $ context ) {
3666+ if ( null !== $ context ) {
3667+ return $ value ;
36783668 }
36793669
3680- // Congratulations, your email made it!
3681- /** This filter is documented in wp-includes/formatting.php */
3682- return apply_filters ( 'is_email ' , $ email , $ email , null );
3670+ $ result = WP_Email_Address::from_string ( $ email , 'ascii ' );
3671+ return $ result ? $ result ->get_unicode_address () : false ;
36833672}
36843673
36853674/**
@@ -3808,109 +3797,96 @@ function iso8601_to_datetime( $date_string, $timezone = 'user' ) {
38083797}
38093798
38103799/**
3811- * Strips out all characters that are not allowable in an email.
3800+ * Sanitizes an email address.
3801+ *
3802+ * Strips stray whitespace from the input, then strips trailing dots from the domain.
3803+ * This is designed to recover from cut/paste mistakes without any risk of transforming
3804+ * the input into a different address than the user intended.
3805+ *
3806+ * Validation and final form are determined by the 'sanitize_email' filter; the default
3807+ * filter is registered in default-filters.php and delegates to {@see WP_Email_Address::from_string()}.
38123808 *
38133809 * @since 1.5.0
3810+ * @since 7.1.0 Accepts Unicode email addresses on supporting platforms.
38143811 *
3815- * @param string $email Email address to filter .
3816- * @return string Filtered email address.
3812+ * @param string $email Email address to sanitize .
3813+ * @return string The sanitized email address, or an empty string if invalid .
38173814 */
38183815function sanitize_email ( $ email ) {
3819- // Test for the minimum length the email can be.
3820- if ( strlen ( $ email ) < 6 ) {
3821- /**
3822- * Filters a sanitized email address.
3823- *
3824- * This filter is evaluated under several contexts, including 'email_too_short',
3825- * 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits',
3826- * 'domain_no_periods', 'domain_no_valid_subs', or no context.
3827- *
3828- * @since 2.8.0
3829- *
3830- * @param string $sanitized_email The sanitized email address.
3831- * @param string $email The email address, as provided to sanitize_email().
3832- * @param string|null $message A message to pass to the user. null if email is sanitized.
3833- */
3834- return apply_filters ( 'sanitize_email ' , '' , $ email , 'email_too_short ' );
3835- }
3836-
3837- // Test for an @ character after the first position.
3838- if ( false === strpos ( $ email , '@ ' , 1 ) ) {
3839- /** This filter is documented in wp-includes/formatting.php */
3840- return apply_filters ( 'sanitize_email ' , '' , $ email , 'email_no_at ' );
3841- }
3842-
3843- // Split out the local and domain parts.
3844- list ( $ local , $ domain ) = explode ( '@ ' , $ email , 2 );
3816+ // Strip surrounding whitespace.
3817+ $ email = trim ( $ email );
38453818
3846- /*
3847- * LOCAL PART
3848- * Test for invalid characters.
3849- */
3850- $ local = preg_replace ( '/[^a-zA-Z0-9!#$%& \'*+\/=?^_`{|}~\.-]/ ' , '' , $ local );
3851- if ( '' === $ local ) {
3852- /** This filter is documented in wp-includes/formatting.php */
3853- return apply_filters ( 'sanitize_email ' , '' , $ email , 'local_invalid_chars ' );
3819+ // Extract the address from "Display Name <username@domain>" format.
3820+ if ( 1 === preg_match ( '/<([^>]+)>$/ ' , $ email , $ matches ) ) {
3821+ $ email = $ matches [1 ];
38543822 }
38553823
38563824 /*
3857- * DOMAIN PART
3858- * Test for sequences of periods.
3825+ * Strip soft hyphens and whitespace adjacent to structural separators (dots and @),
3826+ * e.g. copy-paste artifacts like "info@example\u{00AD}.com" or "info@example .com".
3827+ *
3828+ * In some cases, e.g. autocorrect, some older software has been seen to add the
3829+ * space for unrecognized TLDs. This re-joins the parts for proper examination.
38593830 */
3860- $ domain = preg_replace ( '/\.{2,}/ ' , '' , $ domain );
3861- if ( '' === $ domain ) {
3862- /** This filter is documented in wp-includes/formatting.php */
3863- return apply_filters ( 'sanitize_email ' , '' , $ email , 'domain_period_sequence ' );
3864- }
3865-
3866- // Test for leading and trailing periods and whitespace.
3867- $ domain = trim ( $ domain , " \t\n\r\0\x0B. " );
3868- if ( '' === $ domain ) {
3869- /** This filter is documented in wp-includes/formatting.php */
3870- return apply_filters ( 'sanitize_email ' , '' , $ email , 'domain_period_limits ' );
3871- }
3872-
3873- // Split the domain into subs.
3874- $ subs = explode ( '. ' , $ domain );
3875-
3876- // Assume the domain will have at least two subs.
3877- if ( 2 > count ( $ subs ) ) {
3878- /** This filter is documented in wp-includes/formatting.php */
3879- return apply_filters ( 'sanitize_email ' , '' , $ email , 'domain_no_periods ' );
3880- }
3881-
3882- // Create an array that will contain valid subs.
3883- $ new_subs = array ();
3884-
3885- // Loop through each sub.
3886- foreach ( $ subs as $ sub ) {
3887- // Test for leading and trailing hyphens.
3888- $ sub = trim ( $ sub , " \t\n\r\0\x0B- " );
3831+ $ email = preg_replace ( '/[\x{00AD}\s]*([.@])[\x{00AD}\s]*/u ' , '$1 ' , $ email ) ?? $ email ;
38893832
3890- // Test for invalid characters.
3891- $ sub = preg_replace ( '/[^a-z0-9-]+/i ' , '' , $ sub );
3892-
3893- // If there's anything left, add it to the valid subs.
3894- if ( '' !== $ sub ) {
3895- $ new_subs [] = $ sub ;
3896- }
3897- }
3898-
3899- // If there aren't 2 or more valid subs.
3900- if ( 2 > count ( $ new_subs ) ) {
3901- /** This filter is documented in wp-includes/formatting.php */
3902- return apply_filters ( 'sanitize_email ' , '' , $ email , 'domain_no_valid_subs ' );
3833+ // Strip a trailing dot from the domain (e.g. if pasted from the end of a sentence).
3834+ if ( str_contains ( $ email , '@ ' ) ) {
3835+ list ( $ local , $ domain ) = explode ( '@ ' , $ email , 2 );
3836+ $ domain = rtrim ( $ domain , '. ' );
3837+ $ email = $ local . '@ ' . $ domain ;
39033838 }
39043839
3905- // Join valid subs into the new domain.
3906- $ domain = implode ( '. ' , $ new_subs );
3840+ /**
3841+ * Filters a sanitized email address.
3842+ *
3843+ * Filters registered on this hook perform the actual validation and return
3844+ * the canonical email string on success or an empty string on failure.
3845+ * The default filter is registered in default-filters.php.
3846+ *
3847+ * @since 2.8.0
3848+ *
3849+ * @param string $sanitized_email The sanitized email address, or empty string.
3850+ * @param string $email The email address as provided to sanitize_email().
3851+ * @param string|null $context Validation context, or null for the initial call.
3852+ */
3853+ return apply_filters ( 'sanitize_email ' , '' , $ email , null );
3854+ }
39073855
3908- // Put the email back together.
3909- $ sanitized_email = $ local . '@ ' . $ domain ;
3856+ /**
3857+ * Default sanitize_email filter for databases that support Unicode (db charset is utf8mb4).
3858+ *
3859+ * Returns the canonical address from {@see WP_Email_Address::from_string()} with Unicode
3860+ * enabled, or an empty string if the address is invalid.
3861+ *
3862+ * @since 7.1.0
3863+ *
3864+ * @param string $value The current filter value.
3865+ * @param string $email The email address being sanitized.
3866+ * @param string|null $context Sanitization context, always null.
3867+ * @return string The canonical email address if valid, empty string otherwise.
3868+ */
3869+ function wp_sanitize_unicode_email ( $ value , $ email , $ context ) {
3870+ $ result = WP_Email_Address::from_string ( $ email , 'unicode ' );
3871+ return $ result ? $ result ->get_unicode_address () : '' ;
3872+ }
39103873
3911- // Congratulations, your email made it!
3912- /** This filter is documented in wp-includes/formatting.php */
3913- return apply_filters ( 'sanitize_email ' , $ sanitized_email , $ email , null );
3874+ /**
3875+ * Default sanitize_email filter for databases that do not support Unicode (db charset is not utf8mb4).
3876+ *
3877+ * Returns the canonical address from {@see WP_Email_Address::from_string()} with Unicode
3878+ * disabled, or an empty string if the address is invalid.
3879+ *
3880+ * @since 7.1.0
3881+ *
3882+ * @param string $value The current filter value.
3883+ * @param string $email The email address being sanitized.
3884+ * @param string|null $context Sanitization context, always null.
3885+ * @return string The canonical email address if valid, empty string otherwise.
3886+ */
3887+ function wp_sanitize_ascii_email ( $ value , $ email , $ context ) {
3888+ $ result = WP_Email_Address::from_string ( $ email , 'ascii ' );
3889+ return $ result ? $ result ->get_unicode_address () : '' ;
39143890}
39153891
39163892/**
0 commit comments