Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 54 additions & 1 deletion tests/phpunit/tests/formatting/isEmail.php
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,60 @@ public static function data_invalid_email_provider() {
);

foreach ( $invalid_emails as $email ) {
yield $email => array( $email );
yield self::invalid_utf8_as_ascii( $email ) => array( $email );
}
}

/**
* Transforms invalid byte sequences in UTF-8 into representations of
* each byte value, according to the maximal subpart rule.
*
* Example:
*
* // For valid UTF-8 the output is the input.
* 'test' === invalid_utf8_as_ascii( 'test' );
*
* // Invalid bytes are represented with their hex value.
* 'a(0x80)b' === invalid_utf8_as_ascii( "a\x80b" );
*
* // Invalid byte sequences form maximal subparts.
* '(0xC2)(0xEF 0xBF)' === invalid_utf8_as_ascii( "\xC2\xEF\xBF" );
*
* @param string $text
* @return string
*/
private static function invalid_utf8_as_ascii( string $text ): string {
$output = '';
$at = 0;
$was_at = 0;
$end = strlen( $text );
$invalid_bytes = 0;

while ( $at < $end ) {
if ( 0 === _wp_scan_utf8( $text, $at, $invalid_bytes ) && 0 === $invalid_bytes ) {
break;
}

if ( $at > $was_at ) {
$output .= substr( $text, $was_at, $at - $was_at );
}

if ( $invalid_bytes > 0 ) {
$output .= '(';

for ( $i = 0; $i < $invalid_bytes; $i++ ) {
$space = $i > 0 ? ' ' : '';
$as_hex = bin2hex( $text[ $at + $i ] );
$output .= "{$space}0x{$as_hex}";
}

$output .= ')';
}

$at += $invalid_bytes;
$was_at = $at;
}

return $output;
}
}
73 changes: 68 additions & 5 deletions tests/phpunit/tests/formatting/sanitizeEmail.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,21 @@ class Tests_Formatting_SanitizeEmail extends WP_UnitTestCase {
* @param string $expected The expected sanitized email address.
*/
public function test_returns_stripped_email_address( $address, $expected ) {
$this->assertSame(
$expected,
sanitize_email( $address ),
'Should have produced the known sanitized form of the email.'
);
$sanitized = sanitize_email( $address );

if ( $expected === $sanitized ) {
$this->assertSame(
$expected,
$sanitized,
'Should have produced the known sanitized form of the email.'
);
} else {
$this->assertSame(
$expected,
self::invalid_utf8_as_ascii( $sanitized ),
'Should have produced the known sanitized form of the email.'
);
}
}

/**
Expand All @@ -39,4 +49,57 @@ public function data_sanitized_email_pairs() {
'all subdomains invalid utf8' => array( "abc@\x80.org", '' ),
);
}

/**
* Transforms invalid byte sequences in UTF-8 into representations of
* each byte value, according to the maximal subpart rule.
*
* Example:
*
* // For valid UTF-8 the output is the input.
* 'test' === invalid_utf8_as_ascii( 'test' );
*
* // Invalid bytes are represented with their hex value.
* 'a(0x80)b' === invalid_utf8_as_ascii( "a\x80b" );
*
* // Invalid byte sequences form maximal subparts.
* '(0xC2)(0xEF 0xBF)' === invalid_utf8_as_ascii( "\xC2\xEF\xBF" );
*
* @param string $text
* @return string
*/
private static function invalid_utf8_as_ascii( string $text ): string {
$output = '';
$at = 0;
$was_at = 0;
$end = strlen( $text );
$invalid_bytes = 0;

while ( $at < $end ) {
if ( 0 === _wp_scan_utf8( $text, $at, $invalid_bytes ) && 0 === $invalid_bytes ) {
break;
}

if ( $at > $was_at ) {
$output .= substr( $text, $was_at, $at - $was_at );
}

if ( $invalid_bytes > 0 ) {
$output .= '(';

for ( $i = 0; $i < $invalid_bytes; $i++ ) {
$space = $i > 0 ? ' ' : '';
$as_hex = bin2hex( $text[ $at + $i ] );
$output .= "{$space}0x{$as_hex}";
}

$output .= ')';
}

$at += $invalid_bytes;
$was_at = $at;
}

return $output;
}
}
Loading