Skip to content
Open
75 changes: 75 additions & 0 deletions src/wp-includes/compat.php
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,81 @@ function array_last( array $array ) { // phpcs:ignore Universal.NamingConvention
}
}

if ( ! function_exists( 'mb_trim' ) ) {
/**
* Polyfill for `mb_trim()` function added in PHP 8.4.
*
* Trims whitespace from the beginning and end of a string.
*
* @since 6.9.0
*
* @param string $string The string to trim.
* @param string|null $characters Optional. The characters to trim from the string.
* Without the second parameter, mb_trim() will strip these characters:
* - " " (Unicode U+0020), an ordinary space.
* - "\t" (Unicode U+0009), a tab.
* - "\n" (Unicode U+000A), a new line (line feed).
* - "\r" (Unicode U+000D), a carriage return.
* - "\0" (Unicode U+0000), the NUL-byte.
* - "\v" (Unicode U+000B), a vertical tab.
* - "\f" (Unicode U+000C), a form feed.
* - "\u00A0" (Unicode U+00A0), a NO-BREAK SPACE.
* - "\u1680" (Unicode U+1680), an OGHAM SPACE MARK.
* - "\u2000" (Unicode U+2000), an EN QUAD.
* - "\u2001" (Unicode U+2001), an EM QUAD.
* - "\u2002" (Unicode U+2002), an EN SPACE.
* - "\u2003" (Unicode U+2003), an EM SPACE.
* - "\u2004" (Unicode U+2004), a THREE-PER-EM SPACE.
* - "\u2005" (Unicode U+2005), a FOUR-PER-EM SPACE.
* - "\u2006" (Unicode U+2006), a SIX-PER-EM SPACE.
* - "\u2007" (Unicode U+2007), a FIGURE SPACE.
* - "\u2008" (Unicode U+2008), a PUNCTUATION SPACE.
* - "\u2009" (Unicode U+2009), a THIN SPACE.
* - "\u200A" (Unicode U+200A), a HAIR SPACE.
* - "\u2028" (Unicode U+2028), a LINE SEPARATOR.
* - "\u2029" (Unicode U+2029), a PARAGRAPH SEPARATOR.
* - "\u202F" (Unicode U+202F), a NARROW NO-BREAK SPACE.
* - "\u205F" (Unicode U+205F), a MEDIUM MATHEMATICAL SPACE.
* - "\u3000" (Unicode U+3000), an IDEOGRAPHIC SPACE.
* - "\u0085" (Unicode U+0085), a NEXT LINE (NEL).
* - "\u180E" (Unicode U+180E), a MONGOLIAN VOWEL SEPARATOR.
* @param string|null $encoding Optional. The encoding parameter is the character encoding. If it is omitted or null, the internal character encoding value will be used.
* @return string The trimmed string.
*/
function mb_trim( string $str, ?string $characters = null, ?string $encoding = null ) {
if ( is_null( $characters ) ) {
$characters = " \t\n\r\0\v\f\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}";
}

if ( '' === $characters ) {
return $str;
}

/*
* Keep this polyfill UTF-8-only: if a non-UTF-8 encoding is explicitly
* requested, bail out unchanged instead of attempting lossy conversions.
*/
if ( ! is_null( $encoding ) && ! _is_utf8_charset( $encoding ) ) {
wp_trigger_error(
__FUNCTION__,
'mb_trim() polyfill only supports UTF-8 encoding. The provided encoding "' . $encoding . '" is not supported.',
E_USER_WARNING
);
return $str;
}

// Use preg_replace to trim the characters from both ends of the string.
$pattern = '/^[' . preg_quote( $characters, '/' ) . ']+|[' . preg_quote( $characters, '/' ) . ']+$/uD';
$trimmed_string = preg_replace( $pattern, '', $str );

if ( false === $trimmed_string || null === $trimmed_string ) {
return $str; // If preg_replace fails, return the original string.
}

return $trimmed_string;
}
}

// IMAGETYPE_AVIF constant is only defined in PHP 8.x or later.
if ( ! defined( 'IMAGETYPE_AVIF' ) ) {
define( 'IMAGETYPE_AVIF', 19 );
Expand Down
29 changes: 29 additions & 0 deletions src/wp-includes/formatting.php
Original file line number Diff line number Diff line change
Expand Up @@ -6294,3 +6294,32 @@ function maybe_hash_hex_color( $color ) {

return $color;
}

/**
* Global variable containing the characters to trim from the beginning and end of a string.
*
* This variable is used by the `js_trim()` function to define which characters
* should be trimmed from a string. It includes common whitespace characters
* as well as some Unicode whitespace characters supported by JavaScript.
*
* @since 6.9.0
*
* @var string
*/
$js_trimmables = "\u{0009}\u{000A}\u{000B}\u{000C}\u{000D}\u{0020}\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{FEFF}";

/**
* Trims whitespace from the beginning and end of a string.
*
* This function is similar to `trim()`, but it uses a custom set of characters
* defined in the global `$js_trimmables` variable.
*
* @since 6.9.0
*
* @param string $string The string to trim.
* @return string The trimmed string.
*/
function js_trim( $string ) {
global $js_trimmables;
return mb_trim( $string, $js_trimmables, 'UTF-8' );
}
125 changes: 125 additions & 0 deletions tests/phpunit/tests/compat/mbTrim.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
<?php

/**
* @group compat
*
* @covers ::mb_trim
*/
class Tests_Compat_mbTrim extends WP_UnitTestCase {

/**
* @ticket 63804
*
* Test that mb_trim() is always available (either from PHP or WP).
*/
public function test_mb_trim_availability(): void {
$this->assertTrue( function_exists( 'mb_trim' ) );
}

/**
* @ticket 63804
*
* @dataProvider data_mb_trim
*
* @param string $input The input string to be trimmed.
* @param string $expected The expected trimmed result.
* @param string|null $characters Optional. The characters to trim. Default null (whitespace).
* @param string|null $encoding Optional. The character encoding. Default null (internal encoding).
*/
public function test_mb_trim( $input, $expected, $characters = null, $encoding = null ): void {
$this->assertSame(
$expected,
mb_trim( $input, $characters, $encoding )
);
}

/**
* Data provider for mb_trim tests.
*
* @return array[]
*/
public function data_mb_trim(): array {
return array(
// Basic ASCII whitespace.
array( ' hello ', 'hello' ),
array( "\t\n\rhello\n\r\t", 'hello' ),
// Unicode whitespace.
array( "\u{00A0}hello\u{00A0}", 'hello' ),
array( "\u{3000}hello\u{3000}", 'hello' ),
array( "\u{00A0}\u{3000} hello \u{3000}\u{00A0}", 'hello' ),
// Custom characters.
array( 'xxhelloxx', 'hello', 'x' ),
array( 'xyhelloyx', 'hello', 'xy' ),
// No trimming needed.
array( 'hello', 'hello' ),
// Empty string.
array( '', '' ),
// With encoding.
array( ' hello ', 'hello', null, 'UTF-8' ),
// Null characters.
array( "\0hello\0", 'hello' ),
// Vertical tab and form feed.
array( "\v\fhello\f\v", 'hello' ),
);
}

/**
* @ticket 63804
*
* Tests that passing a non-UTF-8 encoding to the WP polyfill triggers a
* warning and returns the original string unchanged, rather than attempting
* a lossy re-encoding that could silently corrupt data.
*
* Note: when PHP's native mb_trim() is available this test is skipped,
* because the native function does handle other encodings (via code-point
* boundary iteration, not re-encoding) and no warning is issued.
*
* @dataProvider data_mb_trim_non_utf8
*
* @param string $input The input string to be trimmed.
* @param string $encoding The non-UTF-8 character encoding to pass.
*/
public function test_mb_trim_non_utf8_encoding_bails_with_warning( string $input, string $encoding ): void {
if ( extension_loaded( 'mbstring' ) && version_compare( PHP_VERSION, '8.4', '>=' ) ) {
$this->markTestSkipped( 'Native mb_trim() is available; polyfill bail-out behaviour does not apply.' );
}

$this->expectException( 'WP_Exception' );
$this->expectExceptionMessage( 'mb_trim() polyfill only supports UTF-8 encoding' );

// wp_trigger_error() raises E_USER_WARNING; convert it to an exception so
// PHPUnit can catch it cleanly.
set_error_handler(
static function ( int $errno, string $errstr ) use ( $encoding ): bool {
if ( E_USER_WARNING === $errno ) {
throw new WP_Exception( $errstr );
}
return false;
},
E_USER_WARNING
);

try {
$result = mb_trim( $input, null, $encoding );

// If wp_trigger_error() did not throw (e.g. errors are suppressed),
// assert that the original string is returned unchanged.
$this->assertSame( $input, $result, 'Polyfill should return the original string unchanged for unsupported encodings.' );
} finally {
restore_error_handler();
}
}

/**
* Data provider for non-UTF-8 encoding bail-out tests.
*
* @return array[]
*/
public function data_mb_trim_non_utf8(): array {
return array(
'ISO-8859-1 latin string' => array( ' café ', 'ISO-8859-1' ),
'SJIS japanese string' => array( ' test ', 'SJIS' ),
'Windows-1252 string' => array( ' hello ', 'Windows-1252' ),
);
}
}
55 changes: 55 additions & 0 deletions tests/phpunit/tests/formatting/jsTrim.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<?php

/**
* @group formatting
*
* @covers ::js_trim
*/
class Tests_Formatting_JsTrim extends WP_UnitTestCase {

/**
* @ticket 63804
*
* Test that js_trim() is always available (either from PHP or WP).
*/
public function test_js_trim_availability(): void {
$this->assertTrue( function_exists( 'js_trim' ) );
}

/**
* @ticket 63804
*
* @dataProvider data_js_trim
*
* @param string $input The input string to be trimmed.
* @param string $expected The expected trimmed result.
*/
public function test_js_trim( $input, $expected ): void {
$this->assertSame( $expected, js_trim( $input ) );
}

/**
* Data provider for js_trim tests.
*
* @return array[]
*/
public function data_js_trim(): array {
return array(
// Basic ASCII whitespace.
array( ' hello ', 'hello' ),
array( "\t\n\rhello\n\r\t", 'hello' ),
// Unicode whitespace.
array( "\u{00A0}hello\u{00A0}", 'hello' ),
array( "\u{3000}hello\u{3000}", 'hello' ),
array( "\u{00A0}\u{3000} hello \u{3000}\u{00A0}", 'hello' ),
// Null characters should not be trimmed by js_trim().
array( "\0hello\0", "\0hello\0" ),
// Vertical tab and form feed are trimmed.
array( "\v\fhello\f\v", 'hello' ),
// No trimming needed.
array( 'hello', 'hello' ),
// Empty string.
array( '', '' ),
);
}
}
Loading