From 17cd6171db472d8a7190847ead5659e1a130709c Mon Sep 17 00:00:00 2001 From: USERSATOSHI Date: Mon, 18 Aug 2025 21:25:29 +0530 Subject: [PATCH 1/7] feat: add js_trim() and mb_trim() compat --- src/wp-includes/compat.php | 80 +++++++++++++++++++++ src/wp-includes/formatting.php | 30 ++++++++ tests/phpunit/tests/compat/mbTrim.php | 87 +++++++++++++++++++++++ tests/phpunit/tests/formatting/jsTrim.php | 35 +++++++++ 4 files changed, 232 insertions(+) create mode 100644 tests/phpunit/tests/compat/mbTrim.php create mode 100644 tests/phpunit/tests/formatting/jsTrim.php diff --git a/src/wp-includes/compat.php b/src/wp-includes/compat.php index f0bdf079742f1..4f6e923f9c018 100644 --- a/src/wp-includes/compat.php +++ b/src/wp-includes/compat.php @@ -539,6 +539,86 @@ function array_all( array $array, callable $callback ): bool { // phpcs:ignore U } } +if ( ! function_exists( 'mb_trim' ) ) { + /** + * Polyfill for `mb_trim()` function added in PHP 8.4. + * + * Trims whitespace from the beginning and end of a string. + * + * @since 6.8.0 + * + * @param string $string The string to trim. + * @param string|null $characters Optional. The characters to trim from the string. + * Without the second parameter, mb_trim() will strip these characters: + * - " " (Unicode U+0020), an ordinary space. + * - "\t" (Unicode U+0009), a tab. + * - "\n" (Unicode U+000A), a new line (line feed). + * - "\r" (Unicode U+000D), a carriage return. + * - "\0" (Unicode U+0000), the NUL-byte. + * - "\v" (Unicode U+000B), a vertical tab. + * - "\f" (Unicode U+000C), a form feed. + * - "\u00A0" (Unicode U+00A0), a NO-BREAK SPACE. + * - "\u1680" (Unicode U+1680), an OGHAM SPACE MARK. + * - "\u2000" (Unicode U+2000), an EN QUAD. + * - "\u2001" (Unicode U+2001), an EM QUAD. + * - "\u2002" (Unicode U+2002), an EN SPACE. + * - "\u2003" (Unicode U+2003), an EM SPACE. + * - "\u2004" (Unicode U+2004), a THREE-PER-EM SPACE. + * - "\u2005" (Unicode U+2005), a FOUR-PER-EM SPACE. + * - "\u2006" (Unicode U+2006), a SIX-PER-EM SPACE. + * - "\u2007" (Unicode U+2007), a FIGURE SPACE. + * - "\u2008" (Unicode U+2008), a PUNCTUATION SPACE. + * - "\u2009" (Unicode U+2009), a THIN SPACE. + * - "\u200A" (Unicode U+200A), a HAIR SPACE. + * - "\u2028" (Unicode U+2028), a LINE SEPARATOR. + * - "\u2029" (Unicode U+2029), a PARAGRAPH SEPARATOR. + * - "\u202F" (Unicode U+202F), a NARROW NO-BREAK SPACE. + * - "\u205F" (Unicode U+205F), a MEDIUM MATHEMATICAL SPACE. + * - "\u3000" (Unicode U+3000), an IDEOGRAPHIC SPACE. + * - "\u0085" (Unicode U+0085), a NEXT LINE (NEL). + * - "\u180E" (Unicode U+180E), a MONGOLIAN VOWEL SEPARATOR. + * @param string|null $encoding Optional. The encoding parameter is the character encoding. If it is omitted or null, the internal character encoding value will be used. + * @return string The trimmed string. + */ + function mb_trim( string $str, ?string $characters = null, ?string $encoding = null ) { + if ( is_null( $characters ) ) { + $characters = " \t\n\r\0\v\f\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u180E"; + } + + if ( is_null( $encoding ) ) { + $encoding = mb_internal_encoding(); + } + + if ( ! mb_check_encoding( '', $encoding ) ) { + return $str; // If the encoding is invalid, return the original string. + } + + if ( '' === $characters ) { + return $str; + } + + if ( 'UTF-8' !== $encoding ) { + $characters = mb_convert_encoding( $characters, 'UTF-8', $encoding ); + $str = mb_convert_encoding( $str, 'UTF-8', $encoding ); + } + + // Use preg_replace to trim the characters from both ends of the string. + $pattern = '/^[' . preg_quote( $characters, '/' ) . ']+|[' . preg_quote( $characters, '/' ) . ']+$/uD'; + $trimmed_string = preg_replace( $pattern, '', $str ); + + if ( false === $trimmed_string ) { + return $str; // If preg_replace fails, return the original string. + } + + // Convert back to the original encoding if it was not UTF-8. + if ( 'UTF-8' !== $encoding ) { + $trimmed_string = mb_convert_encoding( $trimmed_string, $encoding, 'UTF-8' ); + } + + return $trimmed_string; + } +} + // IMAGETYPE_AVIF constant is only defined in PHP 8.x or later. if ( ! defined( 'IMAGETYPE_AVIF' ) ) { define( 'IMAGETYPE_AVIF', 19 ); diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index 234d71a2a175a..0be432500306b 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -6265,3 +6265,33 @@ function maybe_hash_hex_color( $color ) { return $color; } + +/** + * Global variable containing the characters to trim from the beginning and end of a string. + * + * This variable is used by the `js_trim()` function to define which characters + * should be trimmed from a string. It includes common whitespace characters + * as well as some Unicode whitespace characters supported by JavaScript. + * + * @since 6.9.0 + * + * @var string + */ +$js_trimmables = "\u{0009}\u{000A}\u{000B}\u{000C}\u{000D}\u{0020}\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{FEFF}"; + +/** + * Trims whitespace from the beginning and end of a string. + * + * This function is similar to `trim()`, but it uses a custom set of characters + * defined in the global `$js_trimmables` variable. + * + * @since 6.9.0 + * + * @param string $string The string to trim. + * @return string The trimmed string. + */ +function js_trim( $string ) { + global $js_trimmables; + + return mb_trim( $string, $js_trimmables, 'UTF-8' ); +} diff --git a/tests/phpunit/tests/compat/mbTrim.php b/tests/phpunit/tests/compat/mbTrim.php new file mode 100644 index 0000000000000..b1eab5b57e8fd --- /dev/null +++ b/tests/phpunit/tests/compat/mbTrim.php @@ -0,0 +1,87 @@ +assertTrue( function_exists( 'mb_trim' ) ); + } + + /** + * @dataProvider data_mb_trim + */ + public function test_mb_trim( $input, $expected, $characters = null, $encoding = null ) { + $this->assertSame( + $expected, + mb_trim( $input, $characters, $encoding ) + ); + } + + /** + * Data provider for mb_trim tests. + * + * @return array[] + */ + public function data_mb_trim() { + return array( + // Basic ASCII whitespace. + array( ' hello ', 'hello' ), + array( "\t\n\rhello\n\r\t", 'hello' ), + // Unicode whitespace. + array( "\u{00A0}hello\u{00A0}", 'hello' ), + array( "\u{3000}hello\u{3000}", 'hello' ), + array( "\u{00A0}\u{3000} hello \u{3000}\u{00A0}", 'hello' ), + // Custom characters. + array( 'xxhelloxx', 'hello', 'x' ), + array( 'xyhelloyx', 'hello', 'xy' ), + // No trimming needed. + array( 'hello', 'hello' ), + // Empty string. + array( '', '' ), + // With encoding. + array( " hello ", 'hello', null, 'UTF-8' ), + // Null characters. + array( "\0hello\0", 'hello' ), + // Vertical tab and form feed. + array( "\v\fhello\f\v", 'hello' ), + ); + } + + /** + * @dataProvider data_mb_trim_non_utf8 + */ + public function test_mb_trim_non_utf8_encodings( $input, $expected, $encoding ) { + $this->assertSame( + $expected, + mb_trim( $input, null, $encoding ) + ); + } + + /** + * Data provider for non-UTF-8 encoding tests. + * + * @return array[] + */ + public function data_mb_trim_non_utf8() { + // Japanese "ヒス" (HIS) in Shift_JIS, with ASCII spaces around. + $shift_jis_str = mb_convert_encoding(' ヒス ', 'SJIS', 'UTF-8'); + $shift_jis_expected = mb_convert_encoding('ヒス', 'SJIS', 'UTF-8'); + + // Latin1 example with spaces. + $latin1_str = mb_convert_encoding(' café ', 'ISO-8859-1', 'UTF-8'); + $latin1_expected = mb_convert_encoding('café', 'ISO-8859-1', 'UTF-8'); + + return array( + array( $shift_jis_str, $shift_jis_expected, 'SJIS' ), + array( $latin1_str, $latin1_expected, 'ISO-8859-1' ), + ); + } + +} diff --git a/tests/phpunit/tests/formatting/jsTrim.php b/tests/phpunit/tests/formatting/jsTrim.php new file mode 100644 index 0000000000000..50327eec4f201 --- /dev/null +++ b/tests/phpunit/tests/formatting/jsTrim.php @@ -0,0 +1,35 @@ +assertSame( 'hello', js_trim( " hello " ) ); + $this->assertSame( 'hello', js_trim( "\t\n\rhello\n\r\t" ) ); + } + + public function test_trims_unicode_whitespace() { + // NO-BREAK SPACE (U+00A0) + $this->assertSame( 'hello', js_trim( "\u{00A0}hello\u{00A0}" ) ); + // IDEOGRAPHIC SPACE (U+3000) + $this->assertSame( 'hello', js_trim( "\u{3000}hello\u{3000}" ) ); + // MIXED + $this->assertSame( 'hello', js_trim( "\u{00A0}\u{3000} hello \u{3000}\u{00A0}" ) ); + } + + public function test_trims_null_and_control_chars() { + $this->assertSame( "\0hello\0", js_trim( "\0hello\0" ) ); + $this->assertSame( 'hello', js_trim( "\v\fhello\f\v" ) ); + } + + public function test_no_trimming_needed() { + $this->assertSame( 'hello', js_trim( 'hello' ) ); + } + + public function test_empty_string_returns_empty() { + $this->assertSame( '', js_trim( '' ) ); + } +} From 7a54f093c1b9a3fb79e1cdbb206b66255fe45763 Mon Sep 17 00:00:00 2001 From: USERSATOSHI Date: Mon, 18 Aug 2025 21:27:36 +0530 Subject: [PATCH 2/7] docs: change version to 6.9.0 --- src/wp-includes/compat.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/compat.php b/src/wp-includes/compat.php index 4f6e923f9c018..2882ca1fa2d4f 100644 --- a/src/wp-includes/compat.php +++ b/src/wp-includes/compat.php @@ -545,7 +545,7 @@ function array_all( array $array, callable $callback ): bool { // phpcs:ignore U * * Trims whitespace from the beginning and end of a string. * - * @since 6.8.0 + * @since 6.9.0 * * @param string $string The string to trim. * @param string|null $characters Optional. The characters to trim from the string. From b33b6ce2618edc92009504a10eb0f1a49e4acf66 Mon Sep 17 00:00:00 2001 From: USERSATOSHI Date: Mon, 18 Aug 2025 21:29:25 +0530 Subject: [PATCH 3/7] refactor: fix phpcs errors --- src/wp-includes/compat.php | 2 +- tests/phpunit/tests/compat/mbTrim.php | 11 +++++------ tests/phpunit/tests/formatting/jsTrim.php | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/wp-includes/compat.php b/src/wp-includes/compat.php index 2882ca1fa2d4f..bf90dc1ceed72 100644 --- a/src/wp-includes/compat.php +++ b/src/wp-includes/compat.php @@ -599,7 +599,7 @@ function mb_trim( string $str, ?string $characters = null, ?string $encoding = n if ( 'UTF-8' !== $encoding ) { $characters = mb_convert_encoding( $characters, 'UTF-8', $encoding ); - $str = mb_convert_encoding( $str, 'UTF-8', $encoding ); + $str = mb_convert_encoding( $str, 'UTF-8', $encoding ); } // Use preg_replace to trim the characters from both ends of the string. diff --git a/tests/phpunit/tests/compat/mbTrim.php b/tests/phpunit/tests/compat/mbTrim.php index b1eab5b57e8fd..a1bf9fb99c9be 100644 --- a/tests/phpunit/tests/compat/mbTrim.php +++ b/tests/phpunit/tests/compat/mbTrim.php @@ -46,7 +46,7 @@ public function data_mb_trim() { // Empty string. array( '', '' ), // With encoding. - array( " hello ", 'hello', null, 'UTF-8' ), + array( ' hello ', 'hello', null, 'UTF-8' ), // Null characters. array( "\0hello\0", 'hello' ), // Vertical tab and form feed. @@ -71,17 +71,16 @@ public function test_mb_trim_non_utf8_encodings( $input, $expected, $encoding ) */ public function data_mb_trim_non_utf8() { // Japanese "ヒス" (HIS) in Shift_JIS, with ASCII spaces around. - $shift_jis_str = mb_convert_encoding(' ヒス ', 'SJIS', 'UTF-8'); - $shift_jis_expected = mb_convert_encoding('ヒス', 'SJIS', 'UTF-8'); + $shift_jis_str = mb_convert_encoding( ' ヒス ', 'SJIS', 'UTF-8' ); + $shift_jis_expected = mb_convert_encoding( 'ヒス', 'SJIS', 'UTF-8' ); // Latin1 example with spaces. - $latin1_str = mb_convert_encoding(' café ', 'ISO-8859-1', 'UTF-8'); - $latin1_expected = mb_convert_encoding('café', 'ISO-8859-1', 'UTF-8'); + $latin1_str = mb_convert_encoding( ' café ', 'ISO-8859-1', 'UTF-8' ); + $latin1_expected = mb_convert_encoding( 'café', 'ISO-8859-1', 'UTF-8' ); return array( array( $shift_jis_str, $shift_jis_expected, 'SJIS' ), array( $latin1_str, $latin1_expected, 'ISO-8859-1' ), ); } - } diff --git a/tests/phpunit/tests/formatting/jsTrim.php b/tests/phpunit/tests/formatting/jsTrim.php index 50327eec4f201..4e65426506e7a 100644 --- a/tests/phpunit/tests/formatting/jsTrim.php +++ b/tests/phpunit/tests/formatting/jsTrim.php @@ -7,7 +7,7 @@ */ class Tests_Formatting_JsTrim extends WP_UnitTestCase { public function test_trims_ascii_whitespace() { - $this->assertSame( 'hello', js_trim( " hello " ) ); + $this->assertSame( 'hello', js_trim( ' hello ' ) ); $this->assertSame( 'hello', js_trim( "\t\n\rhello\n\r\t" ) ); } From a58c949f5e532100c6f8b0429f5f3e7f90a5085a Mon Sep 17 00:00:00 2001 From: USERSATOSHI Date: Tue, 19 Aug 2025 12:45:20 +0530 Subject: [PATCH 4/7] refactor: fix phpcs errors in formatting --- src/wp-includes/formatting.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index b5e7c66eeb666..550a90ee2b009 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -6465,7 +6465,6 @@ function maybe_hash_hex_color( $color ) { * @return string The trimmed string. */ function js_trim( $string ) { - global $js_trimmables; - - return mb_trim( $string, $js_trimmables, 'UTF-8' ); + global $js_trimmables; + return mb_trim( $string, $js_trimmables, 'UTF-8' ); } From 63a5ddc27be1cd4df7399fd7dc3aba92035c4e42 Mon Sep 17 00:00:00 2001 From: USERSATOSHI Date: Fri, 22 Aug 2025 16:35:37 +0530 Subject: [PATCH 5/7] tests: update tests comments --- tests/phpunit/tests/compat/mbTrim.php | 25 ++++++++-- tests/phpunit/tests/formatting/jsTrim.php | 60 +++++++++++++++-------- 2 files changed, 60 insertions(+), 25 deletions(-) diff --git a/tests/phpunit/tests/compat/mbTrim.php b/tests/phpunit/tests/compat/mbTrim.php index a1bf9fb99c9be..6f51bdccf0d9c 100644 --- a/tests/phpunit/tests/compat/mbTrim.php +++ b/tests/phpunit/tests/compat/mbTrim.php @@ -8,16 +8,25 @@ class Tests_Compat_mbTrim extends WP_UnitTestCase { /** + * @ticket 63804 + * * Test that mb_trim() is always available (either from PHP or WP). */ - public function test_mb_trim_availability() { + public function test_mb_trim_availability(): void { $this->assertTrue( function_exists( 'mb_trim' ) ); } /** + * @ticket 63804 + * * @dataProvider data_mb_trim + * + * @param string $input The input string to be trimmed. + * @param string $expected The expected trimmed result. + * @param string|null $characters Optional. The characters to trim. Default null (whitespace). + * @param string|null $encoding Optional. The character encoding. Default null (internal encoding). */ - public function test_mb_trim( $input, $expected, $characters = null, $encoding = null ) { + public function test_mb_trim( $input, $expected, $characters = null, $encoding = null ): void { $this->assertSame( $expected, mb_trim( $input, $characters, $encoding ) @@ -29,7 +38,7 @@ public function test_mb_trim( $input, $expected, $characters = null, $encoding = * * @return array[] */ - public function data_mb_trim() { + public function data_mb_trim(): array { return array( // Basic ASCII whitespace. array( ' hello ', 'hello' ), @@ -55,9 +64,15 @@ public function data_mb_trim() { } /** + * @ticket 63804 + * * @dataProvider data_mb_trim_non_utf8 + * + * @param string $input The input string to be trimmed. + * @param string $expected The expected trimmed result. + * @param string $encoding The character encoding. */ - public function test_mb_trim_non_utf8_encodings( $input, $expected, $encoding ) { + public function test_mb_trim_non_utf8_encodings( $input, $expected, $encoding ): void { $this->assertSame( $expected, mb_trim( $input, null, $encoding ) @@ -69,7 +84,7 @@ public function test_mb_trim_non_utf8_encodings( $input, $expected, $encoding ) * * @return array[] */ - public function data_mb_trim_non_utf8() { + public function data_mb_trim_non_utf8(): array { // Japanese "ヒス" (HIS) in Shift_JIS, with ASCII spaces around. $shift_jis_str = mb_convert_encoding( ' ヒス ', 'SJIS', 'UTF-8' ); $shift_jis_expected = mb_convert_encoding( 'ヒス', 'SJIS', 'UTF-8' ); diff --git a/tests/phpunit/tests/formatting/jsTrim.php b/tests/phpunit/tests/formatting/jsTrim.php index 4e65426506e7a..ff19cdbe7a0bd 100644 --- a/tests/phpunit/tests/formatting/jsTrim.php +++ b/tests/phpunit/tests/formatting/jsTrim.php @@ -6,30 +6,50 @@ * @covers ::js_trim */ class Tests_Formatting_JsTrim extends WP_UnitTestCase { - public function test_trims_ascii_whitespace() { - $this->assertSame( 'hello', js_trim( ' hello ' ) ); - $this->assertSame( 'hello', js_trim( "\t\n\rhello\n\r\t" ) ); - } - - public function test_trims_unicode_whitespace() { - // NO-BREAK SPACE (U+00A0) - $this->assertSame( 'hello', js_trim( "\u{00A0}hello\u{00A0}" ) ); - // IDEOGRAPHIC SPACE (U+3000) - $this->assertSame( 'hello', js_trim( "\u{3000}hello\u{3000}" ) ); - // MIXED - $this->assertSame( 'hello', js_trim( "\u{00A0}\u{3000} hello \u{3000}\u{00A0}" ) ); - } - public function test_trims_null_and_control_chars() { - $this->assertSame( "\0hello\0", js_trim( "\0hello\0" ) ); - $this->assertSame( 'hello', js_trim( "\v\fhello\f\v" ) ); + /** + * @ticket 63804 + * + * Test that js_trim() is always available (either from PHP or WP). + */ + public function test_js_trim_availability(): void { + $this->assertTrue( function_exists( 'js_trim' ) ); } - public function test_no_trimming_needed() { - $this->assertSame( 'hello', js_trim( 'hello' ) ); + /** + * @ticket 63804 + * + * @dataProvider data_js_trim + * + * @param string $input The input string to be trimmed. + * @param string $expected The expected trimmed result. + */ + public function test_js_trim( $input, $expected ): void { + $this->assertSame( $expected, js_trim( $input ) ); } - public function test_empty_string_returns_empty() { - $this->assertSame( '', js_trim( '' ) ); + /** + * Data provider for js_trim tests. + * + * @return array[] + */ + public function data_js_trim(): array { + return array( + // Basic ASCII whitespace. + array( ' hello ', 'hello' ), + array( "\t\n\rhello\n\r\t", 'hello' ), + // Unicode whitespace. + array( "\u{00A0}hello\u{00A0}", 'hello' ), + array( "\u{3000}hello\u{3000}", 'hello' ), + array( "\u{00A0}\u{3000} hello \u{3000}\u{00A0}", 'hello' ), + // Null characters should not be trimmed by js_trim(). + array( "\0hello\0", "\0hello\0" ), + // Vertical tab and form feed are trimmed. + array( "\v\fhello\f\v", 'hello' ), + // No trimming needed. + array( 'hello', 'hello' ), + // Empty string. + array( '', '' ), + ); } } From 1db297d044b54de0126a4dd505dd3422154cd680 Mon Sep 17 00:00:00 2001 From: USERSATOSHI Date: Tue, 7 Apr 2026 12:59:21 +0530 Subject: [PATCH 6/7] Fix mb_trim() polyfill to only support UTF-8 encoding and trigger warning error to inform the dev --- src/wp-includes/compat.php | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/wp-includes/compat.php b/src/wp-includes/compat.php index a516612ba2ebc..469ecd6e9248e 100644 --- a/src/wp-includes/compat.php +++ b/src/wp-includes/compat.php @@ -578,39 +578,34 @@ function array_last( array $array ) { // phpcs:ignore Universal.NamingConvention */ function mb_trim( string $str, ?string $characters = null, ?string $encoding = null ) { if ( is_null( $characters ) ) { - $characters = " \t\n\r\0\v\f\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u180E"; - } - - if ( is_null( $encoding ) ) { - $encoding = mb_internal_encoding(); - } - - if ( ! mb_check_encoding( '', $encoding ) ) { - return $str; // If the encoding is invalid, return the original string. + $characters = " \t\n\r\0\v\f\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}"; } if ( '' === $characters ) { return $str; } - if ( 'UTF-8' !== $encoding ) { - $characters = mb_convert_encoding( $characters, 'UTF-8', $encoding ); - $str = mb_convert_encoding( $str, 'UTF-8', $encoding ); + /* + * Keep this polyfill UTF-8-only: if a non-UTF-8 encoding is explicitly + * requested, bail out unchanged instead of attempting lossy conversions. + */ + if ( ! is_null( $encoding ) && ! _is_utf8_charset( $encoding ) ) { + wp_trigger_error( + __FUNCTION__, + 'mb_trim() polyfill only supports UTF-8 encoding. The provided encoding "' . $encoding . '" is not supported.', + E_USER_WARNING + ); + return $str; } // Use preg_replace to trim the characters from both ends of the string. $pattern = '/^[' . preg_quote( $characters, '/' ) . ']+|[' . preg_quote( $characters, '/' ) . ']+$/uD'; $trimmed_string = preg_replace( $pattern, '', $str ); - if ( false === $trimmed_string ) { + if ( false === $trimmed_string || null === $trimmed_string ) { return $str; // If preg_replace fails, return the original string. } - // Convert back to the original encoding if it was not UTF-8. - if ( 'UTF-8' !== $encoding ) { - $trimmed_string = mb_convert_encoding( $trimmed_string, $encoding, 'UTF-8' ); - } - return $trimmed_string; } } From b062abc437f9a5b60df499de3f29d25caf56db54 Mon Sep 17 00:00:00 2001 From: USERSATOSHI Date: Tue, 7 Apr 2026 15:25:45 +0530 Subject: [PATCH 7/7] tests: skip tests if mbstring extension is present --- tests/phpunit/tests/compat/mbTrim.php | 58 +++++++++++++++++++-------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/tests/phpunit/tests/compat/mbTrim.php b/tests/phpunit/tests/compat/mbTrim.php index 6f51bdccf0d9c..6afda3951777b 100644 --- a/tests/phpunit/tests/compat/mbTrim.php +++ b/tests/phpunit/tests/compat/mbTrim.php @@ -66,36 +66,60 @@ public function data_mb_trim(): array { /** * @ticket 63804 * + * Tests that passing a non-UTF-8 encoding to the WP polyfill triggers a + * warning and returns the original string unchanged, rather than attempting + * a lossy re-encoding that could silently corrupt data. + * + * Note: when PHP's native mb_trim() is available this test is skipped, + * because the native function does handle other encodings (via code-point + * boundary iteration, not re-encoding) and no warning is issued. + * * @dataProvider data_mb_trim_non_utf8 * * @param string $input The input string to be trimmed. - * @param string $expected The expected trimmed result. - * @param string $encoding The character encoding. + * @param string $encoding The non-UTF-8 character encoding to pass. */ - public function test_mb_trim_non_utf8_encodings( $input, $expected, $encoding ): void { - $this->assertSame( - $expected, - mb_trim( $input, null, $encoding ) + public function test_mb_trim_non_utf8_encoding_bails_with_warning( string $input, string $encoding ): void { + if ( extension_loaded( 'mbstring' ) && version_compare( PHP_VERSION, '8.4', '>=' ) ) { + $this->markTestSkipped( 'Native mb_trim() is available; polyfill bail-out behaviour does not apply.' ); + } + + $this->expectException( 'WP_Exception' ); + $this->expectExceptionMessage( 'mb_trim() polyfill only supports UTF-8 encoding' ); + + // wp_trigger_error() raises E_USER_WARNING; convert it to an exception so + // PHPUnit can catch it cleanly. + set_error_handler( + static function ( int $errno, string $errstr ) use ( $encoding ): bool { + if ( E_USER_WARNING === $errno ) { + throw new WP_Exception( $errstr ); + } + return false; + }, + E_USER_WARNING ); + + try { + $result = mb_trim( $input, null, $encoding ); + + // If wp_trigger_error() did not throw (e.g. errors are suppressed), + // assert that the original string is returned unchanged. + $this->assertSame( $input, $result, 'Polyfill should return the original string unchanged for unsupported encodings.' ); + } finally { + restore_error_handler(); + } } /** - * Data provider for non-UTF-8 encoding tests. + * Data provider for non-UTF-8 encoding bail-out tests. * * @return array[] */ public function data_mb_trim_non_utf8(): array { - // Japanese "ヒス" (HIS) in Shift_JIS, with ASCII spaces around. - $shift_jis_str = mb_convert_encoding( ' ヒス ', 'SJIS', 'UTF-8' ); - $shift_jis_expected = mb_convert_encoding( 'ヒス', 'SJIS', 'UTF-8' ); - - // Latin1 example with spaces. - $latin1_str = mb_convert_encoding( ' café ', 'ISO-8859-1', 'UTF-8' ); - $latin1_expected = mb_convert_encoding( 'café', 'ISO-8859-1', 'UTF-8' ); - return array( - array( $shift_jis_str, $shift_jis_expected, 'SJIS' ), - array( $latin1_str, $latin1_expected, 'ISO-8859-1' ), + 'ISO-8859-1 latin string' => array( ' café ', 'ISO-8859-1' ), + 'SJIS japanese string' => array( ' test ', 'SJIS' ), + 'Windows-1252 string' => array( ' hello ', 'Windows-1252' ), ); } }