@@ -166,8 +166,8 @@ function safe_strlen( $str, $encoding = false ) {
166166 // Allow for selective testings - "1" bit set tests grapheme_strlen(), "2" preg_match_all( '/\X/u' ), "4" mb_strlen(), "other" strlen().
167167 $ test_safe_strlen = (int ) getenv ( 'PHP_CLI_TOOLS_TEST_SAFE_STRLEN ' );
168168
169- // Assume UTF-8 if no encoding given - `grapheme_strlen()` will return null if given non-UTF-8 string .
170- if ( ( ! $ encoding || 'UTF-8 ' === $ encoding ) && can_use_icu () && null !== ( $ length = grapheme_strlen ( $ str ) ) ) {
169+ // Assume UTF-8 if no encoding given - `grapheme_strlen()` will return false on failure .
170+ if ( ( ! $ encoding || 'UTF-8 ' === $ encoding ) && can_use_icu () && is_int ( $ length = grapheme_strlen ( $ str ) ) ) {
171171 if ( ! $ test_safe_strlen || ( $ test_safe_strlen & 1 ) ) {
172172 return $ length ;
173173 }
@@ -183,10 +183,12 @@ function safe_strlen( $str, $encoding = false ) {
183183 if ( ! $ encoding ) {
184184 $ encoding = mb_detect_encoding ( $ str , null , true /*strict*/ );
185185 }
186- $ length = $ encoding ? mb_strlen ( $ str , $ encoding ) : mb_strlen ( $ str ); // mbstring funcs can fail if given `$encoding` arg that evals to false.
186+ $ length = is_string ( $ encoding ) ? mb_strlen ( $ str , $ encoding ) : mb_strlen ( $ str ); // mbstring funcs can fail if given `$encoding` arg that evals to false.
187187 if ( 'UTF-8 ' === $ encoding ) {
188188 // Subtract combining characters.
189- $ length -= preg_match_all ( get_unicode_regexs ( 'm ' ), $ str , $ dummy /*needed for PHP 5.3*/ );
189+ $ m_regex = get_unicode_regexs ( 'm ' );
190+ assert ( is_string ( $ m_regex ) );
191+ $ length -= preg_match_all ( $ m_regex , $ str , $ dummy /*needed for PHP 5.3*/ );
190192 }
191193 if ( ! $ test_safe_strlen || ( $ test_safe_strlen & 4 ) ) {
192194 return $ length ;
@@ -217,6 +219,8 @@ function safe_substr( $str, $start, $length = false, $is_width = false, $encodin
217219 // Normalize `$length` when not specified - PHP 5.3 substr takes false as full length, PHP > 5.3 takes null.
218220 if ( null === $ length || false === $ length ) {
219221 $ length = $ safe_strlen ;
222+ } else {
223+ $ length = (int ) $ length ;
220224 }
221225 // Normalize `$start` - various methods treat this differently.
222226 if ( $ start > $ safe_strlen ) {
@@ -250,7 +254,7 @@ function safe_substr( $str, $start, $length = false, $is_width = false, $encodin
250254 $ encoding = mb_detect_encoding ( $ str , null , true /*strict*/ );
251255 }
252256 // Bug: not adjusting for combining chars.
253- $ try = $ encoding ? mb_substr ( $ str , $ start , $ length , $ encoding ) : mb_substr ( $ str , $ start , $ length ); // mbstring funcs can fail if given `$encoding` arg that evals to false.
257+ $ try = is_string ( $ encoding ) ? mb_substr ( $ str , $ start , $ length , $ encoding ) : mb_substr ( $ str , $ start , $ length ); // mbstring funcs can fail if given `$encoding` arg that evals to false.
254258 if ( 'UTF-8 ' === $ encoding && $ is_width ) {
255259 $ try = _safe_substr_eaw ( $ try , $ length );
256260 }
@@ -271,6 +275,7 @@ function safe_substr( $str, $start, $length = false, $is_width = false, $encodin
271275function _safe_substr_eaw ( $ str , $ length ) {
272276 // Set the East Asian Width regex.
273277 $ eaw_regex = get_unicode_regexs ( 'eaw ' );
278+ assert ( is_string ( $ eaw_regex ) );
274279
275280 // If there's any East Asian double-width chars...
276281 if ( preg_match ( $ eaw_regex , $ str ) ) {
@@ -283,6 +288,9 @@ function _safe_substr_eaw( $str, $length ) {
283288 } else {
284289 // Explode string into an array of UTF-8 chars. Based on core `_mb_substr()` in "wp-includes/compat.php".
285290 $ chars = preg_split ( '/([\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*)/ ' , $ str , $ length + 1 , PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
291+ if ( false === $ chars ) {
292+ $ chars = array ( $ str );
293+ }
286294 $ cnt = min ( count ( $ chars ), $ length );
287295 $ width = $ length ;
288296
@@ -326,7 +334,9 @@ function strwidth( $string, $encoding = false ) {
326334 $ string = (string ) $ string ;
327335
328336 // Set the East Asian Width and Mark regexs.
329- list ( $ eaw_regex , $ m_regex ) = get_unicode_regexs ();
337+ $ regexs = get_unicode_regexs ();
338+ assert ( is_array ( $ regexs ) );
339+ list ( $ eaw_regex , $ m_regex ) = $ regexs ;
330340
331341 // Allow for selective testings - "1" bit set tests grapheme_strlen(), "2" preg_match_all( '/\X/u' ), "4" mb_strwidth(), "other" safe_strlen().
332342 $ test_strwidth = (int ) getenv ( 'PHP_CLI_TOOLS_TEST_STRWIDTH ' );
@@ -348,7 +358,7 @@ function strwidth( $string, $encoding = false ) {
348358 if ( ! $ encoding ) {
349359 $ encoding = mb_detect_encoding ( $ string , null , true /*strict*/ );
350360 }
351- $ width = $ encoding ? mb_strwidth ( $ string , $ encoding ) : mb_strwidth ( $ string ); // mbstring funcs can fail if given `$encoding` arg that evals to false.
361+ $ width = is_string ( $ encoding ) ? mb_strwidth ( $ string , $ encoding ) : mb_strwidth ( $ string ); // mbstring funcs can fail if given `$encoding` arg that evals to false.
352362 if ( 'UTF-8 ' === $ encoding ) {
353363 // Subtract combining characters.
354364 $ width -= preg_match_all ( $ m_regex , $ string , $ dummy /*needed for PHP 5.3*/ );
0 commit comments