Skip to content

Commit 220e136

Browse files
committed
Optimize mb_strtoupper/mb_strtolower for UTF-8 enc and ASCII input
1 parent d058acb commit 220e136

File tree

1 file changed

+21
-10
lines changed

1 file changed

+21
-10
lines changed

ext/mbstring/mbstring.c

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2588,22 +2588,29 @@ PHP_FUNCTION(mb_convert_case)
25882588
/* {{{ Returns a upper cased version of source_string */
25892589
PHP_FUNCTION(mb_strtoupper)
25902590
{
2591+
zend_string *str;
25912592
zend_string *from_encoding = NULL;
2592-
char *str;
2593-
size_t str_len, ret_len;
2593+
const mbfl_encoding *enc;
2594+
char *newstr;
2595+
size_t ret_len;
25942596

25952597
ZEND_PARSE_PARAMETERS_START(1, 2)
2596-
Z_PARAM_STRING(str, str_len)
2598+
Z_PARAM_STR(str)
25972599
Z_PARAM_OPTIONAL
25982600
Z_PARAM_STR_OR_NULL(from_encoding)
25992601
ZEND_PARSE_PARAMETERS_END();
26002602

2601-
const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 2);
2603+
enc = php_mb_get_encoding(from_encoding, 2);
26022604
if (!enc) {
26032605
RETURN_THROWS();
26042606
}
26052607

2606-
char *newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
2608+
// optimize performance for UTF-8 encoding and input string consisting of lower/7-bit ASCII characters only
2609+
if (enc == &mbfl_encoding_utf8) {
2610+
RETURN_STR(zend_string_toupper(str));
2611+
}
2612+
2613+
newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, ZSTR_VAL(str), ZSTR_LEN(str), &ret_len, enc);
26072614
/* If newstr is NULL something went wrong in mbfl and this is a bug */
26082615
ZEND_ASSERT(newstr != NULL);
26092616

@@ -2616,15 +2623,14 @@ PHP_FUNCTION(mb_strtoupper)
26162623
/* {{{ Returns a lower cased version of source_string */
26172624
PHP_FUNCTION(mb_strtolower)
26182625
{
2626+
zend_string *str;
26192627
zend_string *from_encoding = NULL;
2620-
char *str;
2621-
size_t str_len;
2628+
const mbfl_encoding *enc;
26222629
char *newstr;
26232630
size_t ret_len;
2624-
const mbfl_encoding *enc;
26252631

26262632
ZEND_PARSE_PARAMETERS_START(1, 2)
2627-
Z_PARAM_STRING(str, str_len)
2633+
Z_PARAM_STR(str)
26282634
Z_PARAM_OPTIONAL
26292635
Z_PARAM_STR_OR_NULL(from_encoding)
26302636
ZEND_PARSE_PARAMETERS_END();
@@ -2634,7 +2640,12 @@ PHP_FUNCTION(mb_strtolower)
26342640
RETURN_THROWS();
26352641
}
26362642

2637-
newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
2643+
// optimize performance for UTF-8 encoding and input string consisting of lower/7-bit ASCII characters only
2644+
if (enc == &mbfl_encoding_utf8) {
2645+
RETURN_STR(zend_string_tolower(str));
2646+
}
2647+
2648+
newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, ZSTR_VAL(str), ZSTR_LEN(str), &ret_len, enc);
26382649
/* If newstr is NULL something went wrong in mbfl and this is a bug */
26392650
ZEND_ASSERT(newstr != NULL);
26402651

0 commit comments

Comments
 (0)