diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index 3690162418fa3..3ffa30fa31737 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -2952,6 +2952,34 @@ ZEND_API zend_string* ZEND_FASTCALL zend_string_toupper_ex(zend_string *str, boo } /* }}} */ +ZEND_API bool ZEND_FASTCALL zend_str_is_utf8_pure_ascii(const char *str, size_t length) /* {{{ */ +{ + unsigned char *p = (unsigned char *) str; + unsigned char *end = p + length; + +#ifdef HAVE_BLOCKCONV + __m128i blconv_80 = _mm_set1_epi8(0x80), blconv_operand, blconv_mingle; + while (p + BLOCKCONV_STRIDE <= end) { + blconv_operand = _mm_loadu_si128((__m128i*)(p)); + blconv_mingle = _mm_cmpeq_epi8(_mm_max_epu8(blconv_operand, blconv_80), blconv_operand); + if (BLOCKCONV_FOUND()) { + return false; + } + p += BLOCKCONV_STRIDE; + } +#endif + + while (p < end) { + if (*p >= 0x80) { + return false; + } + p++; + } + + return true; +} +/* }}} */ + ZEND_API int ZEND_FASTCALL zend_binary_strcmp(const char *s1, size_t len1, const char *s2, size_t len2) /* {{{ */ { int retval; diff --git a/Zend/zend_operators.h b/Zend/zend_operators.h index de02a406d575e..d4f18e6e2dca8 100644 --- a/Zend/zend_operators.h +++ b/Zend/zend_operators.h @@ -449,6 +449,7 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup_ex(const char *source, size_t length); ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, bool persistent); ZEND_API zend_string* ZEND_FASTCALL zend_string_toupper_ex(zend_string *str, bool persistent); +ZEND_API bool ZEND_FASTCALL zend_str_is_utf8_pure_ascii(const char *str, size_t length); #define zend_string_tolower(str) zend_string_tolower_ex(str, 0) #define zend_string_toupper(str) zend_string_toupper_ex(str, 0) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 265a6ba0e398d..c407831c95814 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -2588,22 +2588,29 @@ PHP_FUNCTION(mb_convert_case) /* {{{ Returns a upper cased version of source_string */ PHP_FUNCTION(mb_strtoupper) { + zend_string *str; zend_string *from_encoding = NULL; - char *str; - size_t str_len, ret_len; + const mbfl_encoding *enc; + char *newstr; + size_t ret_len; ZEND_PARSE_PARAMETERS_START(1, 2) - Z_PARAM_STRING(str, str_len) + Z_PARAM_STR(str) Z_PARAM_OPTIONAL Z_PARAM_STR_OR_NULL(from_encoding) ZEND_PARSE_PARAMETERS_END(); - const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 2); + enc = php_mb_get_encoding(from_encoding, 2); if (!enc) { RETURN_THROWS(); } - char *newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc); + // optimize performance for UTF-8 encoding and input string consisting of lower/7-bit ASCII characters only + if (enc == &mbfl_encoding_utf8 && zend_str_is_utf8_pure_ascii(ZSTR_VAL(str), ZSTR_LEN(str))) { + RETURN_STR(zend_string_toupper(str)); + } + + newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, ZSTR_VAL(str), ZSTR_LEN(str), &ret_len, enc); /* If newstr is NULL something went wrong in mbfl and this is a bug */ ZEND_ASSERT(newstr != NULL); @@ -2616,15 +2623,14 @@ PHP_FUNCTION(mb_strtoupper) /* {{{ Returns a lower cased version of source_string */ PHP_FUNCTION(mb_strtolower) { + zend_string *str; zend_string *from_encoding = NULL; - char *str; - size_t str_len; + const mbfl_encoding *enc; char *newstr; size_t ret_len; - const mbfl_encoding *enc; ZEND_PARSE_PARAMETERS_START(1, 2) - Z_PARAM_STRING(str, str_len) + Z_PARAM_STR(str) Z_PARAM_OPTIONAL Z_PARAM_STR_OR_NULL(from_encoding) ZEND_PARSE_PARAMETERS_END(); @@ -2634,7 +2640,12 @@ PHP_FUNCTION(mb_strtolower) RETURN_THROWS(); } - newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc); + // optimize performance for UTF-8 encoding and input string consisting of lower/7-bit ASCII characters only + if (enc == &mbfl_encoding_utf8 && zend_str_is_utf8_pure_ascii(ZSTR_VAL(str), ZSTR_LEN(str))) { + RETURN_STR(zend_string_tolower(str)); + } + + newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, ZSTR_VAL(str), ZSTR_LEN(str), &ret_len, enc); /* If newstr is NULL something went wrong in mbfl and this is a bug */ ZEND_ASSERT(newstr != NULL);