@@ -355,12 +355,20 @@ public static function from_precomputed_table( $key_length, $groups, $large_word
355355 *
356356 * @since 6.6.0
357357 *
358- * @param string $word Determine if this word is a lookup key in the map.
358+ * @param string $word Determine if this word is a lookup key in the map.
359+ * @param ?string $case_sensitivity 'case-insensitive' to ignore ASCII case or default of 'case-sensitive'.
359360 * @return bool Whether there's an entry for the given word in the map.
360361 */
361- public function contains ( $ word ) {
362+ public function contains ( $ word , $ case_sensitivity = 'case-sensitive ' ) {
363+ $ ignore_case = 'case-insensitive ' === $ case_sensitivity ;
364+
362365 if ( $ this ->key_length >= strlen ( $ word ) ) {
363- $ word_at = strpos ( $ this ->small_words , str_pad ( $ word , $ this ->key_length + 1 , "\x00" ), STR_PAD_RIGHT );
366+ if ( 0 === strlen ( $ this ->small_words ) ) {
367+ return false ;
368+ }
369+
370+ $ term = str_pad ( $ word , $ this ->key_length + 1 , "\x00" , STR_PAD_RIGHT );
371+ $ word_at = $ ignore_case ? stripos ( $ this ->small_words , $ term ) : strpos ( $ this ->small_words , $ term );
364372 if ( false === $ word_at ) {
365373 return false ;
366374 }
@@ -369,7 +377,7 @@ public function contains( $word ) {
369377 }
370378
371379 $ group_key = substr ( $ word , 0 , $ this ->key_length );
372- $ group_at = strpos ( $ this ->groups , $ group_key );
380+ $ group_at = $ ignore_case ? stripos ( $ this -> groups , $ group_key ) : strpos ( $ this ->groups , $ group_key );
373381 if ( false === $ group_at ) {
374382 return false ;
375383 }
@@ -386,7 +394,7 @@ public function contains( $word ) {
386394 $ mapping_length = unpack ( 'C ' , $ group [ $ at ++ ] )[1 ];
387395 $ mapping_at = $ at ;
388396
389- if ( $ token_length === $ length && 0 === substr_compare ( $ group , $ slug , $ token_at , $ token_length ) ) {
397+ if ( $ token_length === $ length && 0 === substr_compare ( $ group , $ slug , $ token_at , $ token_length, $ ignore_case ) ) {
390398 return true ;
391399 }
392400
@@ -432,22 +440,26 @@ public function contains( $word ) {
432440 *
433441 * @since 6.6.0
434442 *
435- * @param string $text String in which to search for a lookup key.
436- * @param ?int $offset How many bytes into the string where the lookup key ought to start.
437- * @param ?int &$skip_bytes Holds byte-length of found lookup key if matched, otherwise not set.
443+ * @param string $text String in which to search for a lookup key.
444+ * @param ?int $offset How many bytes into the string where the lookup key ought to start.
445+ * @param ?int &$skip_bytes Holds byte-length of found lookup key if matched, otherwise not set.
446+ * @param ?string $case_sensitivity 'case-insensitive' to ignore ASCII case or default of 'case-sensitive'.
438447 * @return string|false Mapped value of lookup key if found, otherwise `false`.
439448 */
440- public function read_token ( $ text , $ offset = 0 , &$ skip_bytes = null ) {
449+ public function read_token ( $ text , $ offset = 0 , &$ skip_bytes = null , $ case_sensitivity = 'case-sensitive ' ) {
450+ $ ignore_case = 'case-insensitive ' === $ case_sensitivity ;
441451 $ text_length = strlen ( $ text );
442452
443453 // Search for a long word first, if the text is long enough, and if that fails, a short one.
444454 if ( $ text_length > $ this ->key_length ) {
445455 $ group_key = substr ( $ text , $ offset , $ this ->key_length );
446456
447- $ group_at = strpos ( $ this ->groups , $ group_key );
457+ $ group_at = $ ignore_case ? stripos ( $ this -> groups , $ group_key ) : strpos ( $ this ->groups , $ group_key );
448458 if ( false === $ group_at ) {
449459 // Perhaps a short word then.
450- return $ this ->read_small_token ( $ text , $ offset , $ skip_bytes );
460+ return strlen ( $ this ->small_words ) > 0
461+ ? $ this ->read_small_token ( $ text , $ offset , $ skip_bytes , $ case_sensitivity )
462+ : false ;
451463 }
452464
453465 $ group = $ this ->large_words [ $ group_at / ( $ this ->key_length + 1 ) ];
@@ -460,7 +472,7 @@ public function read_token( $text, $offset = 0, &$skip_bytes = null ) {
460472 $ mapping_length = unpack ( 'C ' , $ group [ $ at ++ ] )[1 ];
461473 $ mapping_at = $ at ;
462474
463- if ( 0 === substr_compare ( $ text , $ token , $ offset + $ this ->key_length , $ token_length ) ) {
475+ if ( 0 === substr_compare ( $ text , $ token , $ offset + $ this ->key_length , $ token_length, $ ignore_case ) ) {
464476 $ skip_bytes = $ this ->key_length + $ token_length ;
465477 return substr ( $ group , $ mapping_at , $ mapping_length );
466478 }
@@ -470,26 +482,37 @@ public function read_token( $text, $offset = 0, &$skip_bytes = null ) {
470482 }
471483
472484 // Perhaps a short word then.
473- return $ this ->read_small_token ( $ text , $ offset , $ skip_bytes );
485+ return strlen ( $ this ->small_words ) > 0
486+ ? $ this ->read_small_token ( $ text , $ offset , $ skip_bytes , $ case_sensitivity )
487+ : false ;
474488 }
475489
476490 /**
477491 * Finds a match for a short word at the index.
478492 *
479493 * @since 6.6.0.
480494 *
481- * @param string $text String in which to search for a lookup key.
482- * @param ?int $offset How many bytes into the string where the lookup key ought to start.
483- * @param ?int &$skip_bytes Holds byte-length of found lookup key if matched, otherwise not set.
495+ * @param string $text String in which to search for a lookup key.
496+ * @param ?int $offset How many bytes into the string where the lookup key ought to start.
497+ * @param ?int &$skip_bytes Holds byte-length of found lookup key if matched, otherwise not set.
498+ * @param ?string $case_sensitivity 'case-insensitive' to ignore ASCII case or default of 'case-sensitive'.
484499 * @return string|false Mapped value of lookup key if found, otherwise `false`.
485500 */
486- private function read_small_token ( $ text , $ offset , &$ skip_bytes ) {
487- $ small_length = strlen ( $ this ->small_words );
488- $ starting_char = $ text [ $ offset ];
501+ private function read_small_token ( $ text , $ offset , &$ skip_bytes , $ case_sensitivity = 'case-sensitive ' ) {
502+ $ ignore_case = 'case-insensitive ' === $ case_sensitivity ;
503+ $ small_length = strlen ( $ this ->small_words );
504+ $ search_text = substr ( $ text , $ offset , $ this ->key_length );
505+ if ( $ ignore_case ) {
506+ $ search_text = strtoupper ( $ search_text );
507+ }
508+ $ starting_char = $ search_text [0 ];
489509
490510 $ at = 0 ;
491511 while ( $ at < $ small_length ) {
492- if ( $ starting_char !== $ this ->small_words [ $ at ] ) {
512+ if (
513+ $ starting_char !== $ this ->small_words [ $ at ] &&
514+ ( ! $ ignore_case || strtoupper ( $ this ->small_words [ $ at ] ) !== $ starting_char )
515+ ) {
493516 $ at += $ this ->key_length + 1 ;
494517 continue ;
495518 }
@@ -500,7 +523,10 @@ private function read_small_token( $text, $offset, &$skip_bytes ) {
500523 return $ this ->small_mappings [ $ at / ( $ this ->key_length + 1 ) ];
501524 }
502525
503- if ( $ text [ $ offset + $ adjust ] !== $ this ->small_words [ $ at + $ adjust ] ) {
526+ if (
527+ $ search_text [ $ adjust ] !== $ this ->small_words [ $ at + $ adjust ] &&
528+ ( ! $ ignore_case || strtoupper ( $ this ->small_words [ $ at + $ adjust ] !== $ search_text [ $ adjust ] ) )
529+ ) {
504530 $ at += $ this ->key_length + 1 ;
505531 continue 2 ;
506532 }
0 commit comments