@@ -2274,12 +2274,31 @@ function sanitize_title_for_query( $title ) {
22742274 * @param string $title The title to be sanitized.
22752275 * @param string $raw_title Optional. Not used. Default empty.
22762276 * @param string $context Optional. The operation for which the string is sanitized.
2277- * When set to 'save', additional entities are converted to hyphens
2278- * or stripped entirely. Default 'display'.
2277+ * When set to 'save', HTML entities are decoded to raw UTF-8 and
2278+ * Unicode dash punctuation and separators are converted to hyphens.
2279+ * Default 'display'.
22792280 * @return string The sanitized title.
22802281 */
22812282function sanitize_title_with_dashes ( $ title , $ raw_title = '' , $ context = 'display ' ) {
22822283 $ title = strip_tags ( $ title );
2284+
2285+ if ( 'save ' === $ context ) {
2286+ /*
2287+ * Decode HTML entities to raw UTF-8, ensuring all representations of the same
2288+ * character are treated identically.
2289+ */
2290+ $ title = WP_HTML_Decoder::decode_text_node ( $ title );
2291+
2292+ $ title = str_replace ( '& ' , '' , $ title );
2293+
2294+ if ( _wp_can_use_pcre_u () ) {
2295+ $ title = preg_replace ( '~[\p{Pd}\p{Z}]~u ' , '- ' , $ title );
2296+ }
2297+
2298+ // Convert forward slash to hyphen.
2299+ $ title = str_replace ( '/ ' , '- ' , $ title );
2300+ }
2301+
22832302 // Preserve escaped octets.
22842303 $ title = preg_replace ( '|%([a-fA-F0-9][a-fA-F0-9])| ' , '---$1--- ' , $ title );
22852304 // Remove percent signs that are not part of an octet.
@@ -2297,12 +2316,38 @@ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'displa
22972316 $ title = strtolower ( $ title );
22982317
22992318 if ( 'save ' === $ context ) {
2300- // Convert  , non-breaking hyphen, &ndash, and &mdash to hyphens.
2319+ /*
2320+ * Convert known dash punctuation and space separator variants to hyphens.
2321+ *
2322+ * These are the percent-encoded UTF-8 forms produced by utf8_uri_encode().
2323+ * When _wp_can_use_pcre_u() is true, raw UTF-8 dash/space chars were already
2324+ * replaced by PCRE above, so these str_replace() calls become no-ops for those.
2325+ * They remain necessary to handle inputs that arrived as pre-encoded percent
2326+ * sequences.
2327+ */
23012328 $ title = str_replace ( array ( '%c2%a0 ' , '%e2%80%91 ' , '%e2%80%93 ' , '%e2%80%94 ' ), '- ' , $ title );
2302- // Convert  , non-breaking hyphen, &ndash, and &mdash HTML entities to hyphens.
2303- $ title = str_replace ( array ( ' ' , '‑ ' , '  ' , '– ' , '– ' , '— ' , '— ' ), '- ' , $ title );
2304- // Convert forward slash to hyphen.
2305- $ title = str_replace ( '/ ' , '- ' , $ title );
2329+
2330+ // Convert space separator variants (percent-encoded) to hyphen.
2331+ $ title = str_replace (
2332+ array (
2333+ '%e2%80%80 ' , // En quad.
2334+ '%e2%80%81 ' , // Em quad.
2335+ '%e2%80%82 ' , // En space.
2336+ '%e2%80%83 ' , // Em space.
2337+ '%e2%80%84 ' , // Three-per-em space.
2338+ '%e2%80%85 ' , // Four-per-em space.
2339+ '%e2%80%86 ' , // Six-per-em space.
2340+ '%e2%80%87 ' , // Figure space.
2341+ '%e2%80%88 ' , // Punctuation space.
2342+ '%e2%80%89 ' , // Thin space.
2343+ '%e2%80%8a ' , // Hair space.
2344+ '%e2%80%a8 ' , // Line separator.
2345+ '%e2%80%a9 ' , // Paragraph separator.
2346+ '%e2%80%af ' , // Narrow no-break space.
2347+ ),
2348+ '- ' ,
2349+ $ title
2350+ );
23062351
23072352 // Strip these characters entirely.
23082353 $ title = str_replace (
@@ -2361,28 +2406,6 @@ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'displa
23612406 $ title
23622407 );
23632408
2364- // Convert non-visible characters that display with a width to hyphen.
2365- $ title = str_replace (
2366- array (
2367- '%e2%80%80 ' , // En quad.
2368- '%e2%80%81 ' , // Em quad.
2369- '%e2%80%82 ' , // En space.
2370- '%e2%80%83 ' , // Em space.
2371- '%e2%80%84 ' , // Three-per-em space.
2372- '%e2%80%85 ' , // Four-per-em space.
2373- '%e2%80%86 ' , // Six-per-em space.
2374- '%e2%80%87 ' , // Figure space.
2375- '%e2%80%88 ' , // Punctuation space.
2376- '%e2%80%89 ' , // Thin space.
2377- '%e2%80%8a ' , // Hair space.
2378- '%e2%80%a8 ' , // Line separator.
2379- '%e2%80%a9 ' , // Paragraph separator.
2380- '%e2%80%af ' , // Narrow no-break space.
2381- ),
2382- '- ' ,
2383- $ title
2384- );
2385-
23862409 // Convert × to 'x'.
23872410 $ title = str_replace ( '%c3%97 ' , 'x ' , $ title );
23882411 }
0 commit comments