From d29bbc8d6696e580cf320eb560e410042c3de71b Mon Sep 17 00:00:00 2001 From: Shawn Bulen Date: Mon, 8 Jun 2026 23:53:54 -0700 Subject: [PATCH 1/3] Fix json_encode issues with non-utf8 chars Signed-off-by: Shawn Bulen --- other/upgrade.php | 57 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/other/upgrade.php b/other/upgrade.php index 2dc4bccb62..f2ddfa6df8 100644 --- a/other/upgrade.php +++ b/other/upgrade.php @@ -3510,22 +3510,27 @@ function upgrade_unserialize($string) $data = @safe_unserialize($string); // The serialized data is broken. - if ($data === false) + // OR... Has strings that are not utf8. + if (($data === false) || (mb_check_encoding($string, 'UTF-8') === false)) { // This bit fixes incorrect string lengths, which can happen if the character encoding was changed (e.g. conversion to UTF-8) $new_string = preg_replace_callback( '~\bs:(\d+):"(.*?)";(?=$|[bidsaO]:|[{}}]|N;)~s', function ($matches) { + // If not utf8, use cheezy-21-encoding, because json_encode ONLY works on utf8 + // Will decode this after json_encode; utf8 conversion can then proceed properly on the non-utf8 data + if (mb_check_encoding($matches[2], 'UTF-8') === false) + { + $matches[2] = 'czy21enc:' . bin2hex($matches[2]); + } return 's:' . strlen($matches[2]) . ':"' . $matches[2] . '";'; }, $string ); - // @todo Add more possible fixes here. For example, fix incorrect array lengths, try to handle truncated strings gracefully, etc. - // Did it work? - $data = @safe_unserialize($string); + $data = @safe_unserialize($new_string); } } // Just a plain string, then. @@ -3648,7 +3653,23 @@ function serialize_to_json() if (!$temp && $command_line) echo "\n - Failed to unserialize the '" . $var . "' setting. Skipping."; elseif ($temp !== false) + { $new_settings[$var] = json_encode($temp); + if ($new_settings[$var] === false) + $new_settings[$var] = ''; + else + { + // Decode cheezy-21-encoding to preserve non-utf8 strings before utf8 conversion + $new_settings[$var] = preg_replace_callback( + '~"czy21enc:((?:[0-9a-f]{2})*)"~', + function ($matches) + { + return '"' . hex2bin($matches[1]) . '"'; + }, + $new_settings[$var] + ); + } + } } } @@ -3686,6 +3707,20 @@ function serialize_to_json() if ($temp !== false) { $row['value'] = json_encode($temp); + if ($row['value'] === false) + $row['value'] = ''; + else + { + // Decode cheezy-21-encoding to preserve non-utf8 strings before utf8 conversion + $row['value'] = preg_replace_callback( + '~"czy21enc:((?:[0-9a-f]{2})*)"~', + function ($matches) + { + return '"' . hex2bin($matches[1]) . '"'; + }, + $row['value'] + ); + } // Even though we have all values from the table, UPDATE is still faster than REPLACE $smcFunc['db_query']('', ' @@ -3762,6 +3797,20 @@ function serialize_to_json() } $row[$col] = json_encode($temp); + if ($row[$col] === false) + $row[$col] = ''; + else + { + // Decode cheezy-21-encoding to preserve non-utf8 strings before utf8 conversion + $row[$col] = preg_replace_callback( + '~"czy21enc:((?:[0-9a-f]{2})*)"~', + function ($matches) + { + return '"' . hex2bin($matches[1]) . '"'; + }, + $row[$col] + ); + } // Build our SET string and variables array $update .= (empty($update) ? '' : ', ') . $col . ' = {string:' . $col . '}'; From e1099f6435c67579f1a05cbf2521c604e252f179 Mon Sep 17 00:00:00 2001 From: Shawn Bulen Date: Wed, 10 Jun 2026 08:53:33 -0700 Subject: [PATCH 2/3] No longer needed, json at this point Signed-off-by: Shawn Bulen --- other/upgrade.php | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/other/upgrade.php b/other/upgrade.php index f2ddfa6df8..7c729832a9 100644 --- a/other/upgrade.php +++ b/other/upgrade.php @@ -3445,31 +3445,6 @@ function ConvertUtf8() require_once($sourcedir . '/Subs-Admin.php'); updateSettingsFile(array('db_character_set' => 'utf8')); - // The conversion might have messed up some serialized strings. Fix them! - $request = $smcFunc['db_query']('', ' - SELECT id_action, extra - FROM {db_prefix}log_actions - WHERE action IN ({string:remove}, {string:delete})', - array( - 'remove' => 'remove', - 'delete' => 'delete', - ) - ); - while ($row = $smcFunc['db_fetch_assoc']($request)) - { - if (@safe_unserialize($row['extra']) === false && preg_match('~^(a:3:{s:5:"topic";i:\d+;s:7:"subject";s:)(\d+):"(.+)"(;s:6:"member";s:5:"\d+";})$~', $row['extra'], $matches) === 1) - $smcFunc['db_query']('', ' - UPDATE {db_prefix}log_actions - SET extra = {string:extra} - WHERE id_action = {int:current_action}', - array( - 'current_action' => $row['id_action'], - 'extra' => $matches[1] . strlen($matches[3]) . ':"' . $matches[3] . '"' . $matches[4], - ) - ); - } - $smcFunc['db_free_result']($request); - if ($upcontext['dropping_index'] && $command_line) { echo "\n" . '', $txt['upgrade_fulltext_error'], ''; From 66ffa773ed0bd5462df43a0f2c8134104a3b4cc1 Mon Sep 17 00:00:00 2001 From: Shawn Bulen Date: Thu, 18 Jun 2026 22:58:15 -0700 Subject: [PATCH 3/3] Just do what you're told, json_encode Signed-off-by: Shawn Bulen --- other/upgrade.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/other/upgrade.php b/other/upgrade.php index 7c729832a9..b91cfd25bf 100644 --- a/other/upgrade.php +++ b/other/upgrade.php @@ -3629,7 +3629,7 @@ function serialize_to_json() echo "\n - Failed to unserialize the '" . $var . "' setting. Skipping."; elseif ($temp !== false) { - $new_settings[$var] = json_encode($temp); + $new_settings[$var] = json_encode($temp, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); if ($new_settings[$var] === false) $new_settings[$var] = ''; else @@ -3681,7 +3681,7 @@ function ($matches) if ($temp !== false) { - $row['value'] = json_encode($temp); + $row['value'] = json_encode($temp, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); if ($row['value'] === false) $row['value'] = ''; else @@ -3771,7 +3771,7 @@ function ($matches) echo "\nFailed to unserialize " . $row[$col] . ". Setting to empty value.\n"; } - $row[$col] = json_encode($temp); + $row[$col] = json_encode($temp, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); if ($row[$col] === false) $row[$col] = ''; else