diff --git a/other/upgrade.php b/other/upgrade.php index 2dc4bccb62..b91cfd25bf 100644 --- a/other/upgrade.php +++ b/other/upgrade.php @@ -3445,31 +3445,6 @@ function ConvertUtf8() require_once($sourcedir . '/Subs-Admin.php'); updateSettingsFile(array('db_character_set' => 'utf8')); - // The conversion might have messed up some serialized strings. Fix them! - $request = $smcFunc['db_query']('', ' - SELECT id_action, extra - FROM {db_prefix}log_actions - WHERE action IN ({string:remove}, {string:delete})', - array( - 'remove' => 'remove', - 'delete' => 'delete', - ) - ); - while ($row = $smcFunc['db_fetch_assoc']($request)) - { - if (@safe_unserialize($row['extra']) === false && preg_match('~^(a:3:{s:5:"topic";i:\d+;s:7:"subject";s:)(\d+):"(.+)"(;s:6:"member";s:5:"\d+";})$~', $row['extra'], $matches) === 1) - $smcFunc['db_query']('', ' - UPDATE {db_prefix}log_actions - SET extra = {string:extra} - WHERE id_action = {int:current_action}', - array( - 'current_action' => $row['id_action'], - 'extra' => $matches[1] . strlen($matches[3]) . ':"' . $matches[3] . '"' . $matches[4], - ) - ); - } - $smcFunc['db_free_result']($request); - if ($upcontext['dropping_index'] && $command_line) { echo "\n" . '', $txt['upgrade_fulltext_error'], ''; @@ -3510,22 +3485,27 @@ function upgrade_unserialize($string) $data = @safe_unserialize($string); // The serialized data is broken. - if ($data === false) + // OR... Has strings that are not utf8. + if (($data === false) || (mb_check_encoding($string, 'UTF-8') === false)) { // This bit fixes incorrect string lengths, which can happen if the character encoding was changed (e.g. conversion to UTF-8) $new_string = preg_replace_callback( '~\bs:(\d+):"(.*?)";(?=$|[bidsaO]:|[{}}]|N;)~s', function ($matches) { + // If not utf8, use cheezy-21-encoding, because json_encode ONLY works on utf8 + // Will decode this after json_encode; utf8 conversion can then proceed properly on the non-utf8 data + if (mb_check_encoding($matches[2], 'UTF-8') === false) + { + $matches[2] = 'czy21enc:' . bin2hex($matches[2]); + } return 's:' . strlen($matches[2]) . ':"' . $matches[2] . '";'; }, $string ); - // @todo Add more possible fixes here. For example, fix incorrect array lengths, try to handle truncated strings gracefully, etc. - // Did it work? - $data = @safe_unserialize($string); + $data = @safe_unserialize($new_string); } } // Just a plain string, then. @@ -3648,7 +3628,23 @@ function serialize_to_json() if (!$temp && $command_line) echo "\n - Failed to unserialize the '" . $var . "' setting. Skipping."; elseif ($temp !== false) - $new_settings[$var] = json_encode($temp); + { + $new_settings[$var] = json_encode($temp, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); + if ($new_settings[$var] === false) + $new_settings[$var] = ''; + else + { + // Decode cheezy-21-encoding to preserve non-utf8 strings before utf8 conversion + $new_settings[$var] = preg_replace_callback( + '~"czy21enc:((?:[0-9a-f]{2})*)"~', + function ($matches) + { + return '"' . hex2bin($matches[1]) . '"'; + }, + $new_settings[$var] + ); + } + } } } @@ -3685,7 +3681,21 @@ function serialize_to_json() if ($temp !== false) { - $row['value'] = json_encode($temp); + $row['value'] = json_encode($temp, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); + if ($row['value'] === false) + $row['value'] = ''; + else + { + // Decode cheezy-21-encoding to preserve non-utf8 strings before utf8 conversion + $row['value'] = preg_replace_callback( + '~"czy21enc:((?:[0-9a-f]{2})*)"~', + function ($matches) + { + return '"' . hex2bin($matches[1]) . '"'; + }, + $row['value'] + ); + } // Even though we have all values from the table, UPDATE is still faster than REPLACE $smcFunc['db_query']('', ' @@ -3761,7 +3771,21 @@ function serialize_to_json() echo "\nFailed to unserialize " . $row[$col] . ". Setting to empty value.\n"; } - $row[$col] = json_encode($temp); + $row[$col] = json_encode($temp, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); + if ($row[$col] === false) + $row[$col] = ''; + else + { + // Decode cheezy-21-encoding to preserve non-utf8 strings before utf8 conversion + $row[$col] = preg_replace_callback( + '~"czy21enc:((?:[0-9a-f]{2})*)"~', + function ($matches) + { + return '"' . hex2bin($matches[1]) . '"'; + }, + $row[$col] + ); + } // Build our SET string and variables array $update .= (empty($update) ? '' : ', ') . $col . ' = {string:' . $col . '}';