Skip to content

Commit ffad939

Browse files
authored
Re-work escapeForValue & unescapeForValue (#1558)
Support all FCITX_WHITESPACE escape characters. Fix some inconsistency when character is escaped, but the result is not quoted.
1 parent d6cf7f7 commit ffad939

3 files changed

Lines changed: 77 additions & 51 deletions

File tree

src/lib/fcitx-utils/stringutils.cpp

Lines changed: 62 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,13 @@
55
*
66
*/
77
#include "stringutils.h"
8+
#include <array>
89
#include <cassert>
910
#include <climits>
11+
#include <cstdint>
1012
#include <cstring>
1113
#include <initializer_list>
14+
#include <limits>
1215
#include <optional>
1316
#include <string>
1417
#include <string_view>
@@ -78,6 +81,38 @@ std::string concatPathPieces(
7881
}
7982
} // namespace details
8083

84+
namespace {
85+
86+
constexpr std::array<char, std::numeric_limits<uint8_t>::max()> escapeMap =
87+
[]() consteval {
88+
std::array<char, std::numeric_limits<uint8_t>::max()> table{};
89+
table.fill('\0');
90+
table['\\'] = '\\';
91+
table['"'] = '"';
92+
table['\n'] = 'n';
93+
table['\f'] = 'f';
94+
table['\r'] = 'r';
95+
table['\t'] = 't';
96+
table['\v'] = 'v';
97+
return table;
98+
}();
99+
100+
constexpr std::array<char, std::numeric_limits<uint8_t>::max()> unescapeMap =
101+
[]() consteval {
102+
std::array<char, std::numeric_limits<uint8_t>::max()> table{};
103+
table.fill('\0');
104+
table['\\'] = '\\';
105+
table['"'] = '"';
106+
table['n'] = '\n';
107+
table['f'] = '\f';
108+
table['r'] = '\r';
109+
table['t'] = '\t';
110+
table['v'] = '\v';
111+
return table;
112+
}();
113+
114+
} // namespace
115+
81116
FCITXUTILS_DEPRECATED_EXPORT bool startsWith(const std::string &str,
82117
const std::string &prefix) {
83118
return str.starts_with(prefix);
@@ -337,69 +372,52 @@ bool unescape(std::string &str, bool unescapeQuote) {
337372
}
338373
break;
339374
case UnescapeState::ESCAPE:
340-
if (str[i] == '\\') {
341-
str[j] = '\\';
342-
j++;
343-
} else if (str[i] == 'n') {
344-
str[j] = '\n';
375+
if (auto c = unescapeMap[str[i]];
376+
c && (unescapeQuote || c != '"')) {
377+
str[j] = c;
345378
j++;
346-
} else if (str[i] == '\"' && unescapeQuote) {
347-
str[j] = '\"';
348-
j++;
349-
} else {
350-
return false;
379+
state = UnescapeState::NORMAL;
380+
break;
351381
}
352-
state = UnescapeState::NORMAL;
353-
break;
382+
// invalid escape sequence
383+
return false;
354384
}
355385
} while (str[i++]);
356386
str.resize(j - 1);
357387
return true;
358388
}
359389

360390
std::optional<std::string> unescapeForValue(std::string_view str) {
361-
bool unescapeQuote = false;
362391
// having quote at beginning and end, escape
363392
if (str.size() >= 2 && str.front() == '"' && str.back() == '"') {
364-
unescapeQuote = true;
365-
str = str.substr(1, str.size() - 2);
366-
}
367-
if (str.empty()) {
368-
return std::string();
369-
}
370-
371-
std::string value(str);
372-
if (!stringutils::unescape(value, unescapeQuote)) {
393+
std::string result;
394+
auto originLength = str.size();
395+
auto consumed = consumeMaybeEscapedValue(str, "", &result);
396+
if (consumed.size() == originLength) {
397+
return result;
398+
}
373399
return std::nullopt;
374400
}
375-
return value;
401+
return std::string{str};
376402
}
377403

378404
std::string escapeForValue(std::string_view str) {
379405
std::string value;
380406
value.reserve(str.size());
381-
const bool needQuote =
382-
str.find_first_of("\f\r\t\v \"") != std::string::npos;
383-
if (needQuote) {
407+
const bool needEscape =
408+
str.find_first_of("\f\r\t\v \"\\\n") != std::string::npos;
409+
if (needEscape) {
384410
value.push_back('"');
385411
}
386412
for (char c : str) {
387-
switch (c) {
388-
case '\\':
389-
value.append("\\\\");
390-
break;
391-
case '\n':
392-
value.append("\\n");
393-
break;
394-
case '"':
395-
value.append("\\\"");
396-
break;
397-
default:
413+
if (auto escape = escapeMap[static_cast<uint8_t>(c)]) {
414+
value.push_back('\\');
415+
value.push_back(escape);
416+
} else {
398417
value.push_back(c);
399-
break;
400418
}
401419
}
402-
if (needQuote) {
420+
if (needEscape) {
403421
value.push_back('"');
404422
}
405423

@@ -450,14 +468,12 @@ std::string_view consumeMaybeEscapedValue(std::string_view &input,
450468
}
451469
break;
452470
case UnescapeState::ESCAPE:
453-
if (input[i] == '\\') {
454-
result.push_back('\\');
455-
} else if (input[i] == 'n') {
456-
result.push_back('\n');
457-
} else if (input[i] == '"') {
458-
result.push_back('"');
471+
if (auto c = unescapeMap[input[i]]) {
472+
result.push_back(c);
459473
} else {
460-
break;
474+
// invalid escape sequence
475+
// and treat it as normal character.
476+
result.push_back(input[i]);
461477
}
462478
state = UnescapeState::NORMAL;
463479
break;

src/lib/fcitx-utils/stringutils.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,11 +159,19 @@ constexpr bool literalEqual(char const *a, char const *b) {
159159
return *a == *b && (*a == '\0' || literalEqual(a + 1, b + 1));
160160
}
161161

162-
/// \brief Inplace unescape a string contains slash, new line, optionally quote.
162+
/**
163+
* \brief Inplace unescape a string contains slash, new line, optionally quote.
164+
*
165+
* \param str the string to unescape, will be modified.
166+
* \param unescapeQuote whether to unescape quote.
167+
*/
163168
FCITXUTILS_EXPORT bool unescape(std::string &str, bool unescapeQuote);
164169

165170
/**
166-
* \brief unescape a string, that is potentially quoted.
171+
* \brief unescape a string if it is quoted, otherwise return the original
172+
* string.
173+
*
174+
* Will return nullopt if the escape is invalid.
167175
*
168176
* \param str input string.
169177
* \return unescaped string
@@ -174,10 +182,12 @@ FCITXUTILS_EXPORT std::optional<std::string>
174182
unescapeForValue(std::string_view str);
175183

176184
/**
177-
* \brief escape a string, add quote if needed.
185+
* \brief escape a string if str contains certain characters.
186+
*
187+
* The characters include all FCITX_WHITESPACE, backslash, quote, space.
178188
*
179189
* \param str input string.
180-
* \return escaped string
190+
* \return quoted escaped string, or the original string if no escape is needed.
181191
* \see unescapeForValue
182192
* \since 5.0.16
183193
*/

src/modules/quickphrase/quickphrase.d/latex.mb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
\backslash \\
1+
\backslash \
22
\hat ^
33
\grave `
44
\tilde ~

0 commit comments

Comments
 (0)