Skip to content

Commit 5764389

Browse files
committed
tests: Fix json formatter string encoding
1 parent 3aeccbb commit 5764389

4 files changed

Lines changed: 75 additions & 27 deletions

File tree

generators/include/pl/formatters/formatter_json.hpp

Lines changed: 72 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -64,34 +64,83 @@ namespace pl::gen::fmt {
6464
if (pattern->getVisibility() == ptrn::Visibility::Hidden) return;
6565
if (pattern->getVisibility() == ptrn::Visibility::TreeHidden) return;
6666

67-
const auto string = pattern->toString();
67+
const auto rawBytes = pattern->toString();
6868

6969
std::string result;
70-
const auto decodedString = wolv::util::utf8ToUtf32(string, true).value();
71-
for (const auto &ch : decodedString) {
72-
switch (ch) {
73-
case U'"': result += "\\\""; break;
74-
case U'\\': result += "\\\\"; break;
75-
case U'\b': result += "\\b"; break;
76-
case U'\f': result += "\\f"; break;
77-
case U'\n': result += "\\n"; break;
78-
case U'\r': result += "\\r"; break;
79-
case U'\t': result += "\\t"; break;
80-
default:
81-
if (ch < 0x20) {
82-
result += ::fmt::format("\\u{:04x}", static_cast<u32>(ch));
83-
} else if (ch <= 0xFFFF) {
84-
result += static_cast<char>(ch);
85-
} else {
86-
u32 code = static_cast<u32>(ch) - 0x10000;
87-
u16 highSurrogate = 0xD800 + ((code >> 10) & 0x3FF);
88-
u16 lowSurrogate = 0xDC00 + (code & 0x3FF);
89-
result += ::fmt::format("\\u{:04x}\\u{:04x}", highSurrogate, lowSurrogate);
90-
}
91-
break;
70+
for (size_t i = 0; i < rawBytes.size(); ) {
71+
const u8 ch = static_cast<u8>(rawBytes[i]);
72+
73+
// Determine UTF-8 sequence length
74+
size_t seqLen = 0;
75+
u32 codepoint = 0;
76+
if (ch < 0x80) {
77+
seqLen = 1;
78+
codepoint = ch;
79+
} else if ((ch & 0xE0) == 0xC0 && ch >= 0xC2) {
80+
seqLen = 2;
81+
codepoint = ch & 0x1F;
82+
} else if ((ch & 0xF0) == 0xE0) {
83+
seqLen = 3;
84+
codepoint = ch & 0x0F;
85+
} else if ((ch & 0xF8) == 0xF0 && ch <= 0xF4) {
86+
seqLen = 4;
87+
codepoint = ch & 0x07;
88+
}
89+
90+
// Validate continuation bytes
91+
bool valid = seqLen > 0;
92+
for (size_t j = 1; j < seqLen && valid; ++j) {
93+
const u8 cb = static_cast<u8>(rawBytes[i + j]);
94+
if (i + j >= rawBytes.size() || (cb & 0xC0) != 0x80) {
95+
valid = false;
96+
} else {
97+
codepoint = (codepoint << 6) | (cb & 0x3F);
9298
}
9399
}
94100

101+
// Reject overlong sequences and surrogates
102+
if (valid) {
103+
if (seqLen == 2 && codepoint < 0x80) valid = false;
104+
if (seqLen == 3 && codepoint < 0x800) valid = false;
105+
if (seqLen == 4 && codepoint < 0x10000) valid = false;
106+
if (codepoint >= 0xD800 && codepoint <= 0xDFFF) valid = false;
107+
if (codepoint > 0x10FFFF) valid = false;
108+
}
109+
110+
if (!valid) {
111+
// Escape the single invalid byte
112+
result += ::fmt::format("\\u{:04x}", static_cast<u32>(ch));
113+
i += 1;
114+
continue;
115+
}
116+
117+
// Emit the codepoint
118+
switch (codepoint) {
119+
case U'"': result += "\\\""; break;
120+
case U'\\': result += "\\\\"; break;
121+
case U'\b': result += "\\b"; break;
122+
case U'\f': result += "\\f"; break;
123+
case U'\n': result += "\\n"; break;
124+
case U'\r': result += "\\r"; break;
125+
case U'\t': result += "\\t"; break;
126+
default:
127+
if (codepoint < 0x20 || codepoint == 0x7F) {
128+
result += ::fmt::format("\\u{:04x}", codepoint);
129+
} else if (codepoint <= 0xFFFF) {
130+
// Append the original UTF-8 bytes directly
131+
for (size_t j = 0; j < seqLen; ++j)
132+
result += rawBytes[i + j];
133+
} else {
134+
u32 code = codepoint - 0x10000;
135+
u16 highSurrogate = 0xD800 + ((code >> 10) & 0x3FF);
136+
u16 lowSurrogate = 0xDC00 + (code & 0x3FF);
137+
result += ::fmt::format("\\u{:04x}\\u{:04x}", highSurrogate, lowSurrogate);
138+
}
139+
break;
140+
}
141+
i += seqLen;
142+
}
143+
95144
addLine(pattern->getVariableName(), ::fmt::format("\"{}\",", result));
96145
}
97146

tests/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ set(AVAILABLE_TESTS
4242
add_executable(pattern_language_tests
4343
source/main.cpp
4444
source/tests.cpp
45-
include/test_patterns/test_pattern_import.hpp
4645
)
4746

4847

@@ -63,7 +62,7 @@ add_custom_command(TARGET pattern_language_tests
6362

6463
add_custom_command(TARGET pattern_language_tests
6564
POST_BUILD
66-
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_CURRENT_SOURCE_DIR}//files/export/yaml.yml" ${CMAKE_BINARY_DIR}/bin/files/export/yaml.yml)
65+
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_CURRENT_SOURCE_DIR}/files/export/yaml.yml" ${CMAKE_BINARY_DIR}/bin/files/export/yaml.yml)
6766

6867
foreach (test IN LISTS AVAILABLE_TESTS)
6968
add_test(NAME "PatternLanguage/${test}" COMMAND pattern_language_tests "${test}" WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

tests/files/export/json.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"data": {
3-
"s": "%89PNG%0D%0A%1A%0A%00",
3+
"s": "\u0089PNG\r\n\u001a\n\u0000",
44
"ua": 0,
55
"ub": 3328,
66
"uc": 1380206665,

tests/files/export/yaml.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@ data:
1111
sb: -11044
1212
sc: -25165923
1313
sd: 29773251444219
14-
se: -1463797564129820304
14+
se: -1463797564129820304

0 commit comments

Comments
 (0)