Skip to content

Commit ba41ffc

Browse files
committed
String/Encoding: toUtf8
1 parent c8e0644 commit ba41ffc

5 files changed

Lines changed: 176 additions & 2 deletions

File tree

modules/String/Encoding.mpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
export module CppUtils.String.Encoding;
2+
3+
import std;
4+
5+
export namespace CppUtils::String
6+
{
7+
inline auto toUtf8(char32_t codePoint, std::string&& string = {}) -> std::string
8+
{
9+
if (codePoint < 0x80)
10+
{
11+
string += static_cast<char>(codePoint);
12+
}
13+
else if (codePoint < 0x8'00)
14+
{
15+
string += static_cast<char>(0xC0 | (codePoint >> 6));
16+
string += static_cast<char>(0x80 | (codePoint & 0x3F));
17+
}
18+
else if (codePoint < 0x1'00'00)
19+
{
20+
string += static_cast<char>(0xE0 | (codePoint >> 12));
21+
string += static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F));
22+
string += static_cast<char>(0x80 | (codePoint & 0x3F));
23+
}
24+
else if (codePoint < 0x11'00'00)
25+
{
26+
string += static_cast<char>(0xF0 | (codePoint >> 18));
27+
string += static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F));
28+
string += static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F));
29+
string += static_cast<char>(0x80 | (codePoint & 0x3F));
30+
}
31+
return string;
32+
}
33+
34+
inline auto toUtf8(std::u32string unicodeString, std::string&& string = {}) -> std::string
35+
{
36+
for (const auto codePoint : unicodeString)
37+
string = toUtf8(codePoint, std::move(string));
38+
return string;
39+
}
40+
}

modules/String/String.mpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ export module CppUtils.String;
22

33
export import CppUtils.String.Concept;
44
export import CppUtils.String.Cursor;
5+
export import CppUtils.String.Encoding;
56
export import CppUtils.String.Hash;
67
export import CppUtils.String.HashTable;
78
export import CppUtils.String.Utility;

modules/Terminal/DynamicAreaBuffer.mpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ export namespace CppUtils::Terminal
1414
using Buffer = std::vector<Line>;
1515

1616
inline DynamicAreaBuffer(const Container::Size2& size, CharAttributes defaultCharAttributes = {}):
17-
DynamicAreaBuffer{size, Buffer{size.height(), Line{size.width(), defaultCharAttributes}}}
17+
DynamicAreaBuffer{size, Buffer(size.height(), Line(size.width(), defaultCharAttributes))}
1818
{}
1919

2020
inline DynamicAreaBuffer(const Container::Size2& size, char defaultChar):
@@ -30,7 +30,7 @@ export namespace CppUtils::Terminal
3030
{
3131
auto lock = std::unique_lock{m_mutex};
3232
m_size = size;
33-
m_buffer = Buffer{m_size.height(), Line{m_size.width()}};
33+
m_buffer = Buffer(m_size.height(), Line(m_size.width()));
3434
}
3535

3636
[[nodiscard]] inline auto getSize() const noexcept -> Container::Size2 override

tests/String/Encoding.mpp

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
export module CppUtils.UnitTests.String.Encoding;
2+
3+
import std;
4+
import CppUtils;
5+
6+
export namespace CppUtils::UnitTest::String::Encoding
7+
{
8+
inline auto _ = TestSuite{"String/Encoding", {"Logger"}, [](auto& suite) {
9+
using namespace std::literals;
10+
using Logger = CppUtils::Logger<"CppUtils">;
11+
12+
suite.addTest("toUtf8(char32_t)", [&] {
13+
// 1-byte (0x00 - 0x7F)
14+
{
15+
static_assert(char32_t{0x24} == U'$');
16+
17+
auto result = CppUtils::String::toUtf8(char32_t{0x24});
18+
Logger::print("toUtf8(0x24) -> {}\n", result);
19+
suite.expectEqual(result, "\x24");
20+
21+
result = CppUtils::String::toUtf8(U'$');
22+
Logger::print("toUtf8(U'$') -> {}\n", result);
23+
suite.expectEqual(result, "\x24");
24+
}
25+
26+
{
27+
static_assert(char32_t{0x41} == U'A');
28+
29+
auto result = CppUtils::String::toUtf8(char32_t{0x41});
30+
Logger::print("toUtf8(0x41) -> {}\n", result);
31+
suite.expectEqual(result, "A");
32+
33+
result = CppUtils::String::toUtf8(U'A');
34+
Logger::print("toUtf8(U'A') -> {}\n", result);
35+
suite.expectEqual(result, "A");
36+
}
37+
38+
// 2-byte (0x80 - 0x7FF)
39+
{
40+
static_assert(char32_t{0xA2} == U'¢');
41+
42+
auto result = CppUtils::String::toUtf8(char32_t{0xA2});
43+
Logger::print("toUtf8(0xA2) -> {}\n", result);
44+
suite.expectEqual(result, "\xC2\xA2");
45+
46+
result = CppUtils::String::toUtf8(U'¢');
47+
Logger::print("toUtf8(U'¢') -> {}\n", result);
48+
suite.expectEqual(result, "\xC2\xA2");
49+
}
50+
51+
{
52+
static_assert(char32_t{0x3'B1} == U'α');
53+
54+
auto result = CppUtils::String::toUtf8(char32_t{0x3'B1});
55+
Logger::print("toUtf8(0x3B1) -> {}\n", result);
56+
suite.expectEqual(result, "\xCE\xB1");
57+
58+
result = CppUtils::String::toUtf8(U'α');
59+
Logger::print("toUtf8(U'α') -> {}\n", result);
60+
suite.expectEqual(result, "\xCE\xB1");
61+
}
62+
63+
// 3-byte (0x800 - 0xFFFF)
64+
{
65+
static_assert(char32_t{0x20'AC} == U'€');
66+
67+
auto result = CppUtils::String::toUtf8(char32_t{0x20'AC});
68+
Logger::print("toUtf8(0x20AC) -> {}\n", result);
69+
suite.expectEqual(result, "\xE2\x82\xAC");
70+
71+
result = CppUtils::String::toUtf8(U'€');
72+
Logger::print("toUtf8(U'€') -> {}\n", result);
73+
suite.expectEqual(result, "\xE2\x82\xAC");
74+
}
75+
76+
{
77+
static_assert(char32_t{0x30'93} == U'ん');
78+
79+
auto result = CppUtils::String::toUtf8(char32_t{0x30'93});
80+
Logger::print("toUtf8(0x3093) -> {}\n", result);
81+
suite.expectEqual(result, "\xE3\x82\x93");
82+
83+
result = CppUtils::String::toUtf8(U'ん');
84+
Logger::print("toUtf8(U'ん') -> {}\n", result);
85+
suite.expectEqual(result, "\xE3\x82\x93");
86+
}
87+
88+
// 4-byte (0x10000 - 0x10FFFF)
89+
{
90+
static_assert(char32_t{0x1'03'48} == U'𐍈');
91+
92+
auto result = CppUtils::String::toUtf8(char32_t{0x1'03'48});
93+
Logger::print("toUtf8(0x10348) -> {}\n", result);
94+
suite.expectEqual(result, "\xF0\x90\x8D\x88"); // '𐍈'
95+
96+
result = CppUtils::String::toUtf8(U'𐍈');
97+
Logger::print("toUtf8(U'𐍈') -> {}\n", result);
98+
suite.expectEqual(result, "\xF0\x90\x8D\x88");
99+
}
100+
101+
{
102+
static_assert(char32_t{0x1'F6'00} == U'😀');
103+
104+
auto result = CppUtils::String::toUtf8(char32_t{0x1'F6'00});
105+
Logger::print("toUtf8(0x1F600) -> {}\n", result);
106+
suite.expectEqual(result, "\xF0\x9F\x98\x80");
107+
108+
result = CppUtils::String::toUtf8(U'😀');
109+
Logger::print("toUtf8(U'😀') -> {}\n", result);
110+
suite.expectEqual(result, "\xF0\x9F\x98\x80");
111+
}
112+
});
113+
114+
suite.addTest("toUtf8(std::u32string_view)", [&] {
115+
auto result = CppUtils::String::toUtf8(U"Hello World!"sv);
116+
Logger::print("{}\n", result);
117+
suite.expectEqual(result, "Hello World!");
118+
119+
result = CppUtils::String::toUtf8(U"€"sv);
120+
Logger::print("{}\n", result);
121+
suite.expectEqual(result, "\xE2\x82\xAC");
122+
123+
result = CppUtils::String::toUtf8(U"😀"sv);
124+
Logger::print("{}\n", result);
125+
suite.expectEqual(result, "\xF0\x9F\x98\x80");
126+
127+
result = CppUtils::String::toUtf8(U"A€😀"sv);
128+
Logger::print("{}\n", result);
129+
suite.expectEqual(result, "A\xE2\x82\xAC\xF0\x9F\x98\x80");
130+
});
131+
}};
132+
}

tests/UnitTests.mpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ export import CppUtils.UnitTests.Math.Random;
3232
export import CppUtils.UnitTests.Memory;
3333
export import CppUtils.UnitTests.Network;
3434
export import CppUtils.UnitTests.Stl.Format;
35+
export import CppUtils.UnitTests.String.Encoding;
3536
export import CppUtils.UnitTests.String.Utility;
3637
export import CppUtils.UnitTests.System.Error;
3738
export import CppUtils.UnitTests.Terminal.Canvas;

0 commit comments

Comments
 (0)