Skip to content

Commit 09cd90e

Browse files
committed
[attr_line_t] stricter utf check
1 parent 8170da1 commit 09cd90e

2 files changed

Lines changed: 162 additions & 21 deletions

File tree

src/base/attr_line.cc

Lines changed: 80 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,11 @@ attr_line_t::from_table_cell_content(const string_fragment& content,
5959
attr_line_t retval;
6060
std::string_view replacement;
6161
int copy_start = 0;
62+
const auto* ucontent = content.udata();
6263

6364
retval.al_string.reserve(max_char_width);
6465
for (int index = 0; index < content.length(); ++index) {
65-
const auto ch = content.udata()[index];
66+
const auto ch = ucontent[index];
6667

6768
switch (ch) {
6869
case '\t':
@@ -84,48 +85,105 @@ attr_line_t::from_table_cell_content(const string_fragment& content,
8485
default:
8586
if (ch < 0x80) {
8687
char_width += 1;
87-
} else if (ch < 0xc0) {
88+
} else if (ch < 0xC2) {
8889
replacement = REP_SYMBOL;
8990
char_width += 1;
90-
} else if (ch < 0xe0) {
91-
auto next_ch = content[index + 1];
92-
if (next_ch != 0) {
91+
} else if (ch >= 0xC2 && ch <= 0xDF) {
92+
if (index + 1 < content.length()
93+
&& ucontent[index + 1] >= 0x80
94+
&& ucontent[index + 1] <= 0xBF)
95+
{
9396
index += 1;
9497
} else {
9598
replacement = REP_SYMBOL;
9699
}
97100
char_width += 1;
98-
} else if (ch < 0xf0) {
99-
if (content[index + 1] != 0 && content[index + 2] != 0) {
101+
} else if (ch == 0xE0) {
102+
if (index + 2 < content.length()
103+
&& ucontent[index + 1] >= 0xA0
104+
&& ucontent[index + 1] <= 0xBF
105+
&& ucontent[index + 2] >= 0x80
106+
&& ucontent[index + 2] <= 0xBF)
107+
{
108+
index += 2;
109+
} else {
110+
replacement = REP_SYMBOL;
111+
}
112+
char_width += 1;
113+
} else if (ch >= 0xE1 && ch <= 0xEC) {
114+
if (index + 2 < content.length()
115+
&& ucontent[index + 1] >= 0x80
116+
&& ucontent[index + 1] <= 0xBF
117+
&& ucontent[index + 2] >= 0x80
118+
&& ucontent[index + 2] <= 0xBF)
119+
{
120+
index += 2;
121+
} else {
122+
replacement = REP_SYMBOL;
123+
}
124+
char_width += 1;
125+
} else if (ch == 0xED) {
126+
if (index + 2 < content.length()
127+
&& ucontent[index + 1] >= 0x80
128+
&& ucontent[index + 1] <= 0x9F
129+
&& ucontent[index + 2] >= 0x80
130+
&& ucontent[index + 2] <= 0xBF)
131+
{
100132
index += 2;
101133
} else {
102134
replacement = REP_SYMBOL;
103135
}
104136
char_width += 1;
105-
} else if (ch < 0xf8) {
106-
if (content[index + 1] != 0 && content[index + 2] != 0
107-
&& content[index + 3] != 0)
137+
} else if (ch >= 0xEE && ch <= 0xEF) {
138+
if (index + 2 < content.length()
139+
&& ucontent[index + 1] >= 0x80
140+
&& ucontent[index + 1] <= 0xBF
141+
&& ucontent[index + 2] >= 0x80
142+
&& ucontent[index + 2] <= 0xBF)
143+
{
144+
index += 2;
145+
} else {
146+
replacement = REP_SYMBOL;
147+
}
148+
char_width += 1;
149+
} else if (ch == 0xF0) {
150+
if (index + 3 < content.length()
151+
&& ucontent[index + 1] >= 0x90
152+
&& ucontent[index + 1] <= 0xBF
153+
&& ucontent[index + 2] >= 0x80
154+
&& ucontent[index + 2] <= 0xBF
155+
&& ucontent[index + 3] >= 0x80
156+
&& ucontent[index + 3] <= 0xBF)
108157
{
109158
index += 3;
110159
} else {
111160
replacement = REP_SYMBOL;
112161
}
113162
char_width += 1;
114-
} else if (ch < 0xfc) {
115-
if (content[index + 1] != 0 && content[index + 2] != 0
116-
&& content[index + 3] != 0 && content[index + 4] != 0)
163+
} else if (ch >= 0xF1 && ch <= 0xF3) {
164+
if (index + 3 < content.length()
165+
&& ucontent[index + 1] >= 0x80
166+
&& ucontent[index + 1] <= 0xBF
167+
&& ucontent[index + 2] >= 0x80
168+
&& ucontent[index + 2] <= 0xBF
169+
&& ucontent[index + 3] >= 0x80
170+
&& ucontent[index + 3] <= 0xBF)
117171
{
118-
index += 4;
172+
index += 3;
119173
} else {
120174
replacement = REP_SYMBOL;
121175
}
122176
char_width += 1;
123-
} else if (ch < 0xfe) {
124-
if (content[index + 1] != 0 && content[index + 2] != 0
125-
&& content[index + 3] != 0 && content[index + 4] != 0
126-
&& content[index + 5] != 0)
177+
} else if (ch == 0xF4) {
178+
if (index + 3 < content.length()
179+
&& ucontent[index + 1] >= 0x80
180+
&& ucontent[index + 1] <= 0x8F
181+
&& ucontent[index + 2] >= 0x80
182+
&& ucontent[index + 2] <= 0xBF
183+
&& ucontent[index + 3] >= 0x80
184+
&& ucontent[index + 3] <= 0xBF)
127185
{
128-
index += 5;
186+
index += 3;
129187
} else {
130188
replacement = REP_SYMBOL;
131189
}
@@ -331,8 +389,9 @@ attr_line_t::insert(size_t index,
331389
return *this;
332390
}
333391

334-
auto starting_line_index = index == 0 ? std::string::npos
335-
: this->al_string.rfind('\n', index - 1);
392+
auto starting_line_index = index == 0
393+
? std::string::npos
394+
: this->al_string.rfind('\n', index - 1);
336395
if (starting_line_index == std::string::npos) {
337396
starting_line_index = 0;
338397
} else {

src/base/attr_line.tests.cc

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,85 @@ TEST_CASE("attr_line_t::pre-wrap")
113113

114114
printf("body\n%s\n", body.get_string().c_str());
115115
}
116+
117+
TEST_CASE("attr_line_t::from_table_cell_content")
118+
{
119+
const unsigned char bits[] = {
120+
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xed, 0x9c,
121+
0x5b, 0x6f, 0xda, 0x30, 0x14, 0x80, 0xdf, 0xfb, 0x2b, 0xaa, 0x88, 0xc7,
122+
0x29, 0x69, 0x81, 0x02, 0xdd, 0x1b, 0x50, 0x28, 0xb4, 0x5c, 0x52, 0x6e,
123+
0xdd, 0x58, 0xa7, 0xc8, 0x4d, 0x0c, 0x04, 0x82, 0x13, 0x6c, 0x87, 0x4b,
124+
0xab, 0xfe, 0xf7, 0x25, 0x30, 0xb6, 0xb5, 0x50, 0xc0, 0xac, 0x12, 0x9b,
125+
0x39, 0x91, 0x10, 0x22, 0xf9, 0x72, 0x8e, 0xe3, 0x93, 0x0f, 0x47, 0x89,
126+
0xe1, 0xf9, 0xe4, 0x34, 0x58, 0x94, 0x7a, 0xa3, 0xd6, 0xcc, 0x36, 0x9a,
127+
0xb5, 0x9c, 0xf2, 0xf9, 0xf4, 0x79, 0xbe, 0x6a, 0xbe, 0xda, 0x74, 0x87,
128+
0xea, 0x78, 0x38, 0x41, 0x14, 0xab, 0x98, 0x4d, 0x55, 0x86, 0x39, 0xb7,
129+
0x49, 0x97, 0x19, 0x16, 0xc2, 0x43, 0x97, 0xa8, 0xa6, 0x4b, 0x3a, 0x76,
130+
0xd7, 0xa7, 0x88, 0xdb, 0xe1, 0xa7, 0x1e, 0x36, 0x07, 0x46, 0xb0, 0x8b,
131+
0xe7, 0xd8, 0x88, 0x98, 0xd8, 0xa0, 0x98, 0xf9, 0x0e, 0x7f, 0x15, 0x71,
132+
0x19, 0x75, 0x89, 0xd8, 0xa4, 0xe3, 0xae, 0x00, 0x73, 0xa8, 0xaa, 0x37,
133+
0x8a, 0xd5, 0x4a, 0xba, 0xb4, 0x76, 0xeb, 0x86, 0x36, 0xaf, 0x60, 0x62,
134+
0xc7, 0xb0, 0x43, 0xd3, 0x5e, 0x45, 0x47, 0x96, 0xc5, 0xb6, 0x52, 0x6f,
135+
0x0f, 0x88, 0xf8, 0x8e, 0xb3, 0x71, 0x87, 0x97, 0x4f, 0x9b, 0xb3, 0x5a,
136+
0xd8, 0xc1, 0x1c, 0x8b, 0x27, 0xfe, 0xb6, 0x95, 0x0e, 0x97, 0xed, 0x31,
137+
0x7f, 0xc5, 0xde, 0xad, 0x06, 0x6b, 0x77, 0x15, 0xaa, 0xcb, 0xcf, 0xad,
138+
0x7f, 0x9c, 0x5d, 0xc2, 0x09, 0x17, 0x49, 0x7d, 0x4a, 0x31, 0x59, 0x3d,
139+
0x27, 0x77, 0x0e, 0x20, 0x52, 0xc5, 0x75, 0xcb, 0x96, 0xca, 0xae, 0xcd,
140+
0xe9, 0x21, 0xde, 0x0b, 0xf2, 0x29, 0x9a, 0x47, 0xdd, 0x8e, 0xed, 0x60,
141+
0x2d, 0xe8, 0x2f, 0x6d, 0x3c, 0x1c, 0x60, 0x4a, 0xb0, 0xa3, 0x0d, 0x5d,
142+
0xcb, 0x77, 0xb0, 0xb1, 0xec, 0x3f, 0xed, 0x4c, 0xd9, 0x23, 0x05, 0x47,
143+
0xb4, 0x8b, 0x0f, 0xd9, 0x2d, 0x42, 0x7b, 0xec, 0x4e, 0xef, 0x46, 0xee,
144+
0x58, 0x14, 0xf0, 0xe2, 0xfd, 0x00, 0xff, 0xbe, 0x17, 0xe7, 0xe0, 0x85,
145+
0x28, 0x09, 0x5e, 0x1c, 0x81, 0x17, 0x51, 0xf0, 0x42, 0x94, 0x04, 0x2f,
146+
0x8e, 0xc0, 0x8b, 0x18, 0x78, 0x21, 0x4a, 0x82, 0x17, 0x47, 0xe0, 0x45,
147+
0x1c, 0xbc, 0x10, 0x25, 0xc1, 0x8b, 0x23, 0xf0, 0xe2, 0x02, 0xbc, 0x10,
148+
0x25, 0xc1, 0x0b, 0x79, 0xbc, 0x20, 0x98, 0x4f, 0x5c, 0x3a, 0x08, 0xfd,
149+
0x20, 0xb6, 0x09, 0xb7, 0xa1, 0xf6, 0x20, 0x41, 0x07, 0x79, 0x75, 0x80,
150+
0xbb, 0x4f, 0xc2, 0x24, 0xe8, 0x20, 0x9f, 0x0e, 0xc1, 0xbb, 0xc1, 0x38,
151+
0x32, 0x07, 0x30, 0x40, 0xec, 0x41, 0x82, 0x11, 0xd2, 0x19, 0x61, 0x8c,
152+
0x19, 0xd3, 0xd8, 0xc4, 0xe6, 0x66, 0x0f, 0x83, 0x13, 0x7b, 0x90, 0xe0,
153+
0x84, 0xe4, 0x4e, 0xc0, 0x95, 0x93, 0x30, 0x09, 0x4e, 0xc8, 0xe3, 0x04,
154+
0x9b, 0x31, 0xc7, 0xed, 0x6a, 0xc1, 0xab, 0x8b, 0x29, 0xcc, 0xf2, 0xf8,
155+
0x0b, 0x12, 0xac, 0x90, 0xde, 0x0a, 0x18, 0x2b, 0x84, 0x49, 0xb0, 0x42,
156+
0x7a, 0x2b, 0x60, 0x86, 0x87, 0x30, 0x09, 0x56, 0x48, 0x6f, 0x05, 0xcc,
157+
0xef, 0x10, 0x26, 0xc1, 0x0a, 0xe9, 0xad, 0x80, 0xd9, 0x1d, 0xc2, 0x24,
158+
0x58, 0x21, 0xbd, 0x15, 0x30, 0xb7, 0x43, 0x98, 0x04, 0x2b, 0xa4, 0xb7,
159+
0x22, 0x01, 0x56, 0x88, 0x92, 0x60, 0x85, 0xf4, 0x56, 0x24, 0xc1, 0x0a,
160+
0x51, 0x12, 0xac, 0x90, 0xde, 0x8a, 0x14, 0x58, 0x21, 0x4a, 0x82, 0x15,
161+
0xd2, 0x5b, 0x71, 0x09, 0x56, 0x88, 0x92, 0x60, 0x85, 0xf4, 0x56, 0x9c,
162+
0xc3, 0xc3, 0x6d, 0x61, 0x12, 0xb4, 0x90, 0x5f, 0x0b, 0x78, 0xba, 0x2d,
163+
0x4c, 0x82, 0x16, 0xf2, 0x6b, 0x01, 0x8f, 0xb7, 0x85, 0xc9, 0xed, 0xd4,
164+
0xf7, 0x8d, 0xc4, 0xb6, 0xff, 0xa7, 0x0a, 0x8a, 0x03, 0x7f, 0x4e, 0x25,
165+
0x92, 0x70, 0x91, 0xf4, 0x03, 0x55, 0x55, 0x38, 0xf5, 0xb1, 0x72, 0x38,
166+
0x59, 0x97, 0x3f, 0xf8, 0xe8, 0xd8, 0x14, 0x4f, 0x90, 0xe3, 0x68, 0x98,
167+
0xa0, 0x47, 0x07, 0x5b, 0xe0, 0xaa, 0x28, 0x09, 0x43, 0xd8, 0x87, 0x7a,
168+
0xf1, 0x10, 0x1e, 0xc3, 0xf9, 0x83, 0x72, 0x40, 0x37, 0x90, 0x35, 0x0e,
169+
0x3b, 0xc3, 0x32, 0x5c, 0x2f, 0xec, 0x27, 0xa6, 0x0d, 0x6d, 0x66, 0x86,
170+
0xe3, 0x9a, 0xe1, 0xb9, 0x94, 0x83, 0x21, 0xa2, 0x24, 0x18, 0xf2, 0xc1,
171+
0x86, 0x44, 0x12, 0x91, 0xda, 0xa8, 0xd3, 0xcd, 0x4f, 0xed, 0xa7, 0x88,
172+
0x7a, 0x59, 0xf7, 0x12, 0xfe, 0x6d, 0xaf, 0xdf, 0x9a, 0xe5, 0x47, 0xbd,
173+
0x4c, 0x31, 0x31, 0xba, 0xbd, 0xd6, 0x13, 0xd9, 0x2a, 0x69, 0xb6, 0x73,
174+
0x85, 0x68, 0xdf, 0xd1, 0x4d, 0xaf, 0xd5, 0xef, 0x66, 0xd5, 0xc9, 0xe3,
175+
0xac, 0xdd, 0x2f, 0x50, 0x52, 0x47, 0x63, 0x32, 0xbc, 0x99, 0x9e, 0x95,
176+
0xe2, 0x9e, 0xd7, 0xb4, 0x1a, 0x76, 0xda, 0x4b, 0xa7, 0xf2, 0xb1, 0x62,
177+
0xa2, 0x7d, 0x7d, 0x31, 0x6a, 0xc7, 0x6b, 0xd1, 0x6c, 0x13, 0xf1, 0x8c,
178+
0x7a, 0x58, 0xff, 0x7c, 0xde, 0x0b, 0x7a, 0xcb, 0x36, 0xe7, 0x45, 0xd0,
179+
0x7c, 0x16, 0x5c, 0x4e, 0x22, 0xd3, 0x74, 0x7d, 0xc2, 0x99, 0x76, 0xa6,
180+
0x79, 0x88, 0xb1, 0x60, 0xec, 0xb2, 0x8c, 0x1e, 0x62, 0xbd, 0x03, 0xdb,
181+
0xb8, 0x28, 0x46, 0xd0, 0xce, 0x5c, 0xf1, 0xaa, 0x50, 0x56, 0x53, 0xb1,
182+
0x04, 0xfa, 0xda, 0xf2, 0x22, 0xc3, 0x69, 0xdd, 0xee, 0x5f, 0x96, 0x6f,
183+
0x72, 0x56, 0x3e, 0xdb, 0xb8, 0x4f, 0x60, 0xd2, 0xaa, 0x97, 0xd1, 0xcd,
184+
0x04, 0x75, 0x4c, 0xbd, 0x91, 0x2c, 0xdf, 0x8d, 0x6a, 0xa5, 0xd6, 0x24,
185+
0x3d, 0xbb, 0x1b, 0xf9, 0xf1, 0x64, 0xa1, 0x92, 0x4a, 0xeb, 0xed, 0xfb,
186+
0x8b, 0x6c, 0xe1, 0x29, 0xa3, 0x4e, 0xfa, 0xd9, 0xab, 0x72, 0xa6, 0x79,
187+
0xd5, 0x1f, 0x37, 0xf5, 0x5c, 0x7b, 0xe0, 0x3e, 0xc5, 0x8a, 0xd1, 0xe8,
188+
0xb8, 0xe0, 0xc7, 0x5b, 0x5f, 0xf4, 0xe4, 0x7e, 0x45, 0xf9, 0xbf, 0xaf,
189+
0x63, 0xdf, 0xdd, 0xba, 0x7e, 0xcb, 0xea, 0xda, 0xd7, 0x6b, 0xde, 0x9c,
190+
0xa2, 0x0a, 0xa6, 0xd4, 0xa5, 0xeb, 0xaf, 0x84, 0x37, 0x7e, 0xe7, 0xbe,
191+
0x8d, 0xc3, 0x38, 0xe2, 0x7e, 0x18, 0x47, 0xa9, 0x54, 0x2b, 0x46, 0xb6,
192+
0x5a, 0xd6, 0x4b, 0xc5, 0x74, 0xa5, 0xf1, 0xbb, 0x5c, 0x8b, 0x66, 0xbc,
193+
0x9c, 0xbc, 0xfc, 0x00, 0xa7, 0xa7, 0x1d, 0x26, 0xd0, 0x57, 0x00, 0x00};
194+
auto sf = string_fragment::from_bytes(bits, sizeof(bits));
195+
196+
auto al = attr_line_t::from_table_cell_content(sf, 100);
197+
}

0 commit comments

Comments
 (0)