Skip to content

Commit a9309ff

Browse files
committed
simplified UTF-16 checks
1 parent 96fbbd5 commit a9309ff

1 file changed

Lines changed: 8 additions & 6 deletions

File tree

simplecpp.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,9 @@ class simplecpp::TokenList::Stream {
217217
public:
218218
Stream(std::istream &istr)
219219
: istr(istr)
220+
, bom(getAndSkipBOM())
221+
, isUtf16(bom == 0xfeff || bom == 0xfffe)
220222
{
221-
bom = getAndSkipBOM();
222223
}
223224

224225
int get() {
@@ -240,7 +241,7 @@ class simplecpp::TokenList::Stream {
240241

241242
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
242243
// character is non-ASCII character then replace it with 0xff
243-
if (bom == 0xfeff || bom == 0xfffe) {
244+
if (isUtf16) {
244245
const unsigned char ch2 = static_cast<unsigned char>(get());
245246
const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
246247
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
@@ -251,7 +252,7 @@ class simplecpp::TokenList::Stream {
251252
ch = '\n';
252253
if (bom == 0 && static_cast<char>(peek()) == '\n')
253254
(void)get();
254-
else if (bom == 0xfeff || bom == 0xfffe) {
255+
else if (isUtf16) {
255256
int c1 = get();
256257
int c2 = get();
257258
int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1);
@@ -271,7 +272,7 @@ class simplecpp::TokenList::Stream {
271272

272273
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
273274
// character is non-ASCII character then replace it with 0xff
274-
if (bom == 0xfeff || bom == 0xfffe) {
275+
if (isUtf16) {
275276
(void)get();
276277
const unsigned char ch2 = static_cast<unsigned char>(peek());
277278
unget();
@@ -289,7 +290,7 @@ class simplecpp::TokenList::Stream {
289290
void ungetChar()
290291
{
291292
unget();
292-
if (bom == 0xfeff || bom == 0xfffe)
293+
if (isUtf16)
293294
unget();
294295
}
295296

@@ -322,7 +323,8 @@ class simplecpp::TokenList::Stream {
322323
}
323324

324325
std::istream &istr;
325-
unsigned short bom;
326+
const unsigned short bom;
327+
const bool isUtf16;
326328
};
327329

328330
simplecpp::TokenList::TokenList(std::vector<std::string> &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {}

0 commit comments

Comments
 (0)