Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions code/globalincs/pstypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@
#define DIR_SEPARATOR_STR "/"
#endif

constexpr char COMMENT_CHAR = static_cast<char>(';');
constexpr char EOLN = static_cast<char>(0x0a);
constexpr char CARRIAGE_RETURN = static_cast<char>(0x0d);

#ifndef NDEBUG
constexpr bool FSO_DEBUG = true;
#else
Expand Down
4 changes: 0 additions & 4 deletions code/parse/parselo.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@ extern int fred_parse_flag;
extern int Token_found_flag;


#define COMMENT_CHAR (char)';'
#define EOLN (char)0x0a
#define CARRIAGE_RETURN (char)0x0d

enum class LineEndingType { UNKNOWN, CR, CRLF, LF };

#define F_NAME 1
Expand Down
23 changes: 16 additions & 7 deletions code/utils/unicode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,20 @@ text_iterator::text_iterator(const char* in_current_byte, const char* in_range_s
#endif
}
}

constexpr auto warning_text = "Exception while %s near '%.16s': %s\n\nThis is most likely caused by text created in another encoding, such as Windows-1252, that cannot be interpreted as UTF-8.";

text_iterator& unicode::text_iterator::operator++() {
if (Unicode_text_mode) {
try {
// Increment by UTF-8 encoded codepoints
utf8::next(current_byte, range_end_byte);
} catch(const std::exception& e) {
Error(LOCATION, "Exception while incrementing UTF-8 sequence near '%.16s': %s", current_byte, e.what());
return *this;
Warning(LOCATION, warning_text, "incrementing text iterator", current_byte, e.what());
// Increment by byte, so we still make progress
if (current_byte < range_end_byte) {
++current_byte;
}
}
} else {
// Increment by byte
Expand All @@ -41,11 +47,14 @@ text_iterator& text_iterator::operator--() {
// Decrement by UTF-8 encoded codepoints
utf8::prior(current_byte, range_start_byte);
} catch(const std::exception& e) {
Error(LOCATION, "Exception while decrementing text iterator near '%.16s': %s", current_byte, e.what());
return *this;
Warning(LOCATION, warning_text, "decrementing text iterator", current_byte, e.what());
// Decrement by byte, so we still make progress
if (current_byte > range_start_byte) {
--current_byte;
}
}
} else {
// Increment by byte
// Decrement by byte
--current_byte;
}

Expand All @@ -66,8 +75,8 @@ text_iterator::value_type text_iterator::operator*() const {
try {
return utf8::peek_next(current_byte, range_end_byte);
} catch(const std::exception& e) {
Error(LOCATION, "Exception while decoding UTF-8 sequence near '%.16s': %s", current_byte, e.what());
return 0;
Warning(LOCATION, warning_text, "decoding UTF-8 sequence", current_byte, e.what());
return replacement_char;
}
} else {
// Use the unsigned byte value here to avoid integer overflows
Expand Down
14 changes: 14 additions & 0 deletions code/utils/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,20 @@ namespace unicode {
*/
typedef char32_t codepoint_t;

/**
* @brief An invalid and ignorable character, equivalent to -1
*/
constexpr codepoint_t invalid_char = static_cast<codepoint_t>(-1);

/**
* @brief Substitute for malformed UTF-8 so that a bad decode can degrade gracefully
*/
constexpr codepoint_t replacement_char = 0xFFFD;

constexpr codepoint_t comment_char = static_cast<codepoint_t>(COMMENT_CHAR);
constexpr codepoint_t eoln = static_cast<codepoint_t>(EOLN);
constexpr codepoint_t carriage_return = static_cast<codepoint_t>(CARRIAGE_RETURN);

class text_iterator {
const char* current_byte = nullptr;
const char* range_end_byte = nullptr;
Expand Down
Loading