scp-fs2open · Goober5000 · Jun 21, 2026 · Jun 19, 2026
diff --git a/code/globalincs/pstypes.h b/code/globalincs/pstypes.h
@@ -47,6 +47,10 @@
 #define DIR_SEPARATOR_STR  "/"
 #endif
 
+constexpr char COMMENT_CHAR =    static_cast<char>(';');
+constexpr char EOLN =            static_cast<char>(0x0a);
+constexpr char CARRIAGE_RETURN = static_cast<char>(0x0d);
+
 #ifndef NDEBUG
 constexpr bool FSO_DEBUG = true;
 #else

diff --git a/code/parse/parselo.h b/code/parse/parselo.h
@@ -34,10 +34,6 @@ extern int fred_parse_flag;
 extern int Token_found_flag;
 
 
-#define	COMMENT_CHAR	(char)';'
-#define	EOLN			(char)0x0a
-#define CARRIAGE_RETURN (char)0x0d
-
 enum class LineEndingType { UNKNOWN, CR, CRLF, LF };
 
 #define	F_NAME					1

diff --git a/code/utils/unicode.cpp b/code/utils/unicode.cpp
@@ -19,14 +19,20 @@ text_iterator::text_iterator(const char* in_current_byte, const char* in_range_s
 #endif
 	}
 }
+
+constexpr auto warning_text = "Exception while %s near '%.16s': %s\n\nThis is most likely caused by text created in another encoding, such as Windows-1252, that cannot be interpreted as UTF-8.";
+
 text_iterator& unicode::text_iterator::operator++() {
 	if (Unicode_text_mode) {
 		try {
 			// Increment by UTF-8 encoded codepoints
 			utf8::next(current_byte, range_end_byte);
 		} catch(const std::exception& e) {
-			Error(LOCATION, "Exception while incrementing UTF-8 sequence near '%.16s': %s", current_byte, e.what());
-			return *this;
+			Warning(LOCATION, warning_text, "incrementing text iterator", current_byte, e.what());
+			// Increment by byte, so we still make progress
+			if (current_byte < range_end_byte) {
+				++current_byte;
+			}
 		}
 	} else {
 		// Increment by byte
@@ -41,11 +47,14 @@ text_iterator& text_iterator::operator--() {
 			// Decrement by UTF-8 encoded codepoints
 			utf8::prior(current_byte, range_start_byte);
 		} catch(const std::exception& e) {
-			Error(LOCATION, "Exception while decrementing text iterator near '%.16s': %s", current_byte, e.what());
-			return *this;
+			Warning(LOCATION, warning_text, "decrementing text iterator", current_byte, e.what());
+			// Decrement by byte, so we still make progress
+			if (current_byte > range_start_byte) {
+				--current_byte;
+			}
 		}
 	} else {
-		// Increment by byte
+		// Decrement by byte
 		--current_byte;
 	}
 
@@ -66,8 +75,8 @@ text_iterator::value_type text_iterator::operator*() const {
 		try {
 			return utf8::peek_next(current_byte, range_end_byte);
 		} catch(const std::exception& e) {
-			Error(LOCATION, "Exception while decoding UTF-8 sequence near '%.16s': %s", current_byte, e.what());
-			return 0;
+			Warning(LOCATION, warning_text, "decoding UTF-8 sequence", current_byte, e.what());
+			return replacement_char;
 		}
 	} else {
 		// Use the unsigned byte value here to avoid integer overflows

diff --git a/code/utils/unicode.h b/code/utils/unicode.h
@@ -33,6 +33,20 @@ namespace unicode {
  */
 typedef char32_t codepoint_t;
 
+/**
+ * @brief An invalid and ignorable character, equivalent to -1
+ */
+constexpr codepoint_t invalid_char = static_cast<codepoint_t>(-1);
+
+/**
+ * @brief Substitute for malformed UTF-8 so that a bad decode can degrade gracefully
+ */
+constexpr codepoint_t replacement_char = 0xFFFD;
+
+constexpr codepoint_t comment_char =    static_cast<codepoint_t>(COMMENT_CHAR);
+constexpr codepoint_t eoln =            static_cast<codepoint_t>(EOLN);
+constexpr codepoint_t carriage_return = static_cast<codepoint_t>(CARRIAGE_RETURN);
+
 class text_iterator {
 	const char* current_byte = nullptr;
 	const char* range_end_byte = nullptr;