@@ -19,14 +19,20 @@ text_iterator::text_iterator(const char* in_current_byte, const char* in_range_s
1919#endif
2020 }
2121}
22+
23+ constexpr auto warning_text = " Exception while %s near '%.16s': %s\n\n This is most likely caused by text created in another encoding, such as Windows-1252, that cannot be interpreted as UTF-8." ;
24+
2225text_iterator& unicode::text_iterator::operator ++() {
2326 if (Unicode_text_mode) {
2427 try {
2528 // Increment by UTF-8 encoded codepoints
2629 utf8::next (current_byte, range_end_byte);
2730 } catch (const std::exception& e) {
28- Error (LOCATION , " Exception while incrementing UTF-8 sequence near '%.16s': %s" , current_byte, e.what ());
29- return *this ;
31+ Warning (LOCATION , warning_text, " incrementing text iterator" , current_byte, e.what ());
32+ // Increment by byte, so we still make progress
33+ if (current_byte < range_end_byte) {
34+ ++current_byte;
35+ }
3036 }
3137 } else {
3238 // Increment by byte
@@ -41,11 +47,14 @@ text_iterator& text_iterator::operator--() {
4147 // Decrement by UTF-8 encoded codepoints
4248 utf8::prior (current_byte, range_start_byte);
4349 } catch (const std::exception& e) {
44- Error (LOCATION , " Exception while decrementing text iterator near '%.16s': %s" , current_byte, e.what ());
45- return *this ;
50+ Warning (LOCATION , warning_text, " decrementing text iterator" , current_byte, e.what ());
51+ // Decrement by byte, so we still make progress
52+ if (current_byte > range_start_byte) {
53+ --current_byte;
54+ }
4655 }
4756 } else {
48- // Increment by byte
57+ // Decrement by byte
4958 --current_byte;
5059 }
5160
@@ -66,8 +75,8 @@ text_iterator::value_type text_iterator::operator*() const {
6675 try {
6776 return utf8::peek_next (current_byte, range_end_byte);
6877 } catch (const std::exception& e) {
69- Error (LOCATION , " Exception while decoding UTF-8 sequence near '%.16s': %s " , current_byte, e.what ());
70- return 0 ;
78+ Warning (LOCATION , warning_text, " decoding UTF-8 sequence" , current_byte, e.what ());
79+ return replacement_char ;
7180 }
7281 } else {
7382 // Use the unsigned byte value here to avoid integer overflows
0 commit comments