|
1 | 1 | // Copyright (c) 2021 midnightBITS |
2 | 2 | // This code is licensed under MIT license (see LICENSE for details) |
3 | 3 |
|
| 4 | +#include <array> |
4 | 5 | #include <cassert> |
5 | 6 | #include <cmath> |
6 | 7 | #include <iostream> |
7 | 8 | #include <limits> |
8 | 9 | #include <memory> |
| 10 | +#include <print> |
9 | 11 | #include <stack> |
10 | 12 | #include <json/json.hpp> |
11 | 13 |
|
@@ -84,17 +86,18 @@ namespace json { |
84 | 86 | return from_json_impl(value, path); |
85 | 87 | } |
86 | 88 |
|
| 89 | + node read_string(string_view::iterator&, |
| 90 | + string_view::iterator const&, |
| 91 | + read_mode mode); |
| 92 | + |
87 | 93 | namespace { |
88 | 94 | using uchar = unsigned char; |
89 | 95 | using iterator = string_view::iterator; |
90 | 96 |
|
91 | 97 | void skip_ws(iterator&, iterator const&, read_mode mode); |
92 | | - node read_string(iterator&, iterator const&, read_mode mode); |
93 | 98 | node read_number(iterator&, iterator const&, read_mode mode); |
94 | 99 | node read_keyword(iterator&, iterator const&, read_mode mode); |
95 | 100 |
|
96 | | - void encode(uint32_t ch, string& target); |
97 | | - |
98 | 101 | void skip_ws(iterator& it, iterator const& end, read_mode mode) { |
99 | 102 | if (mode == read_mode::strict) { |
100 | 103 | while (it != end) { |
@@ -438,154 +441,6 @@ namespace json { |
438 | 441 | return val; |
439 | 442 | } |
440 | 443 |
|
441 | | - unsigned hex_escape(iterator& it, iterator const& end) { |
442 | | - // sanity check for read_string |
443 | | - assert(*it == 'x'); |
444 | | - |
445 | | - ++it; |
446 | | - auto const result = hex_digit(it, end); |
447 | | - if (result == INV_HEX) return INV_HEX_SQUARE; |
448 | | - |
449 | | - auto const lower = hex_digit(it, end); |
450 | | - if (lower == INV_HEX) return INV_HEX_SQUARE; |
451 | | - return result * 16 + lower; // NOLINT(readability-magic-numbers) |
452 | | - } |
453 | | - |
454 | | - uint32_t unicode_escape(iterator& it, iterator const& end) { |
455 | | - static constexpr auto max = std::numeric_limits<uint32_t>::max(); |
456 | | - // sanity check for read_string |
457 | | - assert(*it == 'u'); |
458 | | - |
459 | | - ++it; |
460 | | - if (it == end) return max; |
461 | | - if (*it == '{') { |
462 | | - ++it; |
463 | | - uint32_t val = 0; |
464 | | - while (it != end && *it != '}') { |
465 | | - auto const dig = hex_digit(it, end); |
466 | | - if (dig == INV_HEX) return max; |
467 | | - auto const overflow_guard = val; |
468 | | - val *= 16; // NOLINT(readability-magic-numbers) |
469 | | - val += dig; |
470 | | - if (overflow_guard > val) return max; |
471 | | - } |
472 | | - if (it != end) ++it; |
473 | | - return val; |
474 | | - } |
475 | | - |
476 | | - uint32_t val = 0; |
477 | | - for (int i = 0; i < 4; ++i) { |
478 | | - auto const dig = hex_digit(it, end); |
479 | | - if (dig == INV_HEX) return max; |
480 | | - val *= 16; // NOLINT(readability-magic-numbers) |
481 | | - val += dig; |
482 | | - } |
483 | | - return val; |
484 | | - } |
485 | | - |
486 | | - node read_string(iterator& it, iterator const& end, read_mode mode) { |
487 | | - // sanity check for value_reader::read and |
488 | | - // object_reader::read_object_key |
489 | | - assert(it != end && (*it == '"' || *it == '\'')); |
490 | | - |
491 | | - if (mode == read_mode::strict && *it == '\'') return {}; |
492 | | - |
493 | | - auto tmplt = *it; |
494 | | - ++it; |
495 | | - |
496 | | - string result{}; |
497 | | - bool in_string = true; |
498 | | - bool in_escape = false; |
499 | | - while (it != end && in_string) { |
500 | | - if (in_escape) { |
501 | | - switch (*it) { |
502 | | - case '\r': |
503 | | - if (mode == read_mode::strict) return {}; |
504 | | - ++it; |
505 | | - if (it != end && *it == '\n') break; |
506 | | - --it; |
507 | | - break; |
508 | | - case '\n': |
509 | | - if (mode == read_mode::strict) return {}; |
510 | | - ++it; |
511 | | - if (it != end && *it == '\r') break; |
512 | | - --it; |
513 | | - break; |
514 | | - case 'b': |
515 | | - result.push_back('\b'); |
516 | | - break; |
517 | | - case 'f': |
518 | | - result.push_back('\f'); |
519 | | - break; |
520 | | - case 'n': |
521 | | - result.push_back('\n'); |
522 | | - break; |
523 | | - case 'r': |
524 | | - result.push_back('\r'); |
525 | | - break; |
526 | | - case 't': |
527 | | - result.push_back('\t'); |
528 | | - break; |
529 | | - case 'v': |
530 | | - if (mode == read_mode::strict) return {}; |
531 | | - result.push_back('\v'); |
532 | | - break; |
533 | | - case 'x': { |
534 | | - if (mode == read_mode::strict) return {}; |
535 | | - auto const val = hex_escape(it, end); |
536 | | - if (val > 255) // NOLINT(readability-magic-numbers) |
537 | | - return {}; |
538 | | - result.push_back(static_cast<string::value_type>( |
539 | | - static_cast<uchar>(val))); |
540 | | - --it; |
541 | | - break; |
542 | | - } |
543 | | - case 'u': { |
544 | | - auto const val = unicode_escape(it, end); |
545 | | - if (val == std::numeric_limits<uint32_t>::max()) |
546 | | - return {}; |
547 | | - encode(val, result); |
548 | | - --it; |
549 | | - break; |
550 | | - } |
551 | | - case '"': |
552 | | - case '\\': |
553 | | - case '/': |
554 | | - result.push_back(*it); |
555 | | - break; |
556 | | - default: |
557 | | - if (mode == read_mode::strict) return {}; |
558 | | - result.push_back(*it); |
559 | | - break; |
560 | | - } |
561 | | - ++it; |
562 | | - in_escape = false; |
563 | | - continue; |
564 | | - } |
565 | | - |
566 | | - if (*it == tmplt) { |
567 | | - ++it; |
568 | | - in_string = false; |
569 | | - continue; |
570 | | - } |
571 | | - |
572 | | - switch (*it) { |
573 | | - case '\\': |
574 | | - in_escape = true; |
575 | | - break; |
576 | | - default: |
577 | | - if (mode == read_mode::strict && |
578 | | - static_cast<unsigned char>(*it) < CHAR_SPACE) |
579 | | - return {}; |
580 | | - result.push_back(*it); |
581 | | - } |
582 | | - ++it; |
583 | | - } |
584 | | - |
585 | | - if (mode == read_mode::strict && in_string) return {}; |
586 | | - return node{std::move(result)}; |
587 | | - } |
588 | | - |
589 | 444 | node read_int(iterator& it, |
590 | 445 | iterator const& end, |
591 | 446 | unsigned base, |
@@ -763,67 +618,6 @@ namespace json { |
763 | 618 | return {}; |
764 | 619 | } |
765 | 620 |
|
766 | | - constexpr uchar firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, |
767 | | - 0xF0, 0xF8, 0xFC}; |
768 | | - |
769 | | - enum : uint32_t { |
770 | | - UNI_SUR_HIGH_START = 0xD800, |
771 | | - UNI_SUR_HIGH_END = 0xDBFF, |
772 | | - UNI_SUR_LOW_START = 0xDC00, |
773 | | - UNI_SUR_LOW_END = 0xDFFF, |
774 | | - UNI_REPLACEMENT_CHAR = 0x0000FFFD, |
775 | | - UNI_MAX_BMP = 0x0000FFFF, |
776 | | - UNI_MAX_UTF16 = 0x0010FFFF, |
777 | | - UNI_MAX_LEGAL_UTF32 = 0x0010FFFF |
778 | | - }; |
779 | | - |
780 | | - constexpr uint32_t byteMask = 0xBF; |
781 | | - constexpr uint32_t byteMark = 0x80; |
782 | | - |
783 | | - void encode(uint32_t ch, string& target) { |
784 | | - unsigned short bytesToWrite = 0; |
785 | | - |
786 | | - /* Figure out how many bytes the result will require */ |
787 | | - if (ch < 0x80u) // NOLINT |
788 | | - bytesToWrite = 1; |
789 | | - else if (ch < 0x800u) // NOLINT |
790 | | - bytesToWrite = 2; // NOLINT |
791 | | - else if (ch >= UNI_SUR_HIGH_START && |
792 | | - ch <= UNI_SUR_LOW_END) { // NOLINT |
793 | | - bytesToWrite = 3; // NOLINT |
794 | | - ch = UNI_REPLACEMENT_CHAR; |
795 | | - } else if (ch < 0x10000u) // NOLINT |
796 | | - bytesToWrite = 3; // NOLINT |
797 | | - else if (ch <= UNI_MAX_LEGAL_UTF32) |
798 | | - bytesToWrite = 4; // NOLINT |
799 | | - else { |
800 | | - bytesToWrite = 3; // NOLINT |
801 | | - ch = UNI_REPLACEMENT_CHAR; |
802 | | - } |
803 | | - |
804 | | - uchar mid[4]; |
805 | | - uchar* midp = mid + sizeof(mid); |
806 | | - switch (bytesToWrite) { /* note: everything falls through. */ |
807 | | - case 4: // NOLINT |
808 | | - *--midp = static_cast<uchar>((ch | byteMark) & byteMask); |
809 | | - ch >>= 6; // NOLINT |
810 | | - [[fallthrough]]; |
811 | | - case 3: |
812 | | - *--midp = static_cast<uchar>((ch | byteMark) & byteMask); |
813 | | - ch >>= 6; // NOLINT |
814 | | - [[fallthrough]]; |
815 | | - case 2: |
816 | | - *--midp = static_cast<uchar>((ch | byteMark) & byteMask); |
817 | | - ch >>= 6; // NOLINT |
818 | | - [[fallthrough]]; |
819 | | - case 1: |
820 | | - *--midp = |
821 | | - static_cast<uchar>(ch | firstByteMark[bytesToWrite]); |
822 | | - } |
823 | | - for (int i = 0; i < bytesToWrite; ++i) |
824 | | - target.push_back(static_cast<string::value_type>(*midp++)); |
825 | | - } |
826 | | - |
827 | 621 | struct size_judge { |
828 | 622 | size_t allowed_space; |
829 | 623 |
|
|
0 commit comments