1515#include < sstream>
1616#include < stdexcept>
1717#include < unordered_map>
18+ #include < utility>
19+ #include < vector>
1820
1921#include " TIVarTypes.h"
2022#include " TypeHandlers/TypeHandlers.h"
@@ -58,9 +60,24 @@ namespace tivars::EvoFormat
5860
5961 bool is_displayable_ucs2_scalar (uint16_t codepoint)
6062 {
61- return (codepoint >= 0x0020 && codepoint <= 0x007E )
62- || (codepoint >= 0x00A0 && codepoint <= 0x00FF )
63- || codepoint == 0x0177 ;
63+ static constexpr std::pair<uint16_t , uint16_t > acceptedRanges[] = {
64+ {0x0020 , 0x007E }, {0x00A0 , 0x00FF }, {0x0177 , 0x0177 }, {0x0394 , 0x0394 },
65+ {0x03A3 , 0x03A3 }, {0x03A9 , 0x03A9 }, {0x03B1 , 0x03B5 }, {0x03B8 , 0x03B8 },
66+ {0x03BB , 0x03BC }, {0x03C0 , 0x03C1 }, {0x03C3 , 0x03C4 }, {0x03C6 , 0x03C7 },
67+ {0x2010 , 0x2010 }, {0x2026 , 0x2026 }, {0x2070 , 0x2070 }, {0x2074 , 0x2079 },
68+ {0x2080 , 0x2089 }, {0x2122 , 0x2122 }, {0x2190 , 0x2193 }, {0x221A , 0x221A },
69+ {0x2220 , 0x2220 }, {0x222B , 0x222B }, {0x2260 , 0x2260 }, {0x2264 , 0x2265 },
70+ {0x238C , 0x238C }, {0x25A0 , 0x25A0 }, {0x25AB , 0x25AB }, {0x25B2 , 0x25B2 },
71+ {0x25B6 , 0x25B6 }, {0x25B8 , 0x25B8 }, {0x25BC , 0x25BC }, {0x25C0 , 0x25C0 },
72+ {0x25C2 , 0x25C2 }, {0xF000 , 0xF032 }, {0xF038 , 0xF03A }, {0xF041 , 0xF04D },
73+ {0xF04F , 0xF058 }, {0xF05B , 0xF061 },
74+ };
75+
76+ return std::ranges::any_of (acceptedRanges, [codepoint](const auto & range)
77+ {
78+ const auto & [first, last] = range;
79+ return codepoint >= first && codepoint <= last;
80+ });
6481 }
6582
6683 bool utf8_to_single_codepoint (const std::string& text, uint16_t & codepoint)
@@ -752,13 +769,117 @@ static const char* evo_token_name(uint16_t token)
752769}
753770
754771static bool direct_legacy_token_for_evo (uint16_t evoToken, uint16_t & legacyToken);
772+ static bool direct_legacy_payload_for_evo (uint16_t evoToken, data_t & payload);
755773static bool direct_evo_token_for_legacy (uint16_t legacyToken, uint16_t & evoToken);
756774static void append_evo_token (data_t & out, uint16_t evoToken);
757775static bool legacy_token_to_evo_ucs2 (uint16_t legacyToken, uint16_t & evoToken);
758776
777+ static const std::unordered_map<uint16_t , std::string>& evo_private_display_aliases ()
778+ {
779+ static const std::unordered_map<uint16_t , std::string> aliases = {
780+ {0xF000 , " ᴇ" }, {0xF001 , " E" }, {0xF002 , " e" }, {0xF003 , " 𝙵" },
781+ {0xF004 , " 𝑖" }, {0xF005 , " ʟ" }, {0xF006 , " 𝗡" }, {0xF007 , " 𝑛" },
782+ {0xF008 , " p̂" }, {0xF009 , " ʳ" }, {0xF00A , " ᵀ" }, {0xF00B , " ᴛ" },
783+ {0xF00C , " ˣ" }, {0xF00D , " x̄" }, {0xF00E , " ȳ" }, {0xF00F , " ⁺" },
784+ {0xF010 , " ⁻" }, {0xF011 , " ⁻¹" }, {0xF012 , " ₁₀" },{0xF013 , " ²" },
785+ {0xF014 , " ³" }, {0xF015 , " ⧸" }, {0xF016 , " ⟋" }, {0xF017 , " ␣" },
786+ {0xF018 , " ⁄" }, {0xF019 , " `" },
787+ {0xF01A , " ⸩" }, {0xF01B , " 🡅" }, {0xF01C , " 🡇" }, {0xF01D , " 🠺" },
788+ {0xF01E , " ↑" }, {0xF01F , " ↓" },
789+ {0xF020 , " ⌸" }, {0xF021 , " ▮" }, {0xF022 , " ↑" },
790+ {0xF023 , " A" }, {0xF024 , " a" }, {0xF025 , " _" }, {0xF026 , " ↑͟" },
791+ {0xF027 , " A͟" }, {0xF028 , " a͟" }, {0xF029 , " ░" }, {0xF02A , " ⬚" },
792+ {0xF02B , " ╲" }, {0xF02C , " \" }, {0xF02D , " ◥" }, {0xF02E , " ◣" },
793+ {0xF02F , " ⌕" }, {0xF030 , " ⁰" }, {0xF031 , " ⧵" }, {0xF032 , " ⧹" },
794+ {0xF038 , " ⬩" }, {0xF039 , " ⎵" }, {0xF03A , " 🔒" }, {0xF041 , " ⋅" },
795+ {0xF042 , " ₀" }, {0xF043 , " ₁" }, {0xF044 , " ₂" }, {0xF045 , " ₃" },
796+ {0xF046 , " ₄" }, {0xF047 , " ₅" }, {0xF048 , " ₆" }, {0xF049 , " ₇" },
797+ {0xF04A , " ₈" }, {0xF04B , " ₉" }, {0xF04C , " □" },
798+ {0xF04F , " ⬌" }, {0xF050 , " ▯" }, {0xF051 , " ⸋" }, {0xF052 , " 𝅆" },
799+ {0xF053 , " ᵍ" }, {0xF054 , " ▫" }, {0xF055 , " ᵃ" }, {0xF056 , " 🔒" },
800+ {0xF057 , " ◣̏" }, {0xF058 , " ◥̤" },
801+ {0xF05B , " Β" }, {0xF05C , " Ε" }, {0xF05D , " F" },
802+ {0xF05E , " ‹" }, {0xF05F , " ›" }, {0xF060 , " ≤" }, {0xF061 , " ≥" }
803+ };
804+ return aliases;
805+ }
806+
807+ static bool source_text_tokenizes_without_private_alias (const std::string& text)
808+ {
809+ const auto scanned = TypeHandlers::TH_Tokenized::scanSourceTokens (text);
810+ if (scanned.size () != 1 )
811+ {
812+ return false ;
813+ }
814+
815+ const auto & [scannedText, legacyToken, matched] = scanned[0 ];
816+ if (!matched || scannedText != text)
817+ {
818+ return false ;
819+ }
820+
821+ uint16_t evoToken = 0 ;
822+ return direct_evo_token_for_legacy (legacyToken, evoToken);
823+ }
824+
825+ static const std::vector<std::pair<std::string, uint16_t >>& evo_private_source_aliases ()
826+ {
827+ static const std::vector<std::pair<std::string, uint16_t >> aliases = [] {
828+ std::vector<std::pair<std::string, uint16_t >> result;
829+ for (const auto & [token, text] : evo_private_display_aliases ())
830+ {
831+ if (source_text_tokenizes_without_private_alias (text))
832+ {
833+ continue ;
834+ }
835+ result.emplace_back (text, token);
836+ }
837+ std::ranges::sort (result, [](const auto & lhs, const auto & rhs) {
838+ if (lhs.first .size () != rhs.first .size ())
839+ {
840+ return lhs.first .size () > rhs.first .size ();
841+ }
842+ return lhs.second < rhs.second ;
843+ });
844+ return result;
845+ }();
846+ return aliases;
847+ }
848+
849+ static std::string normalize_evo_private_source_aliases (const std::string& source)
850+ {
851+ std::string normalized;
852+ normalized.reserve (source.size ());
853+
854+ for (size_t pos = 0 ; pos < source.size ();)
855+ {
856+ bool matched = false ;
857+ for (const auto & [text, token] : evo_private_source_aliases ())
858+ {
859+ if (source.compare (pos, text.size (), text) != 0 )
860+ {
861+ continue ;
862+ }
863+
864+ normalized += " \\ u" + dechex (static_cast <uint8_t >(token >> 8 )) + dechex (static_cast <uint8_t >(token & 0xFF ));
865+ pos += text.size ();
866+ matched = true ;
867+ break ;
868+ }
869+
870+ if (!matched)
871+ {
872+ normalized += source[pos++];
873+ }
874+ }
875+
876+ return normalized;
877+ }
878+
759879static std::string evo_token_to_string (uint16_t token)
760880{
761881 if (token == 0x0000 ) return " " ;
882+ if (const auto it = evo_private_display_aliases ().find (token); it != evo_private_display_aliases ().end ()) return it->second ;
762883 if (is_displayable_ucs2_scalar (token)) return utf8_from_codepoint (token);
763884 if (token >= 0xE800 && token <= 0xE819 ) return std::string (1 , static_cast <char >(' A' + (token - 0xE800 )));
764885 if (token == 0xE81A ) return " θ" ;
@@ -770,6 +891,9 @@ static std::string evo_token_to_string(uint16_t token)
770891 if (token == 0xE41A ) return " '" ;
771892 if (token == 0xE424 ) return " ᵍ" ;
772893 if (token == 0xE589 ) return " Grad" ;
894+ if (token == 0xE9D6 ) return " ►ʳ" ;
895+ if (token == 0xE9D7 ) return " ►ᵍ" ;
896+ if (token == 0xE9D8 ) return " ►º" ;
773897 if (token >= 0xE850 && token <= 0xE85B )
774898 {
775899 const uint16_t idx = static_cast <uint16_t >((token - 0xE850 ) / 2 + 1 );
@@ -848,6 +972,7 @@ data_t tokenize_evo_token_words(const std::string& source, const options_t& opti
848972 {
849973 normalizedSource = sourceText;
850974 }
975+ normalizedSource = normalize_evo_private_source_aliases (normalizedSource);
851976
852977 static constexpr uint16_t legacyStore = 0x04 ;
853978 static constexpr uint16_t legacyQuote = 0x2A ;
@@ -953,6 +1078,28 @@ static bool legacy_payload_for_evo_ucs2(uint16_t evoToken, data_t& payload)
9531078 }
9541079}
9551080
1081+ static bool direct_legacy_payload_for_evo (uint16_t evoToken, data_t & payload)
1082+ {
1083+ payload.clear ();
1084+
1085+ // ►{angle} conv token
1086+ if (evoToken == 0xE9D6 || evoToken == 0xE9D7 || evoToken == 0xE9D8 )
1087+ {
1088+ append_legacy_token (payload, 0xBBEC );
1089+ append_legacy_token (payload, evoToken == 0xE9D6 ? 0x0A : evoToken == 0xE9D7 ? 0xAF : 0x0B );
1090+ return true ;
1091+ }
1092+
1093+ uint16_t legacyToken = 0 ;
1094+ if (!direct_legacy_token_for_evo (evoToken, legacyToken))
1095+ {
1096+ return false ;
1097+ }
1098+
1099+ append_legacy_token (payload, legacyToken);
1100+ return true ;
1101+ }
1102+
9561103static bool legacy_token_to_evo_ucs2 (uint16_t legacyToken, uint16_t & evoToken)
9571104{
9581105 const std::string text = TypeHandlers::TH_Tokenized::oneTokenBytesToString (legacyToken);
@@ -1099,7 +1246,7 @@ static bool direct_legacy_token_for_evo(uint16_t evoToken, uint16_t& legacyToken
10991246 {0x03B4 , 0xBBA3 }, {0x03B5 , 0xBBA4 }, {0x03BB , 0xBBA5 }, {0x03BC , 0xBBA6 },
11001247 {0x03C0 , 0xBBA7 }, {0x03C1 , 0xBBA8 }, {0x03A3 , 0xBBA9 }, {0x03A6 , 0xBBAB },
11011248 {0x03A9 , 0xBBAC }, {0x03C7 , 0xBBAE }, {0x007C , 0xBBD8 }, {0x2026 , 0xBBDB },
1102- {0x00D7 , 0xBBF0 }, {0x222B , 0xBBF1 }, { 0x2338 , 0xBBF5 },
1249+ {0x00D7 , 0xBBF0 }, {0x222B , 0xBBF1 },
11031250 {0x007E , 0xBBCF }, {0x03C3 , 0xBBCB }, {0x03C4 , 0xBBCC }, {0x00CD , 0xBBCD },
11041251 {0x0040 , 0xBBD1 }, {0x0023 , 0xBBD2 }, {0x0024 , 0xBBD3 }, {0x0026 , 0xBBD4 },
11051252 {0x003B , 0xBBD6 }, {0x005C , 0xBBD7 }, {0x0025 , 0xBBDA }, {0x2220 , 0xBBDC },
@@ -1112,6 +1259,7 @@ static bool direct_legacy_token_for_evo(uint16_t evoToken, uint16_t& legacyToken
11121259 {0xE5BD , 0x7F }, {0xE5BE , 0x80 }, {0xE5BF , 0x81 }, {0xE5C0 , 0xEF73 },
11131260 {0xE5C1 , 0xEF74 }, {0xE5C2 , 0xEF75 },
11141261 {0xE4F9 , 0xBB57 }, {0xE593 , 0xBB64 }, {0xE6C6 , 0xE8 }, {0xE6C7 , 0xE7 },
1262+ {0xE6AE , 0xEF79 },
11151263 {0xE900 , 0x6201 }, {0xE901 , 0x6202 }, {0xE902 , 0x6203 }, {0xE903 , 0x6204 },
11161264 {0xE904 , 0x6205 }, {0xE905 , 0x6206 }, {0xE906 , 0x6207 }, {0xE907 , 0x6208 },
11171265 {0xE908 , 0x6209 }, {0xE909 , 0x620A }, {0xE90A , 0x620B }, {0xE90B , 0x620C },
@@ -1128,6 +1276,9 @@ static bool direct_legacy_token_for_evo(uint16_t evoToken, uint16_t& legacyToken
11281276 {0xE932 , 0x6234 }, {0xE933 , 0x6235 }, {0xE934 , 0x6236 }, {0xE935 , 0x6237 },
11291277 {0xE936 , 0x6238 }, {0xE937 , 0x6239 }, {0xE938 , 0x623A }, {0xE939 , 0x623B },
11301278 {0xE93A , 0x623C }, {0xE980 , 0x6304 }, {0xE981 , 0x6305 }, {0xE982 , 0x6332 },
1279+ {0xE93B , 0x6203 }, {0xE93C , 0x622B }, {0xE93D , 0x622E }, {0xE93E , 0xBBA6 },
1280+ {0xE941 , 0x622D }, {0xE942 , 0x6230 }, {0xE943 , 0x6206 }, {0xE944 , 0x622C },
1281+ {0xE945 , 0x622F }, {0xE946 , 0xBBCB }, {0xE95C , 0x6227 },
11311282 {0xE983 , 0x6306 }, {0xE984 , 0x6307 }, {0xE985 , 0x6308 }, {0xE986 , 0x6309 },
11321283 {0xE987 , 0x6333 }, {0xE98F , 0x630A }, {0xE990 , 0x630B }, {0xE991 , 0x6302 },
11331284 {0xE992 , 0x6336 }, {0xE993 , 0x630C }, {0xE994 , 0x630D }, {0xE995 , 0x6303 },
@@ -1451,6 +1602,51 @@ static bool direct_legacy_token_for_evo(uint16_t evoToken, uint16_t& legacyToken
14511602 {0xE6C5 , 0xEF31 },
14521603 {0xE81B , 0x0072 },
14531604 {0xE81C , 0x005F },
1605+ {0xF000 , 0x3B },
1606+ {0xF001 , 0x45 },
1607+ {0xF002 , 0xBBB4 },
1608+ {0xF003 , 0xBBAF },
1609+ {0xF004 , 0x2C },
1610+ {0xF005 , 0xEB },
1611+ {0xF006 , 0x632B },
1612+ {0xF007 , 0x6221 },
1613+ {0xF008 , 0xBBAD },
1614+ {0xF009 , 0x0A },
1615+ {0xF00A , 0x0E },
1616+ {0xF00B , 0xBBDF },
1617+ {0xF00C , 0xBBDE },
1618+ {0xF00D , 0x6203 },
1619+ {0xF00E , 0x620C },
1620+ {0xF010 , 0xB0 },
1621+ {0xF011 , 0x0C },
1622+ {0xF012 , 0xBBEA },
1623+ {0xF013 , 0x0D },
1624+ {0xF014 , 0x0F },
1625+ {0xF018 , 0xEF2E },
1626+ {0xF019 , 0xBB9B },
1627+ {0xF01B , 0xBBF2 },
1628+ {0xF01C , 0xBBF3 },
1629+ {0xF01E , 0xBBED },
1630+ {0xF01F , 0xBBEE },
1631+ {0xF020 , 0xBBF5 },
1632+ {0xF022 , 0xBBED },
1633+ {0xF023 , 0x41 },
1634+ {0xF024 , 0xBBB0 },
1635+ {0xF025 , 0xBBD9 },
1636+ {0xF02A , 0xEF1E },
1637+ {0xF042 , 0xBBE0 },
1638+ {0xF043 , 0xBBE1 },
1639+ {0xF044 , 0xBBE2 },
1640+ {0xF045 , 0xBBE3 },
1641+ {0xF046 , 0xBBE4 },
1642+ {0xF047 , 0xBBE5 },
1643+ {0xF048 , 0xBBE6 },
1644+ {0xF049 , 0xBBE7 },
1645+ {0xF04A , 0xBBE8 },
1646+ {0xF04B , 0xBBE9 },
1647+ {0xF04C , 0x7F },
1648+ {0xF060 , 0x6D },
1649+ {0xF061 , 0x6E },
14541650 };
14551651
14561652 const auto it = direct.find (evoToken);
@@ -1595,7 +1791,7 @@ static bool direct_evo_token_for_legacy(uint16_t legacyToken, uint16_t& evoToken
15951791 {0xBBA3 , 0x03B4 }, {0xBBA4 , 0x03B5 }, {0xBBA5 , 0x03BB }, {0xBBA6 , 0x03BC },
15961792 {0xBBA7 , 0x03C0 }, {0xBBA8 , 0x03C1 }, {0xBBA9 , 0x03A3 }, {0xBBAB , 0x03A6 },
15971793 {0xBBAC , 0x03A9 }, {0xBBAE , 0x03C7 }, {0xBBD8 , 0x007C }, {0xBBDB , 0x2026 },
1598- {0xBBF0 , 0x00D7 }, {0xBBF1 , 0x222B }, { 0xBBF5 , 0x2338 },
1794+ {0xBBF0 , 0x00D7 }, {0xBBF1 , 0x222B },
15991795 {0xBBCF , 0x007E }, {0xBBCB , 0x03C3 }, {0xBBCC , 0x03C4 }, {0xBBCD , 0x00CD },
16001796 {0xBBD1 , 0x0040 }, {0xBBD2 , 0x0023 }, {0xBBD3 , 0x0024 }, {0xBBD4 , 0x0026 },
16011797 {0xBBD5 , 0x0060 }, {0xBBD6 , 0x003B }, {0xBBD7 , 0x005C }, {0xBBDA , 0x0025 }, {0xBBDC , 0x2220 },
@@ -1607,6 +1803,7 @@ static bool direct_evo_token_for_legacy(uint16_t legacyToken, uint16_t& evoToken
16071803 {0x7F , 0xE5BD }, {0x80 , 0xE5BE }, {0x81 , 0xE5BF }, {0xEF73 , 0xE5C0 },
16081804 {0xEF74 , 0xE5C1 }, {0xEF75 , 0xE5C2 },
16091805 {0xBB57 , 0xE4F9 }, {0xBB64 , 0xE593 }, {0xE8 , 0xE6C6 }, {0xE7 , 0xE6C7 },
1806+ {0xEF79 , 0xE6AE },
16101807 {0x6201 , 0xE900 }, {0x6202 , 0xE901 }, {0x6203 , 0xE902 }, {0x6204 , 0xE903 },
16111808 {0x6205 , 0xE904 }, {0x6206 , 0xE905 }, {0x6207 , 0xE906 }, {0x6208 , 0xE907 },
16121809 {0x6209 , 0xE908 }, {0x620A , 0xE909 }, {0x620B , 0xE90A }, {0x620C , 0xE90B },
@@ -1942,6 +2139,13 @@ static bool direct_evo_token_for_legacy(uint16_t legacyToken, uint16_t& evoToken
19422139 {0xEF31 , 0xE6C5 },
19432140 {0x0072 , 0xE81B },
19442141 {0x005F , 0xE81C },
2142+ {0xBBAF , 0xF003 },
2143+ {0xBBAD , 0xF008 },
2144+ {0xBBEA , 0xF012 },
2145+ {0xBBF2 , 0xF01B },
2146+ {0xBBF3 , 0xF01C },
2147+ {0xBBF5 , 0xF020 },
2148+ {0xEF1E , 0xF02A },
19452149 };
19462150
19472151 const auto it = direct.find (legacyToken);
@@ -1955,20 +2159,8 @@ static bool direct_evo_token_for_legacy(uint16_t legacyToken, uint16_t& evoToken
19552159
19562160static bool tokenized_legacy_payload_for_evo (uint16_t evoToken, data_t & payload)
19572161{
1958- if (legacy_payload_for_evo_ucs2 (evoToken, payload))
1959- {
1960- return true ;
1961- }
1962-
1963- uint16_t legacyToken = 0 ;
1964- if (!direct_legacy_token_for_evo (evoToken, legacyToken))
1965- {
1966- return false ;
1967- }
1968-
1969- payload.clear ();
1970- append_legacy_token (payload, legacyToken);
1971- return true ;
2162+ return direct_legacy_payload_for_evo (evoToken, payload)
2163+ || legacy_payload_for_evo_ucs2 (evoToken, payload);
19722164}
19732165
19742166data_t evo_tokenized_data_to_legacy (const data_t & evoData)
0 commit comments