Skip to content

Commit 0ff349f

Browse files
committed
Evo: handle even more UCS-2 ranges and direct token mappings.
Credits to Zeroko mostly for painfully testing ranges.
1 parent fc10afc commit 0ff349f

4 files changed

Lines changed: 383 additions & 20 deletions

File tree

TIVarsLib.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

TIVarsLib.wasm

601 KB
Binary file not shown.

src/EvoFormat.cpp

Lines changed: 211 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#include <sstream>
1616
#include <stdexcept>
1717
#include <unordered_map>
18+
#include <utility>
19+
#include <vector>
1820

1921
#include "TIVarTypes.h"
2022
#include "TypeHandlers/TypeHandlers.h"
@@ -58,9 +60,24 @@ namespace tivars::EvoFormat
5860

5961
bool is_displayable_ucs2_scalar(uint16_t codepoint)
6062
{
61-
return (codepoint >= 0x0020 && codepoint <= 0x007E)
62-
|| (codepoint >= 0x00A0 && codepoint <= 0x00FF)
63-
|| codepoint == 0x0177;
63+
static constexpr std::pair<uint16_t, uint16_t> acceptedRanges[] = {
64+
{0x0020, 0x007E}, {0x00A0, 0x00FF}, {0x0177, 0x0177}, {0x0394, 0x0394},
65+
{0x03A3, 0x03A3}, {0x03A9, 0x03A9}, {0x03B1, 0x03B5}, {0x03B8, 0x03B8},
66+
{0x03BB, 0x03BC}, {0x03C0, 0x03C1}, {0x03C3, 0x03C4}, {0x03C6, 0x03C7},
67+
{0x2010, 0x2010}, {0x2026, 0x2026}, {0x2070, 0x2070}, {0x2074, 0x2079},
68+
{0x2080, 0x2089}, {0x2122, 0x2122}, {0x2190, 0x2193}, {0x221A, 0x221A},
69+
{0x2220, 0x2220}, {0x222B, 0x222B}, {0x2260, 0x2260}, {0x2264, 0x2265},
70+
{0x238C, 0x238C}, {0x25A0, 0x25A0}, {0x25AB, 0x25AB}, {0x25B2, 0x25B2},
71+
{0x25B6, 0x25B6}, {0x25B8, 0x25B8}, {0x25BC, 0x25BC}, {0x25C0, 0x25C0},
72+
{0x25C2, 0x25C2}, {0xF000, 0xF032}, {0xF038, 0xF03A}, {0xF041, 0xF04D},
73+
{0xF04F, 0xF058}, {0xF05B, 0xF061},
74+
};
75+
76+
return std::ranges::any_of(acceptedRanges, [codepoint](const auto& range)
77+
{
78+
const auto& [first, last] = range;
79+
return codepoint >= first && codepoint <= last;
80+
});
6481
}
6582

6683
bool utf8_to_single_codepoint(const std::string& text, uint16_t& codepoint)
@@ -752,13 +769,117 @@ static const char* evo_token_name(uint16_t token)
752769
}
753770

754771
static bool direct_legacy_token_for_evo(uint16_t evoToken, uint16_t& legacyToken);
772+
static bool direct_legacy_payload_for_evo(uint16_t evoToken, data_t& payload);
755773
static bool direct_evo_token_for_legacy(uint16_t legacyToken, uint16_t& evoToken);
756774
static void append_evo_token(data_t& out, uint16_t evoToken);
757775
static bool legacy_token_to_evo_ucs2(uint16_t legacyToken, uint16_t& evoToken);
758776

777+
static const std::unordered_map<uint16_t, std::string>& evo_private_display_aliases()
778+
{
779+
static const std::unordered_map<uint16_t, std::string> aliases = {
780+
{0xF000, ""}, {0xF001, "E"}, {0xF002, "e"}, {0xF003, "𝙵"},
781+
{0xF004, "𝑖"}, {0xF005, "ʟ"}, {0xF006, "𝗡"}, {0xF007, "𝑛"},
782+
{0xF008, ""}, {0xF009, "ʳ"}, {0xF00A, ""}, {0xF00B, ""},
783+
{0xF00C, "ˣ"}, {0xF00D, ""}, {0xF00E, "ȳ"}, {0xF00F, ""},
784+
{0xF010, ""}, {0xF011, "⁻¹"}, {0xF012, "₁₀"},{0xF013, "²"},
785+
{0xF014, "³"}, {0xF015, ""}, {0xF016, ""}, {0xF017, ""},
786+
{0xF018, ""}, {0xF019, "`"},
787+
{0xF01A, ""}, {0xF01B, "🡅"}, {0xF01C, "🡇"}, {0xF01D, "🠺"},
788+
{0xF01E, ""}, {0xF01F, ""},
789+
{0xF020, ""}, {0xF021, ""}, {0xF022, ""},
790+
{0xF023, "A"}, {0xF024, "a"}, {0xF025, "_"}, {0xF026, "↑͟"},
791+
{0xF027, ""}, {0xF028, ""}, {0xF029, ""}, {0xF02A, ""},
792+
{0xF02B, ""}, {0xF02C, ""}, {0xF02D, ""}, {0xF02E, ""},
793+
{0xF02F, ""}, {0xF030, ""}, {0xF031, ""}, {0xF032, ""},
794+
{0xF038, ""}, {0xF039, ""}, {0xF03A, "🔒"}, {0xF041, ""},
795+
{0xF042, ""}, {0xF043, ""}, {0xF044, ""}, {0xF045, ""},
796+
{0xF046, ""}, {0xF047, ""}, {0xF048, ""}, {0xF049, ""},
797+
{0xF04A, ""}, {0xF04B, ""}, {0xF04C, ""},
798+
{0xF04F, ""}, {0xF050, ""}, {0xF051, ""}, {0xF052, "𝅆"},
799+
{0xF053, ""}, {0xF054, ""}, {0xF055, ""}, {0xF056, "🔒"},
800+
{0xF057, "◣̏"}, {0xF058, "◥̤"},
801+
{0xF05B, "Β"}, {0xF05C, "Ε"}, {0xF05D, ""},
802+
{0xF05E, ""}, {0xF05F, ""}, {0xF060, ""}, {0xF061, ""}
803+
};
804+
return aliases;
805+
}
806+
807+
static bool source_text_tokenizes_without_private_alias(const std::string& text)
808+
{
809+
const auto scanned = TypeHandlers::TH_Tokenized::scanSourceTokens(text);
810+
if (scanned.size() != 1)
811+
{
812+
return false;
813+
}
814+
815+
const auto& [scannedText, legacyToken, matched] = scanned[0];
816+
if (!matched || scannedText != text)
817+
{
818+
return false;
819+
}
820+
821+
uint16_t evoToken = 0;
822+
return direct_evo_token_for_legacy(legacyToken, evoToken);
823+
}
824+
825+
static const std::vector<std::pair<std::string, uint16_t>>& evo_private_source_aliases()
826+
{
827+
static const std::vector<std::pair<std::string, uint16_t>> aliases = [] {
828+
std::vector<std::pair<std::string, uint16_t>> result;
829+
for (const auto& [token, text] : evo_private_display_aliases())
830+
{
831+
if (source_text_tokenizes_without_private_alias(text))
832+
{
833+
continue;
834+
}
835+
result.emplace_back(text, token);
836+
}
837+
std::ranges::sort(result, [](const auto& lhs, const auto& rhs) {
838+
if (lhs.first.size() != rhs.first.size())
839+
{
840+
return lhs.first.size() > rhs.first.size();
841+
}
842+
return lhs.second < rhs.second;
843+
});
844+
return result;
845+
}();
846+
return aliases;
847+
}
848+
849+
static std::string normalize_evo_private_source_aliases(const std::string& source)
850+
{
851+
std::string normalized;
852+
normalized.reserve(source.size());
853+
854+
for (size_t pos = 0; pos < source.size();)
855+
{
856+
bool matched = false;
857+
for (const auto& [text, token] : evo_private_source_aliases())
858+
{
859+
if (source.compare(pos, text.size(), text) != 0)
860+
{
861+
continue;
862+
}
863+
864+
normalized += "\\u" + dechex(static_cast<uint8_t>(token >> 8)) + dechex(static_cast<uint8_t>(token & 0xFF));
865+
pos += text.size();
866+
matched = true;
867+
break;
868+
}
869+
870+
if (!matched)
871+
{
872+
normalized += source[pos++];
873+
}
874+
}
875+
876+
return normalized;
877+
}
878+
759879
static std::string evo_token_to_string(uint16_t token)
760880
{
761881
if (token == 0x0000) return "";
882+
if (const auto it = evo_private_display_aliases().find(token); it != evo_private_display_aliases().end()) return it->second;
762883
if (is_displayable_ucs2_scalar(token)) return utf8_from_codepoint(token);
763884
if (token >= 0xE800 && token <= 0xE819) return std::string(1, static_cast<char>('A' + (token - 0xE800)));
764885
if (token == 0xE81A) return "θ";
@@ -770,6 +891,9 @@ static std::string evo_token_to_string(uint16_t token)
770891
if (token == 0xE41A) return "'";
771892
if (token == 0xE424) return "";
772893
if (token == 0xE589) return "Grad";
894+
if (token == 0xE9D6) return "►ʳ";
895+
if (token == 0xE9D7) return "►ᵍ";
896+
if (token == 0xE9D8) return "►º";
773897
if (token >= 0xE850 && token <= 0xE85B)
774898
{
775899
const uint16_t idx = static_cast<uint16_t>((token - 0xE850) / 2 + 1);
@@ -848,6 +972,7 @@ data_t tokenize_evo_token_words(const std::string& source, const options_t& opti
848972
{
849973
normalizedSource = sourceText;
850974
}
975+
normalizedSource = normalize_evo_private_source_aliases(normalizedSource);
851976

852977
static constexpr uint16_t legacyStore = 0x04;
853978
static constexpr uint16_t legacyQuote = 0x2A;
@@ -953,6 +1078,28 @@ static bool legacy_payload_for_evo_ucs2(uint16_t evoToken, data_t& payload)
9531078
}
9541079
}
9551080

1081+
static bool direct_legacy_payload_for_evo(uint16_t evoToken, data_t& payload)
1082+
{
1083+
payload.clear();
1084+
1085+
// ►{angle} conv token
1086+
if (evoToken == 0xE9D6 || evoToken == 0xE9D7 || evoToken == 0xE9D8)
1087+
{
1088+
append_legacy_token(payload, 0xBBEC);
1089+
append_legacy_token(payload, evoToken == 0xE9D6 ? 0x0A : evoToken == 0xE9D7 ? 0xAF : 0x0B);
1090+
return true;
1091+
}
1092+
1093+
uint16_t legacyToken = 0;
1094+
if (!direct_legacy_token_for_evo(evoToken, legacyToken))
1095+
{
1096+
return false;
1097+
}
1098+
1099+
append_legacy_token(payload, legacyToken);
1100+
return true;
1101+
}
1102+
9561103
static bool legacy_token_to_evo_ucs2(uint16_t legacyToken, uint16_t& evoToken)
9571104
{
9581105
const std::string text = TypeHandlers::TH_Tokenized::oneTokenBytesToString(legacyToken);
@@ -1099,7 +1246,7 @@ static bool direct_legacy_token_for_evo(uint16_t evoToken, uint16_t& legacyToken
10991246
{0x03B4, 0xBBA3}, {0x03B5, 0xBBA4}, {0x03BB, 0xBBA5}, {0x03BC, 0xBBA6},
11001247
{0x03C0, 0xBBA7}, {0x03C1, 0xBBA8}, {0x03A3, 0xBBA9}, {0x03A6, 0xBBAB},
11011248
{0x03A9, 0xBBAC}, {0x03C7, 0xBBAE}, {0x007C, 0xBBD8}, {0x2026, 0xBBDB},
1102-
{0x00D7, 0xBBF0}, {0x222B, 0xBBF1}, {0x2338, 0xBBF5},
1249+
{0x00D7, 0xBBF0}, {0x222B, 0xBBF1},
11031250
{0x007E, 0xBBCF}, {0x03C3, 0xBBCB}, {0x03C4, 0xBBCC}, {0x00CD, 0xBBCD},
11041251
{0x0040, 0xBBD1}, {0x0023, 0xBBD2}, {0x0024, 0xBBD3}, {0x0026, 0xBBD4},
11051252
{0x003B, 0xBBD6}, {0x005C, 0xBBD7}, {0x0025, 0xBBDA}, {0x2220, 0xBBDC},
@@ -1112,6 +1259,7 @@ static bool direct_legacy_token_for_evo(uint16_t evoToken, uint16_t& legacyToken
11121259
{0xE5BD, 0x7F}, {0xE5BE, 0x80}, {0xE5BF, 0x81}, {0xE5C0, 0xEF73},
11131260
{0xE5C1, 0xEF74}, {0xE5C2, 0xEF75},
11141261
{0xE4F9, 0xBB57}, {0xE593, 0xBB64}, {0xE6C6, 0xE8}, {0xE6C7, 0xE7},
1262+
{0xE6AE, 0xEF79},
11151263
{0xE900, 0x6201}, {0xE901, 0x6202}, {0xE902, 0x6203}, {0xE903, 0x6204},
11161264
{0xE904, 0x6205}, {0xE905, 0x6206}, {0xE906, 0x6207}, {0xE907, 0x6208},
11171265
{0xE908, 0x6209}, {0xE909, 0x620A}, {0xE90A, 0x620B}, {0xE90B, 0x620C},
@@ -1128,6 +1276,9 @@ static bool direct_legacy_token_for_evo(uint16_t evoToken, uint16_t& legacyToken
11281276
{0xE932, 0x6234}, {0xE933, 0x6235}, {0xE934, 0x6236}, {0xE935, 0x6237},
11291277
{0xE936, 0x6238}, {0xE937, 0x6239}, {0xE938, 0x623A}, {0xE939, 0x623B},
11301278
{0xE93A, 0x623C}, {0xE980, 0x6304}, {0xE981, 0x6305}, {0xE982, 0x6332},
1279+
{0xE93B, 0x6203}, {0xE93C, 0x622B}, {0xE93D, 0x622E}, {0xE93E, 0xBBA6},
1280+
{0xE941, 0x622D}, {0xE942, 0x6230}, {0xE943, 0x6206}, {0xE944, 0x622C},
1281+
{0xE945, 0x622F}, {0xE946, 0xBBCB}, {0xE95C, 0x6227},
11311282
{0xE983, 0x6306}, {0xE984, 0x6307}, {0xE985, 0x6308}, {0xE986, 0x6309},
11321283
{0xE987, 0x6333}, {0xE98F, 0x630A}, {0xE990, 0x630B}, {0xE991, 0x6302},
11331284
{0xE992, 0x6336}, {0xE993, 0x630C}, {0xE994, 0x630D}, {0xE995, 0x6303},
@@ -1451,6 +1602,51 @@ static bool direct_legacy_token_for_evo(uint16_t evoToken, uint16_t& legacyToken
14511602
{0xE6C5, 0xEF31},
14521603
{0xE81B, 0x0072},
14531604
{0xE81C, 0x005F},
1605+
{0xF000, 0x3B},
1606+
{0xF001, 0x45},
1607+
{0xF002, 0xBBB4},
1608+
{0xF003, 0xBBAF},
1609+
{0xF004, 0x2C},
1610+
{0xF005, 0xEB},
1611+
{0xF006, 0x632B},
1612+
{0xF007, 0x6221},
1613+
{0xF008, 0xBBAD},
1614+
{0xF009, 0x0A},
1615+
{0xF00A, 0x0E},
1616+
{0xF00B, 0xBBDF},
1617+
{0xF00C, 0xBBDE},
1618+
{0xF00D, 0x6203},
1619+
{0xF00E, 0x620C},
1620+
{0xF010, 0xB0},
1621+
{0xF011, 0x0C},
1622+
{0xF012, 0xBBEA},
1623+
{0xF013, 0x0D},
1624+
{0xF014, 0x0F},
1625+
{0xF018, 0xEF2E},
1626+
{0xF019, 0xBB9B},
1627+
{0xF01B, 0xBBF2},
1628+
{0xF01C, 0xBBF3},
1629+
{0xF01E, 0xBBED},
1630+
{0xF01F, 0xBBEE},
1631+
{0xF020, 0xBBF5},
1632+
{0xF022, 0xBBED},
1633+
{0xF023, 0x41},
1634+
{0xF024, 0xBBB0},
1635+
{0xF025, 0xBBD9},
1636+
{0xF02A, 0xEF1E},
1637+
{0xF042, 0xBBE0},
1638+
{0xF043, 0xBBE1},
1639+
{0xF044, 0xBBE2},
1640+
{0xF045, 0xBBE3},
1641+
{0xF046, 0xBBE4},
1642+
{0xF047, 0xBBE5},
1643+
{0xF048, 0xBBE6},
1644+
{0xF049, 0xBBE7},
1645+
{0xF04A, 0xBBE8},
1646+
{0xF04B, 0xBBE9},
1647+
{0xF04C, 0x7F},
1648+
{0xF060, 0x6D},
1649+
{0xF061, 0x6E},
14541650
};
14551651

14561652
const auto it = direct.find(evoToken);
@@ -1595,7 +1791,7 @@ static bool direct_evo_token_for_legacy(uint16_t legacyToken, uint16_t& evoToken
15951791
{0xBBA3, 0x03B4}, {0xBBA4, 0x03B5}, {0xBBA5, 0x03BB}, {0xBBA6, 0x03BC},
15961792
{0xBBA7, 0x03C0}, {0xBBA8, 0x03C1}, {0xBBA9, 0x03A3}, {0xBBAB, 0x03A6},
15971793
{0xBBAC, 0x03A9}, {0xBBAE, 0x03C7}, {0xBBD8, 0x007C}, {0xBBDB, 0x2026},
1598-
{0xBBF0, 0x00D7}, {0xBBF1, 0x222B}, {0xBBF5, 0x2338},
1794+
{0xBBF0, 0x00D7}, {0xBBF1, 0x222B},
15991795
{0xBBCF, 0x007E}, {0xBBCB, 0x03C3}, {0xBBCC, 0x03C4}, {0xBBCD, 0x00CD},
16001796
{0xBBD1, 0x0040}, {0xBBD2, 0x0023}, {0xBBD3, 0x0024}, {0xBBD4, 0x0026},
16011797
{0xBBD5, 0x0060}, {0xBBD6, 0x003B}, {0xBBD7, 0x005C}, {0xBBDA, 0x0025}, {0xBBDC, 0x2220},
@@ -1607,6 +1803,7 @@ static bool direct_evo_token_for_legacy(uint16_t legacyToken, uint16_t& evoToken
16071803
{0x7F, 0xE5BD}, {0x80, 0xE5BE}, {0x81, 0xE5BF}, {0xEF73, 0xE5C0},
16081804
{0xEF74, 0xE5C1}, {0xEF75, 0xE5C2},
16091805
{0xBB57, 0xE4F9}, {0xBB64, 0xE593}, {0xE8, 0xE6C6}, {0xE7, 0xE6C7},
1806+
{0xEF79, 0xE6AE},
16101807
{0x6201, 0xE900}, {0x6202, 0xE901}, {0x6203, 0xE902}, {0x6204, 0xE903},
16111808
{0x6205, 0xE904}, {0x6206, 0xE905}, {0x6207, 0xE906}, {0x6208, 0xE907},
16121809
{0x6209, 0xE908}, {0x620A, 0xE909}, {0x620B, 0xE90A}, {0x620C, 0xE90B},
@@ -1942,6 +2139,13 @@ static bool direct_evo_token_for_legacy(uint16_t legacyToken, uint16_t& evoToken
19422139
{0xEF31, 0xE6C5},
19432140
{0x0072, 0xE81B},
19442141
{0x005F, 0xE81C},
2142+
{0xBBAF, 0xF003},
2143+
{0xBBAD, 0xF008},
2144+
{0xBBEA, 0xF012},
2145+
{0xBBF2, 0xF01B},
2146+
{0xBBF3, 0xF01C},
2147+
{0xBBF5, 0xF020},
2148+
{0xEF1E, 0xF02A},
19452149
};
19462150

19472151
const auto it = direct.find(legacyToken);
@@ -1955,20 +2159,8 @@ static bool direct_evo_token_for_legacy(uint16_t legacyToken, uint16_t& evoToken
19552159

19562160
static bool tokenized_legacy_payload_for_evo(uint16_t evoToken, data_t& payload)
19572161
{
1958-
if (legacy_payload_for_evo_ucs2(evoToken, payload))
1959-
{
1960-
return true;
1961-
}
1962-
1963-
uint16_t legacyToken = 0;
1964-
if (!direct_legacy_token_for_evo(evoToken, legacyToken))
1965-
{
1966-
return false;
1967-
}
1968-
1969-
payload.clear();
1970-
append_legacy_token(payload, legacyToken);
1971-
return true;
2162+
return direct_legacy_payload_for_evo(evoToken, payload)
2163+
|| legacy_payload_for_evo_ucs2(evoToken, payload);
19722164
}
19732165

19742166
data_t evo_tokenized_data_to_legacy(const data_t& evoData)

0 commit comments

Comments
 (0)