Skip to content

Commit 218dd5b

Browse files
committed
TH_Tokenized: add an "accessible" option to the detokenizer.
To use accessible token names for non-US-keyboard-typable display tokens.
1 parent d5a0044 commit 218dd5b

4 files changed

Lines changed: 85 additions & 11 deletions

File tree

TIVarsLib.wasm

3.45 KB
Binary file not shown.

cli/cli.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ int main(int argc, char** argv)
4646
("l,lang", "Language", cxxopts::value<string>()->default_value("en"))
4747
("a,archive", "Archive status", cxxopts::value<bool>())
4848
("r,reindent", "Re-indent", cxxopts::value<bool>())
49+
("accessible", "Use accessible token names for non-US-keyboard display tokens", cxxopts::value<bool>())
4950
("p,prettify", "Prettify (display-oriented, may not roundtrip)", cxxopts::value<bool>())
5051
("s,detect_strings", "Detect strings", cxxopts::value<bool>())
5152
("h,help", "Print usage");
@@ -317,6 +318,7 @@ int main(int argc, char** argv)
317318

318319
options_t contentOptions;
319320
contentOptions["reindent"] = result["reindent"].as<bool>();
321+
contentOptions["accessible"] = result["accessible"].as<bool>();
320322
contentOptions["prettify"] = result["prettify"].as<bool>();
321323

322324
if (result.count("lang"))

src/TypeHandlers/TH_Tokenized.cpp

Lines changed: 65 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,17 @@ static std::string format_raw_token_escape(uint16_t tokenValue)
143143
+ tivars::dechex(static_cast<unsigned char>(tokenValue & 0xFF));
144144
}
145145

146+
static std::string format_token_hex(uint16_t tokenValue)
147+
{
148+
if (tokenValue <= 0xFF)
149+
{
150+
return tivars::dechex(static_cast<unsigned char>(tokenValue));
151+
}
152+
153+
return tivars::dechex(static_cast<unsigned char>(tokenValue >> 8))
154+
+ tivars::dechex(static_cast<unsigned char>(tokenValue & 0xFF));
155+
}
156+
146157
static void ltrim_program_whitespace(std::string& s)
147158
{
148159
size_t pos = 0;
@@ -202,6 +213,21 @@ static std::string prettify_token_string(std::string str)
202213
return str;
203214
}
204215

216+
static bool is_standard_keyboard_typable(const std::string& str)
217+
{
218+
return std::ranges::all_of(str, [](unsigned char c) { return c == '\n' || (c >= 0x20 && c <= 0x7E); });
219+
}
220+
221+
static std::string format_last_resort_detok_string(uint16_t tokenValue)
222+
{
223+
return tokenValue == 0x3F ? "\n" : format_raw_token_escape(tokenValue);
224+
}
225+
226+
static std::string format_detok_string_for_log(const std::string& str)
227+
{
228+
return str == "\n" ? "\\n" : str;
229+
}
230+
205231
static void split_var_keyword_lines(std::string& str, const std::string& keyword)
206232
{
207233
size_t pos = 0;
@@ -668,6 +694,33 @@ namespace tivars::TypeHandlers
668694
return candidates;
669695
}
670696

697+
static std::string get_detok_primary_string(uint16_t bytesKey, uint8_t langIdx, const options_t& options)
698+
{
699+
using TH_Tokenized::LANG_EN;
700+
701+
const auto it = tokens_BytesToNames.find(bytesKey);
702+
if (it == tokens_BytesToNames.end())
703+
{
704+
return format_raw_token_escape(bytesKey);
705+
}
706+
707+
const TokenNames& tokenNames = it->second;
708+
const std::string& display = tokenNames.display[langIdx];
709+
const bool accessibleDetok = options.contains("accessible") && options.at("accessible") == 1;
710+
if (!accessibleDetok || is_standard_keyboard_typable(display))
711+
{
712+
return display;
713+
}
714+
715+
const std::vector<std::string>* accessibles = &tokenNames.accessibles[langIdx];
716+
if (accessibles->empty() && langIdx != LANG_EN)
717+
{
718+
accessibles = &tokenNames.accessibles[LANG_EN];
719+
}
720+
721+
return accessibles->empty() ? display : accessibles->front();
722+
}
723+
671724
// Shared XML parsing routine used by both standard and Qt/CEmu builds
672725
static void parse_tokens_xml_and_register(const std::string& xml)
673726
{
@@ -917,6 +970,7 @@ namespace tivars::TypeHandlers
917970
}
918971

919972
const bool prettify = options.contains("prettify") && options.at("prettify") == 1;
973+
const bool accessibleDetok = options.contains("accessible") && options.at("accessible") == 1;
920974

921975
std::string str;
922976
data_t verifiedRawBytes;
@@ -983,8 +1037,8 @@ namespace tivars::TypeHandlers
9831037

9841038
if (tokens_BytesToNames.contains(bytesKey))
9851039
{
986-
const std::string tokStr = tokens_BytesToNames[bytesKey].display[langIdx];
987-
if (prettify)
1040+
const std::string tokStr = get_detok_primary_string(bytesKey, langIdx, options);
1041+
if (prettify || accessibleDetok)
9881042
{
9891043
str += tokStr;
9901044
}
@@ -1018,16 +1072,17 @@ namespace tivars::TypeHandlers
10181072

10191073
if (!acceptedFallback)
10201074
{
1021-
const std::string rawEscape = format_raw_token_escape(bytesKey);
1022-
std::cerr << "[Warning] Appending token 0x" << rawEscape.substr(2)
1075+
const std::string rawEscape = format_last_resort_detok_string(bytesKey);
1076+
std::cerr << "[Warning] Appending token 0x" << format_token_hex(bytesKey)
10231077
<< " (" << tokStr << ") made the accumulated detokenized string non-roundtrippable, using "
1024-
<< rawEscape << " instead!" << std::endl;
1078+
<< format_detok_string_for_log(rawEscape) << " instead!" << std::endl;
10251079
accept_detok_token(rawEscape, currentRawBytes);
10261080
}
10271081
}
10281082
} else {
1029-
const std::string rawEscape = format_raw_token_escape(bytesKey);
1030-
std::cerr << "[Warning] Unknown token 0x" << rawEscape.substr(2) << " detokenized as " << rawEscape << "!" << std::endl;
1083+
const std::string rawEscape = format_last_resort_detok_string(bytesKey);
1084+
std::cerr << "[Warning] Unknown token 0x" << format_token_hex(bytesKey)
1085+
<< " detokenized as " << format_detok_string_for_log(rawEscape) << "!" << std::endl;
10311086
if (prettify) {
10321087
str += rawEscape;
10331088
} else {
@@ -1244,7 +1299,7 @@ namespace tivars::TypeHandlers
12441299
std::string tokStr;
12451300
if (tokens_BytesToNames.contains(bytesKey))
12461301
{
1247-
tokStr = tokens_BytesToNames[bytesKey].display[langIdx];
1302+
tokStr = get_detok_primary_string(bytesKey, langIdx, options);
12481303
} else {
12491304
tokStr = format_raw_token_escape(bytesKey);
12501305
}
@@ -1267,7 +1322,7 @@ namespace tivars::TypeHandlers
12671322
std::string tokStr;
12681323
if (tokens_BytesToNames.contains(tokenBytes))
12691324
{
1270-
tokStr = tokens_BytesToNames[tokenBytes].display[LANG_EN];
1325+
tokStr = get_detok_primary_string(tokenBytes, LANG_EN, {});
12711326
} else {
12721327
tokStr = format_raw_token_escape(tokenBytes);
12731328
}
@@ -1334,7 +1389,7 @@ namespace tivars::TypeHandlers
13341389
std::string tokStr;
13351390
if (tokens_BytesToNames.contains(bytesKey))
13361391
{
1337-
tokStr = tokens_BytesToNames[bytesKey].display[langIdx];
1392+
tokStr = get_detok_primary_string(bytesKey, langIdx, options);
13381393
} else {
13391394
tokStr = format_raw_token_escape(bytesKey);
13401395
}

tests.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -649,13 +649,26 @@ int main(int argc, char** argv)
649649
}
650650

651651
{
652-
const std::string menuSource = "Menu(Str8,\"k(a+b)\",1,\"k(a-b)\",1S,\"\",A,\"\",A,\"\",A,\"\",A,\"\",A,Str9,F,Str7,Q";
652+
const std::string menuSource = R"lit(Menu(Str8,"k(a+b)",1,"k(a-b)",1S,"",A,"",A,"",A,"",A,"",A,Str9,F,Str7,Q)lit";
653653
TIVarFile menuPrgm = TIVarFile::createNew("Program", "MENU");
654654
menuPrgm.setContentFromString(menuSource);
655+
assert(menuPrgm.getReadableContent({{"accessible", true}, {"prettify", true}, {"reindent", false}}) == menuSource);
656+
assert(menuPrgm.getReadableContent({{"accessible", true}, {"prettify", false}, {"reindent", false}}) == menuSource);
655657
assert(menuPrgm.getReadableContent({{"prettify", true}, {"reindent", false}}) == menuSource);
656658
assert(menuPrgm.getReadableContent({{"prettify", false}, {"reindent", false}}) == menuSource);
657659
}
658660

661+
{
662+
const std::string menuSource = R"lit(Menu(Str8,"k(a…b)",A," (a…b)ʳ",B,"k(a…b)ʳ",D," (a…b)(c…d)",5,"k(a…b)(c…d)",6,"",F,"",F,"",F,Str7,Q)lit";
663+
const std::string accessibleMenuSource = R"lit(Menu(Str8,"k(a...b)",A," (a...b)^^r",B,"k(a...b)^^r",D," (a...b)(c...d)",5,"k(a...b)(c...d)",6,"",F,"",F,"",F,Str7,Q)lit";
664+
TIVarFile menuPrgm = TIVarFile::createNew("Program", "MENU2");
665+
menuPrgm.setContentFromString(menuSource);
666+
const auto& tmp = menuPrgm.getReadableContent({{"accessible", true}, {"reindent", false}});
667+
assert(menuPrgm.getReadableContent({{"accessible", true}, {"reindent", false}}) == accessibleMenuSource);
668+
assert(menuPrgm.getReadableContent({{"accessible", true}, {"prettify", true}, {"reindent", false}}) == accessibleMenuSource);
669+
assert(menuPrgm.getReadableContent({{"prettify", true}, {"reindent", false}}) == menuSource);
670+
}
671+
659672
{
660673
ScopedStderrCapture unknownTokenStderr;
661674
const std::string readable = TH_Tokenized::makeStringFromData(data_t{0xBB, 0xD0}, {{"fromRawBytes", 1}});
@@ -705,6 +718,10 @@ int main(int argc, char** argv)
705718
assert(trim(testPrgmStr1.getReadableContent({{"prettify", true}, {"reindent", false}})) == "42→Str1:Str2:123");
706719
assert(trim(testPrgmStr1.getReadableContent({{"prettify", false}, {"reindent", false}})) == "42→Str1:Str2:123");
707720
assert(trim(testPrgmStr1.getReadableContent()) == "42→Str1:Str2:123");
721+
722+
testPrgmStr1.setContentFromString("AUTO:chi^2pdf(:a+bi:42->Str1");
723+
assert(trim(testPrgmStr1.getReadableContent({{"reindent", false}})) == "AUTO:χ²pdf(:a+b𝑖:42→Str1");
724+
assert(trim(testPrgmStr1.getReadableContent({{"accessible", true}, {"reindent", false}})) == "AUTO:chi^2pdf(:a+bi:42->Str1");
708725
}
709726

710727
{

0 commit comments

Comments
 (0)