Skip to content

Commit dd40093

Browse files
committed
TH_Tokenized: enforce trying detok with variants before accessibles.
1 parent 53d161f commit dd40093

2 files changed

Lines changed: 38 additions & 42 deletions

File tree

src/TypeHandlers/TH_Tokenized.cpp

Lines changed: 17 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -698,41 +698,6 @@ namespace tivars::TypeHandlers
698698
return false;
699699
}
700700

701-
static std::set<std::string> get_detok_alias_candidates(uint16_t bytesKey, uint8_t langIdx, const std::string& primaryDisplay)
702-
{
703-
using TH_Tokenized::LANG_EN;
704-
705-
const auto it = tokens_BytesToNames.find(bytesKey);
706-
if (it == tokens_BytesToNames.end())
707-
{
708-
return {};
709-
}
710-
711-
const TokenNames& tokenNames = it->second;
712-
std::set<std::string> candidates;
713-
auto append_aliases = [&](const auto& aliases, uint8_t idx)
714-
{
715-
for (const auto& candidate : aliases[idx])
716-
{
717-
if (candidate != primaryDisplay)
718-
{
719-
candidates.insert(candidate);
720-
}
721-
}
722-
};
723-
724-
append_aliases(tokenNames.accessibles, langIdx);
725-
append_aliases(tokenNames.variants, langIdx);
726-
727-
if (langIdx != LANG_EN)
728-
{
729-
append_aliases(tokenNames.accessibles, LANG_EN);
730-
append_aliases(tokenNames.variants, LANG_EN);
731-
}
732-
733-
return candidates;
734-
}
735-
736701
static std::string get_detok_primary_string(uint16_t bytesKey, uint8_t langIdx, const options_t& options)
737702
{
738703
using TH_Tokenized::LANG_EN;
@@ -1050,8 +1015,10 @@ namespace tivars::TypeHandlers
10501015
currentRawBytes.push_back(nextToken);
10511016
}
10521017

1053-
if (tokens_BytesToNames.contains(bytesKey))
1018+
const auto tokenNamesIt = tokens_BytesToNames.find(bytesKey);
1019+
if (tokenNamesIt != tokens_BytesToNames.end())
10541020
{
1021+
const TokenNames& tokenNames = tokenNamesIt->second;
10551022
const std::string tokStr = get_detok_primary_string(bytesKey, langIdx, options);
10561023
if (prettify || accessibleDetok)
10571024
{
@@ -1069,15 +1036,23 @@ namespace tivars::TypeHandlers
10691036
{
10701037
bool acceptedFallback = false;
10711038

1072-
for (const auto& aliasCandidate : get_detok_alias_candidates(bytesKey, langIdx, tokStr))
1039+
auto try_aliases = [&](const auto& aliases, uint8_t idx)
10731040
{
1074-
if (validate_detok_token(aliasCandidate, currentRawBytes))
1041+
for (const auto& aliasCandidate : aliases[idx])
10751042
{
1076-
accept_detok_token(aliasCandidate, currentRawBytes);
1077-
acceptedFallback = true;
1078-
break;
1043+
if (aliasCandidate != tokStr && validate_detok_token(aliasCandidate, currentRawBytes))
1044+
{
1045+
accept_detok_token(aliasCandidate, currentRawBytes);
1046+
acceptedFallback = true;
1047+
return;
1048+
}
10791049
}
1080-
}
1050+
};
1051+
1052+
try_aliases(tokenNames.variants, langIdx);
1053+
if (!acceptedFallback) try_aliases(tokenNames.accessibles, langIdx);
1054+
if (!acceptedFallback && langIdx != LANG_EN) try_aliases(tokenNames.variants, LANG_EN);
1055+
if (!acceptedFallback && langIdx != LANG_EN) try_aliases(tokenNames.accessibles, LANG_EN);
10811056

10821057
const std::string escapedToken = "\\" + tokStr;
10831058
if (!acceptedFallback && validate_detok_token(escapedToken, currentRawBytes))

tests.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,27 @@ int main(int argc, char** argv)
586586
assert(prettifiedIntrinsicBadTokenStderr.str().find("roundtrippable way") == std::string::npos);
587587
}
588588

589+
{
590+
ScopedStderrCapture variantBeforeAccessibleStderr;
591+
const data_t greekPiToken = {0xBB, 0xA7};
592+
const std::string readable = TH_Tokenized::makeStringFromData(greekPiToken, {{"fromRawBytes", 1}});
593+
assert(readable == "");
594+
assert(variantBeforeAccessibleStderr.str().empty());
595+
assert(TH_Tokenized::makeDataFromString(readable) == data_t({0x02, 0x00, 0xBB, 0xA7}));
596+
assert(TH_Tokenized::makeDataFromString("greek_pi") == data_t({0x02, 0x00, 0xBB, 0xA7}));
597+
assert(TH_Tokenized::makeDataFromString("π") == data_t({0x01, 0x00, 0xAC}));
598+
assert(TH_Tokenized::makeStringFromData(greekPiToken, {{"fromRawBytes", 1}, {"accessible", 1}}) == "greek_pi");
599+
}
600+
601+
{
602+
ScopedStderrCapture stringVariantBeforeAccessibleStderr;
603+
const data_t rawString = {0x2A, 0x51, 0xBB, 0xA7, 0x2A};
604+
const std::string readable = TH_Tokenized::makeStringFromData(rawString, {{"fromRawBytes", 1}});
605+
assert(readable == "\"Q|π\"");
606+
assert(stringVariantBeforeAccessibleStderr.str().empty());
607+
assert(TH_Tokenized::makeDataFromString(readable) == data_t({0x05, 0x00, 0x2A, 0x51, 0xBB, 0xA7, 0x2A}));
608+
}
609+
589610
{
590611
struct SingleLetterTokenCase
591612
{

0 commit comments

Comments
 (0)