Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/actions/spelling/expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ awgs
azurewebsites
Baz
bcp
BEBOM
BEFACEF
bfd
BFirst
Expand Down Expand Up @@ -268,6 +269,7 @@ kool
ktf
LCID
learnxinyminutes
LEBOM
lhs
LIBYAML
liv
Expand Down
26 changes: 19 additions & 7 deletions src/AppInstallerSharedLib/YamlWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,23 +343,35 @@ namespace AppInstaller::YAML::Wrapper

void Parser::PrepareInput()
{
constexpr char c_utf16BOM[2] = { static_cast<char>(0xFF), static_cast<char>(0xFE) };
constexpr char c_utf16LEBOM[2] = { static_cast<char>(0xFF), static_cast<char>(0xFE) };
constexpr char c_utf16BEBOM[2] = { static_cast<char>(0xFE), static_cast<char>(0xFF) };
constexpr char c_utf8BOM[3] = { static_cast<char>(0xEF), static_cast<char>(0xBB), static_cast<char>(0xBF) };

// If input has a BOM, we want to pass it on through.
// If input has a BOM, we want to remove it to prevent errors with checking for comments within the input document.

// Check for UTF-16 BOMs
if (m_input.size() >= 2 &&
((m_input[0] == c_utf16BOM[0] && m_input[1] == c_utf16BOM[1]) || (m_input[0] == c_utf16BOM[1] && m_input[1] == c_utf16BOM[0])))
if (m_input.size() >= sizeof(c_utf16LEBOM) && std::memcmp(m_input.data(), c_utf16LEBOM, sizeof(c_utf16LEBOM)) == 0)
{
AICLI_LOG(YAML, Verbose, << "Found UTF-16 LE BOM");
yaml_parser_set_encoding(&m_parser, YAML_UTF16LE_ENCODING); // Without the BOM, the encoding must be explicitly set
m_input.erase(0, sizeof(c_utf16LEBOM)); // Remove the BOM from the input
return;
}

if (m_input.size() >= sizeof(c_utf16BEBOM) && std::memcmp(m_input.data(), c_utf16BEBOM, sizeof(c_utf16BEBOM)) == 0)
{
AICLI_LOG(YAML, Verbose, << "Found UTF-16 BOM");
AICLI_LOG(YAML, Verbose, << "Found UTF-16 BE BOM");
yaml_parser_set_encoding(&m_parser, YAML_UTF16BE_ENCODING); // Without the BOM, the encoding must be explicitly set
m_input.erase(0, sizeof(c_utf16BEBOM)); // Remove the BOM from the input
return;
}

// Check for UTF-8 BOM
if (m_input.size() >= 3 &&
(m_input[0] == c_utf8BOM[0] && m_input[1] == c_utf8BOM[1] && m_input[2] == c_utf8BOM[2]))
if (m_input.size() >= sizeof(c_utf8BOM) && std::memcmp(m_input.data(), c_utf8BOM, sizeof(c_utf8BOM)) == 0)
{
AICLI_LOG(YAML, Verbose, << "Found UTF-8 BOM");
yaml_parser_set_encoding(&m_parser, YAML_UTF8_ENCODING); // Without the BOM, the encoding must be explicitly set
m_input.erase(0, sizeof(c_utf8BOM)); // Remove the BOM from the input
return;
}

Expand Down