Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/actions/spelling/expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ awgs
azurewebsites
Baz
bcp
BEBOM
BEFACEF
bfd
BFirst
Expand Down Expand Up @@ -268,6 +269,7 @@ kool
ktf
LCID
learnxinyminutes
LEBOM
lhs
LIBYAML
liv
Expand Down
26 changes: 19 additions & 7 deletions src/AppInstallerSharedLib/YamlWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,23 +343,35 @@ namespace AppInstaller::YAML::Wrapper

void Parser::PrepareInput()
{
constexpr char c_utf16BOM[2] = { static_cast<char>(0xFF), static_cast<char>(0xFE) };
constexpr char c_utf16LEBOM[2] = { static_cast<char>(0xFF), static_cast<char>(0xFE) };
constexpr char c_utf16BEBOM[2] = { static_cast<char>(0xFE), static_cast<char>(0xFF) };
constexpr char c_utf8BOM[3] = { static_cast<char>(0xEF), static_cast<char>(0xBB), static_cast<char>(0xBF) };

// If input has a BOM, we want to pass it on through.
// If input has a BOM, we want to remove it to prevent errors with checking for comments within the input document.

// Check for UTF-16 BOMs
if (m_input.size() >= 2 &&
((m_input[0] == c_utf16BOM[0] && m_input[1] == c_utf16BOM[1]) || (m_input[0] == c_utf16BOM[1] && m_input[1] == c_utf16BOM[0])))
if (m_input.size() >= sizeof(c_utf16LEBOM) && std::memcmp(m_input.data(), c_utf16LEBOM, sizeof(c_utf16LEBOM)) == 0)
{
AICLI_LOG(YAML, Verbose, << "Found UTF-16 BOM");
AICLI_LOG(YAML, Verbose, << "Found UTF-16 LE BOM");
yaml_parser_set_encoding(&m_parser, YAML_UTF16LE_ENCODING); // Without the BOM, the encoding must be explicitly set
Comment thread
Trenly marked this conversation as resolved.
Outdated
m_input.erase(0, sizeof(c_utf16LEBOM)); // Remove the BOM from the input
return;
}

if (m_input.size() >= sizeof(c_utf16BEBOM) && std::memcmp(m_input.data(), c_utf16BEBOM, sizeof(c_utf16BEBOM)) == 0)
{
AICLI_LOG(YAML, Verbose, << "Found UTF-16 BE BOM");
yaml_parser_set_encoding(&m_parser, YAML_UTF16BE_ENCODING); // Without the BOM, the encoding must be explicitly set
m_input.erase(0, sizeof(c_utf16BEBOM)); // Remove the BOM from the input
return;
}

// Check for UTF-8 BOM
if (m_input.size() >= 3 &&
(m_input[0] == c_utf8BOM[0] && m_input[1] == c_utf8BOM[1] && m_input[2] == c_utf8BOM[2]))
if (m_input.size() >= sizeof(c_utf8BOM) && std::memcmp(m_input.data(), c_utf8BOM, sizeof(c_utf8BOM)) == 0)
{
AICLI_LOG(YAML, Verbose, << "Found UTF-8 BOM");
yaml_parser_set_encoding(&m_parser, YAML_UTF8_ENCODING); // Without the BOM, the encoding must be explicitly set
m_input.erase(0, sizeof(c_utf8BOM)); // Remove the BOM from the input
return;
}

Expand Down