Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions absl/strings/internal/str_format/arg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,10 @@ inline bool ConvertStringArg(const wchar_t *v,
if (chars == static_cast<size_t>(-1)) { return false; }
chars_written += chars;
}
// A trailing high surrogate with no following low surrogate leaves only the
// first two bytes of a 4-byte sequence written; reject it instead of emitting
// invalid UTF-8, matching the single-character path in ConvertWCharTImpl.
if (s.saw_high_surrogate) { return false; }
return ConvertStringArg(string_view(mb.data(), chars_written), conv, sink);
}

Expand Down
22 changes: 22 additions & 0 deletions absl/strings/internal/str_format/convert_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,28 @@ TEST_F(FormatConvertTest, StringPrecision) {
EXPECT_EQ("ABC", FormatPack(wformat2, {FormatArgImpl(wp)}));
}

TEST_F(FormatConvertTest, WideStringUnpairedHighSurrogate) {
UntypedFormatSpecImpl format("%ls");

// A wide string ending with an unpaired UTF-16 high surrogate would otherwise
// emit only the first two bytes of a 4-byte sequence. Reject it, matching the
// single-character "%lc" path.
std::wstring bad = L"AB";
bad.push_back(static_cast<wchar_t>(0xD800));
EXPECT_EQ("", FormatPack(format, {FormatArgImpl(bad)}));

// Valid input is unaffected. U+1F600 encodes to the same 4-byte UTF-8 whether
// it arrives as a surrogate pair (16-bit wchar_t) or a single code unit.
std::wstring good;
if (sizeof(wchar_t) * CHAR_BIT <= 16) {
good.push_back(static_cast<wchar_t>(0xD83D));
good.push_back(static_cast<wchar_t>(0xDE00));
} else {
good.push_back(static_cast<wchar_t>(0x1F600));
}
EXPECT_EQ("\xF0\x9F\x98\x80", FormatPack(format, {FormatArgImpl(good)}));
}

// Pointer formatting is implementation defined. This checks that the argument
// can be matched to `ptr`.
MATCHER_P(MatchesPointerString, ptr, "") {
Expand Down