Skip to content

Commit 1f0d3f3

Browse files
committed
review-comment: Correctly handle utf-8 truncation within multi-byte boundaries
#7585 (comment)
1 parent dfefb4e commit 1f0d3f3

2 files changed

Lines changed: 53 additions & 2 deletions

File tree

Sources/Swift/Integrations/SessionReplay/SentryReplayNetworkDetails.swift

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,14 @@ enum NetworkBodyWarning: String {
184184
}
185185

186186
private static func parseText(_ data: Data, encoding: String.Encoding = .utf8, warnings: inout [NetworkBodyWarning]) -> Body {
187-
if let string = String(data: data, encoding: encoding) ?? String(data: data, encoding: .utf8) {
188-
return Body(content: string, warnings: warnings)
187+
// Truncation at a multi-byte boundary (e.g. UTF-8 CJK, emoji) makes
188+
// String(data:encoding:) return nil. Try dropping up to 3 trailing bytes
189+
// to find a valid boundary before giving up.
190+
for drop in 0...min(3, data.count) {
191+
let slice = drop == 0 ? data : data.dropLast(drop)
192+
if let string = String(data: slice, encoding: encoding) ?? String(data: slice, encoding: .utf8) {
193+
return Body(content: string, warnings: warnings)
194+
}
189195
}
190196
warnings.append(.bodyParseError)
191197
return Body(content: "", warnings: warnings)

Tests/SentryTests/Networking/SentryReplayNetworkDetailsBodyTests.swift

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,51 @@ class SentryReplayNetworkDetailsBodyTests: XCTestCase {
295295
XCTAssertEqual(dict["name"] as? String, "Jane Doe")
296296
}
297297

298+
// MARK: - Multi-byte Truncation
299+
300+
func testInit_withTruncatedMultiByteUTF8_shouldRecoverValidPrefix() throws {
301+
// UTF-8 byte widths:
302+
// 3-byte chars: CJK (e.g. "你" = E4 BD A0)
303+
// 4-byte chars: emoji (e.g. "😀" = F0 9F 98 80)
304+
// 2-byte chars: accented Latin (e.g. "é" = C3 A9)
305+
306+
// -- dropLast(1): 3-byte char split after 2 bytes --
307+
// "你好" = 6 bytes; prefix(5) cuts second char after 2 of 3 bytes
308+
let cjk = "你好".data(using: .utf8)!
309+
XCTAssertEqual(cjk.count, 6)
310+
let body1 = try XCTUnwrap(Body(data: cjk.prefix(5), contentType: "text/plain; charset=utf-8"))
311+
XCTAssertEqual(body1.serialize()["body"] as? String, "")
312+
313+
// -- dropLast(2): 3-byte char split after 1 byte --
314+
// prefix(4) cuts second char after 1 of 3 bytes
315+
let body2 = try XCTUnwrap(Body(data: cjk.prefix(4), contentType: "text/plain; charset=utf-8"))
316+
XCTAssertEqual(body2.serialize()["body"] as? String, "")
317+
318+
// -- dropLast(3): 4-byte emoji split after 1 byte --
319+
// "A😀" = 1 + 4 = 5 bytes; prefix(2) cuts emoji after 1 of 4 bytes
320+
let emoji = "A😀".data(using: .utf8)!
321+
XCTAssertEqual(emoji.count, 5)
322+
let body3 = try XCTUnwrap(Body(data: emoji.prefix(2), contentType: "text/plain; charset=utf-8"))
323+
XCTAssertEqual(body3.serialize()["body"] as? String, "A")
324+
325+
// -- no truncation needed: clean boundary --
326+
// prefix(3) is exactly "你", no bytes to drop
327+
let body4 = try XCTUnwrap(Body(data: cjk.prefix(3), contentType: "text/plain; charset=utf-8"))
328+
XCTAssertEqual(body4.serialize()["body"] as? String, "")
329+
330+
// -- pure ASCII: never affected --
331+
let ascii = "hello".data(using: .utf8)!
332+
let body5 = try XCTUnwrap(Body(data: ascii.prefix(3), contentType: "text/plain; charset=utf-8"))
333+
XCTAssertEqual(body5.serialize()["body"] as? String, "hel")
334+
335+
// -- 2-byte char split after 1 byte --
336+
// "Aé" = 1 + 2 = 3 bytes; prefix(2) cuts "é" after 1 of 2 bytes
337+
let accented = "".data(using: .utf8)!
338+
XCTAssertEqual(accented.count, 3)
339+
let body6 = try XCTUnwrap(Body(data: accented.prefix(2), contentType: "text/plain; charset=utf-8"))
340+
XCTAssertEqual(body6.serialize()["body"] as? String, "A")
341+
}
342+
298343
// MARK: - Serialization Tests
299344

300345
func testSerialize_withStringBody_shouldReturnDictionary() {

0 commit comments

Comments
 (0)