Skip to content

Commit 8e38c23

Browse files
antonisclaude
andcommitted
fix(core): Sanitize lone surrogates in log body and attributes
Lone UTF-16 surrogates (U+D800–U+DFFF) in log message bodies or attribute strings cause serde_json on the server to reject the entire log batch. This replaces unpaired surrogates with U+FFFD at capture time, scoped to the logs path only. Fixes getsentry/sentry-react-native#5186 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 45d7b06 commit 8e38c23

File tree

2 files changed

+216
-4
lines changed

2 files changed

+216
-4
lines changed

packages/core/src/logs/internal.ts

Lines changed: 80 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import type { Attributes } from '../attributes';
12
import { serializeAttributes } from '../attributes';
23
import { getGlobalSingleton } from '../carrier';
34
import type { Client } from '../client';
@@ -161,14 +162,14 @@ export function _INTERNAL_captureLog(
161162
const serializedLog: SerializedLog = {
162163
timestamp,
163164
level,
164-
body: message,
165+
body: typeof message === 'string' ? _INTERNAL_removeLoneSurrogates(message) : message,
165166
trace_id: traceContext?.trace_id,
166167
severity_number: severityNumber ?? SEVERITY_TEXT_TO_SEVERITY_NUMBER[level],
167-
attributes: {
168+
attributes: sanitizeLogAttributes({
168169
...serializeAttributes(scopeAttributes),
169170
...serializeAttributes(logAttributes, true),
170171
[sequenceAttr.key]: sequenceAttr.value,
171-
},
172+
}),
172173
};
173174

174175
captureSerializedLog(client, serializedLog);
@@ -220,3 +221,79 @@ function _getBufferMap(): WeakMap<Client, Array<SerializedLog>> {
220221
// The reference to the Client <> LogBuffer map is stored on the carrier to ensure it's always the same
221222
return getGlobalSingleton('clientToLogBufferMap', () => new WeakMap<Client, Array<SerializedLog>>());
222223
}
224+
225+
/**
226+
* Sanitizes serialized log attributes by replacing lone surrogates in both
227+
* keys and string values with U+FFFD.
228+
*/
229+
function sanitizeLogAttributes(attributes: Attributes): Attributes {
230+
const sanitized: Attributes = {};
231+
for (const [key, attr] of Object.entries(attributes)) {
232+
const sanitizedKey = _INTERNAL_removeLoneSurrogates(key);
233+
if (attr.type === 'string') {
234+
sanitized[sanitizedKey] = { ...attr, value: _INTERNAL_removeLoneSurrogates(attr.value as string) };
235+
} else {
236+
sanitized[sanitizedKey] = attr;
237+
}
238+
}
239+
return sanitized;
240+
}
241+
242+
/**
243+
* Replaces unpaired UTF-16 surrogates with U+FFFD (replacement character).
244+
*
245+
* Lone surrogates (U+D800–U+DFFF not part of a valid pair) cause `serde_json`
246+
* on the server to reject the entire log/span batch when they appear in
247+
* JSON-escaped form (e.g. `\uD800`). Replacing them at the SDK level ensures
248+
* only the offending characters are lost instead of the whole payload.
249+
*/
250+
export function _INTERNAL_removeLoneSurrogates(str: string): string {
251+
// Use native toWellFormed() when available (Node 20+, Safari 15.4+, Chrome 111+)
252+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
253+
const s = str as any;
254+
if (typeof s.isWellFormed === 'function') {
255+
return s.isWellFormed() ? str : s.toWellFormed();
256+
}
257+
258+
// Fast path – scan without allocating. Most strings have no surrogates at all.
259+
let hasLoneSurrogate = false;
260+
for (let i = 0; i < str.length; i++) {
261+
const code = str.charCodeAt(i);
262+
if (code >= 0xd800 && code <= 0xdfff) {
263+
if (code <= 0xdbff && i + 1 < str.length) {
264+
const next = str.charCodeAt(i + 1);
265+
if (next >= 0xdc00 && next <= 0xdfff) {
266+
// Valid surrogate pair – skip the low surrogate
267+
i++;
268+
continue;
269+
}
270+
}
271+
hasLoneSurrogate = true;
272+
break;
273+
}
274+
}
275+
276+
if (!hasLoneSurrogate) {
277+
return str;
278+
}
279+
280+
// Slow path – build a new string, replacing lone surrogates with U+FFFD.
281+
const chars: string[] = [];
282+
for (let i = 0; i < str.length; i++) {
283+
const code = str.charCodeAt(i);
284+
if (code >= 0xd800 && code <= 0xdbff) {
285+
const next = i + 1 < str.length ? str.charCodeAt(i + 1) : 0;
286+
if (next >= 0xdc00 && next <= 0xdfff) {
287+
chars.push(str.charAt(i), str.charAt(i + 1));
288+
i++;
289+
} else {
290+
chars.push('\uFFFD');
291+
}
292+
} else if (code >= 0xdc00 && code <= 0xdfff) {
293+
chars.push('\uFFFD');
294+
} else {
295+
chars.push(str.charAt(i));
296+
}
297+
}
298+
return chars.join('');
299+
}

packages/core/test/lib/logs/internal.test.ts

Lines changed: 136 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import { beforeEach, describe, expect, it, vi } from 'vitest';
22
import { fmt, Scope } from '../../../src';
3-
import { _INTERNAL_captureLog, _INTERNAL_flushLogsBuffer, _INTERNAL_getLogBuffer } from '../../../src/logs/internal';
3+
import {
4+
_INTERNAL_captureLog,
5+
_INTERNAL_flushLogsBuffer,
6+
_INTERNAL_getLogBuffer,
7+
_INTERNAL_removeLoneSurrogates,
8+
} from '../../../src/logs/internal';
49
import type { Log } from '../../../src/types-hoist/log';
510
import * as loggerModule from '../../../src/utils/debug-logger';
611
import * as timeModule from '../../../src/utils/time';
@@ -1261,4 +1266,134 @@ describe('_INTERNAL_captureLog', () => {
12611266
expect(buffer2?.[0]?.attributes?.['sentry.timestamp.sequence']).toEqual({ value: 0, type: 'integer' });
12621267
});
12631268
});
1269+
1270+
describe('lone surrogate sanitization', () => {
1271+
it('sanitizes lone surrogates in log message body', () => {
1272+
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
1273+
const client = new TestClient(options);
1274+
const scope = new Scope();
1275+
scope.setClient(client);
1276+
1277+
_INTERNAL_captureLog({ level: 'error', message: 'bad surrogate \uD800 here' }, scope);
1278+
1279+
const logBuffer = _INTERNAL_getLogBuffer(client);
1280+
expect(logBuffer?.[0]?.body).toBe('bad surrogate \uFFFD here');
1281+
});
1282+
1283+
it('sanitizes lone surrogates in log attribute values', () => {
1284+
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
1285+
const client = new TestClient(options);
1286+
const scope = new Scope();
1287+
scope.setClient(client);
1288+
1289+
_INTERNAL_captureLog(
1290+
{
1291+
level: 'error',
1292+
message: 'test',
1293+
attributes: { bad: '{"a":"\uD800"}' },
1294+
},
1295+
scope,
1296+
);
1297+
1298+
const logBuffer = _INTERNAL_getLogBuffer(client);
1299+
expect(logBuffer?.[0]?.attributes?.['bad']).toEqual({
1300+
value: '{"a":"\uFFFD"}',
1301+
type: 'string',
1302+
});
1303+
});
1304+
1305+
it('sanitizes lone surrogates in log attribute keys', () => {
1306+
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
1307+
const client = new TestClient(options);
1308+
const scope = new Scope();
1309+
scope.setClient(client);
1310+
1311+
_INTERNAL_captureLog(
1312+
{
1313+
level: 'error',
1314+
message: 'test',
1315+
attributes: { ['bad\uD800key']: 'value' },
1316+
},
1317+
scope,
1318+
);
1319+
1320+
const logBuffer = _INTERNAL_getLogBuffer(client);
1321+
expect(logBuffer?.[0]?.attributes?.['bad\uFFFDkey']).toEqual({
1322+
value: 'value',
1323+
type: 'string',
1324+
});
1325+
});
1326+
1327+
it('preserves valid emoji in log messages and attributes', () => {
1328+
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
1329+
const client = new TestClient(options);
1330+
const scope = new Scope();
1331+
scope.setClient(client);
1332+
1333+
_INTERNAL_captureLog(
1334+
{
1335+
level: 'info',
1336+
message: 'hello 😀 world',
1337+
attributes: { emoji: '🎉 party' },
1338+
},
1339+
scope,
1340+
);
1341+
1342+
const logBuffer = _INTERNAL_getLogBuffer(client);
1343+
expect(logBuffer?.[0]?.body).toBe('hello 😀 world');
1344+
expect(logBuffer?.[0]?.attributes?.['emoji']).toEqual({
1345+
value: '🎉 party',
1346+
type: 'string',
1347+
});
1348+
});
1349+
});
1350+
});
1351+
1352+
describe('_INTERNAL_removeLoneSurrogates', () => {
1353+
it('returns the same string when there are no surrogates', () => {
1354+
expect(_INTERNAL_removeLoneSurrogates('hello world')).toBe('hello world');
1355+
});
1356+
1357+
it('returns the same string for empty input', () => {
1358+
expect(_INTERNAL_removeLoneSurrogates('')).toBe('');
1359+
});
1360+
1361+
it('preserves valid surrogate pairs (emoji)', () => {
1362+
expect(_INTERNAL_removeLoneSurrogates('hello 😀 world')).toBe('hello 😀 world');
1363+
});
1364+
1365+
it('replaces a lone high surrogate with U+FFFD', () => {
1366+
expect(_INTERNAL_removeLoneSurrogates('before\uD800after')).toBe('before\uFFFDafter');
1367+
});
1368+
1369+
it('replaces a lone low surrogate with U+FFFD', () => {
1370+
expect(_INTERNAL_removeLoneSurrogates('before\uDC00after')).toBe('before\uFFFDafter');
1371+
});
1372+
1373+
it('replaces lone high surrogate at end of string', () => {
1374+
expect(_INTERNAL_removeLoneSurrogates('end\uD800')).toBe('end\uFFFD');
1375+
});
1376+
1377+
it('replaces lone low surrogate at start of string', () => {
1378+
expect(_INTERNAL_removeLoneSurrogates('\uDC00start')).toBe('\uFFFDstart');
1379+
});
1380+
1381+
it('replaces multiple lone surrogates', () => {
1382+
expect(_INTERNAL_removeLoneSurrogates('\uD800\uD801\uDC00')).toBe('\uFFFD\uD801\uDC00');
1383+
});
1384+
1385+
it('handles two consecutive lone high surrogates', () => {
1386+
expect(_INTERNAL_removeLoneSurrogates('\uD800\uD800')).toBe('\uFFFD\uFFFD');
1387+
});
1388+
1389+
it('handles mixed valid pairs and lone surrogates', () => {
1390+
expect(_INTERNAL_removeLoneSurrogates('\uD83D\uDE00\uD800')).toBe('😀\uFFFD');
1391+
});
1392+
1393+
it('handles the exact reproduction case from issue #5186', () => {
1394+
const badValue = '{"a":"\uD800"}';
1395+
const result = _INTERNAL_removeLoneSurrogates(badValue);
1396+
expect(result).toBe('{"a":"\uFFFD"}');
1397+
expect(() => JSON.parse(result)).not.toThrow();
1398+
});
12641399
});

0 commit comments

Comments
 (0)