Skip to content
Open
45 changes: 42 additions & 3 deletions packages/core/src/logs/internal.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import type { Attributes } from '../attributes';
import { serializeAttributes } from '../attributes';
import { getGlobalSingleton } from '../carrier';
import type { Client } from '../client';
Expand Down Expand Up @@ -161,14 +162,14 @@
const serializedLog: SerializedLog = {
timestamp,
level,
body: message,
body: _INTERNAL_removeLoneSurrogates(String(message)),
trace_id: traceContext?.trace_id,
severity_number: severityNumber ?? SEVERITY_TEXT_TO_SEVERITY_NUMBER[level],
attributes: {
attributes: sanitizeLogAttributes({
...serializeAttributes(scopeAttributes),
...serializeAttributes(logAttributes, true),
[sequenceAttr.key]: sequenceAttr.value,
},
}),
};

captureSerializedLog(client, serializedLog);
Expand Down Expand Up @@ -220,3 +221,41 @@
// The reference to the Client <> LogBuffer map is stored on the carrier to ensure it's always the same
return getGlobalSingleton('clientToLogBufferMap', () => new WeakMap<Client, Array<SerializedLog>>());
}

/**
* Sanitizes serialized log attributes by replacing lone surrogates in both
* keys and string values with U+FFFD.
*/
function sanitizeLogAttributes(attributes: Attributes): Attributes {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

m: just thinking out loud here but wdyt about doing this within serializeAttributes so that this applies to all telemetry attributes? I'm assuming the same limitation occurs for metrics and span attributes? Ideally we can keep this code path as performant as possible, so if we do it in the same loop instead of having to loop over the object again, it should be a bit faster.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's good feedback @Lms24 👍 I hesitated a bit to make a wider change in the scope of this PR to avoid possible side effects and limited the scope just to logs which I guess are more vulnerable to free-form user/developer text input. Looking at it again I think it makes sense to do this in serializeAttributes.
I'll be happy to revise to cover everything if it makes more sense from the Js maintainer's perspective 🙇

const sanitized: Attributes = {};
for (const [key, attr] of Object.entries(attributes)) {
const sanitizedKey = _INTERNAL_removeLoneSurrogates(key);
if (attr.type === 'string') {
sanitized[sanitizedKey] = { ...attr, value: _INTERNAL_removeLoneSurrogates(attr.value as string) };

Check failure on line 234 in packages/core/src/logs/internal.ts

View workflow job for this annotation

GitHub Actions / Lint

typescript-eslint(no-unnecessary-type-assertion)

This assertion is unnecessary since it does not change the type of the expression.
} else {
sanitized[sanitizedKey] = attr;
}
}
return sanitized;
}

/**
* Replaces unpaired UTF-16 surrogates with U+FFFD (replacement character).
*
* Lone surrogates (U+D800–U+DFFF not part of a valid pair) cause `serde_json`
* on the server to reject the entire log/span batch when they appear in
* JSON-escaped form (e.g. `\uD800`). Replacing them at the SDK level ensures
* only the offending characters are lost instead of the whole payload.
*
* Uses the native `String.prototype.toWellFormed()` when available
* (Node 20+, Chrome 111+, Safari 15.4+, Firefox 119+, Hermes).
* On older runtimes without native support, returns the string as-is.
*/
export function _INTERNAL_removeLoneSurrogates(str: string): string {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm given this is quite a lot of code and hence a bundle size impact, have we considered alternatives?

  • can we remove prior to serde parsing in Relay?
  • would it be the end of the world if we only use the toWellFormed native method but avoid the fallback? Not sure if ReactNative supports this already.

If both options are a "No", that's fine, too. Just want to make sure we have our bases covered.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we remove prior to serde parsing in Relay?

Yes, handling this in Relay makes a lot more sense. The SDK side fix is probably more of an extra check if a relay fix is deployed. I'll check what would this involve.

would it be the end of the world if we only use the toWellFormed native method but avoid the fallback? Not sure if ReactNative supports this already.

I think it is supported. I'll further look into this.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Lms24 This is now ready for another pass 🤞
Also opened a Relay PR

// isWellFormed/toWellFormed are ES2024 (not in our TS lib target), so we feature-detect via Object().
const strObj: Record<string, unknown> = Object(str);
if (typeof strObj['isWellFormed'] === 'function') {
return (strObj['isWellFormed'] as () => boolean)() ? str : (strObj['toWellFormed'] as () => string)();
}
return str;
}
161 changes: 160 additions & 1 deletion packages/core/test/lib/logs/internal.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';
import { fmt, Scope } from '../../../src';
import { _INTERNAL_captureLog, _INTERNAL_flushLogsBuffer, _INTERNAL_getLogBuffer } from '../../../src/logs/internal';
import {
_INTERNAL_captureLog,
_INTERNAL_flushLogsBuffer,
_INTERNAL_getLogBuffer,
_INTERNAL_removeLoneSurrogates,
} from '../../../src/logs/internal';
import type { Log } from '../../../src/types-hoist/log';
import * as loggerModule from '../../../src/utils/debug-logger';
import * as timeModule from '../../../src/utils/time';
Expand Down Expand Up @@ -1261,4 +1266,158 @@ describe('_INTERNAL_captureLog', () => {
expect(buffer2?.[0]?.attributes?.['sentry.timestamp.sequence']).toEqual({ value: 0, type: 'integer' });
});
});

// toWellFormed() is only available in Node 20+, Chrome 111+, Safari 15.4+, Firefox 119+, Hermes
const hasToWellFormed = typeof ''.isWellFormed === 'function';

describe.runIf(hasToWellFormed)('lone surrogate sanitization', () => {
it('sanitizes lone surrogates in log message body', () => {
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
const client = new TestClient(options);
const scope = new Scope();
scope.setClient(client);

_INTERNAL_captureLog({ level: 'error', message: 'bad surrogate \uD800 here' }, scope);

const logBuffer = _INTERNAL_getLogBuffer(client);
expect(logBuffer?.[0]?.body).toBe('bad surrogate \uFFFD here');
});

it('sanitizes lone surrogates in parameterized (fmt) log message body', () => {
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
const client = new TestClient(options);
const scope = new Scope();
scope.setClient(client);

const badValue = 'bad\uD800value';
_INTERNAL_captureLog({ level: 'error', message: fmt`parameterized ${badValue} message` }, scope);

const logBuffer = _INTERNAL_getLogBuffer(client);
expect(logBuffer?.[0]?.body).toBe('parameterized bad\uFFFDvalue message');
});

it('sanitizes lone surrogates in log attribute values', () => {
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
const client = new TestClient(options);
const scope = new Scope();
scope.setClient(client);

_INTERNAL_captureLog(
{
level: 'error',
message: 'test',
attributes: { bad: '{"a":"\uD800"}' },
},
scope,
);

const logBuffer = _INTERNAL_getLogBuffer(client);
expect(logBuffer?.[0]?.attributes?.['bad']).toEqual({
value: '{"a":"\uFFFD"}',
type: 'string',
});
});

it('sanitizes lone surrogates in log attribute keys', () => {
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
const client = new TestClient(options);
const scope = new Scope();
scope.setClient(client);

_INTERNAL_captureLog(
{
level: 'error',
message: 'test',
attributes: { ['bad\uD800key']: 'value' },
},
scope,
);

const logBuffer = _INTERNAL_getLogBuffer(client);
expect(logBuffer?.[0]?.attributes?.['bad\uFFFDkey']).toEqual({
value: 'value',
type: 'string',
});
});

it('preserves valid emoji in log messages and attributes', () => {
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
const client = new TestClient(options);
const scope = new Scope();
scope.setClient(client);

_INTERNAL_captureLog(
{
level: 'info',
message: 'hello 😀 world',
attributes: { emoji: '🎉 party' },
},
scope,
);

const logBuffer = _INTERNAL_getLogBuffer(client);
expect(logBuffer?.[0]?.body).toBe('hello 😀 world');
expect(logBuffer?.[0]?.attributes?.['emoji']).toEqual({
value: '🎉 party',
type: 'string',
});
});
});
});

// toWellFormed() is only available in Node 20+, Chrome 111+, Safari 15.4+, Firefox 119+, Hermes
const hasToWellFormedGlobal = typeof ''.isWellFormed === 'function';

describe('_INTERNAL_removeLoneSurrogates', () => {
it('returns the same string when there are no surrogates', () => {
expect(_INTERNAL_removeLoneSurrogates('hello world')).toBe('hello world');
});

it('returns the same string for empty input', () => {
expect(_INTERNAL_removeLoneSurrogates('')).toBe('');
});

it('preserves valid surrogate pairs (emoji)', () => {
expect(_INTERNAL_removeLoneSurrogates('hello 😀 world')).toBe('hello 😀 world');
});

it.runIf(hasToWellFormedGlobal)('replaces a lone high surrogate with U+FFFD', () => {
expect(_INTERNAL_removeLoneSurrogates('before\uD800after')).toBe('before\uFFFDafter');
});

it.runIf(hasToWellFormedGlobal)('replaces a lone low surrogate with U+FFFD', () => {
expect(_INTERNAL_removeLoneSurrogates('before\uDC00after')).toBe('before\uFFFDafter');
});

it.runIf(hasToWellFormedGlobal)('replaces lone high surrogate at end of string', () => {
expect(_INTERNAL_removeLoneSurrogates('end\uD800')).toBe('end\uFFFD');
});

it.runIf(hasToWellFormedGlobal)('replaces lone low surrogate at start of string', () => {
expect(_INTERNAL_removeLoneSurrogates('\uDC00start')).toBe('\uFFFDstart');
});

it.runIf(hasToWellFormedGlobal)('replaces multiple lone surrogates', () => {
expect(_INTERNAL_removeLoneSurrogates('\uD800\uD801\uDC00')).toBe('\uFFFD\uD801\uDC00');
});

it.runIf(hasToWellFormedGlobal)('handles two consecutive lone high surrogates', () => {
expect(_INTERNAL_removeLoneSurrogates('\uD800\uD800')).toBe('\uFFFD\uFFFD');
});

it.runIf(hasToWellFormedGlobal)('handles mixed valid pairs and lone surrogates', () => {
expect(_INTERNAL_removeLoneSurrogates('\uD83D\uDE00\uD800')).toBe('😀\uFFFD');
});

it.runIf(hasToWellFormedGlobal)('handles the exact reproduction case from issue #5186', () => {
const badValue = '{"a":"\uD800"}';
const result = _INTERNAL_removeLoneSurrogates(badValue);
expect(result).toBe('{"a":"\uFFFD"}');
expect(() => JSON.parse(result)).not.toThrow();
});

it('returns the string as-is when toWellFormed is not available', () => {
// Verify the function doesn't throw regardless of runtime support
expect(_INTERNAL_removeLoneSurrogates('normal string')).toBe('normal string');
});
});
Loading