Skip to content

Commit 2a7a6a4

Browse files
committed
fix(core): Sanitize data URLs in http.client spans
1 parent 2c041b3 commit 2a7a6a4

8 files changed

Lines changed: 126 additions & 21 deletions

File tree

packages/browser/src/integrations/globalhandlers.ts

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import {
99
getLocationHref,
1010
isPrimitive,
1111
isString,
12+
stripDataUrlContent,
1213
UNKNOWN_FUNCTION,
1314
} from '@sentry/core';
1415
import type { BrowserClient } from '../client';
@@ -208,14 +209,7 @@ function getFilenameFromUrl(url: string | undefined): string | undefined {
208209
return undefined;
209210
}
210211

211-
// stack frame urls can be data urls, for example when initializing a Worker with a base64 encoded script
212-
// in this case we just show the data prefix and mime type to avoid too long raw data urls
213-
if (url.startsWith('data:')) {
214-
const match = url.match(/^data:([^;]+)/);
215-
const mimeType = match ? match[1] : 'text/javascript';
216-
const isBase64 = url.includes('base64,');
217-
return `<data:${mimeType}${isBase64 ? ',base64' : ''}>`;
218-
}
219-
220-
return url; // it's fine to not truncate it as it's not put in a regex (https://codeql.github.com/codeql-query-help/javascript/js-polynomial-redos)
212+
// Strip data URL content to avoid long base64 strings in stack frames
213+
// (e.g. when initializing a Worker with a base64 encoded script)
214+
return stripDataUrlContent(url);
221215
}

packages/browser/src/tracing/request.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import {
2323
spanToJSON,
2424
startInactiveSpan,
2525
stringMatchesSomePattern,
26+
stripDataUrlContent,
2627
stripUrlQueryAndFragment,
2728
} from '@sentry/core';
2829
import type { XhrHint } from '@sentry-internal/browser-utils';
@@ -199,7 +200,7 @@ export function instrumentOutgoingRequests(client: Client, _options?: Partial<Re
199200
const fullUrl = getFullURL(handlerData.fetchData.url);
200201
const host = fullUrl ? parseUrl(fullUrl).host : undefined;
201202
createdSpan.setAttributes({
202-
'http.url': fullUrl,
203+
'http.url': fullUrl ? stripDataUrlContent(fullUrl) : undefined,
203204
'server.address': host,
204205
});
205206

@@ -355,7 +356,7 @@ function xhrCallback(
355356
const fullUrl = getFullURL(url);
356357
const parsedUrl = fullUrl ? parseUrl(fullUrl) : parseUrl(url);
357358

358-
const urlForSpanName = stripUrlQueryAndFragment(url);
359+
const urlForSpanName = stripDataUrlContent(stripUrlQueryAndFragment(url));
359360

360361
const hasParent = !!getActiveSpan();
361362

@@ -364,10 +365,10 @@ function xhrCallback(
364365
? startInactiveSpan({
365366
name: `${method} ${urlForSpanName}`,
366367
attributes: {
367-
url,
368+
url: stripDataUrlContent(url),
368369
type: 'xhr',
369370
'http.method': method,
370-
'http.url': fullUrl,
371+
'http.url': fullUrl ? stripDataUrlContent(fullUrl) : undefined,
371372
'server.address': parsedUrl?.host,
372373
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.http.browser',
373374
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'http.client',

packages/core/src/fetch.ts

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,12 @@ import { hasSpansEnabled } from './utils/hasSpansEnabled';
1111
import { isInstanceOf, isRequest } from './utils/is';
1212
import { getActiveSpan } from './utils/spanUtils';
1313
import { getTraceData } from './utils/traceData';
14-
import { getSanitizedUrlStringFromUrlObject, isURLObjectRelative, parseStringToURLObject } from './utils/url';
14+
import {
15+
getSanitizedUrlStringFromUrlObject,
16+
isURLObjectRelative,
17+
parseStringToURLObject,
18+
stripDataUrlContent,
19+
} from './utils/url';
1520

1621
type PolymorphicRequestHeaders =
1722
| Record<string, string | undefined>
@@ -318,8 +323,9 @@ function getSpanStartOptions(
318323
spanOrigin: SpanOrigin,
319324
): Parameters<typeof startInactiveSpan>[0] {
320325
const parsedUrl = parseStringToURLObject(url);
326+
const sanitizedUrl = parsedUrl ? stripDataUrlContent(getSanitizedUrlStringFromUrlObject(parsedUrl)) : undefined;
321327
return {
322-
name: parsedUrl ? `${method} ${getSanitizedUrlStringFromUrlObject(parsedUrl)}` : method,
328+
name: sanitizedUrl ? `${method} ${sanitizedUrl}` : method,
323329
attributes: getFetchSpanAttributes(url, parsedUrl, method, spanOrigin),
324330
};
325331
}
@@ -331,15 +337,15 @@ function getFetchSpanAttributes(
331337
spanOrigin: SpanOrigin,
332338
): SpanAttributes {
333339
const attributes: SpanAttributes = {
334-
url,
340+
url: stripDataUrlContent(url),
335341
type: 'fetch',
336342
'http.method': method,
337343
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: spanOrigin,
338344
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'http.client',
339345
};
340346
if (parsedUrl) {
341347
if (!isURLObjectRelative(parsedUrl)) {
342-
attributes['http.url'] = parsedUrl.href;
348+
attributes['http.url'] = stripDataUrlContent(parsedUrl.href);
343349
attributes['server.address'] = parsedUrl.host;
344350
}
345351
if (parsedUrl.search) {

packages/core/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ export {
313313
getHttpSpanDetailsFromUrlObject,
314314
isURLObjectRelative,
315315
getSanitizedUrlStringFromUrlObject,
316+
stripDataUrlContent,
316317
} from './utils/url';
317318
export {
318319
eventFromMessage,

packages/core/src/utils/url.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,3 +263,26 @@ export function getSanitizedUrlString(url: PartialURL): string {
263263

264264
return `${protocol ? `${protocol}://` : ''}${filteredHost}${path}`;
265265
}
266+
267+
/**
268+
* Strips the content from a data URL, returning a placeholder with the MIME type.
269+
*
270+
* Data URLs can be very long (e.g. base64 encoded scripts for Web Workers),
271+
* with little valuable information, often leading to envelopes getting dropped due
272+
* to size limit violations. Therefore, we strip data URLs and replace them with a
273+
* placeholder.
274+
*
275+
* @param url - The URL to process
276+
* @returns For data URLs, returns a short format like `<data:text/javascript,base64>`.
277+
* For non-data URLs, returns the original URL unchanged.
278+
*/
279+
export function stripDataUrlContent(url: string): string {
280+
if (url.startsWith('data:')) {
281+
// Match the MIME type (everything after 'data:' until the first ';' or ',')
282+
const match = url.match(/^data:([^;,]+)/);
283+
const mimeType = match ? match[1] : 'text/plain';
284+
const isBase64 = url.includes(';base64,');
285+
return `<data:${mimeType}${isBase64 ? ',base64' : ''}>`;
286+
}
287+
return url;
288+
}

packages/core/test/lib/utils/url.test.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import {
66
isURLObjectRelative,
77
parseStringToURLObject,
88
parseUrl,
9+
stripDataUrlContent,
910
stripUrlQueryAndFragment,
1011
} from '../../../src/utils/url';
1112

@@ -638,3 +639,46 @@ describe('getHttpSpanDetailsFromUrlObject', () => {
638639
});
639640
});
640641
});
642+
643+
describe('stripDataUrlContent', () => {
644+
it('returns regular URLs unchanged', () => {
645+
expect(stripDataUrlContent('https://example.com/api')).toBe('https://example.com/api');
646+
expect(stripDataUrlContent('http://localhost:3000/test')).toBe('http://localhost:3000/test');
647+
expect(stripDataUrlContent('/relative/path')).toBe('/relative/path');
648+
});
649+
650+
it('strips content from base64 data URLs', () => {
651+
expect(stripDataUrlContent('data:text/javascript;base64,SGVsbG8gV29ybGQ=')).toBe('<data:text/javascript,base64>');
652+
expect(stripDataUrlContent('data:application/json;base64,eyJrZXkiOiJ2YWx1ZSJ9')).toBe(
653+
'<data:application/json,base64>',
654+
);
655+
expect(stripDataUrlContent('data:text/html;base64,PGh0bWw+PC9odG1sPg==')).toBe('<data:text/html,base64>');
656+
});
657+
658+
it('strips content from non-base64 data URLs', () => {
659+
expect(stripDataUrlContent('data:text/plain,Hello%20World')).toBe('<data:text/plain>');
660+
expect(stripDataUrlContent('data:text/html,<h1>Hello</h1>')).toBe('<data:text/html>');
661+
});
662+
663+
it('handles data URLs with various MIME types', () => {
664+
expect(stripDataUrlContent('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA')).toBe('<data:image/png,base64>');
665+
expect(stripDataUrlContent('data:image/svg+xml;base64,PHN2Zz4=')).toBe('<data:image/svg+xml,base64>');
666+
expect(stripDataUrlContent('data:application/octet-stream;base64,AQIDBA==')).toBe(
667+
'<data:application/octet-stream,base64>',
668+
);
669+
});
670+
671+
it('defaults to text/plain for data URLs without MIME type', () => {
672+
expect(stripDataUrlContent('data:,Hello')).toBe('<data:text/plain>');
673+
expect(stripDataUrlContent('data:;base64,SGVsbG8=')).toBe('<data:text/plain,base64>');
674+
});
675+
676+
it('handles empty data URLs', () => {
677+
expect(stripDataUrlContent('data:')).toBe('<data:text/plain>');
678+
});
679+
680+
it('handles very long base64 encoded data URLs', () => {
681+
const longBase64 = 'A'.repeat(10000);
682+
expect(stripDataUrlContent(`data:text/javascript;base64,${longBase64}`)).toBe('<data:text/javascript,base64>');
683+
});
684+
});

packages/node/src/integrations/http.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@ import { diag } from '@opentelemetry/api';
33
import type { HttpInstrumentationConfig } from '@opentelemetry/instrumentation-http';
44
import { HttpInstrumentation } from '@opentelemetry/instrumentation-http';
55
import type { Span } from '@sentry/core';
6-
import { defineIntegration, getClient, hasSpansEnabled } from '@sentry/core';
6+
import {
7+
defineIntegration,
8+
getClient,
9+
hasSpansEnabled,
10+
SEMANTIC_ATTRIBUTE_URL_FULL,
11+
stripDataUrlContent,
12+
} from '@sentry/core';
713
import type { HTTPModuleRequestIncomingMessage, NodeClient, SentryHttpInstrumentationOptions } from '@sentry/node-core';
814
import {
915
addOriginToSpan,
@@ -282,6 +288,15 @@ function getConfigWithDefaults(options: Partial<HttpOptions> = {}): HttpInstrume
282288
requestHook: (span, req) => {
283289
addOriginToSpan(span, 'auto.http.otel.http');
284290

291+
// Sanitize data URLs to prevent long base64 strings in span attributes
292+
const url = getRequestUrl(req as ClientRequest);
293+
if (url.startsWith('data:')) {
294+
const sanitizedUrl = stripDataUrlContent(url);
295+
span.setAttribute('http.url', sanitizedUrl);
296+
span.setAttribute(SEMANTIC_ATTRIBUTE_URL_FULL, sanitizedUrl);
297+
span.updateName(`${(req as ClientRequest).method || 'GET'} ${sanitizedUrl}`);
298+
}
299+
285300
options.instrumentation?.requestHook?.(span, req);
286301
},
287302
responseHook: (span, res) => {

packages/node/src/integrations/node-fetch.ts

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
11
import type { UndiciInstrumentationConfig } from '@opentelemetry/instrumentation-undici';
22
import { UndiciInstrumentation } from '@opentelemetry/instrumentation-undici';
33
import type { IntegrationFn } from '@sentry/core';
4-
import { defineIntegration, getClient, hasSpansEnabled, SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN } from '@sentry/core';
4+
import {
5+
defineIntegration,
6+
getClient,
7+
hasSpansEnabled,
8+
SEMANTIC_ATTRIBUTE_SENTRY_CUSTOM_SPAN_NAME,
9+
SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN,
10+
SEMANTIC_ATTRIBUTE_URL_FULL,
11+
stripDataUrlContent,
12+
} from '@sentry/core';
513
import type { NodeClient } from '@sentry/node-core';
614
import { generateInstrumentOnce, SentryNodeFetchInstrumentation } from '@sentry/node-core';
715
import type { NodeClientOptions } from '../types';
@@ -101,7 +109,20 @@ function getConfigWithDefaults(options: Partial<NodeFetchOptions> = {}): UndiciI
101109

102110
return !!shouldIgnore;
103111
},
104-
startSpanHook: () => {
112+
startSpanHook: request => {
113+
const url = getAbsoluteUrl(request.origin, request.path);
114+
115+
// Sanitize data URLs to prevent long base64 strings in span attributes
116+
if (url.startsWith('data:')) {
117+
const sanitizedUrl = stripDataUrlContent(url);
118+
return {
119+
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.http.otel.node_fetch',
120+
'http.url': sanitizedUrl,
121+
[SEMANTIC_ATTRIBUTE_URL_FULL]: sanitizedUrl,
122+
[SEMANTIC_ATTRIBUTE_SENTRY_CUSTOM_SPAN_NAME]: `${request.method || 'GET'} ${sanitizedUrl}`,
123+
};
124+
}
125+
105126
return {
106127
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.http.otel.node_fetch',
107128
};

0 commit comments

Comments
 (0)