Skip to content

Commit 4695148

Browse files
authored
fix(core): Sanitize data URLs in http.client spans (#18896)
Our `http.client` span instrumentations currently treat data URLs (blobs or base64 encoded data) like regular raw URLs. While this is in general fine, the problem is that this leads to incredibly long span names and attribute values, especially because the URL is sent in up to three different attributes per span. This makes Relay reject the the sent events due to exceeding size limits. This patch extracts the already existing stack trace URL sanitization logic for data URLs and apply it to `http.client` spans and attributes
1 parent 2f0d9dc commit 4695148

15 files changed

Lines changed: 324 additions & 19 deletions

File tree

.size-limit.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ module.exports = [
3838
path: 'packages/browser/build/npm/esm/prod/index.js',
3939
import: createImport('init', 'browserTracingIntegration'),
4040
gzip: true,
41-
limit: '42 KB',
41+
limit: '43 KB',
4242
},
4343
{
4444
name: '@sentry/browser (incl. Tracing, Profiling)',
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import * as Sentry from '@sentry/browser';
2+
3+
window.Sentry = Sentry;
4+
5+
Sentry.init({
6+
dsn: 'https://public@dsn.ingest.sentry.io/1337',
7+
integrations: [Sentry.browserTracingIntegration()],
8+
tracesSampleRate: 1,
9+
autoSessionTracking: false,
10+
});
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// Fetch a data URL to verify that the span name and attributes are sanitized
2+
// Data URLs are used for inline resources, e.g., Web Workers with inline scripts
3+
const dataUrl = 'data:text/plain;base64,SGVsbG8gV29ybGQh';
4+
fetch(dataUrl).catch(() => {
5+
// Data URL fetch might fail in some browsers, but the span should still be created
6+
});
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import { expect } from '@playwright/test';
2+
import { sentryTest } from '../../../../utils/fixtures';
3+
import {
4+
envelopeRequestParser,
5+
shouldSkipTracingTest,
6+
waitForTransactionRequestOnUrl,
7+
} from '../../../../utils/helpers';
8+
9+
sentryTest('sanitizes data URLs in fetch span name and attributes', async ({ getLocalTestUrl, page }) => {
10+
if (shouldSkipTracingTest()) {
11+
sentryTest.skip();
12+
}
13+
14+
const url = await getLocalTestUrl({ testDir: __dirname });
15+
16+
const req = await waitForTransactionRequestOnUrl(page, url);
17+
const transactionEvent = envelopeRequestParser(req);
18+
19+
const requestSpans = transactionEvent.spans?.filter(({ op }) => op === 'http.client');
20+
21+
expect(requestSpans).toHaveLength(1);
22+
23+
const span = requestSpans?.[0];
24+
25+
const sanitizedUrl = 'data:text/plain,base64,SGVsbG8gV2... [truncated]';
26+
expect(span?.description).toBe(`GET ${sanitizedUrl}`);
27+
28+
expect(span?.data).toMatchObject({
29+
'http.method': 'GET',
30+
url: sanitizedUrl,
31+
type: 'fetch',
32+
});
33+
34+
expect(span?.data?.['http.url']).toBe(sanitizedUrl);
35+
});
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import * as Sentry from '@sentry/browser';
2+
3+
window.Sentry = Sentry;
4+
5+
Sentry.init({
6+
dsn: 'https://public@dsn.ingest.sentry.io/1337',
7+
integrations: [Sentry.browserTracingIntegration()],
8+
tracesSampleRate: 1,
9+
autoSessionTracking: false,
10+
});
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// XHR request to a data URL to verify that the span name and attributes are sanitized
2+
const dataUrl = 'data:text/plain;base64,SGVsbG8gV29ybGQh';
3+
const xhr = new XMLHttpRequest();
4+
xhr.open('GET', dataUrl);
5+
xhr.send();
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import { expect } from '@playwright/test';
2+
import type { Event } from '@sentry/core';
3+
import { sentryTest } from '../../../../utils/fixtures';
4+
import { getFirstSentryEnvelopeRequest, shouldSkipTracingTest } from '../../../../utils/helpers';
5+
6+
sentryTest('sanitizes data URLs in XHR span name and attributes', async ({ getLocalTestUrl, page }) => {
7+
if (shouldSkipTracingTest()) {
8+
sentryTest.skip();
9+
}
10+
11+
const url = await getLocalTestUrl({ testDir: __dirname });
12+
13+
const eventData = await getFirstSentryEnvelopeRequest<Event>(page, url);
14+
const requestSpans = eventData.spans?.filter(({ op }) => op === 'http.client');
15+
16+
expect(requestSpans).toHaveLength(1);
17+
18+
const span = requestSpans?.[0];
19+
20+
const sanitizedUrl = 'data:text/plain,base64,SGVsbG8gV2... [truncated]';
21+
expect(span?.description).toBe(`GET ${sanitizedUrl}`);
22+
23+
expect(span?.data).toMatchObject({
24+
'http.method': 'GET',
25+
url: sanitizedUrl,
26+
type: 'xhr',
27+
});
28+
29+
expect(span?.data?.['http.url']).toBe(sanitizedUrl);
30+
});

packages/browser/src/integrations/globalhandlers.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import {
99
getLocationHref,
1010
isPrimitive,
1111
isString,
12+
stripDataUrlContent,
1213
UNKNOWN_FUNCTION,
1314
} from '@sentry/core';
1415
import type { BrowserClient } from '../client';
@@ -208,14 +209,13 @@ function getFilenameFromUrl(url: string | undefined): string | undefined {
208209
return undefined;
209210
}
210211

211-
// stack frame urls can be data urls, for example when initializing a Worker with a base64 encoded script
212-
// in this case we just show the data prefix and mime type to avoid too long raw data urls
212+
// Strip data URL content to avoid long base64 strings in stack frames
213+
// (e.g. when initializing a Worker with a base64 encoded script)
214+
// Don't include data prefix for filenames as it's not useful for stack traces
215+
// Wrap with < > to indicate it's a placeholder
213216
if (url.startsWith('data:')) {
214-
const match = url.match(/^data:([^;]+)/);
215-
const mimeType = match ? match[1] : 'text/javascript';
216-
const isBase64 = url.includes('base64,');
217-
return `<data:${mimeType}${isBase64 ? ',base64' : ''}>`;
217+
return `<${stripDataUrlContent(url, false)}>`;
218218
}
219219

220-
return url; // it's fine to not truncate it as it's not put in a regex (https://codeql.github.com/codeql-query-help/javascript/js-polynomial-redos)
220+
return url;
221221
}

packages/browser/src/tracing/request.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import {
2323
spanToJSON,
2424
startInactiveSpan,
2525
stringMatchesSomePattern,
26+
stripDataUrlContent,
2627
stripUrlQueryAndFragment,
2728
} from '@sentry/core';
2829
import type { XhrHint } from '@sentry-internal/browser-utils';
@@ -199,7 +200,7 @@ export function instrumentOutgoingRequests(client: Client, _options?: Partial<Re
199200
const fullUrl = getFullURL(handlerData.fetchData.url);
200201
const host = fullUrl ? parseUrl(fullUrl).host : undefined;
201202
createdSpan.setAttributes({
202-
'http.url': fullUrl,
203+
'http.url': fullUrl ? stripDataUrlContent(fullUrl) : undefined,
203204
'server.address': host,
204205
});
205206

@@ -355,7 +356,7 @@ function xhrCallback(
355356
const fullUrl = getFullURL(url);
356357
const parsedUrl = fullUrl ? parseUrl(fullUrl) : parseUrl(url);
357358

358-
const urlForSpanName = stripUrlQueryAndFragment(url);
359+
const urlForSpanName = stripDataUrlContent(stripUrlQueryAndFragment(url));
359360

360361
const hasParent = !!getActiveSpan();
361362

@@ -364,10 +365,10 @@ function xhrCallback(
364365
? startInactiveSpan({
365366
name: `${method} ${urlForSpanName}`,
366367
attributes: {
367-
url,
368+
url: stripDataUrlContent(url),
368369
type: 'xhr',
369370
'http.method': method,
370-
'http.url': fullUrl,
371+
'http.url': fullUrl ? stripDataUrlContent(fullUrl) : undefined,
371372
'server.address': parsedUrl?.host,
372373
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.http.browser',
373374
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'http.client',

packages/core/src/fetch.ts

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,12 @@ import { hasSpansEnabled } from './utils/hasSpansEnabled';
1111
import { isInstanceOf, isRequest } from './utils/is';
1212
import { getActiveSpan } from './utils/spanUtils';
1313
import { getTraceData } from './utils/traceData';
14-
import { getSanitizedUrlStringFromUrlObject, isURLObjectRelative, parseStringToURLObject } from './utils/url';
14+
import {
15+
getSanitizedUrlStringFromUrlObject,
16+
isURLObjectRelative,
17+
parseStringToURLObject,
18+
stripDataUrlContent,
19+
} from './utils/url';
1520

1621
type PolymorphicRequestHeaders =
1722
| Record<string, string | undefined>
@@ -317,9 +322,22 @@ function getSpanStartOptions(
317322
method: string,
318323
spanOrigin: SpanOrigin,
319324
): Parameters<typeof startInactiveSpan>[0] {
325+
// Data URLs need special handling because parseStringToURLObject treats them as "relative"
326+
// (no "://"), causing getSanitizedUrlStringFromUrlObject to return just the pathname
327+
// without the "data:" prefix, making later stripDataUrlContent calls ineffective.
328+
// So for data URLs, we strip the content first and use that directly.
329+
if (url.startsWith('data:')) {
330+
const sanitizedUrl = stripDataUrlContent(url);
331+
return {
332+
name: `${method} ${sanitizedUrl}`,
333+
attributes: getFetchSpanAttributes(url, undefined, method, spanOrigin),
334+
};
335+
}
336+
320337
const parsedUrl = parseStringToURLObject(url);
338+
const sanitizedUrl = parsedUrl ? getSanitizedUrlStringFromUrlObject(parsedUrl) : url;
321339
return {
322-
name: parsedUrl ? `${method} ${getSanitizedUrlStringFromUrlObject(parsedUrl)}` : method,
340+
name: `${method} ${sanitizedUrl}`,
323341
attributes: getFetchSpanAttributes(url, parsedUrl, method, spanOrigin),
324342
};
325343
}
@@ -331,15 +349,15 @@ function getFetchSpanAttributes(
331349
spanOrigin: SpanOrigin,
332350
): SpanAttributes {
333351
const attributes: SpanAttributes = {
334-
url,
352+
url: stripDataUrlContent(url),
335353
type: 'fetch',
336354
'http.method': method,
337355
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: spanOrigin,
338356
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'http.client',
339357
};
340358
if (parsedUrl) {
341359
if (!isURLObjectRelative(parsedUrl)) {
342-
attributes['http.url'] = parsedUrl.href;
360+
attributes['http.url'] = stripDataUrlContent(parsedUrl.href);
343361
attributes['server.address'] = parsedUrl.host;
344362
}
345363
if (parsedUrl.search) {

0 commit comments

Comments
 (0)