Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions packages/browser/src/integrations/globalhandlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
getLocationHref,
isPrimitive,
isString,
stripDataUrlContent,
UNKNOWN_FUNCTION,
} from '@sentry/core';
import type { BrowserClient } from '../client';
Expand Down Expand Up @@ -208,14 +209,7 @@ function getFilenameFromUrl(url: string | undefined): string | undefined {
return undefined;
}

// stack frame urls can be data urls, for example when initializing a Worker with a base64 encoded script
// in this case we just show the data prefix and mime type to avoid too long raw data urls
if (url.startsWith('data:')) {
const match = url.match(/^data:([^;]+)/);
const mimeType = match ? match[1] : 'text/javascript';
const isBase64 = url.includes('base64,');
return `<data:${mimeType}${isBase64 ? ',base64' : ''}>`;
}

return url; // it's fine to not truncate it as it's not put in a regex (https://codeql.github.com/codeql-query-help/javascript/js-polynomial-redos)
// Strip data URL content to avoid long base64 strings in stack frames
// (e.g. when initializing a Worker with a base64 encoded script)
return stripDataUrlContent(url);
}
9 changes: 5 additions & 4 deletions packages/browser/src/tracing/request.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import {
spanToJSON,
startInactiveSpan,
stringMatchesSomePattern,
stripDataUrlContent,
stripUrlQueryAndFragment,
} from '@sentry/core';
import type { XhrHint } from '@sentry-internal/browser-utils';
Expand Down Expand Up @@ -199,7 +200,7 @@ export function instrumentOutgoingRequests(client: Client, _options?: Partial<Re
const fullUrl = getFullURL(handlerData.fetchData.url);
const host = fullUrl ? parseUrl(fullUrl).host : undefined;
createdSpan.setAttributes({
'http.url': fullUrl,
'http.url': fullUrl ? stripDataUrlContent(fullUrl) : undefined,
'server.address': host,
});

Expand Down Expand Up @@ -355,7 +356,7 @@ function xhrCallback(
const fullUrl = getFullURL(url);
const parsedUrl = fullUrl ? parseUrl(fullUrl) : parseUrl(url);

const urlForSpanName = stripUrlQueryAndFragment(url);
const urlForSpanName = stripDataUrlContent(stripUrlQueryAndFragment(url));

const hasParent = !!getActiveSpan();

Expand All @@ -364,10 +365,10 @@ function xhrCallback(
? startInactiveSpan({
name: `${method} ${urlForSpanName}`,
attributes: {
url,
url: stripDataUrlContent(url),
type: 'xhr',
'http.method': method,
'http.url': fullUrl,
'http.url': fullUrl ? stripDataUrlContent(fullUrl) : undefined,
'server.address': parsedUrl?.host,
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.http.browser',
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'http.client',
Expand Down
14 changes: 10 additions & 4 deletions packages/core/src/fetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@ import { hasSpansEnabled } from './utils/hasSpansEnabled';
import { isInstanceOf, isRequest } from './utils/is';
import { getActiveSpan } from './utils/spanUtils';
import { getTraceData } from './utils/traceData';
import { getSanitizedUrlStringFromUrlObject, isURLObjectRelative, parseStringToURLObject } from './utils/url';
import {
getSanitizedUrlStringFromUrlObject,
isURLObjectRelative,
parseStringToURLObject,
stripDataUrlContent,
} from './utils/url';

type PolymorphicRequestHeaders =
| Record<string, string | undefined>
Expand Down Expand Up @@ -318,8 +323,9 @@ function getSpanStartOptions(
spanOrigin: SpanOrigin,
): Parameters<typeof startInactiveSpan>[0] {
const parsedUrl = parseStringToURLObject(url);
const sanitizedUrl = parsedUrl ? stripDataUrlContent(getSanitizedUrlStringFromUrlObject(parsedUrl)) : undefined;
return {
name: parsedUrl ? `${method} ${getSanitizedUrlStringFromUrlObject(parsedUrl)}` : method,
name: sanitizedUrl ? `${method} ${sanitizedUrl}` : method,
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated
attributes: getFetchSpanAttributes(url, parsedUrl, method, spanOrigin),
};
}
Expand All @@ -331,15 +337,15 @@ function getFetchSpanAttributes(
spanOrigin: SpanOrigin,
): SpanAttributes {
const attributes: SpanAttributes = {
url,
url: stripDataUrlContent(url),
type: 'fetch',
'http.method': method,
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: spanOrigin,
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'http.client',
};
if (parsedUrl) {
if (!isURLObjectRelative(parsedUrl)) {
attributes['http.url'] = parsedUrl.href;
attributes['http.url'] = stripDataUrlContent(parsedUrl.href);
attributes['server.address'] = parsedUrl.host;
}
if (parsedUrl.search) {
Expand Down
1 change: 1 addition & 0 deletions packages/core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ export {
getHttpSpanDetailsFromUrlObject,
isURLObjectRelative,
getSanitizedUrlStringFromUrlObject,
stripDataUrlContent,
} from './utils/url';
export {
eventFromMessage,
Expand Down
23 changes: 23 additions & 0 deletions packages/core/src/utils/url.ts
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,26 @@ export function getSanitizedUrlString(url: PartialURL): string {

return `${protocol ? `${protocol}://` : ''}${filteredHost}${path}`;
}

/**
* Strips the content from a data URL, returning a placeholder with the MIME type.
*
* Data URLs can be very long (e.g. base64 encoded scripts for Web Workers),
* with little valuable information, often leading to envelopes getting dropped due
* to size limit violations. Therefore, we strip data URLs and replace them with a
* placeholder.
*
* @param url - The URL to process
* @returns For data URLs, returns a short format like `<data:text/javascript,base64>`.
* For non-data URLs, returns the original URL unchanged.
*/
export function stripDataUrlContent(url: string): string {
if (url.startsWith('data:')) {
// Match the MIME type (everything after 'data:' until the first ';' or ',')
const match = url.match(/^data:([^;,]+)/);
const mimeType = match ? match[1] : 'text/plain';
const isBase64 = url.includes(';base64,');
return `<data:${mimeType}${isBase64 ? ',base64' : ''}>`;
}
return url;
}
44 changes: 44 additions & 0 deletions packages/core/test/lib/utils/url.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import {
isURLObjectRelative,
parseStringToURLObject,
parseUrl,
stripDataUrlContent,
stripUrlQueryAndFragment,
} from '../../../src/utils/url';

Expand Down Expand Up @@ -638,3 +639,46 @@ describe('getHttpSpanDetailsFromUrlObject', () => {
});
});
});

describe('stripDataUrlContent', () => {
it('returns regular URLs unchanged', () => {
expect(stripDataUrlContent('https://example.com/api')).toBe('https://example.com/api');
expect(stripDataUrlContent('http://localhost:3000/test')).toBe('http://localhost:3000/test');
expect(stripDataUrlContent('/relative/path')).toBe('/relative/path');
});

it('strips content from base64 data URLs', () => {
expect(stripDataUrlContent('data:text/javascript;base64,SGVsbG8gV29ybGQ=')).toBe('<data:text/javascript,base64>');
expect(stripDataUrlContent('data:application/json;base64,eyJrZXkiOiJ2YWx1ZSJ9')).toBe(
'<data:application/json,base64>',
);
expect(stripDataUrlContent('data:text/html;base64,PGh0bWw+PC9odG1sPg==')).toBe('<data:text/html,base64>');
});

it('strips content from non-base64 data URLs', () => {
expect(stripDataUrlContent('data:text/plain,Hello%20World')).toBe('<data:text/plain>');
expect(stripDataUrlContent('data:text/html,<h1>Hello</h1>')).toBe('<data:text/html>');
});

it('handles data URLs with various MIME types', () => {
expect(stripDataUrlContent('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA')).toBe('<data:image/png,base64>');
expect(stripDataUrlContent('data:image/svg+xml;base64,PHN2Zz4=')).toBe('<data:image/svg+xml,base64>');
expect(stripDataUrlContent('data:application/octet-stream;base64,AQIDBA==')).toBe(
'<data:application/octet-stream,base64>',
);
});

it('defaults to text/plain for data URLs without MIME type', () => {
expect(stripDataUrlContent('data:,Hello')).toBe('<data:text/plain>');
expect(stripDataUrlContent('data:;base64,SGVsbG8=')).toBe('<data:text/plain,base64>');
});

it('handles empty data URLs', () => {
expect(stripDataUrlContent('data:')).toBe('<data:text/plain>');
});

it('handles very long base64 encoded data URLs', () => {
const longBase64 = 'A'.repeat(10000);
expect(stripDataUrlContent(`data:text/javascript;base64,${longBase64}`)).toBe('<data:text/javascript,base64>');
});
});
17 changes: 16 additions & 1 deletion packages/node/src/integrations/http.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@ import { diag } from '@opentelemetry/api';
import type { HttpInstrumentationConfig } from '@opentelemetry/instrumentation-http';
import { HttpInstrumentation } from '@opentelemetry/instrumentation-http';
import type { Span } from '@sentry/core';
import { defineIntegration, getClient, hasSpansEnabled } from '@sentry/core';
import {
defineIntegration,
getClient,
hasSpansEnabled,
SEMANTIC_ATTRIBUTE_URL_FULL,
stripDataUrlContent,
} from '@sentry/core';
import type { HTTPModuleRequestIncomingMessage, NodeClient, SentryHttpInstrumentationOptions } from '@sentry/node-core';
import {
addOriginToSpan,
Expand Down Expand Up @@ -282,6 +288,15 @@ function getConfigWithDefaults(options: Partial<HttpOptions> = {}): HttpInstrume
requestHook: (span, req) => {
addOriginToSpan(span, 'auto.http.otel.http');

// Sanitize data URLs to prevent long base64 strings in span attributes
const url = getRequestUrl(req as ClientRequest);
if (url.startsWith('data:')) {
const sanitizedUrl = stripDataUrlContent(url);
span.setAttribute('http.url', sanitizedUrl);
span.setAttribute(SEMANTIC_ATTRIBUTE_URL_FULL, sanitizedUrl);
span.updateName(`${(req as ClientRequest).method || 'GET'} ${sanitizedUrl}`);
}

options.instrumentation?.requestHook?.(span, req);
},
responseHook: (span, res) => {
Expand Down
25 changes: 23 additions & 2 deletions packages/node/src/integrations/node-fetch.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
import type { UndiciInstrumentationConfig } from '@opentelemetry/instrumentation-undici';
import { UndiciInstrumentation } from '@opentelemetry/instrumentation-undici';
import type { IntegrationFn } from '@sentry/core';
import { defineIntegration, getClient, hasSpansEnabled, SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN } from '@sentry/core';
import {
defineIntegration,
getClient,
hasSpansEnabled,
SEMANTIC_ATTRIBUTE_SENTRY_CUSTOM_SPAN_NAME,
SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN,
SEMANTIC_ATTRIBUTE_URL_FULL,
stripDataUrlContent,
} from '@sentry/core';
import type { NodeClient } from '@sentry/node-core';
import { generateInstrumentOnce, SentryNodeFetchInstrumentation } from '@sentry/node-core';
import type { NodeClientOptions } from '../types';
Expand Down Expand Up @@ -101,7 +109,20 @@ function getConfigWithDefaults(options: Partial<NodeFetchOptions> = {}): UndiciI

return !!shouldIgnore;
},
startSpanHook: () => {
startSpanHook: request => {
const url = getAbsoluteUrl(request.origin, request.path);

// Sanitize data URLs to prevent long base64 strings in span attributes
if (url.startsWith('data:')) {
const sanitizedUrl = stripDataUrlContent(url);
return {
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.http.otel.node_fetch',
'http.url': sanitizedUrl,
[SEMANTIC_ATTRIBUTE_URL_FULL]: sanitizedUrl,
[SEMANTIC_ATTRIBUTE_SENTRY_CUSTOM_SPAN_NAME]: `${request.method || 'GET'} ${sanitizedUrl}`,
Comment on lines +119 to +122
Copy link
Copy Markdown
Member

@s1gr1d s1gr1d Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

About the additional comment from @brunohaid: Maybe we could add the content of the first few bytes as an attribute here 🤔

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ended up adding the first 10 bytes to the span description and data. I went for adding this directly to the URL attributes/description because there isn't really a fitting attribute defined in SemConv for data url content. So I just stuck with the "url.full" one.

};
Comment thread
Lms24 marked this conversation as resolved.
}

return {
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.http.otel.node_fetch',
};
Expand Down
Loading