-
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Expand file tree
/
Copy pathurl.ts
More file actions
288 lines (258 loc) · 8.99 KB
/
url.ts
File metadata and controls
288 lines (258 loc) · 8.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
import {
SEMANTIC_ATTRIBUTE_HTTP_REQUEST_METHOD,
SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN,
SEMANTIC_ATTRIBUTE_SENTRY_SOURCE,
SEMANTIC_ATTRIBUTE_URL_FULL,
} from '../semanticAttributes';
import type { SpanAttributes } from '../types-hoist/span';
type PartialURL = {
host?: string;
path?: string;
protocol?: string;
relative?: string;
search?: string;
hash?: string;
};
interface URLwithCanParse extends URL {
canParse: (url: string, base?: string | URL | undefined) => boolean;
}
// A subset of the URL object that is valid for relative URLs
// The URL object cannot handle relative URLs, so we need to handle them separately
type RelativeURL = {
isRelative: true;
pathname: URL['pathname'];
search: URL['search'];
hash: URL['hash'];
};
type URLObject = RelativeURL | URL;
// Curious about `thismessage:/`? See: https://www.rfc-editor.org/rfc/rfc2557.html
// > When the methods above do not yield an absolute URI, a base URL
// > of "thismessage:/" MUST be employed. This base URL has been
// > defined for the sole purpose of resolving relative references
// > within a multipart/related structure when no other base URI is
// > specified.
//
// We need to provide a base URL to `parseStringToURLObject` because the fetch API gives us a
// relative URL sometimes.
//
// This is the only case where we need to provide a base URL to `parseStringToURLObject`
// because the relative URL is not valid on its own.
const DEFAULT_BASE_URL = 'thismessage:/';
/**
* Checks if the URL object is relative
*
* @param url - The URL object to check
* @returns True if the URL object is relative, false otherwise
*/
export function isURLObjectRelative(url: URLObject): url is RelativeURL {
return 'isRelative' in url;
}
/**
* Parses string to a URL object
*
* @param url - The URL to parse
* @returns The parsed URL object or undefined if the URL is invalid
*/
export function parseStringToURLObject(url: string, urlBase?: string | URL | undefined): URLObject | undefined {
const isRelative = url.indexOf('://') <= 0 && url.indexOf('//') !== 0;
const base = urlBase ?? (isRelative ? DEFAULT_BASE_URL : undefined);
try {
// Use `canParse` to short-circuit the URL constructor if it's not a valid URL
// This is faster than trying to construct the URL and catching the error
// Node 20+, Chrome 120+, Firefox 115+, Safari 17+
if ('canParse' in URL && !(URL as unknown as URLwithCanParse).canParse(url, base)) {
return undefined;
}
const fullUrlObject = new URL(url, base);
if (isRelative) {
// Because we used a fake base URL, we need to return a relative URL object.
// We cannot return anything about the origin, host, etc. because it will refer to the fake base URL.
return {
isRelative,
pathname: fullUrlObject.pathname,
search: fullUrlObject.search,
hash: fullUrlObject.hash,
};
}
return fullUrlObject;
} catch {
// empty body
}
return undefined;
}
/**
* Takes a URL object and returns a sanitized string which is safe to use as span name
* see: https://develop.sentry.dev/sdk/data-handling/#structuring-data
*/
export function getSanitizedUrlStringFromUrlObject(url: URLObject): string {
if (isURLObjectRelative(url)) {
return url.pathname;
}
const newUrl = new URL(url);
newUrl.search = '';
newUrl.hash = '';
if (['80', '443'].includes(newUrl.port)) {
newUrl.port = '';
}
if (newUrl.password) {
newUrl.password = '%filtered%';
}
if (newUrl.username) {
newUrl.username = '%filtered%';
}
return newUrl.toString();
}
type PartialRequest = {
method?: string;
};
function getHttpSpanNameFromUrlObject(
urlObject: URLObject | undefined,
kind: 'server' | 'client',
request?: PartialRequest,
routeName?: string,
): string {
const method = request?.method?.toUpperCase() ?? 'GET';
const route = routeName
? routeName
: urlObject
? kind === 'client'
? getSanitizedUrlStringFromUrlObject(urlObject)
: urlObject.pathname
: '/';
return `${method} ${route}`;
}
/**
* Takes a parsed URL object and returns a set of attributes for the span
* that represents the HTTP request for that url. This is used for both server
* and client http spans.
*
* Follows https://opentelemetry.io/docs/specs/semconv/http/.
*
* @param urlObject - see {@link parseStringToURLObject}
* @param kind - The type of HTTP operation (server or client)
* @param spanOrigin - The origin of the span
* @param request - The request object, see {@link PartialRequest}
* @param routeName - The name of the route, must be low cardinality
* @returns The span name and attributes for the HTTP operation
*/
export function getHttpSpanDetailsFromUrlObject(
urlObject: URLObject | undefined,
kind: 'server' | 'client',
spanOrigin: string,
request?: PartialRequest,
routeName?: string,
): [name: string, attributes: SpanAttributes] {
const attributes: SpanAttributes = {
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: spanOrigin,
[SEMANTIC_ATTRIBUTE_SENTRY_SOURCE]: 'url',
};
if (routeName) {
// This is based on https://opentelemetry.io/docs/specs/semconv/http/http-spans/#name
attributes[kind === 'server' ? 'http.route' : 'url.template'] = routeName;
attributes[SEMANTIC_ATTRIBUTE_SENTRY_SOURCE] = 'route';
}
if (request?.method) {
attributes[SEMANTIC_ATTRIBUTE_HTTP_REQUEST_METHOD] = request.method.toUpperCase();
}
if (urlObject) {
if (urlObject.search) {
attributes['url.query'] = urlObject.search;
}
if (urlObject.hash) {
attributes['url.fragment'] = urlObject.hash;
}
if (urlObject.pathname) {
attributes['url.path'] = urlObject.pathname;
if (urlObject.pathname === '/') {
attributes[SEMANTIC_ATTRIBUTE_SENTRY_SOURCE] = 'route';
}
}
if (!isURLObjectRelative(urlObject)) {
attributes[SEMANTIC_ATTRIBUTE_URL_FULL] = urlObject.href;
if (urlObject.port) {
attributes['url.port'] = urlObject.port;
}
if (urlObject.protocol) {
attributes['url.scheme'] = urlObject.protocol;
}
if (urlObject.hostname) {
attributes[kind === 'server' ? 'server.address' : 'url.domain'] = urlObject.hostname;
}
}
}
return [getHttpSpanNameFromUrlObject(urlObject, kind, request, routeName), attributes];
}
/**
* Parses string form of URL into an object
* // borrowed from https://tools.ietf.org/html/rfc3986#appendix-B
* // intentionally using regex and not <a/> href parsing trick because React Native and other
* // environments where DOM might not be available
* @returns parsed URL object
*/
export function parseUrl(url: string): PartialURL {
if (!url) {
return {};
}
const match = url.match(/^(([^:/?#]+):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$/);
if (!match) {
return {};
}
// coerce to undefined values to empty string so we don't get 'undefined'
const query = match[6] || '';
const fragment = match[8] || '';
return {
host: match[4],
path: match[5],
protocol: match[2],
search: query,
hash: fragment,
relative: match[5] + query + fragment, // everything minus origin
};
}
/**
* Strip the query string and fragment off of a given URL or path (if present)
*
* @param urlPath Full URL or path, including possible query string and/or fragment
* @returns URL or path without query string or fragment
*/
export function stripUrlQueryAndFragment(urlPath: string): string {
return (urlPath.split(/[?#]/, 1) as [string, ...string[]])[0];
}
/**
* Takes a URL object and returns a sanitized string which is safe to use as span name
* see: https://develop.sentry.dev/sdk/data-handling/#structuring-data
*/
export function getSanitizedUrlString(url: PartialURL): string {
const { protocol, host, path } = url;
const filteredHost =
host
// Always filter out authority
?.replace(/^.*@/, '[filtered]:[filtered]@')
// Don't show standard :80 (http) and :443 (https) ports to reduce the noise
// TODO: Use new URL global if it exists
.replace(/(:80)$/, '')
.replace(/(:443)$/, '') || '';
return `${protocol ? `${protocol}://` : ''}${filteredHost}${path}`;
}
/**
* Strips the content from a data URL, returning a placeholder with the MIME type.
*
* Data URLs can be very long (e.g. base64 encoded scripts for Web Workers),
* with little valuable information, often leading to envelopes getting dropped due
* to size limit violations. Therefore, we strip data URLs and replace them with a
* placeholder.
*
* @param url - The URL to process
* @returns For data URLs, returns a short format like `<data:text/javascript,base64>`.
* For non-data URLs, returns the original URL unchanged.
*/
export function stripDataUrlContent(url: string): string {
if (url.startsWith('data:')) {
// Match the MIME type (everything after 'data:' until the first ';' or ',')
const match = url.match(/^data:([^;,]+)/);
const mimeType = match ? match[1] : 'text/plain';
const isBase64 = url.includes(';base64,');
return `<data:${mimeType}${isBase64 ? ',base64' : ''}>`;
}
return url;
}