-
-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Expand file tree
/
Copy pathisURL.js
More file actions
265 lines (223 loc) · 8.35 KB
/
isURL.js
File metadata and controls
265 lines (223 loc) · 8.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
import assertString from './util/assertString';
import checkHost from './util/checkHost';
import includes from './util/includesString';
import isFQDN from './isFQDN';
import isIP from './isIP';
import merge from './util/merge';
/*
options for isURL method
protocols - valid protocols can be modified with this option.
require_tld - If set to false isURL will not check if the URL's host includes a top-level domain.
require_protocol - if set to true isURL will return false if protocol is not present in the URL.
require_host - if set to false isURL will not check if host is present in the URL.
require_port - if set to true isURL will check if port is present in the URL.
require_valid_protocol - isURL will check if the URL's protocol is present in the protocols option.
allow_underscores - if set to true, the validator will allow underscores in the URL.
host_whitelist - if set to an array of strings or regexp, and the domain matches none of the strings
defined in it, the validation fails.
host_blacklist - if set to an array of strings or regexp, and the domain matches any of the strings
defined in it, the validation fails.
allow_trailing_dot - if set to true, the validator will allow the domain to end with
a `.` character.
allow_protocol_relative_urls - if set to true protocol relative URLs will be allowed.
allow_fragments - if set to false isURL will return false if fragments are present.
allow_query_components - if set to false isURL will return false if query components are present.
disallow_auth - if set to true, the validator will fail if the URL contains an authentication
component, e.g. `http://username:password@example.com`
validate_length - if set to false isURL will skip string length validation. `max_allowed_length`
will be ignored if this is set as `false`.
max_allowed_length - if set, isURL will not allow URLs longer than the specified value (default is
2084 that IE maximum URL length).
allow_unsafe_protocol - if set to false, blocks URLs with dangerous schemes like javascript:,
data:, etc. Defaults to true to preserve backward compatibility.
*/
const default_url_options = {
protocols: ['http', 'https', 'ftp'],
require_tld: true,
require_protocol: false,
require_host: true,
require_port: false,
require_valid_protocol: true,
allow_underscores: false,
allow_trailing_dot: false,
allow_protocol_relative_urls: false,
allow_fragments: true,
allow_query_components: true,
validate_length: true,
max_allowed_length: 2084,
allow_unsafe_protocol: true,
};
/* eslint-disable no-useless-concat */
const DANGEROUS_SCHEMES = [
'java' + 'script:',
'data:',
'vbs' + 'cript:',
'file:',
'blob:',
'mail' + 'to:',
];
/* eslint-enable no-useless-concat */
const wrapped_ipv6 = /^\[([^\]]+)\](?::([0-9]+))?$/;
export default function isURL(url, options) {
assertString(url);
if (!url || /[\s<>]/.test(url)) {
return false;
}
if (!options?.allow_unsafe_protocol) {
const lowerUrl = url.trim().toLowerCase();
if (DANGEROUS_SCHEMES.some(scheme => lowerUrl.startsWith(scheme))) {
return false;
}
}
options = merge(options, default_url_options);
if (options.validate_length && url.length > options.max_allowed_length) {
return false;
}
if (!options.allow_fragments && includes(url, '#')) {
return false;
}
if (!options.allow_query_components && (includes(url, '?') || includes(url, '&'))) {
return false;
}
let protocol, auth, host, hostname, port, port_str, split, ipv6;
split = url.split('#');
url = split.shift();
split = url.split('?');
url = split.shift();
// Replaced the 'split("://")' logic with a regex to match the protocol.
// This correctly identifies schemes like `javascript:` which don't use `//`.
// However, we need to be careful not to confuse authentication credentials (user:password@host)
// with protocols. A colon before an @ symbol might be part of auth, not a protocol separator.
const protocol_match = url.match(/^([a-z][a-z0-9+\-.]*):/i);
let had_explicit_protocol = false;
const cleanUpProtocol = (potential_protocol) => {
had_explicit_protocol = true;
protocol = potential_protocol.toLowerCase();
if (options.require_valid_protocol && options.protocols.indexOf(protocol) === -1) {
// The identified protocol is not in the allowed list.
return false;
}
// Remove the protocol from the URL string.
return url.substring(protocol_match[0].length);
};
if (protocol_match) {
const potential_protocol = protocol_match[1];
const after_colon = url.substring(protocol_match[0].length);
// Check if what follows looks like authentication credentials (user:password@host)
// rather than a protocol. This happens when:
// 1. There's no `//` after the colon (protocols like `http://` have this)
// 2. There's an `@` symbol before any `/`
// 3. The part before `@` contains only valid auth characters (alphanumeric, -, _, ., %, :)
const starts_with_slashes = after_colon.slice(0, 2) === '//';
if (!starts_with_slashes) {
const first_slash_position = after_colon.indexOf('/');
const before_slash = first_slash_position === -1
? after_colon
: after_colon.substring(0, first_slash_position);
const at_position = before_slash.indexOf('@');
if (at_position !== -1) {
const before_at = before_slash.substring(0, at_position);
const valid_auth_regex = /^[a-zA-Z0-9\-_.%:]*$/;
const is_valid_auth = valid_auth_regex.test(before_at);
if (is_valid_auth) {
// This looks like authentication (e.g., user:password@host), not a protocol
if (options.require_protocol) {
return false;
}
// Don't consume the colon; let the auth parsing handle it later
} else {
// This looks like a malicious protocol (e.g., javascript:alert();@host)
url = cleanUpProtocol(potential_protocol);
if (url === false) {
return false;
}
}
} else {
// No @ symbol, this is definitely a protocol
url = cleanUpProtocol(potential_protocol);
if (url === false) {
return false;
}
}
} else {
// Starts with '//', this is definitely a protocol like http://
url = cleanUpProtocol(potential_protocol);
if (url === false) {
return false;
}
}
} else if (options.require_protocol) {
return false;
}
// Handle leading '//' only as protocol-relative when there was NO explicit protocol.
// If there was an explicit protocol, '//' is the normal separator
// and should be stripped unconditionally.
if (url.slice(0, 2) === '//') {
if (!had_explicit_protocol && !options.allow_protocol_relative_urls) {
return false;
}
url = url.slice(2);
}
if (url === '') {
return false;
}
split = url.split('/');
url = split.shift();
if (url === '' && !options.require_host) {
return true;
}
split = url.split('@');
if (split.length > 1) {
if (options.disallow_auth) {
return false;
}
if (split[0] === '') {
return false;
}
auth = split.shift();
if (auth.indexOf(':') >= 0 && auth.split(':').length > 2) {
return false;
}
const [user, password] = auth.split(':');
if (user === '' && password === '') {
return false;
}
}
hostname = split.join('@');
port_str = null;
ipv6 = null;
const ipv6_match = hostname.match(wrapped_ipv6);
if (ipv6_match) {
host = '';
ipv6 = ipv6_match[1];
port_str = ipv6_match[2] || null;
} else {
split = hostname.split(':');
host = split.shift();
if (split.length) {
port_str = split.join(':');
}
}
if (port_str !== null && port_str.length > 0) {
port = parseInt(port_str, 10);
if (!/^[0-9]+$/.test(port_str) || port <= 0 || port > 65535) {
return false;
}
} else if (options.require_port) {
return false;
}
if (options.host_whitelist) {
return checkHost(host, options.host_whitelist);
}
if (host === '' && !options.require_host) {
return true;
}
if (!isIP(host) && !isFQDN(host, options) && (!ipv6 || !isIP(ipv6, 6))) {
return false;
}
host = host || ipv6;
if (options.host_blacklist && checkHost(host, options.host_blacklist)) {
return false;
}
return true;
}