Skip to content

Commit 816c04a

Browse files
committed
url: optimize path resolution with single-pass algorithm
1 parent cab20f2 commit 816c04a

File tree

1 file changed

+183
-72
lines changed

1 file changed

+183
-72
lines changed

lib/url.js

Lines changed: 183 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@
2323

2424
const {
2525
ArrayPrototypeJoin,
26+
ArrayPrototypePush,
2627
Boolean,
2728
Int8Array,
2829
ObjectAssign,
2930
ObjectKeys,
3031
StringPrototypeAt,
3132
StringPrototypeCharCodeAt,
3233
StringPrototypeIndexOf,
34+
StringPrototypeLastIndexOf,
3335
StringPrototypeReplaceAll,
3436
StringPrototypeSlice,
3537
decodeURIComponent,
@@ -52,7 +54,6 @@ const {
5254

5355
// This ensures setURLConstructor() is called before the native
5456
// URL::ToObject() method is used.
55-
const { spliceOne } = require('internal/util');
5657
const { isInsideNodeModules } = internalBinding('util');
5758

5859
// WHATWG URL implementation provided by internal/url
@@ -91,6 +92,66 @@ function Url() {
9192

9293
// Reference: RFC 3986, RFC 1808, RFC 2396
9394

95+
/**
96+
* Normalize URL path segments by resolving . and .. in a single pass.
97+
* This is O(n) compared to the O(n²) spliceOne approach.
98+
*
99+
* @param {string} path - Combined pathname to normalize
100+
* @param {boolean} allowAboveRoot - Whether .. can go above root
101+
* @returns {{ segments: string[], up: number, trailingSlash: boolean }}
102+
*/
103+
function normalizePathSegments(path, allowAboveRoot) {
104+
if (!path) return { segments: [], up: 0, trailingSlash: false };
105+
106+
const len = path.length;
107+
const segments = [];
108+
let up = 0;
109+
let lastSlash = -1;
110+
let segStart = 0;
111+
let lastSeg = '';
112+
let trailingSlash = false;
113+
114+
// Iterate through the path, processing segments
115+
for (let i = 0; i <= len; i++) {
116+
const code = i < len ? StringPrototypeCharCodeAt(path, i) : CHAR_FORWARD_SLASH;
117+
118+
if (code === CHAR_FORWARD_SLASH) {
119+
if (lastSlash === i - 1 || segStart === i) {
120+
// Empty segment (// or leading /)
121+
// Track that the last segment was empty for trailing slash detection
122+
lastSeg = '';
123+
} else {
124+
const segment = StringPrototypeSlice(path, segStart, i);
125+
lastSeg = segment;
126+
127+
if (segment === '.') {
128+
// Current directory - skip
129+
} else if (segment === '..') {
130+
// Parent directory
131+
if (segments.length > 0 && segments[segments.length - 1] !== '..') {
132+
segments.pop();
133+
} else if (allowAboveRoot) {
134+
ArrayPrototypePush(segments, '..');
135+
} else {
136+
up++;
137+
}
138+
} else {
139+
// Regular segment
140+
ArrayPrototypePush(segments, segment);
141+
}
142+
}
143+
lastSlash = i;
144+
segStart = i + 1;
145+
}
146+
}
147+
148+
// Determine trailing slash based on what the last segment was before normalization
149+
// If path ends with /, ., or .., we need a trailing slash
150+
trailingSlash = lastSeg === '' || lastSeg === '.' || lastSeg === '..';
151+
152+
return { segments, up, trailingSlash };
153+
}
154+
94155
// define these here so at least they only have to be
95156
// compiled once on the first module load.
96157
const protocolPattern = /^[a-z0-9.+-]+:/i;
@@ -127,6 +188,7 @@ const {
127188
CHAR_VERTICAL_LINE,
128189
CHAR_AT,
129190
CHAR_COLON,
191+
CHAR_DOT,
130192
} = require('internal/constants');
131193

132194
let urlParseWarned = false;
@@ -824,11 +886,14 @@ Url.prototype.resolveObject = function resolveObject(relative) {
824886
let mustEndAbs = (isRelAbs || isSourceAbs ||
825887
(result.host && relative.pathname));
826888
const removeAllDots = mustEndAbs;
827-
let srcPath = (result.pathname && result.pathname.split('/')) || [];
828-
const relPath = (relative.pathname && relative.pathname.split('/')) || [];
829889
const noLeadingSlashes = result.protocol &&
830890
!slashedProtocol.has(result.protocol);
831891

892+
// Build the combined path string for normalization
893+
let combinedPath = '';
894+
let srcHost = ''; // For noLeadingSlashes protocols
895+
let relHost = ''; // For noLeadingSlashes protocols
896+
832897
// If the url is a non-slashed url, then relative
833898
// links like ../.. should be able
834899
// to crawl up to the hostname, as well. This is strange.
@@ -837,22 +902,15 @@ Url.prototype.resolveObject = function resolveObject(relative) {
837902
if (noLeadingSlashes) {
838903
result.hostname = '';
839904
result.port = null;
840-
if (result.host) {
841-
if (srcPath[0] === '') srcPath[0] = result.host;
842-
else srcPath.unshift(result.host);
843-
}
905+
srcHost = result.host || '';
844906
result.host = '';
845907
if (relative.protocol) {
846908
relative.hostname = null;
847909
relative.port = null;
848910
result.auth = null;
849-
if (relative.host) {
850-
if (relPath[0] === '') relPath[0] = relative.host;
851-
else relPath.unshift(relative.host);
852-
}
911+
relHost = relative.host || '';
853912
relative.host = null;
854913
}
855-
mustEndAbs &&= (relPath[0] === '' || srcPath[0] === '');
856914
}
857915

858916
if (isRelAbs) {
@@ -868,30 +926,65 @@ Url.prototype.resolveObject = function resolveObject(relative) {
868926
}
869927
result.search = relative.search;
870928
result.query = relative.query;
871-
srcPath = relPath;
872-
// Fall through to the dot-handling below.
873-
} else if (relPath.length) {
929+
// Use relative path directly
930+
if (noLeadingSlashes && relHost) {
931+
combinedPath = (relative.pathname && relative.pathname.charAt(0) === '/' ?
932+
relHost + relative.pathname : relHost + '/' + (relative.pathname || ''));
933+
} else {
934+
combinedPath = relative.pathname || '';
935+
}
936+
} else if (relative.pathname) {
874937
// it's relative
875938
// throw away the existing file, and take the new path instead.
876-
srcPath ||= [];
877-
srcPath.pop();
878-
srcPath = srcPath.concat(relPath);
879939
result.search = relative.search;
880940
result.query = relative.query;
941+
942+
// Build combined path: source path (minus last segment) + relative path
943+
let srcPathname = result.pathname || '';
944+
if (noLeadingSlashes && srcHost) {
945+
srcPathname = (srcPathname && srcPathname.charAt(0) === '/' ?
946+
srcHost + srcPathname : srcHost + '/' + srcPathname);
947+
}
948+
949+
// Remove the last segment from source (the "file" part)
950+
const lastSlashIndex = StringPrototypeLastIndexOf(srcPathname, '/');
951+
if (lastSlashIndex >= 0) {
952+
srcPathname = StringPrototypeSlice(srcPathname, 0, lastSlashIndex + 1);
953+
} else {
954+
srcPathname = '';
955+
}
956+
957+
// Append relative pathname
958+
let relPathname = relative.pathname;
959+
if (noLeadingSlashes && relHost) {
960+
relPathname = (relPathname && relPathname.charAt(0) === '/' ?
961+
relHost + relPathname : relHost + '/' + relPathname);
962+
}
963+
combinedPath = srcPathname + relPathname;
881964
} else if (relative.search !== null && relative.search !== undefined) {
882965
// Just pull out the search.
883966
// like href='?foo'.
884967
// Put this after the other two cases because it simplifies the booleans
885968
if (noLeadingSlashes) {
886-
result.hostname = result.host = srcPath.shift();
969+
// Extract host from first segment of source path
970+
const srcPathname = result.pathname || '';
971+
const firstSlashIdx = StringPrototypeIndexOf(srcPathname, '/');
972+
if (firstSlashIdx > 0) {
973+
result.hostname = result.host = StringPrototypeSlice(srcPathname, 0, firstSlashIdx);
974+
} else if (firstSlashIdx === -1 && srcPathname) {
975+
result.hostname = result.host = srcPathname;
976+
} else if (srcHost) {
977+
result.hostname = result.host = srcHost;
978+
} else {
979+
result.hostname = result.host = '';
980+
}
887981
// Occasionally the auth can get stuck only in host.
888-
// This especially happens in cases like
889-
// url.resolveObject('mailto:local1@domain1', 'local2@domain2')
890982
const authInHost =
891-
result.host && result.host.indexOf('@') > 0 && result.host.split('@');
983+
result.host && StringPrototypeIndexOf(result.host, '@') > 0;
892984
if (authInHost) {
893-
result.auth = authInHost.shift();
894-
result.host = result.hostname = authInHost.shift();
985+
const atIdx = StringPrototypeIndexOf(result.host, '@');
986+
result.auth = StringPrototypeSlice(result.host, 0, atIdx);
987+
result.host = result.hostname = StringPrototypeSlice(result.host, atIdx + 1);
895988
}
896989
}
897990
result.search = relative.search;
@@ -903,9 +996,24 @@ Url.prototype.resolveObject = function resolveObject(relative) {
903996
}
904997
result.href = result.format();
905998
return result;
999+
} else {
1000+
// No relative path at all, use source path
1001+
if (noLeadingSlashes && srcHost) {
1002+
const srcPathname = result.pathname || '';
1003+
combinedPath = (srcPathname && srcPathname.charAt(0) === '/' ?
1004+
srcHost + srcPathname : srcHost + '/' + srcPathname);
1005+
} else {
1006+
combinedPath = result.pathname || '';
1007+
}
9061008
}
9071009

908-
if (!srcPath.length) {
1010+
// Check if we need to handle noLeadingSlashes mustEndAbs
1011+
if (noLeadingSlashes) {
1012+
const startsWithSlash = combinedPath && combinedPath.charAt(0) === '/';
1013+
mustEndAbs &&= startsWithSlash;
1014+
}
1015+
1016+
if (!combinedPath) {
9091017
// No path at all. All other things were already handled above.
9101018
result.pathname = null;
9111019
// To support http.request
@@ -918,75 +1026,78 @@ Url.prototype.resolveObject = function resolveObject(relative) {
9181026
return result;
9191027
}
9201028

921-
// If a url ENDs in . or .., then it must get a trailing slash.
922-
// however, if it ends in anything else non-slashy,
923-
// then it must NOT get a trailing slash.
924-
let last = srcPath[srcPath.length - 1];
925-
const hasTrailingSlash = (
926-
((result.host || relative.host || srcPath.length > 1) &&
927-
(last === '.' || last === '..')) || last === '');
928-
929-
// Strip single dots, resolve double dots to parent dir
930-
// if the path tries to go above the root, `up` ends up > 0
931-
let up = 0;
932-
for (let i = srcPath.length - 1; i >= 0; i--) {
933-
last = srcPath[i];
934-
if (last === '.') {
935-
spliceOne(srcPath, i);
936-
} else if (last === '..') {
937-
spliceOne(srcPath, i);
938-
up++;
939-
} else if (up) {
940-
spliceOne(srcPath, i);
941-
up--;
1029+
// Use optimized single-pass normalization (O(n) instead of O(n²))
1030+
const allowAboveRoot = !mustEndAbs && !removeAllDots;
1031+
const { segments, up, trailingSlash } = normalizePathSegments(combinedPath, allowAboveRoot);
1032+
1033+
const pathHadMultipleSegments = combinedPath && StringPrototypeIndexOf(combinedPath, '/') !== -1;
1034+
const hasTrailingSlash = trailingSlash &&
1035+
(result.host || relative.host || pathHadMultipleSegments);
1036+
1037+
// Handle remaining 'up' count - add leading .. if allowed
1038+
let srcPath = segments;
1039+
if (up > 0 && allowAboveRoot) {
1040+
// Prepend '..' segments for remaining up count
1041+
const newPath = [];
1042+
for (let i = 0; i < up; i++) {
1043+
ArrayPrototypePush(newPath, '..');
9421044
}
943-
}
944-
945-
// If the path is allowed to go above the root, restore leading ..s
946-
if (!mustEndAbs && !removeAllDots) {
947-
while (up--) {
948-
srcPath.unshift('..');
1045+
for (let i = 0; i < srcPath.length; i++) {
1046+
ArrayPrototypePush(newPath, srcPath[i]);
9491047
}
1048+
srcPath = newPath;
9501049
}
9511050

952-
if (mustEndAbs && srcPath[0] !== '' &&
953-
(!srcPath[0] || srcPath[0].charAt(0) !== '/')) {
954-
srcPath.unshift('');
1051+
// Handle mustEndAbs - ensure path starts with /
1052+
let isAbsolute = srcPath.length > 0 && srcPath[0] === '';
1053+
if (!isAbsolute && srcPath.length > 0 && srcPath[0] &&
1054+
srcPath[0].charAt(0) === '/') {
1055+
isAbsolute = true;
9551056
}
9561057

957-
if (hasTrailingSlash && StringPrototypeAt(ArrayPrototypeJoin(srcPath, '/'), -1) !== '/') {
958-
srcPath.push('');
959-
}
960-
961-
const isAbsolute = srcPath[0] === '' ||
962-
(srcPath[0] && srcPath[0].charAt(0) === '/');
963-
964-
// put the host back
1058+
// put the host back for noLeadingSlashes protocols
9651059
if (noLeadingSlashes) {
9661060
result.hostname =
967-
result.host = isAbsolute ? '' : srcPath.length ? srcPath.shift() : '';
1061+
result.host = isAbsolute ? '' : srcPath.length ? srcPath[0] : '';
1062+
if (result.host) {
1063+
// Remove the host from srcPath (first element)
1064+
srcPath = srcPath.length > 1 ?
1065+
ArrayPrototypeJoin(srcPath, '/').slice(result.host.length + 1).split('/') :
1066+
[];
1067+
if (srcPath.length === 1 && srcPath[0] === '') srcPath = [];
1068+
}
9681069
// Occasionally the auth can get stuck only in host.
969-
// This especially happens in cases like
970-
// url.resolveObject('mailto:local1@domain1', 'local2@domain2')
971-
const authInHost = result.host && result.host.indexOf('@') > 0 ?
972-
result.host.split('@') : false;
1070+
const authInHost = result.host && StringPrototypeIndexOf(result.host, '@') > 0;
9731071
if (authInHost) {
974-
result.auth = authInHost.shift();
975-
result.host = result.hostname = authInHost.shift();
1072+
const atIdx = StringPrototypeIndexOf(result.host, '@');
1073+
result.auth = StringPrototypeSlice(result.host, 0, atIdx);
1074+
result.host = result.hostname = StringPrototypeSlice(result.host, atIdx + 1);
9761075
}
9771076
}
9781077

9791078
mustEndAbs ||= (result.host && srcPath.length);
9801079

9811080
if (mustEndAbs && !isAbsolute) {
982-
srcPath.unshift('');
1081+
// Need to add leading empty string for absolute path
1082+
const newPath = [''];
1083+
for (let i = 0; i < srcPath.length; i++) {
1084+
ArrayPrototypePush(newPath, srcPath[i]);
1085+
}
1086+
srcPath = newPath;
1087+
isAbsolute = true;
1088+
}
1089+
1090+
if (hasTrailingSlash &&
1091+
(srcPath.length === 0 ||
1092+
StringPrototypeAt(ArrayPrototypeJoin(srcPath, '/'), -1) !== '/')) {
1093+
ArrayPrototypePush(srcPath, '');
9831094
}
9841095

9851096
if (!srcPath.length) {
9861097
result.pathname = null;
9871098
result.path = null;
9881099
} else {
989-
result.pathname = srcPath.join('/');
1100+
result.pathname = ArrayPrototypeJoin(srcPath, '/');
9901101
}
9911102

9921103
// To support request.http

0 commit comments

Comments
 (0)