Skip to content

Commit ad0cf95

Browse files
committed
url: optimize path resolution with single-pass algorithm
1 parent cab20f2 commit ad0cf95

File tree

1 file changed

+187
-72
lines changed

1 file changed

+187
-72
lines changed

lib/url.js

Lines changed: 187 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@
2323

2424
const {
2525
ArrayPrototypeJoin,
26+
ArrayPrototypePush,
2627
Boolean,
2728
Int8Array,
2829
ObjectAssign,
2930
ObjectKeys,
3031
StringPrototypeAt,
3132
StringPrototypeCharCodeAt,
3233
StringPrototypeIndexOf,
34+
StringPrototypeLastIndexOf,
3335
StringPrototypeReplaceAll,
3436
StringPrototypeSlice,
3537
decodeURIComponent,
@@ -52,7 +54,6 @@ const {
5254

5355
// This ensures setURLConstructor() is called before the native
5456
// URL::ToObject() method is used.
55-
const { spliceOne } = require('internal/util');
5657
const { isInsideNodeModules } = internalBinding('util');
5758

5859
// WHATWG URL implementation provided by internal/url
@@ -91,6 +92,69 @@ function Url() {
9192

9293
// Reference: RFC 3986, RFC 1808, RFC 2396
9394

95+
const CHAR_FORWARD_SLASH = 47; /* / */
96+
const CHAR_DOT = 46; /* . */
97+
98+
/**
99+
* Normalize URL path segments by resolving . and .. in a single pass.
100+
* This is O(n) compared to the O(n²) spliceOne approach.
101+
*
102+
* @param {string} path - Combined pathname to normalize
103+
* @param {boolean} allowAboveRoot - Whether .. can go above root
104+
* @returns {{ segments: string[], up: number, trailingSlash: boolean }}
105+
*/
106+
function normalizePathSegments(path, allowAboveRoot) {
107+
if (!path) return { segments: [], up: 0, trailingSlash: false };
108+
109+
const len = path.length;
110+
const segments = [];
111+
let up = 0;
112+
let lastSlash = -1;
113+
let segStart = 0;
114+
let lastSeg = '';
115+
let trailingSlash = false;
116+
117+
// Iterate through the path, processing segments
118+
for (let i = 0; i <= len; i++) {
119+
const code = i < len ? StringPrototypeCharCodeAt(path, i) : CHAR_FORWARD_SLASH;
120+
121+
if (code === CHAR_FORWARD_SLASH) {
122+
if (lastSlash === i - 1 || segStart === i) {
123+
// Empty segment (// or leading /)
124+
// Track that the last segment was empty for trailing slash detection
125+
lastSeg = '';
126+
} else {
127+
const segment = StringPrototypeSlice(path, segStart, i);
128+
lastSeg = segment;
129+
130+
if (segment === '.') {
131+
// Current directory - skip
132+
} else if (segment === '..') {
133+
// Parent directory
134+
if (segments.length > 0 && segments[segments.length - 1] !== '..') {
135+
segments.pop();
136+
} else if (allowAboveRoot) {
137+
ArrayPrototypePush(segments, '..');
138+
} else {
139+
up++;
140+
}
141+
} else {
142+
// Regular segment
143+
ArrayPrototypePush(segments, segment);
144+
}
145+
}
146+
lastSlash = i;
147+
segStart = i + 1;
148+
}
149+
}
150+
151+
// Determine trailing slash based on what the last segment was before normalization
152+
// If path ends with /, ., or .., we need a trailing slash
153+
trailingSlash = lastSeg === '' || lastSeg === '.' || lastSeg === '..';
154+
155+
return { segments, up, trailingSlash };
156+
}
157+
94158
// define these here so at least they only have to be
95159
// compiled once on the first module load.
96160
const protocolPattern = /^[a-z0-9.+-]+:/i;
@@ -824,11 +888,14 @@ Url.prototype.resolveObject = function resolveObject(relative) {
824888
let mustEndAbs = (isRelAbs || isSourceAbs ||
825889
(result.host && relative.pathname));
826890
const removeAllDots = mustEndAbs;
827-
let srcPath = (result.pathname && result.pathname.split('/')) || [];
828-
const relPath = (relative.pathname && relative.pathname.split('/')) || [];
829891
const noLeadingSlashes = result.protocol &&
830892
!slashedProtocol.has(result.protocol);
831893

894+
// Build the combined path string for normalization
895+
let combinedPath = '';
896+
let srcHost = ''; // For noLeadingSlashes protocols
897+
let relHost = ''; // For noLeadingSlashes protocols
898+
832899
// If the url is a non-slashed url, then relative
833900
// links like ../.. should be able
834901
// to crawl up to the hostname, as well. This is strange.
@@ -837,22 +904,15 @@ Url.prototype.resolveObject = function resolveObject(relative) {
837904
if (noLeadingSlashes) {
838905
result.hostname = '';
839906
result.port = null;
840-
if (result.host) {
841-
if (srcPath[0] === '') srcPath[0] = result.host;
842-
else srcPath.unshift(result.host);
843-
}
907+
srcHost = result.host || '';
844908
result.host = '';
845909
if (relative.protocol) {
846910
relative.hostname = null;
847911
relative.port = null;
848912
result.auth = null;
849-
if (relative.host) {
850-
if (relPath[0] === '') relPath[0] = relative.host;
851-
else relPath.unshift(relative.host);
852-
}
913+
relHost = relative.host || '';
853914
relative.host = null;
854915
}
855-
mustEndAbs &&= (relPath[0] === '' || srcPath[0] === '');
856916
}
857917

858918
if (isRelAbs) {
@@ -868,30 +928,65 @@ Url.prototype.resolveObject = function resolveObject(relative) {
868928
}
869929
result.search = relative.search;
870930
result.query = relative.query;
871-
srcPath = relPath;
872-
// Fall through to the dot-handling below.
873-
} else if (relPath.length) {
931+
// Use relative path directly
932+
if (noLeadingSlashes && relHost) {
933+
combinedPath = (relative.pathname && relative.pathname.charAt(0) === '/' ?
934+
relHost + relative.pathname : relHost + '/' + (relative.pathname || ''));
935+
} else {
936+
combinedPath = relative.pathname || '';
937+
}
938+
} else if (relative.pathname) {
874939
// it's relative
875940
// throw away the existing file, and take the new path instead.
876-
srcPath ||= [];
877-
srcPath.pop();
878-
srcPath = srcPath.concat(relPath);
879941
result.search = relative.search;
880942
result.query = relative.query;
943+
944+
// Build combined path: source path (minus last segment) + relative path
945+
let srcPathname = result.pathname || '';
946+
if (noLeadingSlashes && srcHost) {
947+
srcPathname = (srcPathname && srcPathname.charAt(0) === '/' ?
948+
srcHost + srcPathname : srcHost + '/' + srcPathname);
949+
}
950+
951+
// Remove the last segment from source (the "file" part)
952+
const lastSlashIndex = StringPrototypeLastIndexOf(srcPathname, '/');
953+
if (lastSlashIndex >= 0) {
954+
srcPathname = StringPrototypeSlice(srcPathname, 0, lastSlashIndex + 1);
955+
} else {
956+
srcPathname = '';
957+
}
958+
959+
// Append relative pathname
960+
let relPathname = relative.pathname;
961+
if (noLeadingSlashes && relHost) {
962+
relPathname = (relPathname && relPathname.charAt(0) === '/' ?
963+
relHost + relPathname : relHost + '/' + relPathname);
964+
}
965+
combinedPath = srcPathname + relPathname;
881966
} else if (relative.search !== null && relative.search !== undefined) {
882967
// Just pull out the search.
883968
// like href='?foo'.
884969
// Put this after the other two cases because it simplifies the booleans
885970
if (noLeadingSlashes) {
886-
result.hostname = result.host = srcPath.shift();
971+
// Extract host from first segment of source path
972+
const srcPathname = result.pathname || '';
973+
const firstSlashIdx = StringPrototypeIndexOf(srcPathname, '/');
974+
if (firstSlashIdx > 0) {
975+
result.hostname = result.host = StringPrototypeSlice(srcPathname, 0, firstSlashIdx);
976+
} else if (firstSlashIdx === -1 && srcPathname) {
977+
result.hostname = result.host = srcPathname;
978+
} else if (srcHost) {
979+
result.hostname = result.host = srcHost;
980+
} else {
981+
result.hostname = result.host = '';
982+
}
887983
// Occasionally the auth can get stuck only in host.
888-
// This especially happens in cases like
889-
// url.resolveObject('mailto:local1@domain1', 'local2@domain2')
890984
const authInHost =
891-
result.host && result.host.indexOf('@') > 0 && result.host.split('@');
985+
result.host && StringPrototypeIndexOf(result.host, '@') > 0;
892986
if (authInHost) {
893-
result.auth = authInHost.shift();
894-
result.host = result.hostname = authInHost.shift();
987+
const atIdx = StringPrototypeIndexOf(result.host, '@');
988+
result.auth = StringPrototypeSlice(result.host, 0, atIdx);
989+
result.host = result.hostname = StringPrototypeSlice(result.host, atIdx + 1);
895990
}
896991
}
897992
result.search = relative.search;
@@ -903,9 +998,24 @@ Url.prototype.resolveObject = function resolveObject(relative) {
903998
}
904999
result.href = result.format();
9051000
return result;
1001+
} else {
1002+
// No relative path at all, use source path
1003+
if (noLeadingSlashes && srcHost) {
1004+
const srcPathname = result.pathname || '';
1005+
combinedPath = (srcPathname && srcPathname.charAt(0) === '/' ?
1006+
srcHost + srcPathname : srcHost + '/' + srcPathname);
1007+
} else {
1008+
combinedPath = result.pathname || '';
1009+
}
9061010
}
9071011

908-
if (!srcPath.length) {
1012+
// Check if we need to handle noLeadingSlashes mustEndAbs
1013+
if (noLeadingSlashes) {
1014+
const startsWithSlash = combinedPath && combinedPath.charAt(0) === '/';
1015+
mustEndAbs &&= startsWithSlash;
1016+
}
1017+
1018+
if (!combinedPath) {
9091019
// No path at all. All other things were already handled above.
9101020
result.pathname = null;
9111021
// To support http.request
@@ -918,75 +1028,80 @@ Url.prototype.resolveObject = function resolveObject(relative) {
9181028
return result;
9191029
}
9201030

921-
// If a url ENDs in . or .., then it must get a trailing slash.
922-
// however, if it ends in anything else non-slashy,
923-
// then it must NOT get a trailing slash.
924-
let last = srcPath[srcPath.length - 1];
925-
const hasTrailingSlash = (
926-
((result.host || relative.host || srcPath.length > 1) &&
927-
(last === '.' || last === '..')) || last === '');
928-
929-
// Strip single dots, resolve double dots to parent dir
930-
// if the path tries to go above the root, `up` ends up > 0
931-
let up = 0;
932-
for (let i = srcPath.length - 1; i >= 0; i--) {
933-
last = srcPath[i];
934-
if (last === '.') {
935-
spliceOne(srcPath, i);
936-
} else if (last === '..') {
937-
spliceOne(srcPath, i);
938-
up++;
939-
} else if (up) {
940-
spliceOne(srcPath, i);
941-
up--;
1031+
// Use optimized single-pass normalization (O(n) instead of O(n²))
1032+
const allowAboveRoot = !mustEndAbs && !removeAllDots;
1033+
const { segments, up, trailingSlash } = normalizePathSegments(combinedPath, allowAboveRoot);
1034+
1035+
// Determine if result needs trailing slash
1036+
// hasTrailingSlash is true if path ended with /, ., or ..
1037+
const hasTrailingSlash = trailingSlash &&
1038+
(result.host || relative.host || segments.length > 0);
1039+
1040+
// Handle remaining 'up' count - add leading .. if allowed
1041+
let srcPath = segments;
1042+
if (up > 0 && allowAboveRoot) {
1043+
// Prepend '..' segments for remaining up count
1044+
const newPath = [];
1045+
for (let i = 0; i < up; i++) {
1046+
ArrayPrototypePush(newPath, '..');
9421047
}
943-
}
944-
945-
// If the path is allowed to go above the root, restore leading ..s
946-
if (!mustEndAbs && !removeAllDots) {
947-
while (up--) {
948-
srcPath.unshift('..');
1048+
for (let i = 0; i < srcPath.length; i++) {
1049+
ArrayPrototypePush(newPath, srcPath[i]);
9491050
}
1051+
srcPath = newPath;
9501052
}
9511053

952-
if (mustEndAbs && srcPath[0] !== '' &&
953-
(!srcPath[0] || srcPath[0].charAt(0) !== '/')) {
954-
srcPath.unshift('');
955-
}
956-
957-
if (hasTrailingSlash && StringPrototypeAt(ArrayPrototypeJoin(srcPath, '/'), -1) !== '/') {
958-
srcPath.push('');
1054+
// Handle mustEndAbs - ensure path starts with /
1055+
let isAbsolute = srcPath.length > 0 && srcPath[0] === '';
1056+
if (!isAbsolute && srcPath.length > 0 && srcPath[0] &&
1057+
srcPath[0].charAt(0) === '/') {
1058+
isAbsolute = true;
9591059
}
9601060

961-
const isAbsolute = srcPath[0] === '' ||
962-
(srcPath[0] && srcPath[0].charAt(0) === '/');
963-
964-
// put the host back
1061+
// put the host back for noLeadingSlashes protocols
9651062
if (noLeadingSlashes) {
9661063
result.hostname =
967-
result.host = isAbsolute ? '' : srcPath.length ? srcPath.shift() : '';
1064+
result.host = isAbsolute ? '' : srcPath.length ? srcPath[0] : '';
1065+
if (result.host) {
1066+
// Remove the host from srcPath (first element)
1067+
srcPath = srcPath.length > 1 ?
1068+
ArrayPrototypeJoin(srcPath, '/').slice(result.host.length + 1).split('/') :
1069+
[];
1070+
if (srcPath.length === 1 && srcPath[0] === '') srcPath = [];
1071+
}
9681072
// Occasionally the auth can get stuck only in host.
969-
// This especially happens in cases like
970-
// url.resolveObject('mailto:local1@domain1', 'local2@domain2')
971-
const authInHost = result.host && result.host.indexOf('@') > 0 ?
972-
result.host.split('@') : false;
1073+
const authInHost = result.host && StringPrototypeIndexOf(result.host, '@') > 0;
9731074
if (authInHost) {
974-
result.auth = authInHost.shift();
975-
result.host = result.hostname = authInHost.shift();
1075+
const atIdx = StringPrototypeIndexOf(result.host, '@');
1076+
result.auth = StringPrototypeSlice(result.host, 0, atIdx);
1077+
result.host = result.hostname = StringPrototypeSlice(result.host, atIdx + 1);
9761078
}
9771079
}
9781080

9791081
mustEndAbs ||= (result.host && srcPath.length);
9801082

9811083
if (mustEndAbs && !isAbsolute) {
982-
srcPath.unshift('');
1084+
// Need to add leading empty string for absolute path
1085+
const newPath = [''];
1086+
for (let i = 0; i < srcPath.length; i++) {
1087+
ArrayPrototypePush(newPath, srcPath[i]);
1088+
}
1089+
srcPath = newPath;
1090+
isAbsolute = true;
1091+
}
1092+
1093+
// Handle trailing slash
1094+
if (hasTrailingSlash) {
1095+
if (srcPath.length === 0 || srcPath[srcPath.length - 1] !== '') {
1096+
ArrayPrototypePush(srcPath, '');
1097+
}
9831098
}
9841099

9851100
if (!srcPath.length) {
9861101
result.pathname = null;
9871102
result.path = null;
9881103
} else {
989-
result.pathname = srcPath.join('/');
1104+
result.pathname = ArrayPrototypeJoin(srcPath, '/');
9901105
}
9911106

9921107
// To support request.http

0 commit comments

Comments
 (0)