Skip to content

Commit 8af4f1e

Browse files
dunglasclaude
andcommitted
perf: cache special-scheme flag and switch scheme checks to map lookups
- Call protocolComponentMatchesSpecialScheme() once per New() and reuse the result for both the hostname and pathname branches instead of matching the component regex against the five special schemes twice. - Replace specialSchemeList with a single map[string]struct{} (renamed to specialSchemeSet since the value is now a set, not an ordered list) so the per-component lookups in processHostnameForInit and processPathnameForInit are O(1). protocolComponentMatchesSpecialScheme just iterates the map keys, since order is irrelevant when the loop returns on the first hit. - Collapse the port-defaulting loop into a direct DefaultPorts lookup gated on specialSchemeSet membership, so user-added DefaultPorts entries cannot silently trigger special-scheme behaviour. The processedInit protocol is lowercased before the check because in "pattern" mode processProtocolForInit does not canonicalize, and the protocol component is later compiled with canonicalizeProtocol which lowercases — so a mixed-case literal like "HTTP" should behave as "http" for port defaulting. ~5-10%% reduction in ns/op on New() across pattern benchmarks. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 92fee3d commit 8af4f1e

2 files changed

Lines changed: 28 additions & 16 deletions

File tree

parser.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ const fullWildcardRegexpValue = ".*"
1616

1717
// Experimental: this symbol is exported to allow users adding new values, but may be removed in the feature.
1818
// TODO: there is nothing in the Go stdlib to find the default port associated with a protocol.
19-
// Let's just replace values for protocols in specialSchemeList for now.
19+
// Let's just replace values for protocols in specialSchemeSet for now.
2020
// This list could be completed using https://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers
2121
var DefaultPorts = map[string]string{
2222
"http": "80",

urlpattern.go

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,13 @@ var (
1717
)
1818

1919
// https://url.spec.whatwg.org/#special-scheme
20-
var specialSchemeList = []string{"ftp", "http", "https", "ws", "wss"}
20+
var specialSchemeSet = map[string]struct{}{
21+
"ftp": {},
22+
"http": {},
23+
"https": {},
24+
"ws": {},
25+
"wss": {},
26+
}
2127

2228
type URLPatternResult struct {
2329
Inputs []string
@@ -100,7 +106,7 @@ type component struct {
100106

101107
// https://urlpattern.spec.whatwg.org/#protocol-component-matches-a-special-scheme
102108
func (c *component) protocolComponentMatchesSpecialScheme() bool {
103-
for _, scheme := range specialSchemeList {
109+
for scheme := range specialSchemeSet {
104110
if c.regularExpression.MatchString(scheme) {
105111
return true
106112
}
@@ -165,10 +171,18 @@ func (init *URLPatternInit) New(opt *Options) (*URLPattern, error) {
165171
}
166172

167173
var emptyString string
168-
for _, s := range specialSchemeList {
169-
if *processedInit.Protocol == s && *processedInit.Port == DefaultPorts[s] {
174+
// Only clear the port when the protocol is a WHATWG special scheme; the
175+
// exported DefaultPorts map is user-extendable, so keying off it alone
176+
// would quietly apply the behaviour to arbitrary user-added protocols.
177+
//
178+
// In "pattern" mode processedInit.Protocol is not canonicalized, so
179+
// lowercase it for the comparison: the protocol component is compiled
180+
// with canonicalizeProtocol (which lowercases), so the effective pattern
181+
// is the lowercase form.
182+
canonicalProtocol := strings.ToLower(*processedInit.Protocol)
183+
if _, isSpecial := specialSchemeSet[canonicalProtocol]; isSpecial {
184+
if dp, ok := DefaultPorts[canonicalProtocol]; ok && *processedInit.Port == dp {
170185
processedInit.Port = &emptyString
171-
break
172186
}
173187
}
174188

@@ -191,13 +205,15 @@ func (init *URLPatternInit) New(opt *Options) (*URLPattern, error) {
191205

192206
// If the result running hostname pattern is an IPv6 address given processedInit["hostname"] is true, then set urlPattern’s hostname component to the result of compiling a component given processedInit["hostname"], canonicalize an IPv6 hostname, and hostname options.
193207

208+
protocolMatchesSpecialScheme := urlPattern.protocol.protocolComponentMatchesSpecialScheme()
209+
194210
hostnameOptions := options{delimiterCodePoint: '.'}
195211
if hostnamePatternIsIPv6Address(*processedInit.Hostname) {
196212
urlPattern.hostname, err = compileComponent(*processedInit.Hostname, canonicalizeIPv6Hostname, hostnameOptions)
197213
if err != nil {
198214
return nil, err
199215
}
200-
} else if urlPattern.protocol.protocolComponentMatchesSpecialScheme() || *processedInit.Protocol == "*" {
216+
} else if protocolMatchesSpecialScheme || *processedInit.Protocol == "*" {
201217
urlPattern.hostname, err = compileComponent(*processedInit.Hostname, canonicalizeDomainName, hostnameOptions)
202218
if err != nil {
203219
return nil, err
@@ -219,7 +235,7 @@ func (init *URLPatternInit) New(opt *Options) (*URLPattern, error) {
219235

220236
pathnameOptions := options{'/', '/', false}
221237

222-
if urlPattern.protocol.protocolComponentMatchesSpecialScheme() {
238+
if protocolMatchesSpecialScheme {
223239
pathCompileOptions := pathnameOptions
224240
pathCompileOptions.ignoreCase = opt.IgnoreCase
225241

@@ -629,10 +645,8 @@ func processHostnameForInit(value, protocolValue, uType string) (string, error)
629645
return canonicalizeDomainName(value)
630646
}
631647

632-
for _, s := range specialSchemeList {
633-
if protocolValue == s {
634-
return canonicalizeDomainName(value)
635-
}
648+
if _, ok := specialSchemeSet[protocolValue]; ok {
649+
return canonicalizeDomainName(value)
636650
}
637651

638652
return canonicalizeHostname(value, protocolValue)
@@ -657,10 +671,8 @@ func processPathnameForInit(pathnameValue, protocolValue, ptype string) (string,
657671
return canonicalizePathname(pathnameValue)
658672
}
659673

660-
for _, ss := range specialSchemeList {
661-
if protocolValue == ss {
662-
return canonicalizePathname(pathnameValue)
663-
}
674+
if _, ok := specialSchemeSet[protocolValue]; ok {
675+
return canonicalizePathname(pathnameValue)
664676
}
665677

666678
return canonicalizeOpaquePathname(pathnameValue)

0 commit comments

Comments
 (0)