Skip to content

Commit 7d6ee23

Browse files
committed
use lazyregexp for various regular expressions
Using regex.MustCompile consumes a significant amount of memory when importing the package, even if those regular expressions are not used. This changes compiling the regular expressions to use a lazyregexp package so that they're only compiled the first time they're used. There are various regular expressions remaining that are still compiled on import, but these are exported, so changing them to a sync.OnceValue would be a breaking change; we can still decide to do so, but leaving that for a follow-up. To verify, compile a basic binary importing the package; package main import _ "github.com/distribution/reference" func main() {} Before: for i in $(seq 1 5); do GODEBUG=inittrace=1 ./before 2>&1 | grep distribution/reference; done init github.com/distribution/reference @0.94 ms, 0.22 ms clock, 415712 bytes, 3599 allocs init github.com/distribution/reference @0.39 ms, 0.22 ms clock, 415712 bytes, 3599 allocs init github.com/distribution/reference @0.39 ms, 0.23 ms clock, 415712 bytes, 3599 allocs init github.com/distribution/reference @0.45 ms, 0.27 ms clock, 415712 bytes, 3599 allocs init github.com/distribution/reference @0.38 ms, 0.24 ms clock, 415712 bytes, 3599 allocs After: for i in $(seq 1 5); do GODEBUG=inittrace=1 ./after 2>&1 | grep distribution/reference; done init github.com/distribution/reference/internal/lazyregexp @0.85 ms, 0 ms clock, 0 bytes, 0 allocs init github.com/distribution/reference @1.0 ms, 0.16 ms clock, 238680 bytes, 1383 allocs init github.com/distribution/reference/internal/lazyregexp @0.33 ms, 0 ms clock, 0 bytes, 0 allocs init github.com/distribution/reference @0.42 ms, 0.16 ms clock, 238680 bytes, 1383 allocs init github.com/distribution/reference/internal/lazyregexp @0.39 ms, 0 ms clock, 0 bytes, 0 allocs init github.com/distribution/reference @0.47 ms, 0.19 ms clock, 238680 bytes, 1383 allocs init github.com/distribution/reference/internal/lazyregexp @0.36 ms, 0 ms clock, 0 bytes, 0 allocs init github.com/distribution/reference @0.47 ms, 0.14 ms clock, 238680 bytes, 1383 allocs init github.com/distribution/reference/internal/lazyregexp @0.29 ms, 0 ms clock, 0 bytes, 0 allocs init github.com/distribution/reference @0.38 ms, 0.15 ms clock, 238680 bytes, 1383 allocs Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
1 parent f189e68 commit 7d6ee23

7 files changed

Lines changed: 164 additions & 7 deletions

File tree

NOTICE

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
lazyregexp implementation (internal/lazyregexp)
2+
3+
The internal/lazyregexp directory contains code derived from the Go project.
4+
5+
Copyright 2009-2018 The Go Authors.
6+
Licensed under the BSD 3-Clause License.
7+
8+
Modifications Copyright 2026 The CNCF distribution authors.
9+
10+
The BSD license text and Go patent grant are included in
11+
internal/lazyregexp/LICENSE and internal/lazyregexp/PATENTS.

internal/lazyregexp/LICENSE

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
Copyright 2009 The Go Authors.
2+
3+
Redistribution and use in source and binary forms, with or without
4+
modification, are permitted provided that the following conditions are
5+
met:
6+
7+
* Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
* Redistributions in binary form must reproduce the above
10+
copyright notice, this list of conditions and the following disclaimer
11+
in the documentation and/or other materials provided with the
12+
distribution.
13+
* Neither the name of Google LLC nor the names of its
14+
contributors may be used to endorse or promote products derived from
15+
this software without specific prior written permission.
16+
17+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

internal/lazyregexp/PATENTS

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
Additional IP Rights Grant (Patents)
2+
3+
"This implementation" means the copyrightable works distributed by
4+
Google as part of the Go project.
5+
6+
Google hereby grants to You a perpetual, worldwide, non-exclusive,
7+
no-charge, royalty-free, irrevocable (except as stated in this section)
8+
patent license to make, have made, use, offer to sell, sell, import,
9+
transfer and otherwise run, modify and propagate the contents of this
10+
implementation of Go, where such license applies only to those patent
11+
claims, both currently owned or controlled by Google and acquired in
12+
the future, licensable by Google that are necessarily infringed by this
13+
implementation of Go. This grant does not include claims that would be
14+
infringed only as a consequence of further modification of this
15+
implementation. If you or your agent or exclusive licensee institute or
16+
order or agree to the institution of patent litigation against any
17+
entity (including a cross-claim or counterclaim in a lawsuit) alleging
18+
that this implementation of Go or any code incorporated within this
19+
implementation of Go constitutes direct or contributory patent
20+
infringement, or inducement of patent infringement, then any patent
21+
rights granted to you under this License for this implementation of Go
22+
shall terminate as of the date such litigation is filed.

internal/lazyregexp/lazyregexp.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Copyright 2018 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// Code below was copied from;
6+
// https://github.com/golang/go/blob/go1.13/src/internal/lazyregexp/lazyre.go
7+
8+
// Package lazyregexp is a thin wrapper over regexp, allowing the use of global
9+
// regexp variables without forcing them to be compiled at init.
10+
package lazyregexp
11+
12+
import (
13+
"os"
14+
"regexp"
15+
"strings"
16+
"sync"
17+
)
18+
19+
// Regexp is a wrapper around regexp.Regexp, where the underlying regexp will be
20+
// compiled the first time it is needed.
21+
type Regexp struct {
22+
str string
23+
once sync.Once
24+
rx *regexp.Regexp
25+
}
26+
27+
func (r *Regexp) re() *regexp.Regexp {
28+
r.once.Do(r.build)
29+
return r.rx
30+
}
31+
32+
func (r *Regexp) build() {
33+
r.rx = regexp.MustCompile(r.str)
34+
r.str = ""
35+
}
36+
37+
func (r *Regexp) FindStringSubmatch(s string) []string {
38+
return r.re().FindStringSubmatch(s)
39+
}
40+
41+
func (r *Regexp) MatchString(s string) bool {
42+
return r.re().MatchString(s)
43+
}
44+
45+
func (r *Regexp) SubexpNames() []string {
46+
return r.re().SubexpNames()
47+
}
48+
49+
func (r *Regexp) NumSubexp() int {
50+
return r.re().NumSubexp()
51+
}
52+
53+
func (r *Regexp) String() string { return r.str }
54+
55+
var inTest = len(os.Args) > 0 && strings.HasSuffix(strings.TrimSuffix(os.Args[0], ".exe"), ".test")
56+
57+
// New creates a new lazy regexp, delaying the compiling work until it is first
58+
// needed. If the code is being run as part of tests, the regexp compiling will
59+
// happen immediately.
60+
func New(str string) *Regexp {
61+
lr := &Regexp{str: str}
62+
if inTest {
63+
// In tests, always compile the regexps early.
64+
lr.re()
65+
}
66+
return lr
67+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package lazyregexp
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestCompileOnce(t *testing.T) {
8+
t.Run("invalid regexp", func(t *testing.T) {
9+
defer func() {
10+
if r := recover(); r == nil {
11+
t.Errorf("expected a panic")
12+
}
13+
}()
14+
_ = New("[")
15+
})
16+
t.Run("valid regexp", func(t *testing.T) {
17+
re := New("[a-z]")
18+
ok := re.MatchString("hello")
19+
if !ok {
20+
t.Errorf("expected a match")
21+
}
22+
})
23+
}

regexp.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package reference
33
import (
44
"regexp"
55
"strings"
6+
7+
"github.com/distribution/reference/internal/lazyregexp"
68
)
79

810
// DigestRegexp matches well-formed digests, including algorithm (e.g. "sha256:<encoded>").
@@ -31,7 +33,7 @@ var NameRegexp = regexp.MustCompile(namePat)
3133
// ReferenceRegexp is the full supported format of a reference. The regexp
3234
// is anchored and has capturing groups for name, tag, and digest
3335
// components.
34-
var ReferenceRegexp = referenceRegexp
36+
var ReferenceRegexp = regexp.MustCompile(referencePat)
3537

3638
// TagRegexp matches valid tag names. From [docker/docker:graph/tags.go].
3739
//
@@ -112,15 +114,15 @@ var (
112114
// referenceRegexp is the full supported format of a reference. The regexp
113115
// is anchored and has capturing groups for name, tag, and digest
114116
// components.
115-
referenceRegexp = regexp.MustCompile(referencePat)
117+
referenceRegexp = lazyregexp.New(referencePat)
116118

117119
// anchoredTagRegexp matches valid tag names, anchored at the start and
118120
// end of the matched string.
119-
anchoredTagRegexp = regexp.MustCompile(anchored(tag))
121+
anchoredTagRegexp = lazyregexp.New(anchored(tag))
120122

121123
// anchoredDigestRegexp matches valid digests, anchored at the start and
122124
// end of the matched string.
123-
anchoredDigestRegexp = regexp.MustCompile(anchored(digestPat))
125+
anchoredDigestRegexp = lazyregexp.New(anchored(digestPat))
124126

125127
// pathComponent restricts path-components to start with an alphanumeric
126128
// character, with following parts able to be separated by a separator
@@ -136,14 +138,14 @@ var (
136138

137139
// anchoredNameRegexp is used to parse a name value, capturing the
138140
// domain and trailing components.
139-
anchoredNameRegexp = regexp.MustCompile(anchoredNamePat)
141+
anchoredNameRegexp = lazyregexp.New(anchoredNamePat)
140142
anchoredNamePat = anchored(optional(capture(domainAndPort), `/`), capture(remoteName))
141143

142144
referencePat = anchored(capture(namePat), optional(`:`, capture(tag)), optional(`@`, capture(digestPat)))
143145

144146
// anchoredIdentifierRegexp is used to check or match an
145147
// identifier value, anchored at start and end of string.
146-
anchoredIdentifierRegexp = regexp.MustCompile(anchored(identifier))
148+
anchoredIdentifierRegexp = lazyregexp.New(anchored(identifier))
147149
)
148150

149151
// optional wraps the expression in a non-capturing group and makes the

regexp_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,18 @@ import (
66
"testing"
77
)
88

9+
type regExper interface {
10+
FindStringSubmatch(s string) []string
11+
NumSubexp() int
12+
}
13+
914
type regexpMatch struct {
1015
input string
1116
match bool
1217
subs []string
1318
}
1419

15-
func checkRegexp(t *testing.T, r *regexp.Regexp, m regexpMatch) {
20+
func checkRegexp(t *testing.T, r regExper, m regexpMatch) {
1621
t.Helper()
1722
matches := r.FindStringSubmatch(m.input)
1823
if m.match && matches != nil {

0 commit comments

Comments
 (0)