Skip to content

Commit 3795e75

Browse files
authored
perf(vulnfeeds): Improve regex performance (#4842)
speed up regex normalized tag fuzzy matching
1 parent 3930e59 commit 3795e75

2 files changed

Lines changed: 18 additions & 14 deletions

File tree

vulnfeeds/git/repository.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,9 @@ func (t Tags) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
5454

5555
// NormalizedTag holds a normalized (as by NormalizeRepoTags) tag and corresponding commit hash.
5656
type NormalizedTag struct {
57-
OriginalTag string
58-
Commit string
57+
OriginalTag string
58+
Commit string
59+
MatchesVersionText bool
5960
}
6061

6162
// RepoTagsMap holds all of the tags (naturally occurring and normalized) for a Git repo.
@@ -261,7 +262,11 @@ func NormalizeRepoTags(repoURL string, repoTagsCache *RepoTagsCache) (normalized
261262
// It's conceivable that not all tags are normalizable or potentially versions.
262263
continue
263264
}
264-
normalizedTags[normalizedTag] = NormalizedTag{OriginalTag: t.Tag, Commit: t.Commit}
265+
normalizedTags[normalizedTag] = NormalizedTag{
266+
OriginalTag: t.Tag,
267+
Commit: t.Commit,
268+
MatchesVersionText: validVersionText.MatchString(normalizedTag),
269+
}
265270
}
266271
if repoTagsCache != nil {
267272
// The RepoTags() call above will have cached the Tag map already

vulnfeeds/git/versions.go

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,23 @@ import (
2323
"github.com/google/osv/vulnfeeds/models"
2424
)
2525

26-
var versionRangeRegex = regexp.MustCompile(`^(>=|<=|~|\^|>|<|=)\s*([0-9a-zA-Z\.\-]+)(?:,\s*(>=|<=|~|\^|>|<|=)\s*([0-9a-zA-Z\.\-]+))?$`) // Used to parse version strings from the GitHub CNA.
26+
var (
27+
versionRangeRegex = regexp.MustCompile(`^(>=|<=|~|\^|>|<|=)\s*([0-9a-zA-Z\.\-]+)(?:,\s*(>=|<=|~|\^|>|<|=)\s*([0-9a-zA-Z\.\-]+))?$`) // Used to parse version strings from the GitHub CNA.
28+
// Keep in sync with the intent of https://github.com/google/osv.dev/blob/26050deb42785bc5a4dc7d802eac8e7f95135509/osv/bug.py#L31
29+
validVersion = regexp.MustCompile(`(?i)(\d+|(?:rc|alpha|beta|preview)\d*)`)
30+
validVersionText = regexp.MustCompile(`(?i)(?:rc|alpha|beta|preview)\d*`)
31+
)
2732

2833
// findFuzzyCommit takes an already normalized version and the mapping of repo tags to
2934
// normalized tags and commits, and performs fuzzy matching to find a commit hash.
3035
func findFuzzyCommit(normalizedVersion string, normalizedTags map[string]NormalizedTag) (string, bool) {
3136
candidateTags := []string{} // the subset of normalizedTags tags that might be appropriate to use as a fuzzy match for normalizedVersion.
32-
// Keep in sync with the regex in models.NormalizeVersion()
33-
var validVersionText = regexp.MustCompile(`(?i)(?:rc|alpha|beta|preview)\d*`)
3437

35-
for k := range normalizedTags {
38+
normalizedVersionMatchesText := validVersionText.MatchString(normalizedVersion)
39+
40+
for k, v := range normalizedTags {
3641
// "1-8-0-RC0" (normalized from "1.8.0-RC0") shouldn't be considered a fuzzy match for "1-8-0" (normalized from "1.8.0")
37-
if (validVersionText.MatchString(k) && validVersionText.MatchString(normalizedVersion)) && strings.HasPrefix(k, normalizedVersion) {
38-
candidateTags = append(candidateTags, k)
39-
}
40-
if (!validVersionText.MatchString(k) && !validVersionText.MatchString(normalizedVersion)) && strings.HasPrefix(k, normalizedVersion) {
42+
if v.MatchesVersionText == normalizedVersionMatchesText && strings.HasPrefix(k, normalizedVersion) {
4143
candidateTags = append(candidateTags, k)
4244
}
4345
}
@@ -103,9 +105,6 @@ func NormalizeVersion(version string) (normalizedVersion string, e error) {
103105
if strings.HasPrefix(version, ".") {
104106
version = "0" + version
105107
}
106-
// Keep in sync with the intent of https://github.com/google/osv.dev/blob/26050deb42785bc5a4dc7d802eac8e7f95135509/osv/bug.py#L31
107-
var validVersion = regexp.MustCompile(`(?i)(\d+|(?:rc|alpha|beta|preview)\d*)`)
108-
var validVersionText = regexp.MustCompile(`(?i)(?:rc|alpha|beta|preview)\d*`)
109108
components := validVersion.FindAllString(version, -1)
110109
if components == nil {
111110
return "", fmt.Errorf("%q is not a supported version", version)

0 commit comments

Comments
 (0)