Skip to content

Commit bf23eae

Browse files
authored
fix(nvd): use canonical link in cases of linkrot (#4773)
Actual root cause of what #4770 was trying to solve is actually linkrot. This may involve more requests but also less failures due to linkrot. ~Also refactored FindRepos to do less repetitive work.~ if I make it less repetitive, vp cache doesn't work ):
1 parent 0f74857 commit bf23eae

4 files changed

Lines changed: 31 additions & 36 deletions

File tree

vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"flag"
88
"fmt"
99
"log/slog"
10+
"net/http"
1011
"os"
1112
"slices"
1213
"sync"
@@ -94,7 +95,7 @@ func processCVE(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache
9495
CVEID: cve.ID,
9596
CNA: "nvd",
9697
}
97-
repos := nvd.FindRepos(cve, vpRepoCache, repoTagsCache, metrics)
98+
repos := nvd.FindRepos(cve, vpRepoCache, repoTagsCache, metrics, http.DefaultClient)
9899
metrics.Repos = repos
99100

100101
var err error

vulnfeeds/conversion/nvd/converter.go

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach
229229
}
230230

231231
// FindRepos attempts to find the source code repositories for a given CVE.
232-
func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics) []string {
232+
func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics, httpClient *http.Client) []string {
233233
// Find repos
234234
refs := cve.References
235235
CPEs := cves.CPEs(cve)
@@ -244,47 +244,34 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache *
244244
return nil
245245
}
246246

247-
// Edge case: No CPEs, but perhaps usable references.
248247
if len(refs) > 0 && len(CPEs) == 0 {
249-
repos := cves.ReposFromReferences(nil, nil, refs, cves.RefTagDenyList, repoTagsCache, metrics)
248+
repos := cves.ReposFromReferences(nil, nil, refs, cves.RefTagDenyList, repoTagsCache, metrics, httpClient)
250249
if len(repos) == 0 {
251250
metrics.AddNote("Failed to derive any repos and there were no CPEs")
252251
return nil
253252
}
254253
metrics.AddNote("Derived repos for CVE with no CPEs: %v", repos)
255254
reposForCVE = repos
256255
}
257-
appCPECount := 0
256+
258257
vendorProductCombinations := make(map[cves.VendorProduct]bool)
259258
for _, CPEstr := range CPEs {
260259
CPE, err := cves.ParseCPE(CPEstr)
261260
if err != nil {
262261
metrics.AddNote("Failed to parse CPE: %v", CPEstr)
263262
continue
264263
}
265-
if CPE.Part != "a" {
266-
continue
267-
}
268-
appCPECount += 1
269264
vendorProductCombinations[cves.VendorProduct{Vendor: CPE.Vendor, Product: CPE.Product}] = true
270265
}
271266

272-
if len(CPEs) > 0 && appCPECount == 0 {
273-
// This CVE is not for software (based on there being CPEs but not any application ones), skip.
274-
metrics.Outcome = models.NoSoftware
275-
return nil
276-
}
277-
278267
// If there wasn't a repo from the CPE Dictionary, try and derive one from the CVE references.
279268
for vendorProductKey := range vendorProductCombinations {
280-
// Does it have any application CPEs? Look for pre-computed repos based on VendorProduct.
281269
if repos, ok := vpRepoCache.Get(vendorProductKey); ok {
282270
metrics.AddNote("Pre-references, derived repos using cache: %v", repos)
283271
if len(reposForCVE) == 0 {
284272
reposForCVE = repos
285273
continue
286274
}
287-
// Don't append duplicates.
288275
for _, repo := range repos {
289276
if !slices.Contains(reposForCVE, repo) {
290277
reposForCVE = append(reposForCVE, repo)
@@ -295,7 +282,7 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache *
295282
if slices.Contains(cves.VendorProductDenyList, vendorProductKey) {
296283
continue
297284
}
298-
repos := cves.ReposFromReferences(vpRepoCache, &vendorProductKey, refs, cves.RefTagDenyList, repoTagsCache, metrics)
285+
repos := cves.ReposFromReferences(vpRepoCache, &vendorProductKey, refs, cves.RefTagDenyList, repoTagsCache, metrics, httpClient)
299286
if len(repos) == 0 {
300287
metrics.AddNote("Failed to derive any repos for %s/%s", vendorProductKey.Vendor, vendorProductKey.Product)
301288
continue
@@ -305,22 +292,6 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache *
305292
}
306293
}
307294

308-
logger.Info("Finished processing "+string(CVEID),
309-
slog.String("cve", string(CVEID)),
310-
slog.Int("cpes", len(CPEs)),
311-
slog.Int("app_cpes", appCPECount),
312-
slog.Int("derived_repos", len(reposForCVE)))
313-
314-
// If we've made it to here, we may have a CVE:
315-
// * that has Application-related CPEs (so applies to software)
316-
// * has a reference that is a known repository URL
317-
// OR
318-
// * a derived repository for the software package
319-
//
320-
// We do not yet have:
321-
// * any knowledge of the language used
322-
// * definitive version information
323-
324295
if len(reposForCVE) == 0 {
325296
// We have nothing useful to work with, so we'll assume it's out of scope
326297
metrics.AddNote("Passing due to lack of viable repository")

vulnfeeds/cves/versions.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1083,7 +1083,7 @@ func GitVersionsToCommits(versions models.VersionInfo, repos []string, cache *gi
10831083

10841084
// Examines the CVE references for a CVE and derives repos for it, optionally caching it.
10851085
// TODO (jesslowe): refactor with below
1086-
func ReposFromReferences(cache *VPRepoCache, vp *VendorProduct, refs []models.Reference, tagDenyList []string, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics) (repos []string) {
1086+
func ReposFromReferences(cache *VPRepoCache, vp *VendorProduct, refs []models.Reference, tagDenyList []string, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics, httpClient *http.Client) (repos []string) {
10871087
for _, ref := range refs {
10881088
// If any of the denylist tags are in the ref's tag set, it's out of consideration.
10891089
if !RefAcceptable(ref, tagDenyList) {
@@ -1097,6 +1097,13 @@ func ReposFromReferences(cache *VPRepoCache, vp *VendorProduct, refs []models.Re
10971097
// Failed to parse as a valid repo.
10981098
continue
10991099
}
1100+
1101+
// Check if the repo URL has changed (e.g. via redirect)
1102+
canonicalRepo, err := ValidateAndCanonicalizeLink(repo, httpClient)
1103+
if err == nil {
1104+
repo = canonicalRepo
1105+
}
1106+
11001107
if slices.Contains(repos, repo) {
11011108
continue
11021109
}

vulnfeeds/cves/versions_test.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"encoding/json"
55
"fmt"
66
"log"
7+
"net/http"
78
"os"
89
"reflect"
910
"slices"
@@ -1355,14 +1356,29 @@ func TestReposFromReferences(t *testing.T) {
13551356
},
13561357
wantRepos: []string{"https://github.com/dwyl/hapi-auth-jwt2"},
13571358
},
1359+
{
1360+
name: "A CVE with a repo that redirects (docker/docker -> moby/moby)",
1361+
args: args{
1362+
CVE: "CVE-2017-12345", // Dummy CVE
1363+
refs: []models.Reference{
1364+
{
1365+
Source: "cna@docker.com",
1366+
Tags: []string{"Third Party Advisory"},
1367+
URL: "https://github.com/docker/docker",
1368+
},
1369+
},
1370+
tagDenyList: RefTagDenyList,
1371+
},
1372+
wantRepos: []string{"https://github.com/moby/moby"},
1373+
},
13581374
}
13591375

13601376
for _, tt := range tests {
13611377
t.Run(tt.name, func(t *testing.T) {
13621378
testutils.SetupGitVCR(t)
13631379
metrics := &models.ConversionMetrics{}
13641380
repoTagsCache := &git.RepoTagsCache{}
1365-
if gotRepos := ReposFromReferences(tt.args.cache, tt.args.vp, tt.args.refs, tt.args.tagDenyList, repoTagsCache, metrics); !reflect.DeepEqual(gotRepos, tt.wantRepos) {
1381+
if gotRepos := ReposFromReferences(tt.args.cache, tt.args.vp, tt.args.refs, tt.args.tagDenyList, repoTagsCache, metrics, http.DefaultClient); !reflect.DeepEqual(gotRepos, tt.wantRepos) {
13661382
t.Errorf("ReposFromReferences() = %#v, want %#v", gotRepos, tt.wantRepos)
13671383
}
13681384
})

0 commit comments

Comments
 (0)