From 93087d4cb42d73813d20be65054a1b2cf9097d51 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Tue, 3 Feb 2026 00:27:55 +0000 Subject: [PATCH 01/45] Rewrite and refactor nvd to output version information in db_specific --- vulnfeeds/conversion/nvd/converter.go | 150 ++++++++++++-------------- vulnfeeds/cves/versions.go | 12 +-- vulnfeeds/vulns/vulns.go | 2 +- 3 files changed, 71 insertions(+), 93 deletions(-) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 54018585182..5dc743e1a2c 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -15,8 +15,10 @@ import ( "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" + "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" + "github.com/ossf/osv-schema/bindings/go/osvschema" ) var ErrNoRanges = errors.New("no ranges") @@ -32,55 +34,31 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc maybeProductName := "ENOCPE" if len(CPEs) > 0 { - CPE, err := cves.ParseCPE(CPEs[0]) // For naming the subdirectory used for output. + CPE, _ := cves.ParseCPE(CPEs[0]) // For naming the subdirectory used for output. maybeVendorName = CPE.Vendor maybeProductName = CPE.Product - if err != nil { - return errors.New("can't generate an OSV record without valid CPE data") - } } + // Create basic OSV record v := vulns.FromNVDCVE(cve.ID, cve) - versions := cves.ExtractVersionInfo(cve, nil, http.DefaultClient, metrics) - - if len(versions.AffectedVersions) != 0 { - var err error - // There are some AffectedVersions to try and resolve to AffectedCommits. - if len(repos) == 0 { - metrics.AddNote("No affected ranges for %q, and no repos to try and convert %+v to tags with", maybeProductName, versions.AffectedVersions) - return fmt.Errorf("no affected ranges for %q, and no repos to try and convert %+v to tags with", maybeProductName, versions.AffectedVersions) - } - metrics.AddNote("Trying to convert version tags to commits: %v with repos: %v", versions, repos) - versions, err = cves.GitVersionsToCommits(versions, repos, cache, metrics) - if err != nil { - metrics.AddNote("Failed to convert version tags to commits: %+v", err) - return fmt.Errorf("failed to convert version tags to commits: %+v %w", versions, err) - } - hasAnyFixedCommits := false - for _, repo := range repos { - if versions.HasFixedCommits(repo) { - hasAnyFixedCommits = true - break - } - } - - if versions.HasFixedVersions() && !hasAnyFixedCommits { - metrics.AddNote("Failed to convert fixed version tags to commits: %+v", versions) - return fmt.Errorf("failed to convert fixed version tags to commits: %+v %w", versions, ErrUnresolvedFix) - } - hasAnyLastAffectedCommits := false - for _, repo := range repos { - if versions.HasLastAffectedCommits(repo) { - hasAnyLastAffectedCommits = true - break - } - } + versions := cves.ExtractVersionInfo(cve, nil, http.DefaultClient, metrics) + // turn AffectedVersions into Ranges + ranges := []*osvschema.Range{} + for _, version := range versions.AffectedVersions { + vr := cves.BuildVersionRange(version.Introduced, version.LastAffected, version.Fixed) + ranges = append(ranges, vr) + } + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"versions": ranges}) + if err != nil { + logger.Error("failed to create database specific struct", slog.Any("err", err)) + } else { + v.DatabaseSpecific = databaseSpecific + } - if versions.HasLastAffectedVersions() && !hasAnyLastAffectedCommits && !hasAnyFixedCommits { - metrics.AddNote("Failed to convert last_affected version tags to commits: %+v", versions) - return fmt.Errorf("failed to convert last_affected version tags to commits: %+v %w", versions, ErrUnresolvedFix) - } + err = resolveVersionsToCommits(&versions, repos, cache, metrics) + if err != nil { + return err } slices.SortStableFunc(versions.AffectedCommits, models.AffectedCommitCompare) @@ -103,7 +81,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cve.ID))) } - err := v.ToJSON(osvFile) + err = v.ToJSON(osvFile) if err != nil { logger.Info("Failed to write", slog.Any("err", err)) return err @@ -138,45 +116,7 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach // more often than not, this yields a VersionInfo with AffectedVersions and no AffectedCommits. versions := cves.ExtractVersionInfo(cve, nil, http.DefaultClient, metrics) - if len(versions.AffectedVersions) != 0 { - var err error - // There are some AffectedVersions to try and resolve to AffectedCommits. - if len(repos) == 0 { - metrics.AddNote("No affected ranges for %q, and no repos to try and convert %+v to tags with", maybeProductName, versions.AffectedVersions) - return fmt.Errorf("no affected ranges for %q, and no repos to try and convert %+v to tags with", maybeProductName, versions.AffectedVersions) - } - logger.Info("Trying to convert version tags to commits", slog.String("cve", string(cve.ID)), slog.Any("versions", versions), slog.Any("repos", repos)) - versions, err = cves.GitVersionsToCommits(versions, repos, cache, metrics) - if err != nil { - metrics.AddNote("Failed to convert version tags to commits: %+v", err) - return fmt.Errorf("failed to convert version tags to commits: %+v %w", versions, err) - } - } - - hasAnyFixedCommits := false - for _, repo := range repos { - if versions.HasFixedCommits(repo) { - hasAnyFixedCommits = true - } - } - - if versions.HasFixedVersions() && !hasAnyFixedCommits { - metrics.AddNote("Failed to convert fixed version tags to commits: %+v", versions) - return fmt.Errorf("failed to convert fixed version tags to commits: %+v %w", versions, ErrUnresolvedFix) - } - - hasAnyLastAffectedCommits := false - for _, repo := range repos { - if versions.HasLastAffectedCommits(repo) { - hasAnyLastAffectedCommits = true - } - } - - if versions.HasLastAffectedVersions() && !hasAnyLastAffectedCommits && !hasAnyFixedCommits { - metrics.AddNote("Failed to convert last_affected version tags to commits: %+v", versions) - return fmt.Errorf("failed to convert last_affected version tags to commits: %+v %w", versions, ErrUnresolvedFix) - } - + err := resolveVersionsToCommits(&versions, repos, cache, metrics) if len(versions.AffectedCommits) == 0 { metrics.AddNote("No affected commit ranges determined for %q", maybeProductName) return fmt.Errorf("no affected commit ranges determined for %q %w", maybeProductName, ErrNoRanges) @@ -191,7 +131,7 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach pkgInfos = append(pkgInfos, pi) // combine-to-osv expects a serialised *array* of PackageInfo vulnDir := filepath.Join(directory, maybeVendorName, maybeProductName) - err := os.MkdirAll(vulnDir, 0755) + err = os.MkdirAll(vulnDir, 0755) if err != nil { logger.Warn("Failed to create dir", slog.Any("err", err)) return fmt.Errorf("failed to create dir: %w", err) @@ -333,3 +273,47 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * return reposForCVE } + +func resolveVersionsToCommits(versions *models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) error { + if len(repos) == 0 && len(versions.AffectedCommits) == 0 { + return fmt.Errorf("no repos to try and convert %+v to tags with", versions.AffectedVersions) + } + + // There are some AffectedVersions to try and resolve to AffectedCommits. + metrics.AddNote("Trying to convert version tags to commits: %v with repos: %v", versions, repos) + if len(versions.AffectedVersions) != 0 { + // There are some AffectedVersions to try and resolve to AffectedCommits. + if len(repos) == 0 { + metrics.AddNote("No affected ranges and no repos to try and convert %+v to tags with", versions.AffectedVersions) + return fmt.Errorf("no affected ranges and no repos to try and convert %+v to tags with", versions.AffectedVersions) + } + cves.GitVersionsToCommits(versions, repos, cache, metrics) + } + hasAnyFixedCommits := false + for _, repo := range repos { + if versions.HasFixedCommits(repo) { + hasAnyFixedCommits = true + break + } + } + + if versions.HasFixedVersions() && !hasAnyFixedCommits { + metrics.AddNote("Failed to convert fixed version tags to commits: %+v", versions) + // return fmt.Errorf("failed to convert fixed version tags to commits: %+v %w", versions, ErrUnresolvedFix) + } + + hasAnyLastAffectedCommits := false + for _, repo := range repos { + if versions.HasLastAffectedCommits(repo) { + hasAnyLastAffectedCommits = true + break + } + } + + if versions.HasLastAffectedVersions() && !hasAnyLastAffectedCommits && !hasAnyFixedCommits { + metrics.AddNote("Failed to convert last_affected version tags to commits: %+v", versions) + return fmt.Errorf("failed to convert last_affected version tags to commits: %+v %w", versions, ErrUnresolvedFix) + } + + return nil +} diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index b75b2d382f2..aaebda4a288 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -993,20 +993,16 @@ func (c *VPRepoCache) Initialize(vpMap VendorProductToRepoMap) { // Takes a CVE ID string (for logging), VersionInfo with AffectedVersions and // typically no AffectedCommits and attempts to add AffectedCommits (including Fixed commits) where there aren't any. // Refuses to add the same commit to AffectedCommits more than once. -func GitVersionsToCommits(versions models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) (v models.VersionInfo, e error) { +func GitVersionsToCommits(v *models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics){ // versions is a VersionInfo with AffectedVersions and typically no AffectedCommits // v is a VersionInfo with AffectedCommits (containing Fixed commits) included - v = versions for _, repo := range repos { - if cache.IsInvalid(repo) { - continue - } normalizedTags, err := git.NormalizeRepoTags(repo, cache) if err != nil { metrics.AddNote("Failed to normalize tags %s %s", repo, err) continue } - for _, av := range versions.AffectedVersions { + for _, av := range v.AffectedVersions { metrics.AddNote("Attempting version resolution for %s in %s", av, repo) introducedEquivalentCommit := "" if av.Introduced != "" { @@ -1023,7 +1019,7 @@ func GitVersionsToCommits(versions models.VersionInfo, repos []string, cache *gi // AffectedCommits (with Fixed commits) when the CVE has appropriate references, and assuming these references are indeed // Fixed commits, they're also assumed to be more precise than what may be derived from tag to commit mapping. fixedEquivalentCommit := "" - if v.HasFixedCommits(repo) && av.Fixed != "" && len(versions.AffectedVersions) == 1 { + if v.HasFixedCommits(repo) && av.Fixed != "" && len(v.AffectedVersions) == 1 { fixedEquivalentCommit = v.FixedCommits(repo)[0] metrics.AddNote("Using preassumed fixed commits instead of deriving from fixed version %s", av.Fixed) } else if av.Fixed != "" { @@ -1077,8 +1073,6 @@ func GitVersionsToCommits(versions models.VersionInfo, repos []string, cache *gi v.AffectedCommits = append(v.AffectedCommits, ac) } } - - return v, nil } // Examines the CVE references for a CVE and derives repos for it, optionally caching it. diff --git a/vulnfeeds/vulns/vulns.go b/vulnfeeds/vulns/vulns.go index 0fbdfccb174..554abb89450 100644 --- a/vulnfeeds/vulns/vulns.go +++ b/vulnfeeds/vulns/vulns.go @@ -113,7 +113,7 @@ func AttachExtractedVersionInfo(v *Vulnerability, version models.VersionInfo) { commits := repoToCommits[repo] gitRange := osvschema.Range{ Type: osvschema.Range_GIT, - Repo: repo, + Repo: repo, } // We're not always able to determine when a vulnerability is introduced, and may need to default to the dawn of time. addedIntroduced := false From 343d7bbb6458ec301bb46e12b4164e25d405aecb Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Tue, 3 Feb 2026 03:26:25 +0000 Subject: [PATCH 02/45] Refactor conversion to return conversion output and always create a NVD OSV record --- .../cmd/converters/cve/nvd-cve-osv/main.go | 24 +--- vulnfeeds/conversion/nvd/converter.go | 115 ++++++++---------- 2 files changed, 58 insertions(+), 81 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 14673a58712..d913b5f03d7 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -3,7 +3,6 @@ package main import ( "encoding/json" - "errors" "flag" "fmt" "log/slog" @@ -97,30 +96,15 @@ func processCVE(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache repos := nvd.FindRepos(cve, vpRepoCache, repoTagsCache, metrics) metrics.Repos = repos - var err error + var outcome models.ConversionOutcome switch *outFormat { case "OSV": - err = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics) + outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics) case "PackageInfo": - err = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics) + outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics) } - // Parse this error to determine which failure mode it was - if err != nil { - if errors.Is(err, nvd.ErrNoRanges) { - metrics.Outcome = models.NoRanges - return models.NoRanges - } - if errors.Is(err, nvd.ErrUnresolvedFix) { - metrics.Outcome = models.FixUnresolvable - return models.FixUnresolvable - } - metrics.Outcome = models.ConversionUnknown - - return models.ConversionUnknown - } - metrics.Outcome = models.Successful - return models.Successful + return outcome } func worker(wg *sync.WaitGroup, jobs <-chan models.NVDCVE, _ string, vpRepoCache *cves.VPRepoCache, repoTagsCache *git.RepoTagsCache) { diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 5dc743e1a2c..27b5973f428 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -4,7 +4,6 @@ package nvd import ( "encoding/json" "errors" - "fmt" "log/slog" "net/http" "os" @@ -26,7 +25,7 @@ var ErrNoRanges = errors.New("no ranges") var ErrUnresolvedFix = errors.New("fixes not resolved to commits") // CVEToOSV Takes an NVD CVE record and outputs an OSV file in the specified directory. -func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics) error { +func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics) models.ConversionOutcome { CPEs := cves.CPEs(cve) metrics.CPEs = CPEs // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. @@ -56,18 +55,15 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc v.DatabaseSpecific = databaseSpecific } - err = resolveVersionsToCommits(&versions, repos, cache, metrics) - if err != nil { - return err - } - - slices.SortStableFunc(versions.AffectedCommits, models.AffectedCommitCompare) - - vulns.AttachExtractedVersionInfo(v, versions) + metrics.Outcome = resolveVersionsToCommits(&versions, repos, cache, metrics) - if len(v.Affected) == 0 { - metrics.AddNote("No affected ranges detected for %q", maybeProductName) - return fmt.Errorf("no affected ranges detected for %q %w", maybeProductName, ErrNoRanges) + if metrics.Outcome == models.Successful { + slices.SortStableFunc(versions.AffectedCommits, models.AffectedCommitCompare) + vulns.AttachExtractedVersionInfo(v, versions) + if len(v.Affected) == 0 { + metrics.AddNote("No affected ranges detected for %q", maybeProductName) + metrics.Outcome = models.NoCommitRanges + } } vulnDir := filepath.Join(directory, maybeVendorName, maybeProductName) @@ -83,22 +79,21 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc err = v.ToJSON(osvFile) if err != nil { - logger.Info("Failed to write", slog.Any("err", err)) - return err + logger.Error("Failed to write", slog.Any("err", err)) } osvFile.Close() err = conversion.WriteMetricsFile(metrics, metricsFile) if err != nil { - return err + logger.Error("Failed to write metrics", slog.Any("err", err)) } - return nil + return metrics.Outcome } // CVEToPackageInfo takes an NVD CVE record and outputs a PackageInfo struct in a file in the specified directory. -func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics) error { +func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics) models.ConversionOutcome { CPEs := cves.CPEs(cve) // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. maybeVendorName := "ENOCPE" @@ -109,17 +104,18 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach maybeVendorName = CPE.Vendor maybeProductName = CPE.Product if err != nil { - return errors.New("can't generate an OSV record without valid CPE data") + return models.NoRanges } } // more often than not, this yields a VersionInfo with AffectedVersions and no AffectedCommits. versions := cves.ExtractVersionInfo(cve, nil, http.DefaultClient, metrics) - err := resolveVersionsToCommits(&versions, repos, cache, metrics) + metrics.Outcome = resolveVersionsToCommits(&versions, repos, cache, metrics) + if len(versions.AffectedCommits) == 0 { metrics.AddNote("No affected commit ranges determined for %q", maybeProductName) - return fmt.Errorf("no affected commit ranges determined for %q %w", maybeProductName, ErrNoRanges) + metrics.Outcome = models.NoCommitRanges } versions.AffectedVersions = nil // these have served their purpose and are not required in the resulting output. @@ -131,17 +127,15 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach pkgInfos = append(pkgInfos, pi) // combine-to-osv expects a serialised *array* of PackageInfo vulnDir := filepath.Join(directory, maybeVendorName, maybeProductName) - err = os.MkdirAll(vulnDir, 0755) + err := os.MkdirAll(vulnDir, 0755) if err != nil { logger.Warn("Failed to create dir", slog.Any("err", err)) - return fmt.Errorf("failed to create dir: %w", err) } outputFile := filepath.Join(vulnDir, string(cve.ID)+".nvd"+models.Extension) f, err := os.Create(outputFile) if err != nil { logger.Warn("Failed to open for writing", slog.String("path", outputFile), slog.Any("err", err)) - return fmt.Errorf("failed to open %s for writing: %w", outputFile, err) } defer f.Close() @@ -151,27 +145,27 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach if err != nil { logger.Warn("Failed to encode PackageInfo", slog.String("path", outputFile), slog.Any("err", err)) - return fmt.Errorf("failed to encode PackageInfo to %s: %w", outputFile, err) } logger.Info("Generated PackageInfo record", slog.String("cve", string(cve.ID)), slog.String("product", maybeProductName)) metricsFile, err := conversion.CreateMetricsFile(cve.ID, vulnDir) if err != nil { - return err + logger.Warn("Failed to create metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) } err = conversion.WriteMetricsFile(metrics, metricsFile) if err != nil { - return err + logger.Warn("Failed to write metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) } - return nil + return metrics.Outcome } // FindRepos attempts to find the source code repositories for a given CVE. func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics) []string { // Find repos refs := cve.References + conversion.DeduplicateRefs(refs) CPEs := cves.CPEs(cve) CVEID := cve.ID var reposForCVE []string @@ -274,46 +268,45 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * return reposForCVE } -func resolveVersionsToCommits(versions *models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) error { - if len(repos) == 0 && len(versions.AffectedCommits) == 0 { - return fmt.Errorf("no repos to try and convert %+v to tags with", versions.AffectedVersions) - } +func resolveVersionsToCommits(versions *models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) models.ConversionOutcome { + if len(repos) == 0 && len(versions.AffectedCommits) == 0 { + return models.NoRepos + } + // There are some AffectedVersions to try and resolve to AffectedCommits. + metrics.AddNote("Trying to convert version tags to commits: %v with repos: %v", versions, repos) + if len(versions.AffectedVersions) != 0 { // There are some AffectedVersions to try and resolve to AffectedCommits. - metrics.AddNote("Trying to convert version tags to commits: %v with repos: %v", versions, repos) - if len(versions.AffectedVersions) != 0 { - // There are some AffectedVersions to try and resolve to AffectedCommits. - if len(repos) == 0 { - metrics.AddNote("No affected ranges and no repos to try and convert %+v to tags with", versions.AffectedVersions) - return fmt.Errorf("no affected ranges and no repos to try and convert %+v to tags with", versions.AffectedVersions) - } - cves.GitVersionsToCommits(versions, repos, cache, metrics) + if len(repos) == 0 { + metrics.AddNote("No affected ranges and no repos to try and convert %+v to tags with", versions.AffectedVersions) + return models.NoRanges } - hasAnyFixedCommits := false - for _, repo := range repos { - if versions.HasFixedCommits(repo) { - hasAnyFixedCommits = true - break - } + cves.GitVersionsToCommits(versions, repos, cache, metrics) + } + hasAnyFixedCommits := false + for _, repo := range repos { + if versions.HasFixedCommits(repo) { + hasAnyFixedCommits = true + break } + } - if versions.HasFixedVersions() && !hasAnyFixedCommits { - metrics.AddNote("Failed to convert fixed version tags to commits: %+v", versions) - // return fmt.Errorf("failed to convert fixed version tags to commits: %+v %w", versions, ErrUnresolvedFix) - } + if versions.HasFixedVersions() && !hasAnyFixedCommits { + metrics.AddNote("Failed to convert fixed version tags to commits: %+v", versions) + } - hasAnyLastAffectedCommits := false - for _, repo := range repos { - if versions.HasLastAffectedCommits(repo) { - hasAnyLastAffectedCommits = true - break - } + hasAnyLastAffectedCommits := false + for _, repo := range repos { + if versions.HasLastAffectedCommits(repo) { + hasAnyLastAffectedCommits = true + break } + } - if versions.HasLastAffectedVersions() && !hasAnyLastAffectedCommits && !hasAnyFixedCommits { - metrics.AddNote("Failed to convert last_affected version tags to commits: %+v", versions) - return fmt.Errorf("failed to convert last_affected version tags to commits: %+v %w", versions, ErrUnresolvedFix) - } + if versions.HasLastAffectedVersions() && !hasAnyLastAffectedCommits && !hasAnyFixedCommits { + metrics.AddNote("Failed to convert last_affected version tags to commits: %+v", versions) + return models.FixUnresolvable + } - return nil + return models.Successful } From 3cae05932f68a3c81a85559a2f199b3c20d8faa0 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Wed, 4 Feb 2026 02:03:43 +0000 Subject: [PATCH 03/45] Add csv analysis generation --- .../cmd/converters/cve/nvd-cve-osv/main.go | 14 +++++ vulnfeeds/conversion/common.go | 57 +++++++++++++++++++ vulnfeeds/cves/versions.go | 5 +- vulnfeeds/git/repository.go | 8 ++- 4 files changed, 81 insertions(+), 3 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index d913b5f03d7..5dfdbef7b09 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -7,9 +7,12 @@ import ( "fmt" "log/slog" "os" + "path/filepath" + "regexp" "slices" "sync" + "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/conversion/nvd" "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/git" @@ -86,6 +89,17 @@ func main() { close(jobs) wg.Wait() logger.Info("NVD Conversion run complete") + + // Try to extract year from path, otherwise use "xxxx" filler + filename := filepath.Base(*jsonPath) + re := regexp.MustCompile(`nvdcve-2\.0-([0-9]{4})\.json`) + matches := re.FindStringSubmatch(filename) + if len(matches) >= 2 { + year := matches[1] + conversion.ConductAnalysis(year, *outDir) + } else { + conversion.ConductAnalysis("xxxx", *outDir) + } } func processCVE(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache *git.RepoTagsCache) models.ConversionOutcome { diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 22639351f4f..44c02c89ca1 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -3,13 +3,16 @@ package conversion import ( + "encoding/csv" "encoding/json" "fmt" + "io/fs" "log/slog" "os" "path/filepath" "slices" "strings" + "time" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility/logger" @@ -69,6 +72,60 @@ func DeduplicateRefs(refs []models.Reference) []models.Reference { return refs } +// ConductAnalysis conducts an analysis of the conversion results after completion by reading +// all of the .metrics.json files and extracting conversion outcomes. +func ConductAnalysis(year string, dir string) { + // get the current time in minutes + currentTime := time.Now().Format("2006-01-02T15:04") + outcomesCSV := "nvd-conversion-outcomes-" + year + "-" + currentTime + ".csv" + csvFile, err := os.Create(filepath.Join(dir, outcomesCSV)) + if err != nil { + logger.Fatal("Failed to create analysis CSV file", slog.Any("err", err)) + } + defer csvFile.Close() + + csvWriter := csv.NewWriter(csvFile) + defer csvWriter.Flush() + + header := []string{"CVEID", "Outcome"} + if err := csvWriter.Write(header); err != nil { + logger.Fatal("Failed to write header to CSV", slog.Any("err", err)) + } + + err = filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if !d.IsDir() && strings.HasSuffix(d.Name(), ".metrics.json") { + data, err := os.ReadFile(path) + if err != nil { + logger.Warn("Failed to read metrics file", slog.String("path", path), slog.Any("err", err)) + return nil // Continue + } + + var metrics models.ConversionMetrics + if err := json.Unmarshal(data, &metrics); err != nil { + logger.Warn("Failed to unmarshal metrics JSON", slog.String("path", path), slog.Any("err", err)) + return nil // Continue + } + + record := []string{ + string(metrics.CVEID), + metrics.Outcome.String(), + } + if err := csvWriter.Write(record); err != nil { + logger.Warn("Failed to write record to CSV", slog.String("cve", string(metrics.CVEID)), slog.Any("err", err)) + } + } + + return nil + }) + + if err != nil { + logger.Error("Failed to walk directory for analysis", slog.Any("err", err)) + } +} + // CreateMetricsFile creates the initial file for the metrics record. func CreateMetricsFile(id models.CVEID, vulnDir string) (*os.File, error) { metricsFile := filepath.Join(vulnDir, string(id)+".metrics"+models.Extension) diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index aaebda4a288..0b88c071868 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -937,6 +937,7 @@ func RefAcceptable(ref models.Reference, tagDenyList []string) bool { } // Adds the repo to the cache for the Vendor/Product combination if not already present. +// *** Does external calls to verify repos *** func (c *VPRepoCache) MaybeUpdate(vp *VendorProduct, repo string) { if vp == nil { return @@ -993,7 +994,7 @@ func (c *VPRepoCache) Initialize(vpMap VendorProductToRepoMap) { // Takes a CVE ID string (for logging), VersionInfo with AffectedVersions and // typically no AffectedCommits and attempts to add AffectedCommits (including Fixed commits) where there aren't any. // Refuses to add the same commit to AffectedCommits more than once. -func GitVersionsToCommits(v *models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics){ +func GitVersionsToCommits(v *models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) { // versions is a VersionInfo with AffectedVersions and typically no AffectedCommits // v is a VersionInfo with AffectedCommits (containing Fixed commits) included for _, repo := range repos { @@ -1076,7 +1077,7 @@ func GitVersionsToCommits(v *models.VersionInfo, repos []string, cache *git.Repo } // Examines the CVE references for a CVE and derives repos for it, optionally caching it. -// TODO (jesslowe): refactor with below +// *** Does external calls to verify repos *** func ReposFromReferences(cache *VPRepoCache, vp *VendorProduct, refs []models.Reference, tagDenyList []string, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics) (repos []string) { for _, ref := range refs { // If any of the denylist tags are in the ref's tag set, it's out of consideration. diff --git a/vulnfeeds/git/repository.go b/vulnfeeds/git/repository.go index 2163d0c9664..34a33e523ce 100644 --- a/vulnfeeds/git/repository.go +++ b/vulnfeeds/git/repository.go @@ -18,6 +18,7 @@ package git import ( "context" "errors" + "log/slog" "maps" "net/url" "path" @@ -33,6 +34,7 @@ import ( "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/transport" "github.com/go-git/go-git/v5/storage/memory" + "github.com/google/osv/vulnfeeds/utility/logger" "github.com/sethvargo/go-retry" ) @@ -132,12 +134,13 @@ func RemoteRepoRefsWithRetry(repoURL string, retries uint64) (refs []*plumbing.R if errors.Is(err, context.DeadlineExceeded) { return retry.RetryableError(err) } - + logger.Warn("Error: "+err.Error(), slog.Any("repo", repo)) return err } return nil }); err != nil { + logger.Warn("Error: "+err.Error(), slog.Any("repo", repo)) return refs, err } @@ -158,6 +161,7 @@ func RepoName(repoURL string) (name string, e error) { // RepoTags returns an array of Tag being the (unpeeled, if annotated) tags and associated commits in repoURL. // An optional repoTagsCache can be supplied to reduce repeated remote connections to the same repo. +// *** Does external calls to verify repos *** func RepoTags(repoURL string, repoTagsCache *RepoTagsCache) (tags Tags, e error) { if repoTagsCache != nil { tagsRepoMap, ok := repoTagsCache.Get(repoURL) @@ -295,6 +299,7 @@ func RefBranches(refs []*plumbing.Reference) (branches []*plumbing.Reference) { } // Validate the repo by attempting to query it's references. +// *** Does external calls to verify repos *** func ValidRepo(repoURL string) (valid bool) { _, err := RemoteRepoRefsWithRetry(repoURL, 3) if err != nil && errors.Is(err, transport.ErrAuthenticationRequired) { @@ -309,6 +314,7 @@ func ValidRepo(repoURL string) (valid bool) { } // Otherwise functional repos that don't have any tags are not valid. +// *** Does external calls to verify repos *** func ValidRepoAndHasUsableRefs(repoURL string) (valid bool) { refs, err := RemoteRepoRefsWithRetry(repoURL, 3) if err != nil && errors.Is(err, transport.ErrAuthenticationRequired) { From c34b70f0c972cb291d2211be2066d11e0a71ca9f Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Wed, 4 Feb 2026 02:16:21 +0000 Subject: [PATCH 04/45] fix lint --- vulnfeeds/git/repository.go | 1 + vulnfeeds/vulns/vulns.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/vulnfeeds/git/repository.go b/vulnfeeds/git/repository.go index 34a33e523ce..4c27bc4e7a2 100644 --- a/vulnfeeds/git/repository.go +++ b/vulnfeeds/git/repository.go @@ -135,6 +135,7 @@ func RemoteRepoRefsWithRetry(repoURL string, retries uint64) (refs []*plumbing.R return retry.RetryableError(err) } logger.Warn("Error: "+err.Error(), slog.Any("repo", repo)) + return err } diff --git a/vulnfeeds/vulns/vulns.go b/vulnfeeds/vulns/vulns.go index 554abb89450..0fbdfccb174 100644 --- a/vulnfeeds/vulns/vulns.go +++ b/vulnfeeds/vulns/vulns.go @@ -113,7 +113,7 @@ func AttachExtractedVersionInfo(v *Vulnerability, version models.VersionInfo) { commits := repoToCommits[repo] gitRange := osvschema.Range{ Type: osvschema.Range_GIT, - Repo: repo, + Repo: repo, } // We're not always able to determine when a vulnerability is introduced, and may need to default to the dawn of time. addedIntroduced := false From 11493cb18ed118055ec5754af97da0f276edc3cc Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Wed, 4 Feb 2026 03:05:21 +0000 Subject: [PATCH 05/45] l --- vulnfeeds/git/repository.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnfeeds/git/repository.go b/vulnfeeds/git/repository.go index 4c27bc4e7a2..f86ef1a708e 100644 --- a/vulnfeeds/git/repository.go +++ b/vulnfeeds/git/repository.go @@ -135,7 +135,7 @@ func RemoteRepoRefsWithRetry(repoURL string, retries uint64) (refs []*plumbing.R return retry.RetryableError(err) } logger.Warn("Error: "+err.Error(), slog.Any("repo", repo)) - + return err } From f6e4b772f659f453a36b4b4101a275ff3fa43fe4 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 6 Feb 2026 04:31:59 +0000 Subject: [PATCH 06/45] Add flag for only outputting successful conversions --- vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go | 5 +++-- vulnfeeds/conversion/nvd/converter.go | 12 ++++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 5dfdbef7b09..a6e394206a6 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -26,6 +26,7 @@ var ( outDir = flag.String("out-dir", "", "Path to output results.") outFormat = flag.String("out-format", "OSV", "Format to output {OSV,PackageInfo}") workers = flag.Int("workers", 30, "The number of concurrent workers to use for processing CVEs.") + rejectFailed = flag.Bool("reject-failed", false, "If set, OSV records with a failed conversion outcome will not be generated.") ) func loadCPEDictionary(productToRepo *cves.VPRepoCache, f string) error { @@ -113,9 +114,9 @@ func processCVE(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache var outcome models.ConversionOutcome switch *outFormat { case "OSV": - outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics) + outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed) case "PackageInfo": - outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics) + outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed) } return outcome diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 27b5973f428..0110844d280 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -25,7 +25,7 @@ var ErrNoRanges = errors.New("no ranges") var ErrUnresolvedFix = errors.New("fixes not resolved to commits") // CVEToOSV Takes an NVD CVE record and outputs an OSV file in the specified directory. -func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics) models.ConversionOutcome { +func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool) models.ConversionOutcome { CPEs := cves.CPEs(cve) metrics.CPEs = CPEs // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. @@ -66,6 +66,10 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc } } + if rejectFailed && metrics.Outcome != models.Successful { + return metrics.Outcome + } + vulnDir := filepath.Join(directory, maybeVendorName, maybeProductName) if err := os.MkdirAll(vulnDir, 0755); err != nil { @@ -93,7 +97,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc } // CVEToPackageInfo takes an NVD CVE record and outputs a PackageInfo struct in a file in the specified directory. -func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics) models.ConversionOutcome { +func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool) models.ConversionOutcome { CPEs := cves.CPEs(cve) // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. maybeVendorName := "ENOCPE" @@ -118,6 +122,10 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach metrics.Outcome = models.NoCommitRanges } + if rejectFailed && metrics.Outcome != models.Successful { + return metrics.Outcome + } + versions.AffectedVersions = nil // these have served their purpose and are not required in the resulting output. slices.SortStableFunc(versions.AffectedCommits, models.AffectedCommitCompare) From 2fbd16b119c2dc1ee139c4ff46b22a86143156d8 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 6 Feb 2026 04:33:44 +0000 Subject: [PATCH 07/45] Use all extractable repositories for commit finding. --- vulnfeeds/conversion/nvd/converter.go | 106 +++++++++----------------- 1 file changed, 34 insertions(+), 72 deletions(-) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 0110844d280..49dc703b124 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -55,15 +55,17 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc v.DatabaseSpecific = databaseSpecific } - metrics.Outcome = resolveVersionsToCommits(&versions, repos, cache, metrics) - - if metrics.Outcome == models.Successful { - slices.SortStableFunc(versions.AffectedCommits, models.AffectedCommitCompare) - vulns.AttachExtractedVersionInfo(v, versions) - if len(v.Affected) == 0 { - metrics.AddNote("No affected ranges detected for %q", maybeProductName) - metrics.Outcome = models.NoCommitRanges - } + if outcome := ResolveVersionsToCommits(&versions, repos, cache, metrics); outcome == models.FixUnresolvable { + return models.FixUnresolvable + } else { + metrics.Outcome = outcome + } + slices.SortStableFunc(versions.AffectedCommits, models.AffectedCommitCompare) + vulns.AttachExtractedVersionInfo(v, versions) + + if len(v.Affected) == 0 { + metrics.AddNote("No affected ranges detected for %q", maybeProductName) + metrics.Outcome = models.NoCommitRanges } if rejectFailed && metrics.Outcome != models.Successful { @@ -115,7 +117,7 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach // more often than not, this yields a VersionInfo with AffectedVersions and no AffectedCommits. versions := cves.ExtractVersionInfo(cve, nil, http.DefaultClient, metrics) - metrics.Outcome = resolveVersionsToCommits(&versions, repos, cache, metrics) + metrics.Outcome = ResolveVersionsToCommits(&versions, repos, cache, metrics) if len(versions.AffectedCommits) == 0 { metrics.AddNote("No affected commit ranges determined for %q", maybeProductName) @@ -176,7 +178,6 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * conversion.DeduplicateRefs(refs) CPEs := cves.CPEs(cve) CVEID := cve.ID - var reposForCVE []string if len(refs) == 0 && len(CPEs) == 0 { metrics.AddNote("Skipping due to lack of CPEs and lack of references") @@ -186,17 +187,8 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * return nil } - // Edge case: No CPEs, but perhaps usable references. - if len(refs) > 0 && len(CPEs) == 0 { - repos := cves.ReposFromReferences(nil, nil, refs, cves.RefTagDenyList, repoTagsCache, metrics) - if len(repos) == 0 { - metrics.AddNote("Failed to derive any repos and there were no CPEs") - return nil - } - metrics.AddNote("Derived repos for CVE with no CPEs: %v", repos) - reposForCVE = repos - } - appCPECount := 0 + repos := cves.ReposFromReferences(nil, nil, refs, cves.RefTagDenyList, repoTagsCache, metrics) + vendorProductCombinations := make(map[cves.VendorProduct]bool) for _, CPEstr := range CPEs { CPE, err := cves.ParseCPE(CPEstr) @@ -204,66 +196,28 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * metrics.AddNote("Failed to parse CPE: %v", CPEstr) continue } - if CPE.Part != "a" { - continue - } - appCPECount += 1 + // if CPE.Part != "a" { + // continue + // } vendorProductCombinations[cves.VendorProduct{Vendor: CPE.Vendor, Product: CPE.Product}] = true } - if len(CPEs) > 0 && appCPECount == 0 { - // This CVE is not for software (based on there being CPEs but not any application ones), skip. - metrics.Outcome = models.NoSoftware - return nil - } - // If there wasn't a repo from the CPE Dictionary, try and derive one from the CVE references. + for vendorProductKey := range vendorProductCombinations { // Does it have any application CPEs? Look for pre-computed repos based on VendorProduct. if repos, ok := vpRepoCache.Get(vendorProductKey); ok { - metrics.AddNote("Pre-references, derived repos using cache: %v", repos) - if len(reposForCVE) == 0 { - reposForCVE = repos - continue - } + metrics.AddNote("Pre-references, derived repos for %s/%s using cache: %v", vendorProductKey.Vendor, vendorProductKey.Product, repos) // Don't append duplicates. for _, repo := range repos { - if !slices.Contains(reposForCVE, repo) { - reposForCVE = append(reposForCVE, repo) + if !slices.Contains(repos, repo) { + repos = append(repos, repo) } } } - if len(reposForCVE) == 0 && len(refs) > 0 { - if slices.Contains(cves.VendorProductDenyList, vendorProductKey) { - continue - } - repos := cves.ReposFromReferences(vpRepoCache, &vendorProductKey, refs, cves.RefTagDenyList, repoTagsCache, metrics) - if len(repos) == 0 { - metrics.AddNote("Failed to derive any repos for %s/%s", vendorProductKey.Vendor, vendorProductKey.Product) - continue - } - metrics.AddNote("Derived repos: %v", repos) - reposForCVE = append(reposForCVE, repos...) - } } - - logger.Info("Finished processing "+string(CVEID), - slog.String("cve", string(CVEID)), - slog.Int("cpes", len(CPEs)), - slog.Int("app_cpes", appCPECount), - slog.Int("derived_repos", len(reposForCVE))) - - // If we've made it to here, we may have a CVE: - // * that has Application-related CPEs (so applies to software) - // * has a reference that is a known repository URL - // OR - // * a derived repository for the software package - // - // We do not yet have: - // * any knowledge of the language used - // * definitive version information - - if len(reposForCVE) == 0 { + + if len(repos) == 0 { // We have nothing useful to work with, so we'll assume it's out of scope metrics.AddNote("Passing due to lack of viable repository") metrics.Outcome = models.NoRepos @@ -271,12 +225,12 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * return nil } - metrics.AddNote("Found Repos for CVE %s: %v", string(CVEID), reposForCVE) + metrics.AddNote("Found Repos for CVE %s: %v", string(CVEID), repos) - return reposForCVE + return repos } -func resolveVersionsToCommits(versions *models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) models.ConversionOutcome { +func ResolveVersionsToCommits(versions *models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) models.ConversionOutcome { if len(repos) == 0 && len(versions.AffectedCommits) == 0 { return models.NoRepos } @@ -298,6 +252,14 @@ func resolveVersionsToCommits(versions *models.VersionInfo, repos []string, cach break } } + if !hasAnyFixedCommits { + for _, ac := range versions.AffectedCommits { + if ac.Fixed != "" { + hasAnyFixedCommits = true + break + } + } + } if versions.HasFixedVersions() && !hasAnyFixedCommits { metrics.AddNote("Failed to convert fixed version tags to commits: %+v", versions) From 24828895b3cbe219b173838794acb7087fd09cdc Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Tue, 10 Feb 2026 23:23:28 +0000 Subject: [PATCH 08/45] fix unnecessary logic --- vulnfeeds/conversion/nvd/converter.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index e599f14e9af..4201dcd21bf 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -55,11 +55,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc v.DatabaseSpecific = databaseSpecific } - if outcome := ResolveVersionsToCommits(&versions, repos, cache, metrics); outcome == models.FixUnresolvable { - return models.FixUnresolvable - } else { - metrics.Outcome = outcome - } + metrics.Outcome = ResolveVersionsToCommits(&versions, repos, cache, metrics) if metrics.Outcome == models.Successful { versions.AffectedCommits = cves.DeduplicateAffectedCommits(versions.AffectedCommits) From 1fc7354062dee1e8a138d570b32bdd7708e057ca Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Tue, 10 Feb 2026 23:32:00 +0000 Subject: [PATCH 09/45] Fix some bad FindRepo logic --- vulnfeeds/conversion/nvd/converter.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 4201dcd21bf..462f9b5e07b 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -201,9 +201,9 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * metrics.AddNote("Failed to parse CPE: %v", CPEstr) continue } - // if CPE.Part != "a" { - // continue - // } + if CPE.Part != "a" || CPE.Part != "o" { + continue + } vendorProductCombinations[cves.VendorProduct{Vendor: CPE.Vendor, Product: CPE.Product}] = true } @@ -217,8 +217,8 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * continue } for _, repo := range repos { - if !slices.Contains(repos, repo) { - repos = append(repos, repo) + if !slices.Contains(reposForCVE, repo) { + reposForCVE = append(reposForCVE, repo) } } } From b915cd98c819da89cb68abd12f6e0bd175c6c281 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Tue, 10 Feb 2026 23:39:04 +0000 Subject: [PATCH 10/45] fix or/and logic --- vulnfeeds/conversion/nvd/converter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 462f9b5e07b..fc456177f68 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -201,7 +201,7 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * metrics.AddNote("Failed to parse CPE: %v", CPEstr) continue } - if CPE.Part != "a" || CPE.Part != "o" { + if CPE.Part != "a" && CPE.Part != "o" { // only care about application and operating system CPEs continue } vendorProductCombinations[cves.VendorProduct{Vendor: CPE.Vendor, Product: CPE.Product}] = true From db3481e0bf59c080e976a79be8ebc1fcfbf97daf Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Wed, 11 Feb 2026 22:45:31 +0000 Subject: [PATCH 11/45] begin to deprecate VersionInfo struct for NVD conversion --- vulnfeeds/cmd/combine-to-osv/main.go | 4 +- .../cmd/converters/cve/nvd-cve-osv/main.go | 4 +- vulnfeeds/conversion/common.go | 137 +++ vulnfeeds/conversion/nvd/converter.go | 266 +++--- vulnfeeds/cvelist2osv/common.go | 121 +-- vulnfeeds/cvelist2osv/default_extractor.go | 12 +- vulnfeeds/cvelist2osv/linux_extractor.go | 6 +- vulnfeeds/cvelist2osv/strategies.go | 5 +- vulnfeeds/cves/versions.go | 146 ++- vulnfeeds/cves/versions_test.go | 877 +++++++++--------- 10 files changed, 778 insertions(+), 800 deletions(-) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index ceae14c63a8..1a994dbdf5f 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -15,7 +15,7 @@ import ( "strings" "cloud.google.com/go/storage" - "github.com/google/osv/vulnfeeds/cves" + "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/upload" "github.com/google/osv/vulnfeeds/utility/logger" @@ -312,7 +312,7 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* } if c5Intro != "" || c5Fixed != "" { - newRange := cves.BuildVersionRange(c5Intro, "", c5Fixed) + newRange := conversion.BuildVersionRange(c5Intro, "", c5Fixed) newRange.Repo = repo newRange.Type = osvschema.Range_GIT // Preserve the repo newAffectedRanges = append(newAffectedRanges, newRange) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index ec3f8694e22..1ead910a395 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -116,8 +116,8 @@ func processCVE(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache switch *outFormat { case "OSV": outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed) - case "PackageInfo": - outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed) + // case "PackageInfo": + // outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed) } return outcome diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 44c02c89ca1..6aca53eb4c5 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -5,6 +5,7 @@ package conversion import ( "encoding/csv" "encoding/json" + "errors" "fmt" "io/fs" "log/slog" @@ -14,7 +15,9 @@ import ( "strings" "time" + "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" + "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -168,3 +171,137 @@ func WriteMetricsFile(metrics *models.ConversionMetrics, metricsFile *os.File) e return nil } + + +// Examines repos and tries to convert versions to commits by treating them as Git tags. +func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) (*osvschema.Affected, error) { + var newAff osvschema.Affected + var newVersionRanges []*osvschema.Range + unresolvedRanges := versionRanges + + for _, repo := range repos { + if len(unresolvedRanges) == 0 { + break // All ranges have been resolved. + } + + normalizedTags, err := git.NormalizeRepoTags(repo, cache) + if err != nil { + metrics.AddNote("Failed to normalize tags - %s", repo) + continue + } + + var stillUnresolvedRanges []*osvschema.Range + for _, vr := range unresolvedRanges { + var introduced, fixed, lastAffected string + for _, e := range vr.GetEvents() { + if e.GetIntroduced() != "" { + introduced = e.GetIntroduced() + } + if e.GetFixed() != "" { + fixed = e.GetFixed() + } + if e.GetLastAffected() != "" { + lastAffected = e.GetLastAffected() + } + } + + var introducedCommit string + if introduced == "0" { + introducedCommit = "0" + } else { + introducedCommit = resolveVersionToCommit(metrics.CVEID, introduced, "introduced", repo, normalizedTags) + } + fixedCommit := resolveVersionToCommit(metrics.CVEID, fixed, "fixed", repo, normalizedTags) + lastAffectedCommit := resolveVersionToCommit(metrics.CVEID, lastAffected, "last_affected", repo, normalizedTags) + + if fixedCommit != "" || lastAffectedCommit != "" { + var newVR *osvschema.Range + + if fixedCommit != "" { + newVR = BuildVersionRange(introducedCommit, "", fixedCommit) + } else { + newVR = BuildVersionRange(introducedCommit, lastAffectedCommit, "") + } + + newVR.Repo = repo + newVR.Type = osvschema.Range_GIT + if len(vr.GetEvents()) > 0 { + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"versions": vr.GetEvents()}) + if err != nil { + logger.Warn("failed to make database specific: %v", err) + } else { + newVR.DatabaseSpecific = databaseSpecific + } + } + + newVersionRanges = append(newVersionRanges, newVR) + } else { + stillUnresolvedRanges = append(stillUnresolvedRanges, vr) + } + } + unresolvedRanges = stillUnresolvedRanges + } + + var err error + if len(unresolvedRanges) > 0 { + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) + if err != nil { + logger.Warn("failed to make database specific: %v", err) + } else { + newAff.DatabaseSpecific = databaseSpecific + } + + metrics.UnresolvedRangesCount += len(unresolvedRanges) + } + + if len(newVersionRanges) > 0 { + newAff.Ranges = newVersionRanges + metrics.ResolvedRangesCount += len(newVersionRanges) + } else if len(unresolvedRanges) > 0 { // Only error if there were ranges to resolve but none were. + err = errors.New("was not able to get git version ranges") + } + + return &newAff, err +} + +// resolveVersionToCommit is a helper to convert a version string to a commit hash. +// It logs the outcome of the conversion attempt and returns an empty string on failure. +func resolveVersionToCommit(cveID models.CVEID, version, versionType, repo string, normalizedTags map[string]git.NormalizedTag) string { + if version == "" { + return "" + } + logger.Info("Attempting to resolve version to commit", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo)) + commit, err := git.VersionToCommit(version, normalizedTags) + if err != nil { + logger.Warn("Failed to get Git commit for version", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo), slog.Any("err", err)) + return "" + } + logger.Info("Successfully derived commit for version", slog.String("cve", string(cveID)), slog.String("commit", commit), slog.String("version", version), slog.String("type", versionType)) + + return commit +} + +// BuildVersionRange is a helper function that adds 'introduced', 'fixed', or 'last_affected' +// events to an OSV version range. If 'intro' is empty, it defaults to "0". +func BuildVersionRange(intro string, lastAff string, fixed string) *osvschema.Range { + var versionRange osvschema.Range + var i string + if intro == "" { + i = "0" + } else { + i = intro + } + versionRange.Events = append(versionRange.Events, &osvschema.Event{ + Introduced: i}) + + if fixed != "" { + versionRange.Events = append(versionRange.Events, &osvschema.Event{ + Fixed: fixed}) + } else if lastAff != "" { + versionRange.Events = append(versionRange.Events, &osvschema.Event{ + LastAffected: lastAff, + }) + } + + return &versionRange +} diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index fc456177f68..25976382737 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -2,7 +2,6 @@ package nvd import ( - "encoding/json" "errors" "log/slog" "net/http" @@ -14,7 +13,6 @@ import ( "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" - "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -41,34 +39,64 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc // Create basic OSV record v := vulns.FromNVDCVE(cve.ID, cve) - versions := cves.ExtractVersionInfo(cve, nil, http.DefaultClient, metrics) - // turn AffectedVersions into Ranges - ranges := []*osvschema.Range{} - for _, version := range versions.AffectedVersions { - vr := cves.BuildVersionRange(version.Introduced, version.LastAffected, version.Fixed) - ranges = append(ranges, vr) + cpeRanges, commits, textRanges := cves.ExtractVersions(v, cve, nil, http.DefaultClient, metrics) + + if cpeRanges == nil && commits == nil && textRanges == nil { + metrics.AddNote("No ranges detected for %q", maybeProductName) + metrics.Outcome = models.NoRanges + if rejectFailed { + return metrics.Outcome + } } - databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"versions": ranges}) - if err != nil { - logger.Error("failed to create database specific struct", slog.Any("err", err)) - } else { - v.DatabaseSpecific = databaseSpecific + + var resolutionOutcome models.ConversionOutcome + if len(cpeRanges) > 0 { + outcome, affected := ResolveVersionsToCommits(cpeRanges, repos, cache, metrics) + resolutionOutcome = outcome + if outcome == models.Successful { + conversion.AddAffected(v, affected, metrics) + } } - metrics.Outcome = ResolveVersionsToCommits(&versions, repos, cache, metrics) + if len(commits) > 0 { + var versionRanges []*osvschema.Range + // handle commits as version ranges + for _, commit := range commits { + // if commit doesn't already + vr := conversion.BuildVersionRange(commit.Introduced,commit.LastAffected, commit.Fixed) + vr.Repo = commit.Repo + versionRanges = append(versionRanges, vr) + } + + if len(versionRanges) > 0 { + affected := osvschema.Affected{ + Ranges: versionRanges, + } + conversion.AddAffected(v, &affected, metrics) + resolutionOutcome = models.Successful + } + } - if metrics.Outcome == models.Successful { - versions.AffectedCommits = cves.DeduplicateAffectedCommits(versions.AffectedCommits) - vulns.AttachExtractedVersionInfo(v, versions) - if len(v.Affected) == 0 { - metrics.AddNote("No affected ranges detected for %q", maybeProductName) - metrics.Outcome = models.NoCommitRanges + if len(textRanges) > 0 && resolutionOutcome != models.Successful { + // handle text ranges as version ranges + outcome, affected := ResolveVersionsToCommits(textRanges, repos, cache, metrics) + resolutionOutcome = outcome + if outcome == models.Successful { + conversion.AddAffected(v, affected, metrics) } } - if rejectFailed && metrics.Outcome != models.Successful { - return metrics.Outcome + for _, affected := range v.Affected { + if len(affected.Ranges) != 0 { + break + } + resolutionOutcome = models.NoCommitRanges } + + if rejectFailed && resolutionOutcome != models.Successful { + return resolutionOutcome + } + metrics.Outcome = resolutionOutcome vulnDir := filepath.Join(directory, maybeVendorName, maybeProductName) if err := os.MkdirAll(vulnDir, 0755); err != nil { @@ -80,7 +108,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cve.ID))) } - err = v.ToJSON(osvFile) + err := v.ToJSON(osvFile) if err != nil { logger.Error("Failed to write", slog.Any("err", err)) } @@ -96,77 +124,77 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc } // CVEToPackageInfo takes an NVD CVE record and outputs a PackageInfo struct in a file in the specified directory. -func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool) models.ConversionOutcome { - CPEs := cves.CPEs(cve) - // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. - maybeVendorName := "ENOCPE" - maybeProductName := "ENOCPE" - - if len(CPEs) > 0 { - CPE, err := cves.ParseCPE(CPEs[0]) // For naming the subdirectory used for output. - maybeVendorName = CPE.Vendor - maybeProductName = CPE.Product - if err != nil { - return models.NoRanges - } - } - - // more often than not, this yields a VersionInfo with AffectedVersions and no AffectedCommits. - versions := cves.ExtractVersionInfo(cve, nil, http.DefaultClient, metrics) - - metrics.Outcome = ResolveVersionsToCommits(&versions, repos, cache, metrics) - - if len(versions.AffectedCommits) == 0 { - metrics.AddNote("No affected commit ranges determined for %q", maybeProductName) - metrics.Outcome = models.NoCommitRanges - } - - if rejectFailed && metrics.Outcome != models.Successful { - return metrics.Outcome - } - - versions.AffectedVersions = nil // these have served their purpose and are not required in the resulting output. - - slices.SortStableFunc(versions.AffectedCommits, models.AffectedCommitCompare) - - var pkgInfos []vulns.PackageInfo - pi := vulns.PackageInfo{VersionInfo: versions} - pkgInfos = append(pkgInfos, pi) // combine-to-osv expects a serialised *array* of PackageInfo - - vulnDir := filepath.Join(directory, maybeVendorName, maybeProductName) - err := os.MkdirAll(vulnDir, 0755) - if err != nil { - logger.Warn("Failed to create dir", slog.Any("err", err)) - } - - outputFile := filepath.Join(vulnDir, string(cve.ID)+".nvd"+models.Extension) - f, err := os.Create(outputFile) - if err != nil { - logger.Warn("Failed to open for writing", slog.String("path", outputFile), slog.Any("err", err)) - } - defer f.Close() - - encoder := json.NewEncoder(f) - encoder.SetIndent("", " ") - err = encoder.Encode(&pkgInfos) - - if err != nil { - logger.Warn("Failed to encode PackageInfo", slog.String("path", outputFile), slog.Any("err", err)) - } - - logger.Info("Generated PackageInfo record", slog.String("cve", string(cve.ID)), slog.String("product", maybeProductName)) - - metricsFile, err := conversion.CreateMetricsFile(cve.ID, vulnDir) - if err != nil { - logger.Warn("Failed to create metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) - } - err = conversion.WriteMetricsFile(metrics, metricsFile) - if err != nil { - logger.Warn("Failed to write metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) - } - - return metrics.Outcome -} +// func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool) models.ConversionOutcome { +// CPEs := cves.CPEs(cve) +// // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. +// maybeVendorName := "ENOCPE" +// maybeProductName := "ENOCPE" + +// if len(CPEs) > 0 { +// CPE, err := cves.ParseCPE(CPEs[0]) // For naming the subdirectory used for output. +// maybeVendorName = CPE.Vendor +// maybeProductName = CPE.Product +// if err != nil { +// return models.NoRanges +// } +// } + +// // more often than not, this yields a VersionInfo with AffectedVersions and no AffectedCommits. +// // versions := cves.ExtractVersions(cve, nil, http.DefaultClient, metrics) + +// // metrics.Outcome = ResolveVersionsToCommits(&versions, repos, cache, metrics) + +// // if len(versions.AffectedCommits) == 0 { +// // metrics.AddNote("No affected commit ranges determined for %q", maybeProductName) +// // metrics.Outcome = models.NoCommitRanges +// // } + +// // if rejectFailed && metrics.Outcome != models.Successful { +// // return metrics.Outcome +// // } + +// // versions.AffectedVersions = nil // these have served their purpose and are not required in the resulting output. + +// // slices.SortStableFunc(versions.AffectedCommits, models.AffectedCommitCompare) + +// var pkgInfos []vulns.PackageInfo +// pi := vulns.PackageInfo{VersionInfo: versions} +// pkgInfos = append(pkgInfos, pi) // combine-to-osv expects a serialised *array* of PackageInfo + +// vulnDir := filepath.Join(directory, maybeVendorName, maybeProductName) +// err := os.MkdirAll(vulnDir, 0755) +// if err != nil { +// logger.Warn("Failed to create dir", slog.Any("err", err)) +// } + +// outputFile := filepath.Join(vulnDir, string(cve.ID)+".nvd"+models.Extension) +// f, err := os.Create(outputFile) +// if err != nil { +// logger.Warn("Failed to open for writing", slog.String("path", outputFile), slog.Any("err", err)) +// } +// defer f.Close() + +// encoder := json.NewEncoder(f) +// encoder.SetIndent("", " ") +// err = encoder.Encode(&pkgInfos) + +// if err != nil { +// logger.Warn("Failed to encode PackageInfo", slog.String("path", outputFile), slog.Any("err", err)) +// } + +// logger.Info("Generated PackageInfo record", slog.String("cve", string(cve.ID)), slog.String("product", maybeProductName)) + +// metricsFile, err := conversion.CreateMetricsFile(cve.ID, vulnDir) +// if err != nil { +// logger.Warn("Failed to create metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) +// } +// err = conversion.WriteMetricsFile(metrics, metricsFile) +// if err != nil { +// logger.Warn("Failed to write metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) +// } + +// return metrics.Outcome +// } // FindRepos attempts to find the source code repositories for a given CVE. func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics, httpClient *http.Client) []string { @@ -249,53 +277,17 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * return reposForCVE } -func ResolveVersionsToCommits(versions *models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) models.ConversionOutcome { - if len(repos) == 0 && len(versions.AffectedCommits) == 0 { - return models.NoRepos +func ResolveVersionsToCommits(versions []*osvschema.Range, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) (models.ConversionOutcome, *osvschema.Affected) { + if len(repos) == 0 && len(versions) == 0 { + return models.NoRepos, nil } // There are some AffectedVersions to try and resolve to AffectedCommits. metrics.AddNote("Trying to convert version tags to commits: %v with repos: %v", versions, repos) - if len(versions.AffectedVersions) != 0 { - // There are some AffectedVersions to try and resolve to AffectedCommits. - if len(repos) == 0 { - metrics.AddNote("No affected ranges and no repos to try and convert %+v to tags with", versions.AffectedVersions) - return models.NoRanges - } - cves.GitVersionsToCommits(versions, repos, cache, metrics) - } - hasAnyFixedCommits := false - for _, repo := range repos { - if versions.HasFixedCommits(repo) { - hasAnyFixedCommits = true - break - } - } - if !hasAnyFixedCommits { - for _, ac := range versions.AffectedCommits { - if ac.Fixed != "" { - hasAnyFixedCommits = true - break - } - } - } - - if versions.HasFixedVersions() && !hasAnyFixedCommits { - metrics.AddNote("Failed to convert fixed version tags to commits: %+v", versions) - } - - hasAnyLastAffectedCommits := false - for _, repo := range repos { - if versions.HasLastAffectedCommits(repo) { - hasAnyLastAffectedCommits = true - break - } - } - - if versions.HasLastAffectedVersions() && !hasAnyLastAffectedCommits && !hasAnyFixedCommits { - metrics.AddNote("Failed to convert last_affected version tags to commits: %+v", versions) - return models.FixUnresolvable + affected, err := conversion.GitVersionsToCommits(versions, repos, metrics, cache) + if err != nil { + return models.FixUnresolvable, nil } - return models.Successful + return models.Successful, affected } diff --git a/vulnfeeds/cvelist2osv/common.go b/vulnfeeds/cvelist2osv/common.go index 19361b1bd7b..53e5ae4c1ca 100644 --- a/vulnfeeds/cvelist2osv/common.go +++ b/vulnfeeds/cvelist2osv/common.go @@ -3,15 +3,11 @@ package cvelist2osv import ( "cmp" "errors" - "log/slog" "strconv" "strings" - "github.com/google/osv/vulnfeeds/cves" - "github.com/google/osv/vulnfeeds/git" + "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/models" - "github.com/google/osv/vulnfeeds/utility" - "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" ) @@ -55,117 +51,6 @@ func toVersionRangeType(s string) VersionRangeType { } } -// resolveVersionToCommit is a helper to convert a version string to a commit hash. -// It logs the outcome of the conversion attempt and returns an empty string on failure. -func resolveVersionToCommit(cveID models.CVEID, version, versionType, repo string, normalizedTags map[string]git.NormalizedTag) string { - if version == "" { - return "" - } - logger.Info("Attempting to resolve version to commit", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo)) - commit, err := git.VersionToCommit(version, normalizedTags) - if err != nil { - logger.Warn("Failed to get Git commit for version", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo), slog.Any("err", err)) - return "" - } - logger.Info("Successfully derived commit for version", slog.String("cve", string(cveID)), slog.String("commit", commit), slog.String("version", version), slog.String("type", versionType)) - - return commit -} - -// Examines repos and tries to convert versions to commits by treating them as Git tags. -// Takes a CVE ID string (for logging), VersionInfo with AffectedVersions and -// typically no AffectedCommits and attempts to add AffectedCommits (including Fixed commits) where there aren't any. -// Refuses to add the same commit to AffectedCommits more than once. -func gitVersionsToCommits(cveID models.CVEID, versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) (*osvschema.Affected, error) { - var newAff osvschema.Affected - var newVersionRanges []*osvschema.Range - unresolvedRanges := versionRanges - - for _, repo := range repos { - if len(unresolvedRanges) == 0 { - break // All ranges have been resolved. - } - - normalizedTags, err := git.NormalizeRepoTags(repo, cache) - if err != nil { - metrics.AddNote("Failed to normalize tags - %s", repo) - continue - } - - var stillUnresolvedRanges []*osvschema.Range - for _, vr := range unresolvedRanges { - var introduced, fixed, lastAffected string - for _, e := range vr.GetEvents() { - if e.GetIntroduced() != "" { - introduced = e.GetIntroduced() - } - if e.GetFixed() != "" { - fixed = e.GetFixed() - } - if e.GetLastAffected() != "" { - lastAffected = e.GetLastAffected() - } - } - - var introducedCommit string - if introduced == "0" { - introducedCommit = "0" - } else { - introducedCommit = resolveVersionToCommit(cveID, introduced, "introduced", repo, normalizedTags) - } - fixedCommit := resolveVersionToCommit(cveID, fixed, "fixed", repo, normalizedTags) - lastAffectedCommit := resolveVersionToCommit(cveID, lastAffected, "last_affected", repo, normalizedTags) - - if introducedCommit != "" && (fixedCommit != "" || lastAffectedCommit != "") { - var newVR *osvschema.Range - - if fixedCommit != "" { - newVR = cves.BuildVersionRange(introducedCommit, "", fixedCommit) - } else { - newVR = cves.BuildVersionRange(introducedCommit, lastAffectedCommit, "") - } - - newVR.Repo = repo - newVR.Type = osvschema.Range_GIT - if len(vr.GetEvents()) > 0 { - databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"versions": vr.GetEvents()}) - if err != nil { - logger.Warn("failed to make database specific: %v", err) - } else { - newVR.DatabaseSpecific = databaseSpecific - } - } - - newVersionRanges = append(newVersionRanges, newVR) - } else { - stillUnresolvedRanges = append(stillUnresolvedRanges, vr) - } - } - unresolvedRanges = stillUnresolvedRanges - } - - var err error - if len(unresolvedRanges) > 0 { - databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) - if err != nil { - logger.Warn("failed to make database specific: %v", err) - } else { - newAff.DatabaseSpecific = databaseSpecific - } - - metrics.UnresolvedRangesCount += len(unresolvedRanges) - } - - if len(newVersionRanges) > 0 { - newAff.Ranges = newVersionRanges - metrics.ResolvedRangesCount += len(newVersionRanges) - } else if len(unresolvedRanges) > 0 { // Only error if there were ranges to resolve but none were. - err = errors.New("was not able to get git version ranges") - } - - return &newAff, err -} - // findCPEVersionRanges extracts version ranges and CPE strings from the CNA's // CPE applicability statements in a CVE record. func findCPEVersionRanges(cve models.CVE5) (versionRanges []*osvschema.Range, cpes []string, err error) { @@ -187,9 +72,9 @@ func findCPEVersionRanges(cve models.CVE5) (versionRanges []*osvschema.Range, cp } if match.VersionEndExcluding != "" { - versionRanges = append(versionRanges, cves.BuildVersionRange(match.VersionStartIncluding, "", match.VersionEndExcluding)) + versionRanges = append(versionRanges, conversion.BuildVersionRange(match.VersionStartIncluding, "", match.VersionEndExcluding)) } else if match.VersionEndIncluding != "" { - versionRanges = append(versionRanges, cves.BuildVersionRange(match.VersionStartIncluding, match.VersionEndIncluding, "")) + versionRanges = append(versionRanges, conversion.BuildVersionRange(match.VersionStartIncluding, match.VersionEndIncluding, "")) } } } diff --git a/vulnfeeds/cvelist2osv/default_extractor.go b/vulnfeeds/cvelist2osv/default_extractor.go index a9ac0801f6d..a3cea4f0578 100644 --- a/vulnfeeds/cvelist2osv/default_extractor.go +++ b/vulnfeeds/cvelist2osv/default_extractor.go @@ -39,7 +39,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln ranges := d.handleAffected(cve.Containers.CNA.Affected, metrics) if len(ranges) != 0 { - aff, err := gitVersionsToCommits(cve.Metadata.CVEID, ranges, repos, metrics, repoTagsCache) + aff, err := conversion.GitVersionsToCommits(ranges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } else { @@ -53,7 +53,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln versionRanges, _ := cpeVersionExtraction(cve, metrics) if len(versionRanges) != 0 { - aff, err := gitVersionsToCommits(cve.Metadata.CVEID, versionRanges, repos, metrics, repoTagsCache) + aff, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } else { @@ -68,7 +68,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln metrics.AddNote("No versions in CPEs so attempting extraction from description") versionRanges := textVersionExtraction(cve, metrics) if len(versionRanges) != 0 { - aff, err := gitVersionsToCommits(cve.Metadata.CVEID, versionRanges, repos, metrics, repoTagsCache) + aff, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } @@ -100,10 +100,10 @@ func (d *DefaultVersionExtractor) FindNormalAffectedRanges(affected models.Affec continue } if av.Fixed != "" { - versionRanges = append(versionRanges, cves.BuildVersionRange(av.Introduced, "", av.Fixed)) + versionRanges = append(versionRanges, conversion.BuildVersionRange(av.Introduced, "", av.Fixed)) continue } else if av.LastAffected != "" { - versionRanges = append(versionRanges, cves.BuildVersionRange(av.Introduced, av.LastAffected, "")) + versionRanges = append(versionRanges, conversion.BuildVersionRange(av.Introduced, av.LastAffected, "")) continue } } @@ -118,7 +118,7 @@ func (d *DefaultVersionExtractor) FindNormalAffectedRanges(affected models.Affec // As a fallback, assume a single version means it's the last affected version. if vulns.CheckQuality(vers.Version).AtLeast(acceptableQuality) { - versionRanges = append(versionRanges, cves.BuildVersionRange("0", vers.Version, "")) + versionRanges = append(versionRanges, conversion.BuildVersionRange("0", vers.Version, "")) metrics.AddNote("Single version found %v - Assuming introduced = 0 and last affected = %v", vers.Version, vers.Version) } } diff --git a/vulnfeeds/cvelist2osv/linux_extractor.go b/vulnfeeds/cvelist2osv/linux_extractor.go index 7c4679bcd59..f46ea20a577 100644 --- a/vulnfeeds/cvelist2osv/linux_extractor.go +++ b/vulnfeeds/cvelist2osv/linux_extractor.go @@ -137,7 +137,7 @@ func findInverseAffectedRanges(cveAff models.Affected, metrics *models.Conversio // Create ranges by pairing sorted introduced and fixed versions. for index, f := range fixed { if index < len(introduced) { - ranges = append(ranges, cves.BuildVersionRange(introduced[index], "", f)) + ranges = append(ranges, conversion.BuildVersionRange(introduced[index], "", f)) metrics.AddNote("Introduced from version value - %s", introduced[index]) metrics.AddNote("Fixed from version value - %s", f) } @@ -166,13 +166,13 @@ func (l *LinuxVersionExtractor) FindNormalAffectedRanges(affected models.Affecte metrics.AddNote("Only version exists") if currentVersionType == VersionRangeTypeGit { - versionRanges = append(versionRanges, cves.BuildVersionRange(vers.Version, "", "")) + versionRanges = append(versionRanges, conversion.BuildVersionRange(vers.Version, "", "")) continue } // As a fallback, assume a single version means it's the last affected version. if vulns.CheckQuality(vers.Version).AtLeast(acceptableQuality) { - versionRanges = append(versionRanges, cves.BuildVersionRange("0", vers.Version, "")) + versionRanges = append(versionRanges, conversion.BuildVersionRange("0", vers.Version, "")) metrics.AddNote("Single version found %v - Assuming introduced = 0 and last affected = %v", vers.Version, vers.Version) } } diff --git a/vulnfeeds/cvelist2osv/strategies.go b/vulnfeeds/cvelist2osv/strategies.go index 51cb4d59adb..38f0824e82b 100644 --- a/vulnfeeds/cvelist2osv/strategies.go +++ b/vulnfeeds/cvelist2osv/strategies.go @@ -1,6 +1,7 @@ package cvelist2osv import ( + "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/vulns" @@ -75,9 +76,9 @@ func initialNormalExtraction(vers models.Versions, metrics *models.ConversionMet } var versionRanges []*osvschema.Range if fixed != "" { - versionRanges = append(versionRanges, cves.BuildVersionRange(introduced, "", fixed)) + versionRanges = append(versionRanges, conversion.BuildVersionRange(introduced, "", fixed)) } else if lastaffected != "" { - versionRanges = append(versionRanges, cves.BuildVersionRange(introduced, lastaffected, "")) + versionRanges = append(versionRanges, conversion.BuildVersionRange(introduced, lastaffected, "")) } return versionRanges, currentVersionType, true diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index 124b779f6bf..70a368baeb0 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -32,8 +32,11 @@ import ( "github.com/ossf/osv-schema/bindings/go/osvschema" "github.com/sethvargo/go-retry" + "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" + "github.com/google/osv/vulnfeeds/utility/logger" + "github.com/google/osv/vulnfeeds/vulns" ) // References with these tags have been found to contain completely unrelated @@ -547,19 +550,27 @@ func ValidateAndCanonicalizeLink(link string, httpClient *http.Client) (canonica } // For URLs referencing commits in supported Git repository hosts, return a cloneable AffectedCommit. -func extractGitAffectedCommit(link string, commitType models.CommitType, httpClient *http.Client) (models.AffectedCommit, error) { - var ac models.AffectedCommit - c, r, err := ExtractGitCommit(link, httpClient, 0) +func extractCommitsFromRefs(references []models.Reference, httpClient *http.Client) ([]models.AffectedCommit, error) { + var commits []models.AffectedCommit - if err != nil { - return ac, err - } + for _, ref := range references { + // (Potentially faulty) Assumption: All viable Git commit reference links are fix commits. + var ac models.AffectedCommit + c, r, err := ExtractGitCommit(ref.URL, httpClient, 0) + + if err != nil { + logger.Error("Failed to extract commit from ref: %v", err) + continue + } - ac.SetRepo(r) + ac.SetRepo(r) - models.SetCommitByType(&ac, commitType, c) + models.SetCommitByType(&ac, models.Fixed, c) - return ac, nil + commits = append(commits, ac) + } + + return commits, nil } func ExtractGitCommit(link string, httpClient *http.Client, depth int) (string, string, error) { @@ -638,7 +649,7 @@ func processExtractedVersion(version string) string { return version } -func ExtractVersionsFromText(validVersions []string, text string, metrics *models.ConversionMetrics) []models.AffectedVersion { +func ExtractVersionsFromText(validVersions []string, text string, metrics *models.ConversionMetrics) []*osvschema.Range { // Match: // - x.x.x before x.x.x // - x.x.x through x.x.x @@ -651,7 +662,7 @@ func ExtractVersionsFromText(validVersions []string, text string, metrics *model return nil } - versions := make([]models.AffectedVersion, 0, len(matches)) + versions := make([]*osvschema.Range, 0, len(matches)) for _, match := range matches { // Trim periods that are part of sentences. @@ -689,11 +700,8 @@ func ExtractVersionsFromText(validVersions []string, text string, metrics *model lastaffected = "" } - versions = append(versions, models.AffectedVersion{ - Introduced: introduced, - Fixed: fixed, - LastAffected: lastaffected, - }) + vr := conversion.BuildVersionRange(introduced, lastaffected, fixed) + versions = append(versions, vr) } return versions @@ -719,9 +727,8 @@ func DeduplicateAffectedCommits(commits []models.AffectedCommit) []models.Affect return uniqueCommits } -func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics *models.ConversionMetrics) []models.AffectedVersion { - versions := []models.AffectedVersion{} - seen := make(map[models.AffectedVersion]bool) +func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics *models.ConversionMetrics) []*osvschema.Range { + versions := []*osvschema.Range{} for _, config := range cve.Configurations { for _, node := range config.Nodes { @@ -768,8 +775,7 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics if err != nil { continue } - if CPE.Part != "a" { - // Skip operating system CPEs. + if CPE.Part != "a" && CPE.Part != "o" { continue } if slices.Contains([]string{"NA", "ANY"}, CPE.Version) { @@ -782,8 +788,11 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics } } - if introduced == "" && fixed == "" && lastaffected == "" { - continue + if introduced == "" { + if fixed == "" && lastaffected == "" { + continue + } + introduced = "0" } if introduced != "" && !HasVersion(validVersions, introduced) { @@ -797,19 +806,8 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics if fixed != "" && !HasVersion(validVersions, fixed) { metrics.AddNote("Warning: %s is not a valid fixed version", fixed) } - - possibleNewAffectedVersion := models.AffectedVersion{ - Introduced: introduced, - Fixed: fixed, - LastAffected: lastaffected, - } - - if seen[possibleNewAffectedVersion] { - continue - } - seen[possibleNewAffectedVersion] = true - versions = append(versions, possibleNewAffectedVersion) - metrics.AddNote("Extracted version %+v", possibleNewAffectedVersion) + vr := conversion.BuildVersionRange(introduced, lastaffected, fixed) + versions = append(versions, vr) } } } @@ -817,29 +815,30 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics return versions } -func ExtractVersionInfo(cve models.NVDCVE, validVersions []string, httpClient *http.Client, metrics *models.ConversionMetrics) (v models.VersionInfo) { - for _, reference := range cve.References { - // (Potentially faulty) Assumption: All viable Git commit reference links are fix commits. - if commit, err := extractGitAffectedCommit(reference.URL, models.Fixed, httpClient); err == nil { - v.AffectedCommits = append(v.AffectedCommits, commit) - } +func ExtractVersions(v *vulns.Vulnerability, cve models.NVDCVE, validVersions []string, httpClient *http.Client, metrics *models.ConversionMetrics) (cpeRanges []*osvschema.Range, commits []models.AffectedCommit, textRanges []*osvschema.Range) { + // Extract Versions From CPEs + cpeRanges = ExtractVersionsFromCPEs(cve, validVersions, metrics) + if len(cpeRanges) > 0 { + metrics.AddNote("Extracted versions from CPEs: %v", cpeRanges) + } + + // Extract Commits + commits, err := extractCommitsFromRefs(cve.References, httpClient) + if err != nil { + metrics.AddNote("Failed to extract commits from refs: %v", err) } - if len(v.AffectedCommits) > 0 { - v.AffectedCommits = DeduplicateAffectedCommits(v.AffectedCommits) - metrics.AddNote("Extracted %d commits", len(v.AffectedCommits)) + if len(commits) > 0 { + metrics.AddNote("Extracted commits from refs: %v", commits) } - v.AffectedVersions = ExtractVersionsFromCPEs(cve, validVersions, metrics) - if len(v.AffectedVersions) > 0 { - metrics.AddNote("Extracted versions from CPEs: %v", v.AffectedVersions) - } else { - v.AffectedVersions = ExtractVersionsFromText(validVersions, models.EnglishDescription(cve.Descriptions), metrics) - if len(v.AffectedVersions) > 0 { - metrics.AddNote("Extracted versions from description: %v", v.AffectedVersions) - } + // Extract Versions From Text + textRanges = ExtractVersionsFromText(validVersions, models.EnglishDescription(cve.Descriptions), metrics) + if len(textRanges) > 0 { + metrics.AddNote("Extracted versions from description: %v", textRanges) } - if len(v.AffectedVersions) == 0 { + // If no versions were detected, add a note + if len(cpeRanges) == 0 && len(commits) == 0 && len(textRanges) == 0 { metrics.AddNote("No versions detected.") } @@ -850,19 +849,7 @@ func ExtractVersionInfo(cve models.NVDCVE, validVersions []string, httpClient *h } } - // Remove any lastaffected versions in favour of fixed versions. - if v.HasFixedVersions() { - affectedVersionsWithoutLastAffected := []models.AffectedVersion{} - for _, av := range v.AffectedVersions { - if av.LastAffected != "" { - continue - } - affectedVersionsWithoutLastAffected = append(affectedVersionsWithoutLastAffected, av) - } - v.AffectedVersions = affectedVersionsWithoutLastAffected - } - - return v + return cpeRanges, commits, textRanges } func CPEs(cve models.NVDCVE) []string { @@ -1174,28 +1161,3 @@ func ReposFromReferencesCVEList(refs []models.Reference, tagDenyList []string, m return repos } - -// BuildVersionRange is a helper function that adds 'introduced', 'fixed', or 'last_affected' -// events to an OSV version range. If 'intro' is empty, it defaults to "0". -func BuildVersionRange(intro string, lastAff string, fixed string) *osvschema.Range { - var versionRange osvschema.Range - var i string - if intro == "" { - i = "0" - } else { - i = intro - } - versionRange.Events = append(versionRange.Events, &osvschema.Event{ - Introduced: i}) - - if fixed != "" { - versionRange.Events = append(versionRange.Events, &osvschema.Event{ - Fixed: fixed}) - } else if lastAff != "" { - versionRange.Events = append(versionRange.Events, &osvschema.Event{ - LastAffected: lastAff, - }) - } - - return &versionRange -} diff --git a/vulnfeeds/cves/versions_test.go b/vulnfeeds/cves/versions_test.go index 9f5a69cc06d..f93c60a9916 100644 --- a/vulnfeeds/cves/versions_test.go +++ b/vulnfeeds/cves/versions_test.go @@ -12,6 +12,7 @@ import ( "time" "github.com/google/go-cmp/cmp" + "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/internal/testutils" "github.com/google/osv/vulnfeeds/models" @@ -493,449 +494,449 @@ func TestRepo(t *testing.T) { } } -func TestExtractGitCommit(t *testing.T) { - tests := []struct { - description string - inputLink string - inputCommitType models.CommitType - expectedAffectedCommit models.AffectedCommit - expectFailure bool - skipOnCloudBuild bool - disableExpiryDate time.Time // If test needs to be disabled due to known outage. - }{ - { - description: "Valid GitHub commit URL", - inputLink: "https://github.com/google/osv/commit/cd4e934d0527e5010e373e7fed54ef5daefba2f5", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://github.com/google/osv.dev", - Fixed: "cd4e934d0527e5010e373e7fed54ef5daefba2f5", - }, - }, - { - description: "Undesired GitHub commit URL", // TODO(apollock): be able to parse this a LastAffected commit - inputLink: "https://github.com/Budibase/budibase/commits/develop?after=93d6939466aec192043d8ac842e754f65fdf2e8a+594\u0026branch=develop\u0026qualified_name=refs%2Fheads%2Fdevelop", - inputCommitType: models.Fixed, - expectFailure: true, - }, - { - description: "Valid GitHub commit URL with .patch extension", - inputLink: "https://github.com/pimcore/customer-data-framework/commit/e3f333391582d9309115e6b94e875367d0ea7163.patch", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://github.com/pimcore/customer-data-framework", - Fixed: "e3f333391582d9309115e6b94e875367d0ea7163", - }, - }, - { - description: "Undesired GitHub PR commit URL", - inputLink: "https://github.com/OpenZeppelin/cairo-contracts/pull/542/commits/6d4cb750478fca2fd916f73297632f899aca9299", - inputCommitType: models.Fixed, - expectFailure: true, - }, - { - description: "Valid GitLab commit URL", - inputLink: "https://gitlab.freedesktop.org/virgl/virglrenderer/-/commit/b05bb61f454eeb8a85164c8a31510aeb9d79129c", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://gitlab.freedesktop.org/virgl/virglrenderer", - Fixed: "b05bb61f454eeb8a85164c8a31510aeb9d79129c", - }, - }, - { - description: "Valid GitLab commit URL with .patch extension", - inputLink: "https://gitlab.com/muttmua/mutt/-/commit/452ee330e094bfc7c9a68555e5152b1826534555.patch", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://gitlab.com/muttmua/mutt", - Fixed: "452ee330e094bfc7c9a68555e5152b1826534555", - }, - }, - { - description: "Valid GitLab.com commit URL", - inputLink: "https://gitlab.com/mayan-edms/mayan-edms/commit/9ebe80595afe4fdd1e2c74358d6a9421f4ce130e", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://gitlab.com/mayan-edms/mayan-edms", - Fixed: "9ebe80595afe4fdd1e2c74358d6a9421f4ce130e", - }, - }, - { - description: "Valid bitbucket.org commit URL", - inputLink: "https://bitbucket.org/openpyxl/openpyxl/commits/3b4905f428e1", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://bitbucket.org/openpyxl/openpyxl", - Fixed: "3b4905f428e1", - }, - }, - { - description: "Valid bitbucket.org commit URL with trailing slash", - inputLink: "https://bitbucket.org/utmandrew/pcrs/commits/5f18bcb/", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://bitbucket.org/utmandrew/pcrs", - Fixed: "5f18bcb", - }, - }, - { - description: "Valid cGit commit URL", - inputLink: "https://git.dpkg.org/cgit/dpkg/dpkg.git/commit/?id=faa4c92debe45412bfcf8a44f26e827800bb24be", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://git.dpkg.org/cgit/dpkg/dpkg.git", - Fixed: "faa4c92debe45412bfcf8a44f26e827800bb24be", - }, - }, - { - description: "Valid GitWeb commit URL", - // inputLink: "https://git.gnupg.org/cgi-bin/gitweb.cgi?p=libksba.git;a=commit;h=f61a5ea4e0f6a80fd4b28ef0174bee77793cf070", - // go-vcr / go's url parser does not support ';' in query strings. - // This does actually successfully parse outside of the tests, but there's no way to have go-vcr skip the URL validation. - inputLink: "https://git.gnupg.org/cgi-bin/gitweb.cgi?p=libksba.git&a=commit&h=f61a5ea4e0f6a80fd4b28ef0174bee77793cf070", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "git://git.gnupg.org/libksba.git", - Fixed: "f61a5ea4e0f6a80fd4b28ef0174bee77793cf070", - }, - }, - { - description: "Unsupported GitHub PR URL", - inputLink: "https://github.com/google/osv/pull/123", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{}, - expectFailure: true, - }, - { - description: "Supported GitHub tag URL", - inputLink: "https://github.com/google/osv.dev/releases/tag/v0.0.14", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://github.com/google/osv.dev", - Fixed: "8de7697b3b8a73e79a73ec34f17ef0fa842cfbb2", - }, - expectFailure: false, - }, - { - description: "Completely invalid input", - inputLink: "", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{}, - expectFailure: true, - }, - { - description: "cGit reference from CVE-2022-30594, remapped to be cloneable", - inputLink: "https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=ee1fee900537b5d9560e9f937402de5ddc8412f3", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git", - Fixed: "ee1fee900537b5d9560e9f937402de5ddc8412f3", - }, - skipOnCloudBuild: true, // observing indications of IP denylisting as at 2025-02-13 - }, - { - description: "Valid GitWeb commit URL", - inputLink: "https://git.ffmpeg.org/gitweb/ffmpeg.git/commitdiff/c94875471e3ba3dc396c6919ff3ec9b14539cd71", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://git.ffmpeg.org/ffmpeg.git", - Fixed: "c94875471e3ba3dc396c6919ff3ec9b14539cd71", - }, - }, - { - description: "A GitHub repo that has been renamed (as seen on CVE-2016-10544)", - inputLink: "https://github.com/uWebSockets/uWebSockets/commit/37deefd01f0875e133ea967122e3a5e421b8fcd9", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://github.com/unetworking/uwebsockets", - Fixed: "37deefd01f0875e133ea967122e3a5e421b8fcd9", - }, - }, - { - description: "A GitHub repo that should be working (as seen on CVE-2021-23568)", - inputLink: "https://github.com/eggjs/extend2/commit/aa332a59116c8398976434b57ea477c6823054f8", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://github.com/eggjs/extend2", - Fixed: "aa332a59116c8398976434b57ea477c6823054f8", - }, - }, - { - description: "A GitHub commit link that is 404'ing (as seen on CVE-2019-8375)", - inputLink: "https://github.com/WebKit/webkit/commit/6f9b511a115311b13c06eb58038ddc2c78da5531", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{}, - expectFailure: true, - }, - { - description: "A GitHub link with tags", - inputLink: "https://github.com/redis/redis/releases/tag/6.2.17", - inputCommitType: models.Fixed, - expectedAffectedCommit: models.AffectedCommit{ - Repo: "https://github.com/redis/redis", - Fixed: "441001a4e5e37a7a450c0929d2a94ba489941874", - }, - expectFailure: false, - }, - } +// func TestExtractGitCommit(t *testing.T) { +// tests := []struct { +// description string +// inputLink string +// inputCommitType models.CommitType +// expectedAffectedCommit models.AffectedCommit +// expectFailure bool +// skipOnCloudBuild bool +// disableExpiryDate time.Time // If test needs to be disabled due to known outage. +// }{ +// { +// description: "Valid GitHub commit URL", +// inputLink: "https://github.com/google/osv/commit/cd4e934d0527e5010e373e7fed54ef5daefba2f5", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://github.com/google/osv.dev", +// Fixed: "cd4e934d0527e5010e373e7fed54ef5daefba2f5", +// }, +// }, +// { +// description: "Undesired GitHub commit URL", // TODO(apollock): be able to parse this a LastAffected commit +// inputLink: "https://github.com/Budibase/budibase/commits/develop?after=93d6939466aec192043d8ac842e754f65fdf2e8a+594\u0026branch=develop\u0026qualified_name=refs%2Fheads%2Fdevelop", +// inputCommitType: models.Fixed, +// expectFailure: true, +// }, +// { +// description: "Valid GitHub commit URL with .patch extension", +// inputLink: "https://github.com/pimcore/customer-data-framework/commit/e3f333391582d9309115e6b94e875367d0ea7163.patch", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://github.com/pimcore/customer-data-framework", +// Fixed: "e3f333391582d9309115e6b94e875367d0ea7163", +// }, +// }, +// { +// description: "Undesired GitHub PR commit URL", +// inputLink: "https://github.com/OpenZeppelin/cairo-contracts/pull/542/commits/6d4cb750478fca2fd916f73297632f899aca9299", +// inputCommitType: models.Fixed, +// expectFailure: true, +// }, +// { +// description: "Valid GitLab commit URL", +// inputLink: "https://gitlab.freedesktop.org/virgl/virglrenderer/-/commit/b05bb61f454eeb8a85164c8a31510aeb9d79129c", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://gitlab.freedesktop.org/virgl/virglrenderer", +// Fixed: "b05bb61f454eeb8a85164c8a31510aeb9d79129c", +// }, +// }, +// { +// description: "Valid GitLab commit URL with .patch extension", +// inputLink: "https://gitlab.com/muttmua/mutt/-/commit/452ee330e094bfc7c9a68555e5152b1826534555.patch", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://gitlab.com/muttmua/mutt", +// Fixed: "452ee330e094bfc7c9a68555e5152b1826534555", +// }, +// }, +// { +// description: "Valid GitLab.com commit URL", +// inputLink: "https://gitlab.com/mayan-edms/mayan-edms/commit/9ebe80595afe4fdd1e2c74358d6a9421f4ce130e", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://gitlab.com/mayan-edms/mayan-edms", +// Fixed: "9ebe80595afe4fdd1e2c74358d6a9421f4ce130e", +// }, +// }, +// { +// description: "Valid bitbucket.org commit URL", +// inputLink: "https://bitbucket.org/openpyxl/openpyxl/commits/3b4905f428e1", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://bitbucket.org/openpyxl/openpyxl", +// Fixed: "3b4905f428e1", +// }, +// }, +// { +// description: "Valid bitbucket.org commit URL with trailing slash", +// inputLink: "https://bitbucket.org/utmandrew/pcrs/commits/5f18bcb/", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://bitbucket.org/utmandrew/pcrs", +// Fixed: "5f18bcb", +// }, +// }, +// { +// description: "Valid cGit commit URL", +// inputLink: "https://git.dpkg.org/cgit/dpkg/dpkg.git/commit/?id=faa4c92debe45412bfcf8a44f26e827800bb24be", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://git.dpkg.org/cgit/dpkg/dpkg.git", +// Fixed: "faa4c92debe45412bfcf8a44f26e827800bb24be", +// }, +// }, +// { +// description: "Valid GitWeb commit URL", +// // inputLink: "https://git.gnupg.org/cgi-bin/gitweb.cgi?p=libksba.git;a=commit;h=f61a5ea4e0f6a80fd4b28ef0174bee77793cf070", +// // go-vcr / go's url parser does not support ';' in query strings. +// // This does actually successfully parse outside of the tests, but there's no way to have go-vcr skip the URL validation. +// inputLink: "https://git.gnupg.org/cgi-bin/gitweb.cgi?p=libksba.git&a=commit&h=f61a5ea4e0f6a80fd4b28ef0174bee77793cf070", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "git://git.gnupg.org/libksba.git", +// Fixed: "f61a5ea4e0f6a80fd4b28ef0174bee77793cf070", +// }, +// }, +// { +// description: "Unsupported GitHub PR URL", +// inputLink: "https://github.com/google/osv/pull/123", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{}, +// expectFailure: true, +// }, +// { +// description: "Supported GitHub tag URL", +// inputLink: "https://github.com/google/osv.dev/releases/tag/v0.0.14", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://github.com/google/osv.dev", +// Fixed: "8de7697b3b8a73e79a73ec34f17ef0fa842cfbb2", +// }, +// expectFailure: false, +// }, +// { +// description: "Completely invalid input", +// inputLink: "", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{}, +// expectFailure: true, +// }, +// { +// description: "cGit reference from CVE-2022-30594, remapped to be cloneable", +// inputLink: "https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=ee1fee900537b5d9560e9f937402de5ddc8412f3", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git", +// Fixed: "ee1fee900537b5d9560e9f937402de5ddc8412f3", +// }, +// skipOnCloudBuild: true, // observing indications of IP denylisting as at 2025-02-13 +// }, +// { +// description: "Valid GitWeb commit URL", +// inputLink: "https://git.ffmpeg.org/gitweb/ffmpeg.git/commitdiff/c94875471e3ba3dc396c6919ff3ec9b14539cd71", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://git.ffmpeg.org/ffmpeg.git", +// Fixed: "c94875471e3ba3dc396c6919ff3ec9b14539cd71", +// }, +// }, +// { +// description: "A GitHub repo that has been renamed (as seen on CVE-2016-10544)", +// inputLink: "https://github.com/uWebSockets/uWebSockets/commit/37deefd01f0875e133ea967122e3a5e421b8fcd9", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://github.com/unetworking/uwebsockets", +// Fixed: "37deefd01f0875e133ea967122e3a5e421b8fcd9", +// }, +// }, +// { +// description: "A GitHub repo that should be working (as seen on CVE-2021-23568)", +// inputLink: "https://github.com/eggjs/extend2/commit/aa332a59116c8398976434b57ea477c6823054f8", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://github.com/eggjs/extend2", +// Fixed: "aa332a59116c8398976434b57ea477c6823054f8", +// }, +// }, +// { +// description: "A GitHub commit link that is 404'ing (as seen on CVE-2019-8375)", +// inputLink: "https://github.com/WebKit/webkit/commit/6f9b511a115311b13c06eb58038ddc2c78da5531", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{}, +// expectFailure: true, +// }, +// { +// description: "A GitHub link with tags", +// inputLink: "https://github.com/redis/redis/releases/tag/6.2.17", +// inputCommitType: models.Fixed, +// expectedAffectedCommit: models.AffectedCommit{ +// Repo: "https://github.com/redis/redis", +// Fixed: "441001a4e5e37a7a450c0929d2a94ba489941874", +// }, +// expectFailure: false, +// }, +// } - for _, tc := range tests { - t.Run(tc.description, func(t *testing.T) { - t.Parallel() - r := testutils.SetupVCR(t) - client := r.GetDefaultClient() +// for _, tc := range tests { +// t.Run(tc.description, func(t *testing.T) { +// t.Parallel() +// r := testutils.SetupVCR(t) +// client := r.GetDefaultClient() - if _, ok := os.LookupEnv("BUILD_ID"); ok && tc.skipOnCloudBuild { - t.Skipf("test %q: running on Cloud Build", tc.description) - } - if time.Now().Before(tc.disableExpiryDate) { - t.Skipf("test %q: extractGitAffectedCommit for %q (%q) has been skipped due to known outage and will be reenabled on %s.", tc.description, tc.inputLink, tc.inputCommitType, tc.disableExpiryDate) - } - if !tc.disableExpiryDate.IsZero() && time.Now().After(tc.disableExpiryDate) { - t.Logf("test %q: extractGitAffectedCommit(%q, %q) has been enabled on %s.", tc.description, tc.inputLink, tc.inputCommitType, tc.disableExpiryDate) - } - got, err := extractGitAffectedCommit(tc.inputLink, tc.inputCommitType, client) - if err != nil && !tc.expectFailure { - t.Errorf("test %q: extractGitAffectedCommit for %q (%q) errored unexpectedly: %#v", tc.description, tc.inputLink, tc.inputCommitType, err) - } - if err == nil && tc.expectFailure { - t.Errorf("test %q: extractGitAffectedCommit for %q (%q) did not error as unexpected!", tc.description, tc.inputLink, tc.inputCommitType) - } - if !reflect.DeepEqual(got, tc.expectedAffectedCommit) { - t.Errorf("test %q: extractGitAffectedCommit for %q was incorrect, got: %#v, expected: %#v", tc.description, tc.inputLink, got, tc.expectedAffectedCommit) - } - }) - } -} +// if _, ok := os.LookupEnv("BUILD_ID"); ok && tc.skipOnCloudBuild { +// t.Skipf("test %q: running on Cloud Build", tc.description) +// } +// if time.Now().Before(tc.disableExpiryDate) { +// t.Skipf("test %q: extractGitAffectedCommit for %q (%q) has been skipped due to known outage and will be reenabled on %s.", tc.description, tc.inputLink, tc.inputCommitType, tc.disableExpiryDate) +// } +// if !tc.disableExpiryDate.IsZero() && time.Now().After(tc.disableExpiryDate) { +// t.Logf("test %q: extractGitAffectedCommit(%q, %q) has been enabled on %s.", tc.description, tc.inputLink, tc.inputCommitType, tc.disableExpiryDate) +// } +// got, err := extractGitAffectedCommit(tc.inputLink, tc.inputCommitType, client) +// if err != nil && !tc.expectFailure { +// t.Errorf("test %q: extractGitAffectedCommit for %q (%q) errored unexpectedly: %#v", tc.description, tc.inputLink, tc.inputCommitType, err) +// } +// if err == nil && tc.expectFailure { +// t.Errorf("test %q: extractGitAffectedCommit for %q (%q) did not error as unexpected!", tc.description, tc.inputLink, tc.inputCommitType) +// } +// if !reflect.DeepEqual(got, tc.expectedAffectedCommit) { +// t.Errorf("test %q: extractGitAffectedCommit for %q was incorrect, got: %#v, expected: %#v", tc.description, tc.inputLink, got, tc.expectedAffectedCommit) +// } +// }) +// } +// } -func TestExtractVersionInfo(t *testing.T) { - tests := []struct { - description string - inputCVEItem models.Vulnerability - inputValidVersions []string - expectedVersionInfo models.VersionInfo - expectedNotes []string - disableExpiryDate time.Time // If test needs to be disabled due to known outage. - }{ - { - description: "A CVE with multiple affected versions", - inputCVEItem: loadTestData2("CVE-2022-32746"), - inputValidVersions: []string{}, - expectedVersionInfo: models.VersionInfo{ - AffectedCommits: []models.AffectedCommit(nil), - AffectedVersions: []models.AffectedVersion{ - { - Introduced: "4.3.0", - Fixed: "4.14.14", - LastAffected: "", - }, - { - Introduced: "4.15.0", - Fixed: "4.15.9", - LastAffected: "", - }, - { - Introduced: "4.16.0", - Fixed: "4.16.4", - LastAffected: "", - }, - }, - }, - expectedNotes: []string{}, - }, - { - description: "A CVE with duplicate affected versions squashed", - inputCVEItem: loadTestData2("CVE-2022-0090"), - inputValidVersions: []string{}, - expectedVersionInfo: models.VersionInfo{ - AffectedCommits: []models.AffectedCommit(nil), - AffectedVersions: []models.AffectedVersion{ - { - Introduced: "0", - Fixed: "14.4.5", - LastAffected: "", - }, - { - Introduced: "14.5.0", - Fixed: "14.5.3", - LastAffected: "", - }, - { - Introduced: "14.6.0", - Fixed: "14.6.1", - LastAffected: "", - }, - }, - }, - expectedNotes: []string{}, - }, - { - description: "A CVE with no explicit versions", - inputCVEItem: loadTestData2("CVE-2022-1122"), - inputValidVersions: []string{}, - expectedVersionInfo: models.VersionInfo{ - AffectedCommits: []models.AffectedCommit(nil), - AffectedVersions: []models.AffectedVersion{ - { - Introduced: "0", - Fixed: "", - LastAffected: "2.4.0", - }, - }, - }, - expectedNotes: []string{}, - }, - { - description: "A CVE with fix commits in references and CPE match info", - inputCVEItem: loadTestData2("CVE-2022-25929"), - inputValidVersions: []string{}, - expectedVersionInfo: models.VersionInfo{ - AffectedCommits: []models.AffectedCommit{ - { - Repo: "https://github.com/joewalnes/smoothie", - Introduced: "0", - Fixed: "8e0920d50da82f4b6e605d56f41b69fbb9606a98", - }, - }, - AffectedVersions: []models.AffectedVersion{ - { - Introduced: "1.31.0", - Fixed: "1.36.1", - LastAffected: "", - }, - }, - }, - expectedNotes: []string{}, - }, - { - description: "A CVE with fix commits in references and (more complex) CPE match info", - inputCVEItem: loadTestData2("CVE-2022-29194"), - inputValidVersions: []string{}, - expectedVersionInfo: models.VersionInfo{ - AffectedCommits: []models.AffectedCommit{ - { - Repo: "https://github.com/tensorflow/tensorflow", - Introduced: "0", - Fixed: "0516d4d8bced506cae97dc3cb45dbd2fe4311f26", - }, - { - Repo: "https://github.com/tensorflow/tensorflow", - Introduced: "0", - Fixed: "33ed2b11cb8e879d86c371700e6573db1814a69e", - }, - { - Repo: "https://github.com/tensorflow/tensorflow", - Introduced: "0", - Fixed: "8a20d54a3c1bfa38c03ea99a2ad3c1b0a45dfa95", - }, - { - Repo: "https://github.com/tensorflow/tensorflow", - Introduced: "0", - Fixed: "cff267650c6a1b266e4b4500f69fbc49cdd773c5", - }, - { - Repo: "https://github.com/tensorflow/tensorflow", - Introduced: "0", - Fixed: "dd7b8a3c1714d0052ce4b4a2fd8dcef927439a24", - }, - }, - AffectedVersions: []models.AffectedVersion{ - { - Introduced: "0", - Fixed: "2.6.4", - LastAffected: "", - }, - { - Introduced: "2.7.0", - Fixed: "2.7.2", - LastAffected: "", - }, - { - Introduced: "2.8.0", - Fixed: "2.8.1", - LastAffected: "", - }, - }, - }, - expectedNotes: []string{}, - }, - { - description: "A CVE with undesired wildcards and no versions", - inputCVEItem: loadTestData2("CVE-2022-2956"), - inputValidVersions: []string{}, - expectedVersionInfo: models.VersionInfo{ - AffectedCommits: []models.AffectedCommit(nil), - AffectedVersions: []models.AffectedVersion(nil), - }, - expectedNotes: []string{}, - }, - { - description: "A CVE with a weird GitLab reference that breaks version enumeration in the worker", - inputCVEItem: loadTestData2("CVE-2022-46285"), - inputValidVersions: []string{}, - expectedVersionInfo: models.VersionInfo{ - AffectedCommits: []models.AffectedCommit{{Repo: "https://gitlab.freedesktop.org/xorg/lib/libxpm", Introduced: "0", Fixed: "a3a7c6dcc3b629d7650148"}}, - AffectedVersions: []models.AffectedVersion{{Introduced: "0", Fixed: "3.5.15"}}, - }, - expectedNotes: []string{}, - }, - { - description: "A CVE with a different GitWeb reference URL that was not previously being extracted successfully", - inputCVEItem: loadTestData2("CVE-2021-28429"), - expectedVersionInfo: models.VersionInfo{ - AffectedCommits: []models.AffectedCommit{{Repo: "https://git.ffmpeg.org/ffmpeg.git", Introduced: "0", Fixed: "c94875471e3ba3dc396c6919ff3ec9b14539cd71"}}, - AffectedVersions: []models.AffectedVersion{{Introduced: "0", LastAffected: "4.3.2"}}, - }, - }, - { - description: "A CVE with a configuration unsupported by ExtractVersionInfo and a limit version in the description", - inputCVEItem: loadTestData2("CVE-2020-13595"), - expectedVersionInfo: models.VersionInfo{ - AffectedVersions: []models.AffectedVersion{{Introduced: "4.0.0", LastAffected: "4.2"}}, - }, - }, - { - description: "CVE with duplicate hashes", - inputCVEItem: loadTestData2("CVE-2022-25761"), - expectedVersionInfo: models.VersionInfo{ - AffectedCommits: []models.AffectedCommit{ - { - Repo: "https://github.com/open62541/open62541", - Introduced: "0", - Fixed: "3010bc67fbfd8de0921fc38c9efa146cd2e02c7f", - }, - { - Repo: "https://github.com/open62541/open62541", - Introduced: "0", - Fixed: "b79db1ac78146fc06b0b8435773d3967de2d659c", - }, - }, +// func TestExtractVersionInfo(t *testing.T) { +// tests := []struct { +// description string +// inputCVEItem models.Vulnerability +// inputValidVersions []string +// expectedVersionInfo models.VersionInfo +// expectedNotes []string +// disableExpiryDate time.Time // If test needs to be disabled due to known outage. +// }{ +// { +// description: "A CVE with multiple affected versions", +// inputCVEItem: loadTestData2("CVE-2022-32746"), +// inputValidVersions: []string{}, +// expectedVersionInfo: models.VersionInfo{ +// AffectedCommits: []models.AffectedCommit(nil), +// AffectedVersions: []models.AffectedVersion{ +// { +// Introduced: "4.3.0", +// Fixed: "4.14.14", +// LastAffected: "", +// }, +// { +// Introduced: "4.15.0", +// Fixed: "4.15.9", +// LastAffected: "", +// }, +// { +// Introduced: "4.16.0", +// Fixed: "4.16.4", +// LastAffected: "", +// }, +// }, +// }, +// expectedNotes: []string{}, +// }, +// { +// description: "A CVE with duplicate affected versions squashed", +// inputCVEItem: loadTestData2("CVE-2022-0090"), +// inputValidVersions: []string{}, +// expectedVersionInfo: models.VersionInfo{ +// AffectedCommits: []models.AffectedCommit(nil), +// AffectedVersions: []models.AffectedVersion{ +// { +// Introduced: "0", +// Fixed: "14.4.5", +// LastAffected: "", +// }, +// { +// Introduced: "14.5.0", +// Fixed: "14.5.3", +// LastAffected: "", +// }, +// { +// Introduced: "14.6.0", +// Fixed: "14.6.1", +// LastAffected: "", +// }, +// }, +// }, +// expectedNotes: []string{}, +// }, +// { +// description: "A CVE with no explicit versions", +// inputCVEItem: loadTestData2("CVE-2022-1122"), +// inputValidVersions: []string{}, +// expectedVersionInfo: models.VersionInfo{ +// AffectedCommits: []models.AffectedCommit(nil), +// AffectedVersions: []models.AffectedVersion{ +// { +// Introduced: "0", +// Fixed: "", +// LastAffected: "2.4.0", +// }, +// }, +// }, +// expectedNotes: []string{}, +// }, +// { +// description: "A CVE with fix commits in references and CPE match info", +// inputCVEItem: loadTestData2("CVE-2022-25929"), +// inputValidVersions: []string{}, +// expectedVersionInfo: models.VersionInfo{ +// AffectedCommits: []models.AffectedCommit{ +// { +// Repo: "https://github.com/joewalnes/smoothie", +// Introduced: "0", +// Fixed: "8e0920d50da82f4b6e605d56f41b69fbb9606a98", +// }, +// }, +// AffectedVersions: []models.AffectedVersion{ +// { +// Introduced: "1.31.0", +// Fixed: "1.36.1", +// LastAffected: "", +// }, +// }, +// }, +// expectedNotes: []string{}, +// }, +// { +// description: "A CVE with fix commits in references and (more complex) CPE match info", +// inputCVEItem: loadTestData2("CVE-2022-29194"), +// inputValidVersions: []string{}, +// expectedVersionInfo: models.VersionInfo{ +// AffectedCommits: []models.AffectedCommit{ +// { +// Repo: "https://github.com/tensorflow/tensorflow", +// Introduced: "0", +// Fixed: "0516d4d8bced506cae97dc3cb45dbd2fe4311f26", +// }, +// { +// Repo: "https://github.com/tensorflow/tensorflow", +// Introduced: "0", +// Fixed: "33ed2b11cb8e879d86c371700e6573db1814a69e", +// }, +// { +// Repo: "https://github.com/tensorflow/tensorflow", +// Introduced: "0", +// Fixed: "8a20d54a3c1bfa38c03ea99a2ad3c1b0a45dfa95", +// }, +// { +// Repo: "https://github.com/tensorflow/tensorflow", +// Introduced: "0", +// Fixed: "cff267650c6a1b266e4b4500f69fbc49cdd773c5", +// }, +// { +// Repo: "https://github.com/tensorflow/tensorflow", +// Introduced: "0", +// Fixed: "dd7b8a3c1714d0052ce4b4a2fd8dcef927439a24", +// }, +// }, +// AffectedVersions: []models.AffectedVersion{ +// { +// Introduced: "0", +// Fixed: "2.6.4", +// LastAffected: "", +// }, +// { +// Introduced: "2.7.0", +// Fixed: "2.7.2", +// LastAffected: "", +// }, +// { +// Introduced: "2.8.0", +// Fixed: "2.8.1", +// LastAffected: "", +// }, +// }, +// }, +// expectedNotes: []string{}, +// }, +// { +// description: "A CVE with undesired wildcards and no versions", +// inputCVEItem: loadTestData2("CVE-2022-2956"), +// inputValidVersions: []string{}, +// expectedVersionInfo: models.VersionInfo{ +// AffectedCommits: []models.AffectedCommit(nil), +// AffectedVersions: []models.AffectedVersion(nil), +// }, +// expectedNotes: []string{}, +// }, +// { +// description: "A CVE with a weird GitLab reference that breaks version enumeration in the worker", +// inputCVEItem: loadTestData2("CVE-2022-46285"), +// inputValidVersions: []string{}, +// expectedVersionInfo: models.VersionInfo{ +// AffectedCommits: []models.AffectedCommit{{Repo: "https://gitlab.freedesktop.org/xorg/lib/libxpm", Introduced: "0", Fixed: "a3a7c6dcc3b629d7650148"}}, +// AffectedVersions: []models.AffectedVersion{{Introduced: "0", Fixed: "3.5.15"}}, +// }, +// expectedNotes: []string{}, +// }, +// { +// description: "A CVE with a different GitWeb reference URL that was not previously being extracted successfully", +// inputCVEItem: loadTestData2("CVE-2021-28429"), +// expectedVersionInfo: models.VersionInfo{ +// AffectedCommits: []models.AffectedCommit{{Repo: "https://git.ffmpeg.org/ffmpeg.git", Introduced: "0", Fixed: "c94875471e3ba3dc396c6919ff3ec9b14539cd71"}}, +// AffectedVersions: []models.AffectedVersion{{Introduced: "0", LastAffected: "4.3.2"}}, +// }, +// }, +// { +// description: "A CVE with a configuration unsupported by ExtractVersionInfo and a limit version in the description", +// inputCVEItem: loadTestData2("CVE-2020-13595"), +// expectedVersionInfo: models.VersionInfo{ +// AffectedVersions: []models.AffectedVersion{{Introduced: "4.0.0", LastAffected: "4.2"}}, +// }, +// }, +// { +// description: "CVE with duplicate hashes", +// inputCVEItem: loadTestData2("CVE-2022-25761"), +// expectedVersionInfo: models.VersionInfo{ +// AffectedCommits: []models.AffectedCommit{ +// { +// Repo: "https://github.com/open62541/open62541", +// Introduced: "0", +// Fixed: "3010bc67fbfd8de0921fc38c9efa146cd2e02c7f", +// }, +// { +// Repo: "https://github.com/open62541/open62541", +// Introduced: "0", +// Fixed: "b79db1ac78146fc06b0b8435773d3967de2d659c", +// }, +// }, - AffectedVersions: []models.AffectedVersion{{Introduced: "0", Fixed: "1.2.5"}}, - }, - }, - } +// AffectedVersions: []models.AffectedVersion{{Introduced: "0", Fixed: "1.2.5"}}, +// }, +// }, +// } - for _, tc := range tests { - t.Run(tc.description, func(t *testing.T) { - t.Parallel() - r := testutils.SetupVCR(t) - client := r.GetDefaultClient() +// for _, tc := range tests { +// t.Run(tc.description, func(t *testing.T) { +// t.Parallel() +// r := testutils.SetupVCR(t) +// client := r.GetDefaultClient() - if time.Now().Before(tc.disableExpiryDate) { - t.Skipf("test %q: VersionInfo for %#v has been skipped due to known outage and will be reenabled on %s.", tc.description, tc.inputCVEItem, tc.disableExpiryDate) - } - if !tc.disableExpiryDate.IsZero() && time.Now().After(tc.disableExpiryDate) { - t.Logf("test %q: VersionInfo for %#v has been enabled on %s.", tc.description, tc.inputCVEItem, tc.disableExpiryDate) - } - metrics := &models.ConversionMetrics{} - gotVersionInfo := ExtractVersionInfo(tc.inputCVEItem.CVE, tc.inputValidVersions, client, metrics) - if diff := cmp.Diff(tc.expectedVersionInfo, gotVersionInfo); diff != "" { - t.Errorf("test %q: VersionInfo for %#v was incorrect: %s", tc.description, tc.inputCVEItem, diff) - } - }) - } -} +// if time.Now().Before(tc.disableExpiryDate) { +// t.Skipf("test %q: VersionInfo for %#v has been skipped due to known outage and will be reenabled on %s.", tc.description, tc.inputCVEItem, tc.disableExpiryDate) +// } +// if !tc.disableExpiryDate.IsZero() && time.Now().After(tc.disableExpiryDate) { +// t.Logf("test %q: VersionInfo for %#v has been enabled on %s.", tc.description, tc.inputCVEItem, tc.disableExpiryDate) +// } +// metrics := &models.ConversionMetrics{} +// gotVersionInfo := ExtractVersions(tc.inputCVEItem.CVE, tc.inputValidVersions, client, metrics) +// if diff := cmp.Diff(tc.expectedVersionInfo, gotVersionInfo); diff != "" { +// t.Errorf("test %q: VersionInfo for %#v was incorrect: %s", tc.description, tc.inputCVEItem, diff) +// } +// }) +// } +// } func TestCPEs(t *testing.T) { tests := []struct { @@ -1696,7 +1697,7 @@ func TestBuildVersionRange(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := BuildVersionRange(tt.intro, tt.lastAff, tt.fixed) + got := conversion.BuildVersionRange(tt.intro, tt.lastAff, tt.fixed) if diff := cmp.Diff(tt.want, got, protocmp.Transform()); diff != "" { t.Errorf("cves.BuildVersionRange() mismatch (-want +got):\n%s", diff) } From 340cf1172a627cf2eb2c5430241794ec79ccbb0f Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Thu, 12 Feb 2026 03:15:01 +0000 Subject: [PATCH 12/45] Return ranges not affected --- vulnfeeds/conversion/common.go | 20 +-- vulnfeeds/conversion/nvd/converter.go | 16 ++- vulnfeeds/cvelist2osv/default_extractor.go | 54 +++++++- vulnfeeds/cvelist2osv/linux_extractor.go | 1 - .../cvelist2osv/version_extraction_test.go | 14 +- vulnfeeds/cves/versions.go | 131 ++++++++++++++++++ 6 files changed, 207 insertions(+), 29 deletions(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 6aca53eb4c5..ed5f18b29af 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -174,8 +174,8 @@ func WriteMetricsFile(metrics *models.ConversionMetrics, metricsFile *os.File) e // Examines repos and tries to convert versions to commits by treating them as Git tags. -func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) (*osvschema.Affected, error) { - var newAff osvschema.Affected +// Examines repos and tries to convert versions to commits by treating them as Git tags. +func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) ([]*osvschema.Range, []*osvschema.Range, error) { var newVersionRanges []*osvschema.Range unresolvedRanges := versionRanges @@ -243,25 +243,17 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr } var err error - if len(unresolvedRanges) > 0 { - databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) - if err != nil { - logger.Warn("failed to make database specific: %v", err) - } else { - newAff.DatabaseSpecific = databaseSpecific - } - - metrics.UnresolvedRangesCount += len(unresolvedRanges) - } if len(newVersionRanges) > 0 { - newAff.Ranges = newVersionRanges metrics.ResolvedRangesCount += len(newVersionRanges) } else if len(unresolvedRanges) > 0 { // Only error if there were ranges to resolve but none were. err = errors.New("was not able to get git version ranges") } + if len(unresolvedRanges) > 0 { + metrics.UnresolvedRangesCount += len(unresolvedRanges) + } - return &newAff, err + return newVersionRanges, unresolvedRanges, err } // resolveVersionToCommit is a helper to convert a version string to a commit hash. diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 25976382737..a4ad96dc52e 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -13,6 +13,7 @@ import ( "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" + "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -284,10 +285,23 @@ func ResolveVersionsToCommits(versions []*osvschema.Range, repos []string, cache // There are some AffectedVersions to try and resolve to AffectedCommits. metrics.AddNote("Trying to convert version tags to commits: %v with repos: %v", versions, repos) - affected, err := conversion.GitVersionsToCommits(versions, repos, metrics, cache) + resolvedRanges, unresolvedRanges, err := conversion.GitVersionsToCommits(versions, repos, metrics, cache) if err != nil { return models.FixUnresolvable, nil } + affected := &osvschema.Affected{ + Ranges: resolvedRanges, + } + + if len(unresolvedRanges) > 0 { + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) + if err != nil { + logger.Warn("failed to make database specific: %v", err) + } else { + affected.DatabaseSpecific = databaseSpecific + } + } + return models.Successful, affected } diff --git a/vulnfeeds/cvelist2osv/default_extractor.go b/vulnfeeds/cvelist2osv/default_extractor.go index a3cea4f0578..0f16ed60f09 100644 --- a/vulnfeeds/cvelist2osv/default_extractor.go +++ b/vulnfeeds/cvelist2osv/default_extractor.go @@ -7,6 +7,7 @@ import ( "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" + "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -39,13 +40,27 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln ranges := d.handleAffected(cve.Containers.CNA.Affected, metrics) if len(ranges) != 0 { - aff, err := conversion.GitVersionsToCommits(ranges, repos, metrics, repoTagsCache) + resolvedRanges, unresolvedRanges, err := conversion.GitVersionsToCommits(ranges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } else { gotVersions = true } - conversion.AddAffected(v, aff, metrics) + + if len(resolvedRanges) > 0 { + aff := &osvschema.Affected{ + Ranges: resolvedRanges, + } + if len(unresolvedRanges) > 0 { + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) + if err != nil { + logger.Warn("failed to make database specific: %v", err) + } else { + aff.DatabaseSpecific = databaseSpecific + } + } + conversion.AddAffected(v, aff, metrics) + } } if !gotVersions { @@ -53,14 +68,27 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln versionRanges, _ := cpeVersionExtraction(cve, metrics) if len(versionRanges) != 0 { - aff, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) + resolvedRanges, unresolvedRanges, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } else { gotVersions = true } - conversion.AddAffected(v, aff, metrics) + if len(resolvedRanges) > 0 { + aff := &osvschema.Affected{ + Ranges: resolvedRanges, + } + if len(unresolvedRanges) > 0 { + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) + if err != nil { + logger.Warn("failed to make database specific: %v", err) + } else { + aff.DatabaseSpecific = databaseSpecific + } + } + conversion.AddAffected(v, aff, metrics) + } } } @@ -68,11 +96,25 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln metrics.AddNote("No versions in CPEs so attempting extraction from description") versionRanges := textVersionExtraction(cve, metrics) if len(versionRanges) != 0 { - aff, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) + resolvedRanges, unresolvedRanges, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } - conversion.AddAffected(v, aff, metrics) + + if len(resolvedRanges) > 0 { + aff := &osvschema.Affected{ + Ranges: resolvedRanges, + } + if len(unresolvedRanges) > 0 { + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) + if err != nil { + logger.Warn("failed to make database specific: %v", err) + } else { + aff.DatabaseSpecific = databaseSpecific + } + } + conversion.AddAffected(v, aff, metrics) + } } } } diff --git a/vulnfeeds/cvelist2osv/linux_extractor.go b/vulnfeeds/cvelist2osv/linux_extractor.go index f46ea20a577..7505074c176 100644 --- a/vulnfeeds/cvelist2osv/linux_extractor.go +++ b/vulnfeeds/cvelist2osv/linux_extractor.go @@ -7,7 +7,6 @@ import ( "strings" "github.com/google/osv/vulnfeeds/conversion" - "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvconstants" diff --git a/vulnfeeds/cvelist2osv/version_extraction_test.go b/vulnfeeds/cvelist2osv/version_extraction_test.go index e1080681a0e..04b2635240c 100644 --- a/vulnfeeds/cvelist2osv/version_extraction_test.go +++ b/vulnfeeds/cvelist2osv/version_extraction_test.go @@ -6,7 +6,7 @@ import ( "testing" "github.com/google/go-cmp/cmp" - "github.com/google/osv/vulnfeeds/cves" + "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -58,7 +58,7 @@ func TestFindNormalAffectedRanges(t *testing.T) { }, }, wantRanges: []*osvschema.Range{ - cves.BuildVersionRange("1.0", "", "1.5"), + conversion.BuildVersionRange("1.0", "", "1.5"), }, wantRangeType: VersionRangeTypeSemver, }, @@ -74,7 +74,7 @@ func TestFindNormalAffectedRanges(t *testing.T) { }, }, wantRanges: []*osvschema.Range{ - cves.BuildVersionRange("0", "2.0", ""), + conversion.BuildVersionRange("0", "2.0", ""), }, wantRangeType: VersionRangeTypeSemver, }, @@ -89,7 +89,7 @@ func TestFindNormalAffectedRanges(t *testing.T) { }, }, wantRanges: []*osvschema.Range{ - cves.BuildVersionRange("2.0", "", "2.5"), + conversion.BuildVersionRange("2.0", "", "2.5"), }, wantRangeType: VersionRangeTypeEcosystem, }, @@ -105,7 +105,7 @@ func TestFindNormalAffectedRanges(t *testing.T) { }, }, wantRanges: []*osvschema.Range{ - cves.BuildVersionRange("", "deadbeef", ""), + conversion.BuildVersionRange("", "deadbeef", ""), }, wantRangeType: VersionRangeTypeGit, }, @@ -175,7 +175,7 @@ func TestFindInverseAffectedRanges(t *testing.T) { versionType: VersionRangeTypeSemver, cnaAssigner: "Linux", want: []*osvschema.Range{ - cves.BuildVersionRange("5.0.0", "", "5.10.1"), + conversion.BuildVersionRange("5.0.0", "", "5.10.1"), }, }, { @@ -214,7 +214,7 @@ func TestFindInverseAffectedRanges(t *testing.T) { versionType: VersionRangeTypeSemver, cnaAssigner: "Linux", want: []*osvschema.Range{ - cves.BuildVersionRange("4.0.0", "", "4.5.2"), + conversion.BuildVersionRange("4.0.0", "", "4.5.2"), }, }, } diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index 70a368baeb0..75662ad8fc8 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -852,6 +852,137 @@ func ExtractVersions(v *vulns.Vulnerability, cve models.NVDCVE, validVersions [] return cpeRanges, commits, textRanges } +// ExtractVersionInfo extracts version information from a CVE. +// Deprecated: Use ExtractVersions instead. +func ExtractVersionInfo(cve models.NVDCVE, validVersions []string, httpClient *http.Client, metrics *models.ConversionMetrics) (v models.VersionInfo) { + if commit, err := extractCommitsFromRefs(cve.References, httpClient); err == nil { + v.AffectedCommits = append(v.AffectedCommits, commit...) + } + + if v.AffectedCommits != nil { + v.AffectedCommits = DeduplicateAffectedCommits(v.AffectedCommits) + metrics.AddNote("Extracted %d commits", len(v.AffectedCommits)) + } + + // gotVersions := false + for _, config := range cve.Configurations { + for _, node := range config.Nodes { + if node.Operator != "OR" { + continue + } + + for _, match := range node.CPEMatch { + if !match.Vulnerable { + continue + } + + introduced := "" + fixed := "" + lastaffected := "" + if match.VersionStartIncluding != nil { + introduced = cleanVersion(*match.VersionStartIncluding) + } else if match.VersionStartExcluding != nil { + var err error + introduced, err = nextVersion(validVersions, cleanVersion(*match.VersionStartExcluding)) + if err != nil { + metrics.AddNote("%v", err.Error()) + } + } + + if match.VersionEndExcluding != nil { + fixed = cleanVersion(*match.VersionEndExcluding) + } else if match.VersionEndIncluding != nil { + var err error + // Infer the fixed version from the next version after. + fixed, err = nextVersion(validVersions, cleanVersion(*match.VersionEndIncluding)) + if err != nil { + metrics.AddNote("%v", err.Error()) + // if that inference failed, we know this version was definitely still vulnerable. + lastaffected = cleanVersion(*match.VersionEndIncluding) + metrics.AddNote("Using %s as last_affected version instead", cleanVersion(*match.VersionEndIncluding)) + } + } + + if introduced == "" && fixed == "" && lastaffected == "" { + // See if a last affected version is inferable from the CPE string. + // In this situation there is no known introduced version. + CPE, err := ParseCPE(match.Criteria) + if err != nil { + continue + } + if CPE.Part != "a" { + // Skip operating system CPEs. + continue + } + if slices.Contains([]string{"NA", "ANY"}, CPE.Version) { + // These are meaningless converting to commits. + continue + } + lastaffected = CPE.Version + if CPE.Update != "ANY" { + lastaffected += "-" + CPE.Update + } + } + + if introduced == "" && fixed == "" && lastaffected == "" { + continue + } + + if introduced != "" && !HasVersion(validVersions, introduced) { + metrics.AddNote("Warning: %s is not a valid introduced version", introduced) + } + + if fixed != "" && !HasVersion(validVersions, fixed) { + metrics.AddNote("Warning: %s is not a valid fixed version", fixed) + } + + // gotVersions = true + possibleNewAffectedVersion := models.AffectedVersion{ + Introduced: introduced, + Fixed: fixed, + LastAffected: lastaffected, + } + if slices.Contains(v.AffectedVersions, possibleNewAffectedVersion) { + // Avoid appending duplicates + continue + } + v.AffectedVersions = append(v.AffectedVersions, possibleNewAffectedVersion) + } + } + } + // if !gotVersions { + // v.AffectedVersions = ExtractVersionsFromText(validVersions, models.EnglishDescription(cve.Descriptions), metrics) + // if len(v.AffectedVersions) > 0 { + // metrics.AddNote("Extracted versions from description: %v", v.AffectedVersions) + // } + // } + + if len(v.AffectedVersions) == 0 { + metrics.AddNote("No versions detected.") + } + + if len(validVersions) > 0 { + metrics.AddNote("Valid versions:") + for _, version := range validVersions { + metrics.AddNote(" - %v", version) + } + } + + // Remove any lastaffected versions in favour of fixed versions. + if v.HasFixedVersions() { + affectedVersionsWithoutLastAffected := []models.AffectedVersion{} + for _, av := range v.AffectedVersions { + if av.LastAffected != "" { + continue + } + affectedVersionsWithoutLastAffected = append(affectedVersionsWithoutLastAffected, av) + } + v.AffectedVersions = affectedVersionsWithoutLastAffected + } + + return v +} + func CPEs(cve models.NVDCVE) []string { var cpes []string for _, config := range cve.Configurations { From b26d82781938b7087d79e14f5140e2114dbb1d5e Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 13 Feb 2026 00:37:04 +0000 Subject: [PATCH 13/45] Output commit ranges collected --- vulnfeeds/conversion/common.go | 69 ++++++- vulnfeeds/conversion/nvd/converter.go | 202 ++++++++++++++------- vulnfeeds/conversion/nvd/converter_test.go | 148 +++++++++++++++ 3 files changed, 340 insertions(+), 79 deletions(-) create mode 100644 vulnfeeds/conversion/nvd/converter_test.go diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index ed5f18b29af..93865bd9385 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -5,7 +5,6 @@ package conversion import ( "encoding/csv" "encoding/json" - "errors" "fmt" "io/fs" "log/slog" @@ -175,9 +174,10 @@ func WriteMetricsFile(metrics *models.ConversionMetrics, metricsFile *os.File) e // Examines repos and tries to convert versions to commits by treating them as Git tags. // Examines repos and tries to convert versions to commits by treating them as Git tags. -func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) ([]*osvschema.Range, []*osvschema.Range, error) { +func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) ([]*osvschema.Range, []*osvschema.Range, []string) { var newVersionRanges []*osvschema.Range unresolvedRanges := versionRanges + var successfulRepos []string for _, repo := range repos { if len(unresolvedRanges) == 0 { @@ -222,7 +222,7 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr } else { newVR = BuildVersionRange(introducedCommit, lastAffectedCommit, "") } - + successfulRepos = append(successfulRepos, repo) newVR.Repo = repo newVR.Type = osvschema.Range_GIT if len(vr.GetEvents()) > 0 { @@ -242,18 +242,15 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr unresolvedRanges = stillUnresolvedRanges } - var err error - if len(newVersionRanges) > 0 { metrics.ResolvedRangesCount += len(newVersionRanges) - } else if len(unresolvedRanges) > 0 { // Only error if there were ranges to resolve but none were. - err = errors.New("was not able to get git version ranges") } + if len(unresolvedRanges) > 0 { metrics.UnresolvedRangesCount += len(unresolvedRanges) } - return newVersionRanges, unresolvedRanges, err + return newVersionRanges, unresolvedRanges, successfulRepos } // resolveVersionToCommit is a helper to convert a version string to a commit hash. @@ -262,7 +259,6 @@ func resolveVersionToCommit(cveID models.CVEID, version, versionType, repo strin if version == "" { return "" } - logger.Info("Attempting to resolve version to commit", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo)) commit, err := git.VersionToCommit(version, normalizedTags) if err != nil { logger.Warn("Failed to get Git commit for version", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo), slog.Any("err", err)) @@ -297,3 +293,58 @@ func BuildVersionRange(intro string, lastAff string, fixed string) *osvschema.Ra return &versionRange } + + +func MergeTwoRanges(range1, range2 *osvschema.Range) *osvschema.Range { + // check if the ranges are the same + if range1.Repo != range2.Repo || range1.Type != range2.Type { + return nil + } + + mergedRange := &osvschema.Range{ + Repo: range1.Repo, + Type: range1.Type, + Events: append(range1.Events, range2.Events...), + } + + db1 := range1.GetDatabaseSpecific() + db2 := range2.GetDatabaseSpecific() + + if db1 == nil && db2 == nil { + return mergedRange + } + + mergedMap := make(map[string]any) + + if db1 != nil { + for k, v := range db1.GetFields() { + mergedMap[k] = v.AsInterface() + } + } + + if db2 != nil { + for k, v := range db2.GetFields() { + if existing, ok := mergedMap[k]; ok { + // If both are lists, append them + if list1, ok := existing.([]any); ok { + if list2, ok := v.AsInterface().([]any); ok { + mergedMap[k] = append(list1, list2...) + continue + } + } + } + // Otherwise overwrite or add new + mergedMap[k] = v.AsInterface() + } + } + + if len(mergedMap) > 0 { + if ds, err := utility.NewStructpbFromMap(mergedMap); err == nil { + mergedRange.DatabaseSpecific = ds + } else { + logger.Warn("Failed to create DatabaseSpecific for merged range: %v", err) + } + } + + return mergedRange +} \ No newline at end of file diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index a4ad96dc52e..c2df639a221 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -41,7 +41,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc v := vulns.FromNVDCVE(cve.ID, cve) cpeRanges, commits, textRanges := cves.ExtractVersions(v, cve, nil, http.DefaultClient, metrics) - + if cpeRanges == nil && commits == nil && textRanges == nil { metrics.AddNote("No ranges detected for %q", maybeProductName) metrics.Outcome = models.NoRanges @@ -49,55 +49,11 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc return metrics.Outcome } } - - var resolutionOutcome models.ConversionOutcome - if len(cpeRanges) > 0 { - outcome, affected := ResolveVersionsToCommits(cpeRanges, repos, cache, metrics) - resolutionOutcome = outcome - if outcome == models.Successful { - conversion.AddAffected(v, affected, metrics) - } - } + var affected *osvschema.Affected + metrics.Outcome, affected = ResolveVersionsToCommits(cpeRanges, textRanges, commits, repos, cache, metrics) - if len(commits) > 0 { - var versionRanges []*osvschema.Range - // handle commits as version ranges - for _, commit := range commits { - // if commit doesn't already - vr := conversion.BuildVersionRange(commit.Introduced,commit.LastAffected, commit.Fixed) - vr.Repo = commit.Repo - versionRanges = append(versionRanges, vr) - } - - if len(versionRanges) > 0 { - affected := osvschema.Affected{ - Ranges: versionRanges, - } - conversion.AddAffected(v, &affected, metrics) - resolutionOutcome = models.Successful - } - } + v.Affected = append(v.Affected, affected) - if len(textRanges) > 0 && resolutionOutcome != models.Successful { - // handle text ranges as version ranges - outcome, affected := ResolveVersionsToCommits(textRanges, repos, cache, metrics) - resolutionOutcome = outcome - if outcome == models.Successful { - conversion.AddAffected(v, affected, metrics) - } - } - - for _, affected := range v.Affected { - if len(affected.Ranges) != 0 { - break - } - resolutionOutcome = models.NoCommitRanges - } - - if rejectFailed && resolutionOutcome != models.Successful { - return resolutionOutcome - } - metrics.Outcome = resolutionOutcome vulnDir := filepath.Join(directory, maybeVendorName, maybeProductName) if err := os.MkdirAll(vulnDir, 0755); err != nil { @@ -108,19 +64,19 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc if errCVE != nil || errMetrics != nil { logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cve.ID))) } + if err := conversion.WriteMetricsFile(metrics, metricsFile); err != nil { + logger.Error("Failed to write metrics", slog.Any("err", err)) + } + if rejectFailed && metrics.Outcome != models.Successful { + return metrics.Outcome + } - err := v.ToJSON(osvFile) - if err != nil { + if err := v.ToJSON(osvFile); err != nil { logger.Error("Failed to write", slog.Any("err", err)) } osvFile.Close() - err = conversion.WriteMetricsFile(metrics, metricsFile) - if err != nil { - logger.Error("Failed to write metrics", slog.Any("err", err)) - } - return metrics.Outcome } @@ -237,7 +193,6 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * } // If there wasn't a repo from the CPE Dictionary, try and derive one from the CVE references. - for vendorProductKey := range vendorProductCombinations { if repos, ok := vpRepoCache.Get(vendorProductKey); ok { metrics.AddNote("Pre-references, derived repos using cache: %v", repos) @@ -278,30 +233,137 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * return reposForCVE } -func ResolveVersionsToCommits(versions []*osvschema.Range, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) (models.ConversionOutcome, *osvschema.Affected) { - if len(repos) == 0 && len(versions) == 0 { - return models.NoRepos, nil +func ResolveVersionsToCommits(cpeRanges []*osvschema.Range, textRanges []*osvschema.Range, commits []models.AffectedCommit, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) (models.ConversionOutcome, *osvschema.Affected) { + var cpeOutcome, textOutcome models.ConversionOutcome + resolvedRanges := []*osvschema.Range{} + unresolvedRanges := []*osvschema.Range{} + var successfulRepos []string + if len(cpeRanges) > 0 { + if len(repos) == 0 { + cpeOutcome = models.NoRepos + } else { + resolvedCPERanges, unresolvedCPERanges, successfulCPERepos := conversion.GitVersionsToCommits(cpeRanges, repos, metrics, cache) + successfulRepos = append(successfulRepos, successfulCPERepos...) + if len(resolvedCPERanges) > 0 { + cpeOutcome = models.Successful + resolvedRanges = append(resolvedRanges, resolvedCPERanges...) + } else if len(unresolvedCPERanges) > 0 { + cpeOutcome = models.NoCommitRanges + unresolvedRanges = append(unresolvedRanges, unresolvedCPERanges...) + } + } + } + + if len(textRanges) > 0 && cpeOutcome != models.Successful { + resolvedTextRanges, unresolvedTextRanges, _ := conversion.GitVersionsToCommits(textRanges, repos, metrics, cache) + if len(resolvedTextRanges) > 0 { + textOutcome = models.Successful + resolvedRanges = append(resolvedRanges, resolvedTextRanges...) + } else if len(unresolvedTextRanges) > 0 { + textOutcome = models.NoCommitRanges + unresolvedRanges = append(unresolvedRanges, unresolvedTextRanges...) + } } - // There are some AffectedVersions to try and resolve to AffectedCommits. - metrics.AddNote("Trying to convert version tags to commits: %v with repos: %v", versions, repos) - resolvedRanges, unresolvedRanges, err := conversion.GitVersionsToCommits(versions, repos, metrics, cache) - if err != nil { - return models.FixUnresolvable, nil + if len(commits) > 0 { + for _, commit := range commits { + successfulRepos = append(successfulRepos, commit.Repo) + } + } + var newResolvedRanges []*osvschema.Range + // Combine the ranges appropriately + if len(resolvedRanges) > 0 { + slices.Sort(successfulRepos) + successfulRepos = slices.Compact(successfulRepos) + for _, repo := range successfulRepos { + var mergedRange *osvschema.Range + for _, vr := range resolvedRanges { + if vr.Repo == repo { + if mergedRange == nil { + mergedRange = vr + } else { + mergedRange = conversion.MergeTwoRanges(mergedRange, vr) + } + } + } + if len(commits) > 0 { + for _, commit := range commits { + if commit.Repo == repo { + if mergedRange == nil { + mergedRange = conversion.BuildVersionRange(commit.Introduced, commit.LastAffected, commit.Fixed) + mergedRange.Repo = repo + } else { + event := convertCommitToEvent(commit) + if event != nil { + addEventToRange(mergedRange, event) + } + } + } + } + } + if mergedRange != nil { + newResolvedRanges = append(newResolvedRanges, mergedRange) + } + } } - affected := &osvschema.Affected{ - Ranges: resolvedRanges, + newAffected := &osvschema.Affected{ + Ranges: newResolvedRanges, } if len(unresolvedRanges) > 0 { databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) if err != nil { - logger.Warn("failed to make database specific: %v", err) - } else { - affected.DatabaseSpecific = databaseSpecific + metrics.AddNote("failed to make database specific: %v", err) + } + newAffected.DatabaseSpecific = databaseSpecific + } + + if cpeOutcome == models.Successful || textOutcome == models.Successful || len(commits) > 0 { + return models.Successful, newAffected + } + + return models.NoCommitRanges, newAffected +} + +func addEventToRange(versionRange *osvschema.Range, event *osvschema.Event) { + // Handle duplicate events being added + for _, e := range versionRange.Events { + if e.Introduced != "" && e.Introduced == event.Introduced { + return + } + if e.Fixed != "" && e.Fixed == event.Fixed { + return + } + if e.LastAffected != "" && e.LastAffected == event.LastAffected { + return } } + //TODO: maybe handle if the fixed event appeards as an introduced event or similar. - return models.Successful, affected + if event.Introduced != "" { + versionRange.Events = append([]*osvschema.Event{&osvschema.Event{ + Introduced: event.Introduced}}, versionRange.Events...) + } else { + versionRange.Events = append(versionRange.Events, event) + } +} + +func convertCommitToEvent(commit models.AffectedCommit) *osvschema.Event { + if commit.Introduced != "" { + return &osvschema.Event{ + Introduced: commit.Introduced, + } + } + if commit.Fixed != "" { + return &osvschema.Event{ + Fixed: commit.Fixed, + } + } + if commit.LastAffected != "" { + return &osvschema.Event{ + LastAffected: commit.LastAffected, + } + } + return nil } diff --git a/vulnfeeds/conversion/nvd/converter_test.go b/vulnfeeds/conversion/nvd/converter_test.go new file mode 100644 index 00000000000..309dfe7a130 --- /dev/null +++ b/vulnfeeds/conversion/nvd/converter_test.go @@ -0,0 +1,148 @@ +package nvd + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/ossf/osv-schema/bindings/go/osvschema" + "google.golang.org/protobuf/testing/protocmp" + "google.golang.org/protobuf/types/known/structpb" +) + +func TestMergeTwoRanges(t *testing.T) { + tests := []struct { + name string + range1 *osvschema.Range + range2 *osvschema.Range + want *osvschema.Range + }{ + { + name: "Merge identical ranges", + range1: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + Events: []*osvschema.Event{ + {Introduced: "0"}, + }, + }, + range2: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + Events: []*osvschema.Event{ + {Fixed: "1.0.0"}, + }, + }, + want: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.0"}, + }, + }, + }, + { + name: "Different repos should return nil", + range1: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo1", + }, + range2: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo2", + }, + want: nil, + }, + { + name: "Different types should return nil", + range1: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + }, + range2: &osvschema.Range{ + Type: osvschema.Range_ECOSYSTEM, + Repo: "https://github.com/example/repo", + }, + want: nil, + }, + { + name: "Merge with DatabaseSpecific", + range1: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "key1": structpb.NewStringValue("value1"), + }, + }, + }, + range2: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "key2": structpb.NewStringValue("value2"), + }, + }, + }, + want: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "key1": structpb.NewStringValue("value1"), + "key2": structpb.NewStringValue("value2"), + }, + }, + }, + }, + { + name: "Merge DatabaseSpecific lists", + range1: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "list": structpb.NewListValue(&structpb.ListValue{ + Values: []*structpb.Value{structpb.NewStringValue("item1")}, + }), + }, + }, + }, + range2: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "list": structpb.NewListValue(&structpb.ListValue{ + Values: []*structpb.Value{structpb.NewStringValue("item2")}, + }), + }, + }, + }, + want: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "list": structpb.NewListValue(&structpb.ListValue{ + Values: []*structpb.Value{ + structpb.NewStringValue("item1"), + structpb.NewStringValue("item2"), + }, + }), + }, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := mergeTwoRanges(tt.range1, tt.range2) + if diff := cmp.Diff(tt.want, got, protocmp.Transform()); diff != "" { + t.Errorf("mergeTwoRanges() mismatch (-want +got):\n%s", diff) + } + }) + } +} From 330e73f86e9d9d63169bda94910ddd07e8e9bd3d Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 13 Feb 2026 02:03:55 +0000 Subject: [PATCH 14/45] add qualcomm to deny list --- vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go | 14 ++++++++++++++ vulnfeeds/cves/versions.go | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 1ead910a395..f3d12d84e95 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "regexp" + "runtime/pprof" "slices" "sync" @@ -28,6 +29,7 @@ var ( outFormat = flag.String("out-format", "OSV", "Format to output {OSV,PackageInfo}") workers = flag.Int("workers", 30, "The number of concurrent workers to use for processing CVEs.") rejectFailed = flag.Bool("reject-failed", false, "If set, OSV records with a failed conversion outcome will not be generated.") + cpuProfile = flag.String("cpuprofile", "", "write cpu profile to file") ) func loadCPEDictionary(productToRepo *cves.VPRepoCache, f string) error { @@ -47,6 +49,18 @@ func loadCPEDictionary(productToRepo *cves.VPRepoCache, f string) error { func main() { flag.Parse() + if *cpuProfile != "" { + f, err := os.Create(*cpuProfile) + if err != nil { + logger.Fatal("could not create CPU profile: ", slog.Any("err", err)) + } + defer f.Close() + if err := pprof.StartCPUProfile(f); err != nil { + logger.Fatal("could not start CPU profile: ", slog.Any("err", err)) + } + defer pprof.StopCPUProfile() + } + if !slices.Contains([]string{"OSV", "PackageInfo"}, *outFormat) { fmt.Fprintf(os.Stderr, "Unsupported output format: %s\n", *outFormat) os.Exit(1) diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index 75662ad8fc8..cf50190b4a0 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -35,7 +35,6 @@ import ( "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" - "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" ) @@ -63,6 +62,8 @@ var VendorProductDenyList = []VendorProduct{ // [CVE-2021-28957]: Incorrectly associates with github.com/lxml/lxml {"oracle", "zfs_storage_appliance_kit"}, {"gradle", "enterprise"}, // The OSS repo gets mis-attributed via CVE-2020-15767 + {"qualcomm", ""}, // firmware out of scope + {"linux", "linux_kernel"}, } type VendorProduct struct { @@ -559,7 +560,6 @@ func extractCommitsFromRefs(references []models.Reference, httpClient *http.Clie c, r, err := ExtractGitCommit(ref.URL, httpClient, 0) if err != nil { - logger.Error("Failed to extract commit from ref: %v", err) continue } From e7ec73c7c360615a8bd2f4ea1b528992c4bec16f Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 13 Feb 2026 02:44:23 +0000 Subject: [PATCH 15/45] Add a flag for whether to output metrics file --- .../cmd/converters/cve/nvd-cve-osv/main.go | 3 +- vulnfeeds/conversion/nvd/converter.go | 49 ++++++++++--------- vulnfeeds/cves/versions.go | 4 -- 3 files changed, 29 insertions(+), 27 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index f3d12d84e95..5fa2426c7b4 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -29,6 +29,7 @@ var ( outFormat = flag.String("out-format", "OSV", "Format to output {OSV,PackageInfo}") workers = flag.Int("workers", 30, "The number of concurrent workers to use for processing CVEs.") rejectFailed = flag.Bool("reject-failed", false, "If set, OSV records with a failed conversion outcome will not be generated.") + outputMetrics = flag.Bool("output-metrics", true, "If true, output the metrics information about the conversion") cpuProfile = flag.String("cpuprofile", "", "write cpu profile to file") ) @@ -129,7 +130,7 @@ func processCVE(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache var outcome models.ConversionOutcome switch *outFormat { case "OSV": - outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed) + outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed, *outputMetrics) // case "PackageInfo": // outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed) } diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index c2df639a221..cf892b7ac93 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -24,7 +24,7 @@ var ErrNoRanges = errors.New("no ranges") var ErrUnresolvedFix = errors.New("fixes not resolved to commits") // CVEToOSV Takes an NVD CVE record and outputs an OSV file in the specified directory. -func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool) models.ConversionOutcome { +func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool, outputMetrics bool) models.ConversionOutcome { CPEs := cves.CPEs(cve) metrics.CPEs = CPEs // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. @@ -45,37 +45,42 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc if cpeRanges == nil && commits == nil && textRanges == nil { metrics.AddNote("No ranges detected for %q", maybeProductName) metrics.Outcome = models.NoRanges - if rejectFailed { - return metrics.Outcome - } + } else { + var affected *osvschema.Affected + metrics.Outcome, affected = ResolveVersionsToCommits(cpeRanges, textRanges, commits, repos, cache, metrics) + v.Affected = append(v.Affected, affected) } - var affected *osvschema.Affected - metrics.Outcome, affected = ResolveVersionsToCommits(cpeRanges, textRanges, commits, repos, cache, metrics) - v.Affected = append(v.Affected, affected) + if !outputMetrics && rejectFailed && metrics.Outcome != models.Successful { + return metrics.Outcome + } vulnDir := filepath.Join(directory, maybeVendorName, maybeProductName) if err := os.MkdirAll(vulnDir, 0755); err != nil { logger.Info("Failed to create directory "+vulnDir, slog.String("cve", string(cve.ID)), slog.String("path", vulnDir), slog.Any("err", err)) } - osvFile, errCVE := conversion.CreateOSVFile(cve.ID, vulnDir) - metricsFile, errMetrics := conversion.CreateMetricsFile(cve.ID, vulnDir) - if errCVE != nil || errMetrics != nil { - logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cve.ID))) - } - if err := conversion.WriteMetricsFile(metrics, metricsFile); err != nil { - logger.Error("Failed to write metrics", slog.Any("err", err)) - } - if rejectFailed && metrics.Outcome != models.Successful { - return metrics.Outcome - } - if err := v.ToJSON(osvFile); err != nil { - logger.Error("Failed to write", slog.Any("err", err)) + if !rejectFailed || metrics.Outcome == models.Successful { + osvFile, errCVE := conversion.CreateOSVFile(cve.ID, vulnDir) + if errCVE != nil { + logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cve.ID))) + } + if err := v.ToJSON(osvFile); err != nil { + logger.Error("Failed to write", slog.Any("err", err)) + } + osvFile.Close() + } + if outputMetrics { + metricsFile, errMetrics := conversion.CreateMetricsFile(cve.ID, vulnDir) + if errMetrics != nil { + logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cve.ID))) + } + if err := conversion.WriteMetricsFile(metrics, metricsFile); err != nil { + logger.Error("Failed to write metrics", slog.Any("err", err)) + } + metricsFile.Close() } - - osvFile.Close() return metrics.Outcome } diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index cf50190b4a0..9384c902669 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -1252,10 +1252,6 @@ func ReposFromReferences(cache *VPRepoCache, vp *VendorProduct, refs []models.Re if len(repos) == 0 { return repos } - if vp != nil { - metrics.AddNote("Derived repos using references %q for %q %q", repos, vp.Vendor, vp.Product) - } - metrics.AddNote("Derived repos (no CPEs) using references: %q", repos) return repos } From b01411573270af38f5f5fb4edec3d6c6e60293b5 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Sun, 15 Feb 2026 23:00:09 +0000 Subject: [PATCH 16/45] More refactoring to optimise order of operations --- vulnfeeds/conversion/common.go | 10 +- .../{nvd/converter_test.go => common_test.go} | 4 +- vulnfeeds/conversion/nvd/converter.go | 296 ++++++++++-------- vulnfeeds/cves/versions.go | 45 +-- vulnfeeds/models/metrics.go | 1 + 5 files changed, 172 insertions(+), 184 deletions(-) rename vulnfeeds/conversion/{nvd/converter_test.go => common_test.go} (98%) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 93865bd9385..567a765f937 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -228,7 +228,7 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr if len(vr.GetEvents()) > 0 { databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"versions": vr.GetEvents()}) if err != nil { - logger.Warn("failed to make database specific: %v", err) + metrics.AddNote("failed to make database specific: %v", err) } else { newVR.DatabaseSpecific = databaseSpecific } @@ -244,10 +244,14 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr if len(newVersionRanges) > 0 { metrics.ResolvedRangesCount += len(newVersionRanges) + metrics.Outcome = models.Successful } if len(unresolvedRanges) > 0 { metrics.UnresolvedRangesCount += len(unresolvedRanges) + if len(newVersionRanges) == 0 { + metrics.Outcome = models.NoCommitRanges + } } return newVersionRanges, unresolvedRanges, successfulRepos @@ -261,10 +265,10 @@ func resolveVersionToCommit(cveID models.CVEID, version, versionType, repo strin } commit, err := git.VersionToCommit(version, normalizedTags) if err != nil { - logger.Warn("Failed to get Git commit for version", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo), slog.Any("err", err)) + // logger.Warn("Failed to get Git commit for version", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo), slog.Any("err", err)) return "" } - logger.Info("Successfully derived commit for version", slog.String("cve", string(cveID)), slog.String("commit", commit), slog.String("version", version), slog.String("type", versionType)) + // logger.Info("Successfully derived commit for version", slog.String("cve", string(cveID)), slog.String("commit", commit), slog.String("version", version), slog.String("type", versionType)) return commit } diff --git a/vulnfeeds/conversion/nvd/converter_test.go b/vulnfeeds/conversion/common_test.go similarity index 98% rename from vulnfeeds/conversion/nvd/converter_test.go rename to vulnfeeds/conversion/common_test.go index 309dfe7a130..ee5469c1a0e 100644 --- a/vulnfeeds/conversion/nvd/converter_test.go +++ b/vulnfeeds/conversion/common_test.go @@ -1,4 +1,4 @@ -package nvd +package conversion import ( "testing" @@ -139,7 +139,7 @@ func TestMergeTwoRanges(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := mergeTwoRanges(tt.range1, tt.range2) + got := MergeTwoRanges(tt.range1, tt.range2) if diff := cmp.Diff(tt.want, got, protocmp.Transform()); diff != "" { t.Errorf("mergeTwoRanges() mismatch (-want +got):\n%s", diff) } diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index cf892b7ac93..3fa1fb75dbc 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -2,8 +2,10 @@ package nvd import ( + "encoding/json" "errors" "log/slog" + "maps" "net/http" "os" "path/filepath" @@ -40,14 +42,65 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc // Create basic OSV record v := vulns.FromNVDCVE(cve.ID, cve) - cpeRanges, commits, textRanges := cves.ExtractVersions(v, cve, nil, http.DefaultClient, metrics) + cpeRanges := cves.ExtractVersionsFromCPEs(cve, nil, metrics) + + // if there are no repos, there are no commits from the refs either + if len(cpeRanges) == 0 && len(repos) == 0{ + outputFiles(v,directory,maybeVendorName,maybeProductName,metrics,rejectFailed,outputMetrics) + return models.NoRepos + } - if cpeRanges == nil && commits == nil && textRanges == nil { + successfulRepos := make(map[string]bool) + var resolvedRanges, unresolvedRanges []*osvschema.Range + if len(cpeRanges) > 0 { + r, un, sR := conversion.GitVersionsToCommits(cpeRanges, repos, metrics, cache) + resolvedRanges = append(resolvedRanges, r...) + unresolvedRanges = append(unresolvedRanges, un...) + for _, s := range sR{ + successfulRepos[s] = true + } + metrics.VersionSources = append(metrics.VersionSources, models.VersionSourceCPE) + } else if len(repos) == 0 { + affected := MergeRangesAndCreateAffected(resolvedRanges, unresolvedRanges, nil, nil, metrics) + v.Affected = append(v.Affected, affected) + outputFiles(v,directory,maybeVendorName,maybeProductName,metrics,rejectFailed,outputMetrics) + return models.NoRepos + } + + // Extract Commits + commits, err := cves.ExtractCommitsFromRefs(cve.References, http.DefaultClient) + if err != nil { + metrics.AddNote("Failed to extract commits from refs: %#v", err) + } + if len(commits) > 0 { + metrics.AddNote("Extracted commits from refs: %v", commits) + for _, commit := range commits { + successfulRepos[commit.Repo] = true + } + metrics.VersionSources = append(metrics.VersionSources, models.VersionSourceRefs) + } + + // Extract Versions From Text if no CPE versions found + if len(resolvedRanges) == 0 { + textRanges := cves.ExtractVersionsFromText(nil, models.EnglishDescription(cve.Descriptions), metrics) + if len(textRanges) > 0 { + metrics.AddNote("Extracted versions from description: %v", textRanges) + } + r, un, sR := conversion.GitVersionsToCommits(textRanges, repos, metrics, cache) + resolvedRanges = append(resolvedRanges, r...) + unresolvedRanges = append(unresolvedRanges, un...) + for _, s := range sR{ + successfulRepos[s] = true + } + metrics.VersionSources = append(metrics.VersionSources, models.VersionSourceDescription) + } + + if len(resolvedRanges) == 0 && len(commits) == 0{ metrics.AddNote("No ranges detected for %q", maybeProductName) metrics.Outcome = models.NoRanges } else { - var affected *osvschema.Affected - metrics.Outcome, affected = ResolveVersionsToCommits(cpeRanges, textRanges, commits, repos, cache, metrics) + keys := slices.Collect(maps.Keys(successfulRepos)) + affected := MergeRangesAndCreateAffected(resolvedRanges, unresolvedRanges, commits, keys, metrics) v.Affected = append(v.Affected, affected) } @@ -55,109 +108,84 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc return metrics.Outcome } + outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) + + return metrics.Outcome +} + +// CVEToPackageInfo takes an NVD CVE record and outputs a PackageInfo struct in a file in the specified directory. +func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool) models.ConversionOutcome { + CPEs := cves.CPEs(cve) + // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. + maybeVendorName := "ENOCPE" + maybeProductName := "ENOCPE" + + if len(CPEs) > 0 { + CPE, err := cves.ParseCPE(CPEs[0]) // For naming the subdirectory used for output. + maybeVendorName = CPE.Vendor + maybeProductName = CPE.Product + if err != nil { + return models.NoRanges + } + } + + // more often than not, this yields a VersionInfo with AffectedVersions and no AffectedCommits. + versions := cves.ExtractVersionInfo(cve, nil, http.DefaultClient, metrics) + + // metrics.Outcome = ResolveVersionsToCommits(&versions, repos, cache, metrics) + + // if len(versions.AffectedCommits) == 0 { + // metrics.AddNote("No affected commit ranges determined for %q", maybeProductName) + // metrics.Outcome = models.NoCommitRanges + // } + + // if rejectFailed && metrics.Outcome != models.Successful { + // return metrics.Outcome + // } + + // versions.AffectedVersions = nil // these have served their purpose and are not required in the resulting output. + + // slices.SortStableFunc(versions.AffectedCommits, models.AffectedCommitCompare) + + var pkgInfos []vulns.PackageInfo + pi := vulns.PackageInfo{VersionInfo: versions} + pkgInfos = append(pkgInfos, pi) // combine-to-osv expects a serialised *array* of PackageInfo + vulnDir := filepath.Join(directory, maybeVendorName, maybeProductName) + err := os.MkdirAll(vulnDir, 0755) + if err != nil { + logger.Warn("Failed to create dir", slog.Any("err", err)) + } - if err := os.MkdirAll(vulnDir, 0755); err != nil { - logger.Info("Failed to create directory "+vulnDir, slog.String("cve", string(cve.ID)), slog.String("path", vulnDir), slog.Any("err", err)) + outputFile := filepath.Join(vulnDir, string(cve.ID)+".nvd"+models.Extension) + f, err := os.Create(outputFile) + if err != nil { + logger.Warn("Failed to open for writing", slog.String("path", outputFile), slog.Any("err", err)) } + defer f.Close() - if !rejectFailed || metrics.Outcome == models.Successful { - osvFile, errCVE := conversion.CreateOSVFile(cve.ID, vulnDir) - if errCVE != nil { - logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cve.ID))) - } - if err := v.ToJSON(osvFile); err != nil { - logger.Error("Failed to write", slog.Any("err", err)) - } - osvFile.Close() + encoder := json.NewEncoder(f) + encoder.SetIndent("", " ") + err = encoder.Encode(&pkgInfos) + + if err != nil { + logger.Warn("Failed to encode PackageInfo", slog.String("path", outputFile), slog.Any("err", err)) } - if outputMetrics { - metricsFile, errMetrics := conversion.CreateMetricsFile(cve.ID, vulnDir) - if errMetrics != nil { - logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cve.ID))) - } - if err := conversion.WriteMetricsFile(metrics, metricsFile); err != nil { - logger.Error("Failed to write metrics", slog.Any("err", err)) - } - metricsFile.Close() + + logger.Info("Generated PackageInfo record", slog.String("cve", string(cve.ID)), slog.String("product", maybeProductName)) + + metricsFile, err := conversion.CreateMetricsFile(cve.ID, vulnDir) + if err != nil { + logger.Warn("Failed to create metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) + } + err = conversion.WriteMetricsFile(metrics, metricsFile) + if err != nil { + logger.Warn("Failed to write metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) } return metrics.Outcome } -// CVEToPackageInfo takes an NVD CVE record and outputs a PackageInfo struct in a file in the specified directory. -// func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool) models.ConversionOutcome { -// CPEs := cves.CPEs(cve) -// // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. -// maybeVendorName := "ENOCPE" -// maybeProductName := "ENOCPE" - -// if len(CPEs) > 0 { -// CPE, err := cves.ParseCPE(CPEs[0]) // For naming the subdirectory used for output. -// maybeVendorName = CPE.Vendor -// maybeProductName = CPE.Product -// if err != nil { -// return models.NoRanges -// } -// } - -// // more often than not, this yields a VersionInfo with AffectedVersions and no AffectedCommits. -// // versions := cves.ExtractVersions(cve, nil, http.DefaultClient, metrics) - -// // metrics.Outcome = ResolveVersionsToCommits(&versions, repos, cache, metrics) - -// // if len(versions.AffectedCommits) == 0 { -// // metrics.AddNote("No affected commit ranges determined for %q", maybeProductName) -// // metrics.Outcome = models.NoCommitRanges -// // } - -// // if rejectFailed && metrics.Outcome != models.Successful { -// // return metrics.Outcome -// // } - -// // versions.AffectedVersions = nil // these have served their purpose and are not required in the resulting output. - -// // slices.SortStableFunc(versions.AffectedCommits, models.AffectedCommitCompare) - -// var pkgInfos []vulns.PackageInfo -// pi := vulns.PackageInfo{VersionInfo: versions} -// pkgInfos = append(pkgInfos, pi) // combine-to-osv expects a serialised *array* of PackageInfo - -// vulnDir := filepath.Join(directory, maybeVendorName, maybeProductName) -// err := os.MkdirAll(vulnDir, 0755) -// if err != nil { -// logger.Warn("Failed to create dir", slog.Any("err", err)) -// } - -// outputFile := filepath.Join(vulnDir, string(cve.ID)+".nvd"+models.Extension) -// f, err := os.Create(outputFile) -// if err != nil { -// logger.Warn("Failed to open for writing", slog.String("path", outputFile), slog.Any("err", err)) -// } -// defer f.Close() - -// encoder := json.NewEncoder(f) -// encoder.SetIndent("", " ") -// err = encoder.Encode(&pkgInfos) - -// if err != nil { -// logger.Warn("Failed to encode PackageInfo", slog.String("path", outputFile), slog.Any("err", err)) -// } - -// logger.Info("Generated PackageInfo record", slog.String("cve", string(cve.ID)), slog.String("product", maybeProductName)) - -// metricsFile, err := conversion.CreateMetricsFile(cve.ID, vulnDir) -// if err != nil { -// logger.Warn("Failed to create metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) -// } -// err = conversion.WriteMetricsFile(metrics, metricsFile) -// if err != nil { -// logger.Warn("Failed to write metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) -// } - -// return metrics.Outcome -// } - // FindRepos attempts to find the source code repositories for a given CVE. func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics, httpClient *http.Client) []string { // Find repos @@ -238,43 +266,7 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * return reposForCVE } -func ResolveVersionsToCommits(cpeRanges []*osvschema.Range, textRanges []*osvschema.Range, commits []models.AffectedCommit, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) (models.ConversionOutcome, *osvschema.Affected) { - var cpeOutcome, textOutcome models.ConversionOutcome - resolvedRanges := []*osvschema.Range{} - unresolvedRanges := []*osvschema.Range{} - var successfulRepos []string - if len(cpeRanges) > 0 { - if len(repos) == 0 { - cpeOutcome = models.NoRepos - } else { - resolvedCPERanges, unresolvedCPERanges, successfulCPERepos := conversion.GitVersionsToCommits(cpeRanges, repos, metrics, cache) - successfulRepos = append(successfulRepos, successfulCPERepos...) - if len(resolvedCPERanges) > 0 { - cpeOutcome = models.Successful - resolvedRanges = append(resolvedRanges, resolvedCPERanges...) - } else if len(unresolvedCPERanges) > 0 { - cpeOutcome = models.NoCommitRanges - unresolvedRanges = append(unresolvedRanges, unresolvedCPERanges...) - } - } - } - - if len(textRanges) > 0 && cpeOutcome != models.Successful { - resolvedTextRanges, unresolvedTextRanges, _ := conversion.GitVersionsToCommits(textRanges, repos, metrics, cache) - if len(resolvedTextRanges) > 0 { - textOutcome = models.Successful - resolvedRanges = append(resolvedRanges, resolvedTextRanges...) - } else if len(unresolvedTextRanges) > 0 { - textOutcome = models.NoCommitRanges - unresolvedRanges = append(unresolvedRanges, unresolvedTextRanges...) - } - } - - if len(commits) > 0 { - for _, commit := range commits { - successfulRepos = append(successfulRepos, commit.Repo) - } - } +func MergeRangesAndCreateAffected(resolvedRanges []*osvschema.Range, unresolvedRanges []*osvschema.Range, commits []models.AffectedCommit, successfulRepos []string, metrics *models.ConversionMetrics) (*osvschema.Affected) { var newResolvedRanges []*osvschema.Range // Combine the ranges appropriately if len(resolvedRanges) > 0 { @@ -324,11 +316,7 @@ func ResolveVersionsToCommits(cpeRanges []*osvschema.Range, textRanges []*osvsch newAffected.DatabaseSpecific = databaseSpecific } - if cpeOutcome == models.Successful || textOutcome == models.Successful || len(commits) > 0 { - return models.Successful, newAffected - } - - return models.NoCommitRanges, newAffected + return newAffected } func addEventToRange(versionRange *osvschema.Range, event *osvschema.Event) { @@ -372,3 +360,33 @@ func convertCommitToEvent(commit models.AffectedCommit) *osvschema.Event { } return nil } + +func outputFiles(v *vulns.Vulnerability, dir string, vendor string, product string, metrics *models.ConversionMetrics, rejectFailed bool, outputMetrics bool) { + cveID := v.Id + vulnDir := filepath.Join(dir, vendor, product) + + if err := os.MkdirAll(vulnDir, 0755); err != nil { + logger.Info("Failed to create directory "+vulnDir, slog.String("cve", cveID), slog.String("path", vulnDir), slog.Any("err", err)) + } + + if !rejectFailed || metrics.Outcome == models.Successful { + osvFile, errCVE := conversion.CreateOSVFile(models.CVEID(cveID), vulnDir) + if errCVE != nil { + logger.Fatal("File failed to be created for CVE", slog.String("cve", cveID)) + } + if err := v.ToJSON(osvFile); err != nil { + logger.Error("Failed to write", slog.Any("err", err)) + } + osvFile.Close() + } + if outputMetrics { + metricsFile, errMetrics := conversion.CreateMetricsFile(models.CVEID(cveID), vulnDir) + if errMetrics != nil { + logger.Fatal("File failed to be created for CVE", slog.String("cve", cveID)) + } + if err := conversion.WriteMetricsFile(metrics, metricsFile); err != nil { + logger.Error("Failed to write metrics", slog.Any("err", err)) + } + metricsFile.Close() + } +} diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index 9384c902669..cbd4d0a4d65 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -551,7 +551,7 @@ func ValidateAndCanonicalizeLink(link string, httpClient *http.Client) (canonica } // For URLs referencing commits in supported Git repository hosts, return a cloneable AffectedCommit. -func extractCommitsFromRefs(references []models.Reference, httpClient *http.Client) ([]models.AffectedCommit, error) { +func ExtractCommitsFromRefs(references []models.Reference, httpClient *http.Client) ([]models.AffectedCommit, error) { var commits []models.AffectedCommit for _, ref := range references { @@ -811,51 +811,16 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics } } } - - return versions -} - -func ExtractVersions(v *vulns.Vulnerability, cve models.NVDCVE, validVersions []string, httpClient *http.Client, metrics *models.ConversionMetrics) (cpeRanges []*osvschema.Range, commits []models.AffectedCommit, textRanges []*osvschema.Range) { - // Extract Versions From CPEs - cpeRanges = ExtractVersionsFromCPEs(cve, validVersions, metrics) - if len(cpeRanges) > 0 { - metrics.AddNote("Extracted versions from CPEs: %v", cpeRanges) - } - - // Extract Commits - commits, err := extractCommitsFromRefs(cve.References, httpClient) - if err != nil { - metrics.AddNote("Failed to extract commits from refs: %v", err) - } - if len(commits) > 0 { - metrics.AddNote("Extracted commits from refs: %v", commits) - } - - // Extract Versions From Text - textRanges = ExtractVersionsFromText(validVersions, models.EnglishDescription(cve.Descriptions), metrics) - if len(textRanges) > 0 { - metrics.AddNote("Extracted versions from description: %v", textRanges) - } - - // If no versions were detected, add a note - if len(cpeRanges) == 0 && len(commits) == 0 && len(textRanges) == 0 { - metrics.AddNote("No versions detected.") + if len(versions) > 0{ + metrics.AddNote("Extracted versions from CPEs: %v", versions) } - - if len(validVersions) > 0 { - metrics.AddNote("Valid versions:") - for _, version := range validVersions { - metrics.AddNote(" - %v", version) - } - } - - return cpeRanges, commits, textRanges + return versions } // ExtractVersionInfo extracts version information from a CVE. // Deprecated: Use ExtractVersions instead. func ExtractVersionInfo(cve models.NVDCVE, validVersions []string, httpClient *http.Client, metrics *models.ConversionMetrics) (v models.VersionInfo) { - if commit, err := extractCommitsFromRefs(cve.References, httpClient); err == nil { + if commit, err := ExtractCommitsFromRefs(cve.References, httpClient); err == nil { v.AffectedCommits = append(v.AffectedCommits, commit...) } diff --git a/vulnfeeds/models/metrics.go b/vulnfeeds/models/metrics.go index 85d7c1db4cd..8546e2aed02 100644 --- a/vulnfeeds/models/metrics.go +++ b/vulnfeeds/models/metrics.go @@ -73,6 +73,7 @@ const ( VersionSourceGit VersionSource = "GITVERS" VersionSourceCPE VersionSource = "CPEVERS" VersionSourceDescription VersionSource = "DESCRVERS" + VersionSourceRefs VersionSource = "REFS" ) func DetermineOutcome(metrics *ConversionMetrics) { From 551b686f0064d887933b456e112932470b7adbc5 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 16 Feb 2026 02:49:14 +0000 Subject: [PATCH 17/45] Move GitVersionToCommit and resolveVersionToCommit to conversion common dir --- vulnfeeds/conversion/common.go | 116 ++++++++++++++++++++++++++++++++ vulnfeeds/cvelist2osv/common.go | 115 ------------------------------- 2 files changed, 116 insertions(+), 115 deletions(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 22639351f4f..9c045354c65 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -4,6 +4,7 @@ package conversion import ( "encoding/json" + "errors" "fmt" "log/slog" "os" @@ -11,7 +12,10 @@ import ( "slices" "strings" + "github.com/google/osv/vulnfeeds/cves" + "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" + "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -111,3 +115,115 @@ func WriteMetricsFile(metrics *models.ConversionMetrics, metricsFile *os.File) e return nil } + + +// resolveVersionToCommit is a helper to convert a version string to a commit hash. +// It logs the outcome of the conversion attempt and returns an empty string on failure. +func resolveVersionToCommit(cveID models.CVEID, version, versionType, repo string, normalizedTags map[string]git.NormalizedTag) string { + if version == "" { + return "" + } + logger.Info("Attempting to resolve version to commit", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo)) + commit, err := git.VersionToCommit(version, normalizedTags) + if err != nil { + logger.Warn("Failed to get Git commit for version", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo), slog.Any("err", err)) + return "" + } + logger.Info("Successfully derived commit for version", slog.String("cve", string(cveID)), slog.String("commit", commit), slog.String("version", version), slog.String("type", versionType)) + + return commit +} + +// Examines repos and tries to convert versions to commits by treating them as Git tags. +// Takes a CVE ID string (for logging), VersionInfo with AffectedVersions and +// typically no AffectedCommits and attempts to add AffectedCommits (including Fixed commits) where there aren't any. +// Refuses to add the same commit to AffectedCommits more than once. +func gitVersionsToCommits(cveID models.CVEID, versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) (*osvschema.Affected, error) { + var newAff osvschema.Affected + var newVersionRanges []*osvschema.Range + unresolvedRanges := versionRanges + + for _, repo := range repos { + if len(unresolvedRanges) == 0 { + break // All ranges have been resolved. + } + + normalizedTags, err := git.NormalizeRepoTags(repo, cache) + if err != nil { + metrics.AddNote("Failed to normalize tags - %s", repo) + continue + } + + var stillUnresolvedRanges []*osvschema.Range + for _, vr := range unresolvedRanges { + var introduced, fixed, lastAffected string + for _, e := range vr.GetEvents() { + if e.GetIntroduced() != "" { + introduced = e.GetIntroduced() + } + if e.GetFixed() != "" { + fixed = e.GetFixed() + } + if e.GetLastAffected() != "" { + lastAffected = e.GetLastAffected() + } + } + + var introducedCommit string + if introduced == "0" { + introducedCommit = "0" + } else { + introducedCommit = resolveVersionToCommit(cveID, introduced, "introduced", repo, normalizedTags) + } + fixedCommit := resolveVersionToCommit(cveID, fixed, "fixed", repo, normalizedTags) + lastAffectedCommit := resolveVersionToCommit(cveID, lastAffected, "last_affected", repo, normalizedTags) + + if introducedCommit != "" && (fixedCommit != "" || lastAffectedCommit != "") { + var newVR *osvschema.Range + + if fixedCommit != "" { + newVR = cves.BuildVersionRange(introducedCommit, "", fixedCommit) + } else { + newVR = cves.BuildVersionRange(introducedCommit, lastAffectedCommit, "") + } + + newVR.Repo = repo + newVR.Type = osvschema.Range_GIT + if len(vr.GetEvents()) > 0 { + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"versions": vr.GetEvents()}) + if err != nil { + logger.Warn("failed to make database specific: %v", err) + } else { + newVR.DatabaseSpecific = databaseSpecific + } + } + + newVersionRanges = append(newVersionRanges, newVR) + } else { + stillUnresolvedRanges = append(stillUnresolvedRanges, vr) + } + } + unresolvedRanges = stillUnresolvedRanges + } + + var err error + if len(unresolvedRanges) > 0 { + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) + if err != nil { + logger.Warn("failed to make database specific: %v", err) + } else { + newAff.DatabaseSpecific = databaseSpecific + } + + metrics.UnresolvedRangesCount += len(unresolvedRanges) + } + + if len(newVersionRanges) > 0 { + newAff.Ranges = newVersionRanges + metrics.ResolvedRangesCount += len(newVersionRanges) + } else if len(unresolvedRanges) > 0 { // Only error if there were ranges to resolve but none were. + err = errors.New("was not able to get git version ranges") + } + + return &newAff, err +} \ No newline at end of file diff --git a/vulnfeeds/cvelist2osv/common.go b/vulnfeeds/cvelist2osv/common.go index 19361b1bd7b..473279a3fba 100644 --- a/vulnfeeds/cvelist2osv/common.go +++ b/vulnfeeds/cvelist2osv/common.go @@ -3,15 +3,11 @@ package cvelist2osv import ( "cmp" "errors" - "log/slog" "strconv" "strings" "github.com/google/osv/vulnfeeds/cves" - "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" - "github.com/google/osv/vulnfeeds/utility" - "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" ) @@ -55,117 +51,6 @@ func toVersionRangeType(s string) VersionRangeType { } } -// resolveVersionToCommit is a helper to convert a version string to a commit hash. -// It logs the outcome of the conversion attempt and returns an empty string on failure. -func resolveVersionToCommit(cveID models.CVEID, version, versionType, repo string, normalizedTags map[string]git.NormalizedTag) string { - if version == "" { - return "" - } - logger.Info("Attempting to resolve version to commit", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo)) - commit, err := git.VersionToCommit(version, normalizedTags) - if err != nil { - logger.Warn("Failed to get Git commit for version", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo), slog.Any("err", err)) - return "" - } - logger.Info("Successfully derived commit for version", slog.String("cve", string(cveID)), slog.String("commit", commit), slog.String("version", version), slog.String("type", versionType)) - - return commit -} - -// Examines repos and tries to convert versions to commits by treating them as Git tags. -// Takes a CVE ID string (for logging), VersionInfo with AffectedVersions and -// typically no AffectedCommits and attempts to add AffectedCommits (including Fixed commits) where there aren't any. -// Refuses to add the same commit to AffectedCommits more than once. -func gitVersionsToCommits(cveID models.CVEID, versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) (*osvschema.Affected, error) { - var newAff osvschema.Affected - var newVersionRanges []*osvschema.Range - unresolvedRanges := versionRanges - - for _, repo := range repos { - if len(unresolvedRanges) == 0 { - break // All ranges have been resolved. - } - - normalizedTags, err := git.NormalizeRepoTags(repo, cache) - if err != nil { - metrics.AddNote("Failed to normalize tags - %s", repo) - continue - } - - var stillUnresolvedRanges []*osvschema.Range - for _, vr := range unresolvedRanges { - var introduced, fixed, lastAffected string - for _, e := range vr.GetEvents() { - if e.GetIntroduced() != "" { - introduced = e.GetIntroduced() - } - if e.GetFixed() != "" { - fixed = e.GetFixed() - } - if e.GetLastAffected() != "" { - lastAffected = e.GetLastAffected() - } - } - - var introducedCommit string - if introduced == "0" { - introducedCommit = "0" - } else { - introducedCommit = resolveVersionToCommit(cveID, introduced, "introduced", repo, normalizedTags) - } - fixedCommit := resolveVersionToCommit(cveID, fixed, "fixed", repo, normalizedTags) - lastAffectedCommit := resolveVersionToCommit(cveID, lastAffected, "last_affected", repo, normalizedTags) - - if introducedCommit != "" && (fixedCommit != "" || lastAffectedCommit != "") { - var newVR *osvschema.Range - - if fixedCommit != "" { - newVR = cves.BuildVersionRange(introducedCommit, "", fixedCommit) - } else { - newVR = cves.BuildVersionRange(introducedCommit, lastAffectedCommit, "") - } - - newVR.Repo = repo - newVR.Type = osvschema.Range_GIT - if len(vr.GetEvents()) > 0 { - databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"versions": vr.GetEvents()}) - if err != nil { - logger.Warn("failed to make database specific: %v", err) - } else { - newVR.DatabaseSpecific = databaseSpecific - } - } - - newVersionRanges = append(newVersionRanges, newVR) - } else { - stillUnresolvedRanges = append(stillUnresolvedRanges, vr) - } - } - unresolvedRanges = stillUnresolvedRanges - } - - var err error - if len(unresolvedRanges) > 0 { - databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) - if err != nil { - logger.Warn("failed to make database specific: %v", err) - } else { - newAff.DatabaseSpecific = databaseSpecific - } - - metrics.UnresolvedRangesCount += len(unresolvedRanges) - } - - if len(newVersionRanges) > 0 { - newAff.Ranges = newVersionRanges - metrics.ResolvedRangesCount += len(newVersionRanges) - } else if len(unresolvedRanges) > 0 { // Only error if there were ranges to resolve but none were. - err = errors.New("was not able to get git version ranges") - } - - return &newAff, err -} - // findCPEVersionRanges extracts version ranges and CPE strings from the CNA's // CPE applicability statements in a CVE record. func findCPEVersionRanges(cve models.CVE5) (versionRanges []*osvschema.Range, cpes []string, err error) { From f5b5e77f35b3d5319b2f08197648c19da5390964 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 16 Feb 2026 02:51:46 +0000 Subject: [PATCH 18/45] Fix names and make public --- vulnfeeds/conversion/common.go | 2 +- vulnfeeds/conversion/nvd/converter.go | 4 ++-- vulnfeeds/cvelist2osv/default_extractor.go | 6 +++--- vulnfeeds/cves/versions.go | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 9c045354c65..845aa051ae9 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -138,7 +138,7 @@ func resolveVersionToCommit(cveID models.CVEID, version, versionType, repo strin // Takes a CVE ID string (for logging), VersionInfo with AffectedVersions and // typically no AffectedCommits and attempts to add AffectedCommits (including Fixed commits) where there aren't any. // Refuses to add the same commit to AffectedCommits more than once. -func gitVersionsToCommits(cveID models.CVEID, versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) (*osvschema.Affected, error) { +func GitVersionsToCommits(cveID models.CVEID, versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) (*osvschema.Affected, error) { var newAff osvschema.Affected var newVersionRanges []*osvschema.Range unresolvedRanges := versionRanges diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 01118b8c725..12a36232838 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -50,7 +50,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc return fmt.Errorf("no affected ranges for %q, and no repos to try and convert %+v to tags with", maybeProductName, versions.AffectedVersions) } metrics.AddNote("Trying to convert version tags to commits: %v with repos: %v", versions, repos) - versions = cves.GitVersionsToCommits(versions, repos, cache, metrics) + versions = cves.VersionInfoToCommits(versions, repos, cache, metrics) hasAnyFixedCommits := false for _, ac := range versions.AffectedCommits { if ac.Fixed != "" { @@ -140,7 +140,7 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach return fmt.Errorf("no affected ranges for %q, and no repos to try and convert %+v to tags with", maybeProductName, versions.AffectedVersions) } logger.Info("Trying to convert version tags to commits", slog.String("cve", string(cve.ID)), slog.Any("versions", versions), slog.Any("repos", repos)) - versions = cves.GitVersionsToCommits(versions, repos, cache, metrics) + versions = cves.VersionInfoToCommits(versions, repos, cache, metrics) } hasAnyFixedCommits := false diff --git a/vulnfeeds/cvelist2osv/default_extractor.go b/vulnfeeds/cvelist2osv/default_extractor.go index a9ac0801f6d..02c6f250e5f 100644 --- a/vulnfeeds/cvelist2osv/default_extractor.go +++ b/vulnfeeds/cvelist2osv/default_extractor.go @@ -39,7 +39,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln ranges := d.handleAffected(cve.Containers.CNA.Affected, metrics) if len(ranges) != 0 { - aff, err := gitVersionsToCommits(cve.Metadata.CVEID, ranges, repos, metrics, repoTagsCache) + aff, err := conversion.GitVersionsToCommits(cve.Metadata.CVEID, ranges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } else { @@ -53,7 +53,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln versionRanges, _ := cpeVersionExtraction(cve, metrics) if len(versionRanges) != 0 { - aff, err := gitVersionsToCommits(cve.Metadata.CVEID, versionRanges, repos, metrics, repoTagsCache) + aff, err := conversion.GitVersionsToCommits(cve.Metadata.CVEID, versionRanges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } else { @@ -68,7 +68,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln metrics.AddNote("No versions in CPEs so attempting extraction from description") versionRanges := textVersionExtraction(cve, metrics) if len(versionRanges) != 0 { - aff, err := gitVersionsToCommits(cve.Metadata.CVEID, versionRanges, repos, metrics, repoTagsCache) + aff, err := conversion.GitVersionsToCommits(cve.Metadata.CVEID, versionRanges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index 6609111293a..bc59bb1534e 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -1002,7 +1002,7 @@ func (c *VPRepoCache) Initialize(vpMap VendorProductToRepoMap) { // Takes a CVE ID string (for logging), VersionInfo with AffectedVersions and // typically no AffectedCommits and attempts to add AffectedCommits (including Fixed commits) where there aren't any. // Refuses to add the same commit to AffectedCommits more than once. -func GitVersionsToCommits(versions models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) (v models.VersionInfo) { +func VersionInfoToCommits(versions models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) (v models.VersionInfo) { // versions is a VersionInfo with AffectedVersions and typically no AffectedCommits // v is a VersionInfo with AffectedCommits (containing Fixed commits) included v = versions From 84e6610afe299784f66dcd13eb771c3233732902 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 16 Feb 2026 02:57:19 +0000 Subject: [PATCH 19/45] remove unnecessary function and reliance on cveid --- vulnfeeds/conversion/common.go | 38 ++++++++-------------- vulnfeeds/cvelist2osv/default_extractor.go | 6 ++-- 2 files changed, 16 insertions(+), 28 deletions(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 845aa051ae9..45abbe2a2ee 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -116,29 +116,8 @@ func WriteMetricsFile(metrics *models.ConversionMetrics, metricsFile *os.File) e return nil } - -// resolveVersionToCommit is a helper to convert a version string to a commit hash. -// It logs the outcome of the conversion attempt and returns an empty string on failure. -func resolveVersionToCommit(cveID models.CVEID, version, versionType, repo string, normalizedTags map[string]git.NormalizedTag) string { - if version == "" { - return "" - } - logger.Info("Attempting to resolve version to commit", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo)) - commit, err := git.VersionToCommit(version, normalizedTags) - if err != nil { - logger.Warn("Failed to get Git commit for version", slog.String("cve", string(cveID)), slog.String("version", version), slog.String("type", versionType), slog.String("repo", repo), slog.Any("err", err)) - return "" - } - logger.Info("Successfully derived commit for version", slog.String("cve", string(cveID)), slog.String("commit", commit), slog.String("version", version), slog.String("type", versionType)) - - return commit -} - // Examines repos and tries to convert versions to commits by treating them as Git tags. -// Takes a CVE ID string (for logging), VersionInfo with AffectedVersions and -// typically no AffectedCommits and attempts to add AffectedCommits (including Fixed commits) where there aren't any. -// Refuses to add the same commit to AffectedCommits more than once. -func GitVersionsToCommits(cveID models.CVEID, versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) (*osvschema.Affected, error) { +func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) (*osvschema.Affected, error) { var newAff osvschema.Affected var newVersionRanges []*osvschema.Range unresolvedRanges := versionRanges @@ -173,10 +152,19 @@ func GitVersionsToCommits(cveID models.CVEID, versionRanges []*osvschema.Range, if introduced == "0" { introducedCommit = "0" } else { - introducedCommit = resolveVersionToCommit(cveID, introduced, "introduced", repo, normalizedTags) + introducedCommit, err = git.VersionToCommit(introduced, normalizedTags) + if err != nil { + metrics.AddNote("Failed to resolve version to commit - %s", introduced) + } + } + fixedCommit, err := git.VersionToCommit(fixed, normalizedTags) + if err != nil { + metrics.AddNote("Failed to resolve version to commit - %s", fixed) + } + lastAffectedCommit, err := git.VersionToCommit(lastAffected, normalizedTags) + if err != nil { + metrics.AddNote("Failed to resolve version to commit - %s", lastAffected) } - fixedCommit := resolveVersionToCommit(cveID, fixed, "fixed", repo, normalizedTags) - lastAffectedCommit := resolveVersionToCommit(cveID, lastAffected, "last_affected", repo, normalizedTags) if introducedCommit != "" && (fixedCommit != "" || lastAffectedCommit != "") { var newVR *osvschema.Range diff --git a/vulnfeeds/cvelist2osv/default_extractor.go b/vulnfeeds/cvelist2osv/default_extractor.go index 02c6f250e5f..95623b54764 100644 --- a/vulnfeeds/cvelist2osv/default_extractor.go +++ b/vulnfeeds/cvelist2osv/default_extractor.go @@ -39,7 +39,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln ranges := d.handleAffected(cve.Containers.CNA.Affected, metrics) if len(ranges) != 0 { - aff, err := conversion.GitVersionsToCommits(cve.Metadata.CVEID, ranges, repos, metrics, repoTagsCache) + aff, err := conversion.GitVersionsToCommits(ranges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } else { @@ -53,7 +53,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln versionRanges, _ := cpeVersionExtraction(cve, metrics) if len(versionRanges) != 0 { - aff, err := conversion.GitVersionsToCommits(cve.Metadata.CVEID, versionRanges, repos, metrics, repoTagsCache) + aff, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } else { @@ -68,7 +68,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln metrics.AddNote("No versions in CPEs so attempting extraction from description") versionRanges := textVersionExtraction(cve, metrics) if len(versionRanges) != 0 { - aff, err := conversion.GitVersionsToCommits(cve.Metadata.CVEID, versionRanges, repos, metrics, repoTagsCache) + aff, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } From 63a35eba345ea35879fce92a580e30fe3dff59a3 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 16 Feb 2026 03:11:31 +0000 Subject: [PATCH 20/45] Have GitVersionsToCommits output ranges instead of affected --- vulnfeeds/conversion/common.go | 34 ++++++-------- vulnfeeds/cvelist2osv/default_extractor.go | 54 +++++++++++++++++++--- 2 files changed, 61 insertions(+), 27 deletions(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 032f0dfe61d..c7c21e224f3 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -4,7 +4,6 @@ package conversion import ( "encoding/json" - "errors" "fmt" "log/slog" "os" @@ -116,10 +115,10 @@ func WriteMetricsFile(metrics *models.ConversionMetrics, metricsFile *os.File) e } // Examines repos and tries to convert versions to commits by treating them as Git tags. -func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) (*osvschema.Affected, error) { - var newAff osvschema.Affected +func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) ([]*osvschema.Range, []*osvschema.Range, []string) { var newVersionRanges []*osvschema.Range unresolvedRanges := versionRanges + var successfulRepos []string for _, repo := range repos { if len(unresolvedRanges) == 0 { @@ -173,13 +172,13 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr } else { newVR = BuildVersionRange(introducedCommit, lastAffectedCommit, "") } - + successfulRepos = append(successfulRepos, repo) newVR.Repo = repo newVR.Type = osvschema.Range_GIT if len(vr.GetEvents()) > 0 { databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"versions": vr.GetEvents()}) if err != nil { - logger.Warn("failed to make database specific: %v", err) + metrics.AddNote("failed to make database specific: %v", err) } else { newVR.DatabaseSpecific = databaseSpecific } @@ -193,26 +192,19 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr unresolvedRanges = stillUnresolvedRanges } - var err error - if len(unresolvedRanges) > 0 { - databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) - if err != nil { - logger.Warn("failed to make database specific: %v", err) - } else { - newAff.DatabaseSpecific = databaseSpecific - } - - metrics.UnresolvedRangesCount += len(unresolvedRanges) - } - if len(newVersionRanges) > 0 { - newAff.Ranges = newVersionRanges metrics.ResolvedRangesCount += len(newVersionRanges) - } else if len(unresolvedRanges) > 0 { // Only error if there were ranges to resolve but none were. - err = errors.New("was not able to get git version ranges") + metrics.Outcome = models.Successful + } + + if len(unresolvedRanges) > 0 { + metrics.UnresolvedRangesCount += len(unresolvedRanges) + if len(newVersionRanges) == 0 { + metrics.Outcome = models.NoCommitRanges + } } - return &newAff, err + return newVersionRanges, unresolvedRanges, successfulRepos } // BuildVersionRange is a helper function that adds 'introduced', 'fixed', or 'last_affected' diff --git a/vulnfeeds/cvelist2osv/default_extractor.go b/vulnfeeds/cvelist2osv/default_extractor.go index a3cea4f0578..0f16ed60f09 100644 --- a/vulnfeeds/cvelist2osv/default_extractor.go +++ b/vulnfeeds/cvelist2osv/default_extractor.go @@ -7,6 +7,7 @@ import ( "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" + "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -39,13 +40,27 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln ranges := d.handleAffected(cve.Containers.CNA.Affected, metrics) if len(ranges) != 0 { - aff, err := conversion.GitVersionsToCommits(ranges, repos, metrics, repoTagsCache) + resolvedRanges, unresolvedRanges, err := conversion.GitVersionsToCommits(ranges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } else { gotVersions = true } - conversion.AddAffected(v, aff, metrics) + + if len(resolvedRanges) > 0 { + aff := &osvschema.Affected{ + Ranges: resolvedRanges, + } + if len(unresolvedRanges) > 0 { + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) + if err != nil { + logger.Warn("failed to make database specific: %v", err) + } else { + aff.DatabaseSpecific = databaseSpecific + } + } + conversion.AddAffected(v, aff, metrics) + } } if !gotVersions { @@ -53,14 +68,27 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln versionRanges, _ := cpeVersionExtraction(cve, metrics) if len(versionRanges) != 0 { - aff, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) + resolvedRanges, unresolvedRanges, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } else { gotVersions = true } - conversion.AddAffected(v, aff, metrics) + if len(resolvedRanges) > 0 { + aff := &osvschema.Affected{ + Ranges: resolvedRanges, + } + if len(unresolvedRanges) > 0 { + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) + if err != nil { + logger.Warn("failed to make database specific: %v", err) + } else { + aff.DatabaseSpecific = databaseSpecific + } + } + conversion.AddAffected(v, aff, metrics) + } } } @@ -68,11 +96,25 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln metrics.AddNote("No versions in CPEs so attempting extraction from description") versionRanges := textVersionExtraction(cve, metrics) if len(versionRanges) != 0 { - aff, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) + resolvedRanges, unresolvedRanges, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) if err != nil { logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) } - conversion.AddAffected(v, aff, metrics) + + if len(resolvedRanges) > 0 { + aff := &osvschema.Affected{ + Ranges: resolvedRanges, + } + if len(unresolvedRanges) > 0 { + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) + if err != nil { + logger.Warn("failed to make database specific: %v", err) + } else { + aff.DatabaseSpecific = databaseSpecific + } + } + conversion.AddAffected(v, aff, metrics) + } } } } From 017eaf820d32026e8deefb3c236ce2e9cf1b45d9 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 16 Feb 2026 03:13:58 +0000 Subject: [PATCH 21/45] fix formatting --- vulnfeeds/cvelist2osv/common.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/vulnfeeds/cvelist2osv/common.go b/vulnfeeds/cvelist2osv/common.go index 09e0a23e8b0..53e5ae4c1ca 100644 --- a/vulnfeeds/cvelist2osv/common.go +++ b/vulnfeeds/cvelist2osv/common.go @@ -7,8 +7,6 @@ import ( "strings" "github.com/google/osv/vulnfeeds/conversion" - "github.com/google/osv/vulnfeeds/git" - "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" From 0720af18c8fc6eec1eacf0c1e94b67a4a106201d Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 16 Feb 2026 03:18:42 +0000 Subject: [PATCH 22/45] fix bad conversion assumptions --- vulnfeeds/cvelist2osv/default_extractor.go | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/vulnfeeds/cvelist2osv/default_extractor.go b/vulnfeeds/cvelist2osv/default_extractor.go index 0f16ed60f09..52807f82856 100644 --- a/vulnfeeds/cvelist2osv/default_extractor.go +++ b/vulnfeeds/cvelist2osv/default_extractor.go @@ -1,8 +1,6 @@ package cvelist2osv import ( - "log/slog" - "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/git" @@ -40,9 +38,9 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln ranges := d.handleAffected(cve.Containers.CNA.Affected, metrics) if len(ranges) != 0 { - resolvedRanges, unresolvedRanges, err := conversion.GitVersionsToCommits(ranges, repos, metrics, repoTagsCache) - if err != nil { - logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) + resolvedRanges, unresolvedRanges, _ := conversion.GitVersionsToCommits(ranges, repos, metrics, repoTagsCache) + if len(resolvedRanges) == 0 { + metrics.AddNote("Failed to convert git versions to commits") } else { gotVersions = true } @@ -68,9 +66,9 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln versionRanges, _ := cpeVersionExtraction(cve, metrics) if len(versionRanges) != 0 { - resolvedRanges, unresolvedRanges, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) - if err != nil { - logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) + resolvedRanges, unresolvedRanges, _ := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) + if len(resolvedRanges) == 0 { + metrics.AddNote("Failed to convert git versions to commits") } else { gotVersions = true } @@ -96,9 +94,9 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln metrics.AddNote("No versions in CPEs so attempting extraction from description") versionRanges := textVersionExtraction(cve, metrics) if len(versionRanges) != 0 { - resolvedRanges, unresolvedRanges, err := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) - if err != nil { - logger.Error("Failed to convert git versions to commits", slog.Any("err", err)) + resolvedRanges, unresolvedRanges, _ := conversion.GitVersionsToCommits(versionRanges, repos, metrics, repoTagsCache) + if len(resolvedRanges) == 0 { + metrics.AddNote("Failed to convert git versions to commits") } if len(resolvedRanges) > 0 { From 1f07765d6d53283255a38a0fc5d8abc3619db2da Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 16 Feb 2026 04:07:28 +0000 Subject: [PATCH 23/45] fixed some tests --- vulnfeeds/cves/versions.go | 26 ++++++++++++++++++-------- vulnfeeds/cves/versions_test.go | 12 ++++++------ 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index a44a06e0e2c..d74b882262e 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -29,12 +29,12 @@ import ( "time" "github.com/knqyf263/go-cpe/naming" + "github.com/ossf/osv-schema/bindings/go/osvschema" "github.com/sethvargo/go-retry" "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" - "github.com/google/osv/vulnfeeds/vulns" ) // References with these tags have been found to contain completely unrelated @@ -555,23 +555,33 @@ func ExtractCommitsFromRefs(references []models.Reference, httpClient *http.Clie for _, ref := range references { // (Potentially faulty) Assumption: All viable Git commit reference links are fix commits. - var ac models.AffectedCommit - c, r, err := ExtractGitCommit(ref.URL, httpClient, 0) - + ac, err := extractGitAffectedCommit(ref.URL, models.Fixed, httpClient) if err != nil { continue } - ac.SetRepo(r) - - models.SetCommitByType(&ac, models.Fixed, c) - commits = append(commits, ac) } return commits, nil } +// For URLs referencing commits in supported Git repository hosts, return a cloneable AffectedCommit. +func extractGitAffectedCommit(link string, commitType models.CommitType, httpClient *http.Client) (models.AffectedCommit, error) { + var ac models.AffectedCommit + c, r, err := ExtractGitCommit(link, httpClient, 0) + if err != nil { + return ac, err + } + + ac.SetRepo(r) + + models.SetCommitByType(&ac, commitType, c) + + return ac, nil +} + + func ExtractGitCommit(link string, httpClient *http.Client, depth int) (string, string, error) { if depth > 10 { return "", "", fmt.Errorf("max recursion depth exceeded for %s", link) diff --git a/vulnfeeds/cves/versions_test.go b/vulnfeeds/cves/versions_test.go index 281b0d72783..a1e293d0a60 100644 --- a/vulnfeeds/cves/versions_test.go +++ b/vulnfeeds/cves/versions_test.go @@ -749,7 +749,7 @@ func TestExtractVersionInfo(t *testing.T) { AffectedCommits: []models.AffectedCommit(nil), AffectedVersions: []models.AffectedVersion{ { - Introduced: "0", + Introduced: "", Fixed: "14.4.5", LastAffected: "", }, @@ -775,7 +775,7 @@ func TestExtractVersionInfo(t *testing.T) { AffectedCommits: []models.AffectedCommit(nil), AffectedVersions: []models.AffectedVersion{ { - Introduced: "0", + Introduced: "", Fixed: "", LastAffected: "2.4.0", }, @@ -839,7 +839,7 @@ func TestExtractVersionInfo(t *testing.T) { }, AffectedVersions: []models.AffectedVersion{ { - Introduced: "0", + Introduced: "", Fixed: "2.6.4", LastAffected: "", }, @@ -873,7 +873,7 @@ func TestExtractVersionInfo(t *testing.T) { inputValidVersions: []string{}, expectedVersionInfo: models.VersionInfo{ AffectedCommits: []models.AffectedCommit{{Repo: "https://gitlab.freedesktop.org/xorg/lib/libxpm", Introduced: "0", Fixed: "a3a7c6dcc3b629d7650148"}}, - AffectedVersions: []models.AffectedVersion{{Introduced: "0", Fixed: "3.5.15"}}, + AffectedVersions: []models.AffectedVersion{{Introduced: "", Fixed: "3.5.15"}}, }, expectedNotes: []string{}, }, @@ -882,7 +882,7 @@ func TestExtractVersionInfo(t *testing.T) { inputCVEItem: loadTestData2("CVE-2021-28429"), expectedVersionInfo: models.VersionInfo{ AffectedCommits: []models.AffectedCommit{{Repo: "https://git.ffmpeg.org/ffmpeg.git", Introduced: "0", Fixed: "c94875471e3ba3dc396c6919ff3ec9b14539cd71"}}, - AffectedVersions: []models.AffectedVersion{{Introduced: "0", LastAffected: "4.3.2"}}, + AffectedVersions: []models.AffectedVersion{{Introduced: "", LastAffected: "4.3.2"}}, }, }, { @@ -909,7 +909,7 @@ func TestExtractVersionInfo(t *testing.T) { }, }, - AffectedVersions: []models.AffectedVersion{{Introduced: "0", Fixed: "1.2.5"}}, + AffectedVersions: []models.AffectedVersion{{Introduced: "", Fixed: "1.2.5"}}, }, }, } From ccfb2a68415df43d963abf2ab6ecbf8f78fbf3cf Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 16 Feb 2026 04:21:08 +0000 Subject: [PATCH 24/45] create a function for duplicate code --- vulnfeeds/cvelist2osv/default_extractor.go | 64 +++++++--------------- 1 file changed, 20 insertions(+), 44 deletions(-) diff --git a/vulnfeeds/cvelist2osv/default_extractor.go b/vulnfeeds/cvelist2osv/default_extractor.go index 52807f82856..3dfabcb6682 100644 --- a/vulnfeeds/cvelist2osv/default_extractor.go +++ b/vulnfeeds/cvelist2osv/default_extractor.go @@ -44,21 +44,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln } else { gotVersions = true } - - if len(resolvedRanges) > 0 { - aff := &osvschema.Affected{ - Ranges: resolvedRanges, - } - if len(unresolvedRanges) > 0 { - databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) - if err != nil { - logger.Warn("failed to make database specific: %v", err) - } else { - aff.DatabaseSpecific = databaseSpecific - } - } - conversion.AddAffected(v, aff, metrics) - } + addRangesToAffected(resolvedRanges, unresolvedRanges, v, metrics) } if !gotVersions { @@ -72,21 +58,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln } else { gotVersions = true } - - if len(resolvedRanges) > 0 { - aff := &osvschema.Affected{ - Ranges: resolvedRanges, - } - if len(unresolvedRanges) > 0 { - databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) - if err != nil { - logger.Warn("failed to make database specific: %v", err) - } else { - aff.DatabaseSpecific = databaseSpecific - } - } - conversion.AddAffected(v, aff, metrics) - } + addRangesToAffected(resolvedRanges, unresolvedRanges, v, metrics) } } @@ -99,20 +71,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln metrics.AddNote("Failed to convert git versions to commits") } - if len(resolvedRanges) > 0 { - aff := &osvschema.Affected{ - Ranges: resolvedRanges, - } - if len(unresolvedRanges) > 0 { - databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) - if err != nil { - logger.Warn("failed to make database specific: %v", err) - } else { - aff.DatabaseSpecific = databaseSpecific - } - } - conversion.AddAffected(v, aff, metrics) - } + addRangesToAffected(resolvedRanges, unresolvedRanges, v, metrics) } } } @@ -175,3 +134,20 @@ func (d *DefaultVersionExtractor) FindNormalAffectedRanges(affected models.Affec return versionRanges, mostFrequentVersionType } + +func addRangesToAffected(resolvedRanges []*osvschema.Range, unresolvedRanges []*osvschema.Range, v *vulns.Vulnerability, metrics *models.ConversionMetrics){ + if len(resolvedRanges) > 0 { + aff := &osvschema.Affected{ + Ranges: resolvedRanges, + } + if len(unresolvedRanges) > 0 { + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) + if err != nil { + logger.Warn("failed to make database specific: %v", err) + } else { + aff.DatabaseSpecific = databaseSpecific + } + } + conversion.AddAffected(v, aff, metrics) + } +} \ No newline at end of file From 29e5ceafc2315c0d6577a4cad688cd4cfbff73e7 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 16 Feb 2026 04:29:59 +0000 Subject: [PATCH 25/45] Add ConductAnalysis and CpuProfile --- .../cmd/converters/cve/nvd-cve-osv/main.go | 32 +++++++++++ vulnfeeds/conversion/common.go | 57 +++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 91f814e5f85..45d4daba0b3 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -9,9 +9,13 @@ import ( "log/slog" "net/http" "os" + "path/filepath" + "regexp" + "runtime/pprof" "slices" "sync" + "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/conversion/nvd" "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/git" @@ -25,6 +29,8 @@ var ( outDir = flag.String("out-dir", "", "Path to output results.") outFormat = flag.String("out-format", "OSV", "Format to output {OSV,PackageInfo}") workers = flag.Int("workers", 30, "The number of concurrent workers to use for processing CVEs.") + outputMetrics = flag.Bool("output-metrics", true, "If true, output the metrics information about the conversion") + cpuProfile = flag.String("cpuprofile", "", "write cpu profile to file") ) func loadCPEDictionary(productToRepo *cves.VPRepoCache, f string) error { @@ -44,6 +50,18 @@ func loadCPEDictionary(productToRepo *cves.VPRepoCache, f string) error { func main() { flag.Parse() + if *cpuProfile != "" { + f, err := os.Create(*cpuProfile) + if err != nil { + logger.Fatal("could not create CPU profile: ", slog.Any("err", err)) + } + defer f.Close() + if err := pprof.StartCPUProfile(f); err != nil { + logger.Fatal("could not start CPU profile: ", slog.Any("err", err)) + } + defer pprof.StopCPUProfile() + } + if !slices.Contains([]string{"OSV", "PackageInfo"}, *outFormat) { fmt.Fprintf(os.Stderr, "Unsupported output format: %s\n", *outFormat) os.Exit(1) @@ -88,6 +106,20 @@ func main() { close(jobs) wg.Wait() logger.Info("NVD Conversion run complete") + + // Conduct analysis on the outcome of the converted files and output to a csv + if *outputMetrics { + // Try to extract year from path, otherwise use "xxxx" filler + filename := filepath.Base(*jsonPath) + re := regexp.MustCompile(`nvdcve-2\.0-([0-9]{4})\.json`) + matches := re.FindStringSubmatch(filename) + if len(matches) >= 2 { + year := matches[1] + conversion.ConductAnalysis(year, *outDir) + } else { + conversion.ConductAnalysis("xxxx", *outDir) + } + } } func processCVE(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache *git.RepoTagsCache) models.ConversionOutcome { diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index fadf93ae819..9e0171726e0 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -3,13 +3,16 @@ package conversion import ( + "encoding/csv" "encoding/json" "fmt" + "io/fs" "log/slog" "os" "path/filepath" "slices" "strings" + "time" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility/logger" @@ -69,6 +72,60 @@ func DeduplicateRefs(refs []models.Reference) []models.Reference { return refs } +// ConductAnalysis conducts an analysis of the conversion results after completion by reading +// all of the .metrics.json files and extracting conversion outcomes. +func ConductAnalysis(year string, dir string) { + // get the current time in minutes + currentTime := time.Now().Format("2006-01-02T15:04") + outcomesCSV := "nvd-conversion-outcomes-" + year + "-" + currentTime + ".csv" + csvFile, err := os.Create(filepath.Join(dir, outcomesCSV)) + if err != nil { + logger.Fatal("Failed to create analysis CSV file", slog.Any("err", err)) + } + defer csvFile.Close() + + csvWriter := csv.NewWriter(csvFile) + defer csvWriter.Flush() + + header := []string{"CVEID", "Outcome"} + if err := csvWriter.Write(header); err != nil { + logger.Fatal("Failed to write header to CSV", slog.Any("err", err)) + } + + err = filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if !d.IsDir() && strings.HasSuffix(d.Name(), ".metrics.json") { + data, err := os.ReadFile(path) + if err != nil { + logger.Warn("Failed to read metrics file", slog.String("path", path), slog.Any("err", err)) + return nil // Continue + } + + var metrics models.ConversionMetrics + if err := json.Unmarshal(data, &metrics); err != nil { + logger.Warn("Failed to unmarshal metrics JSON", slog.String("path", path), slog.Any("err", err)) + return nil // Continue + } + + record := []string{ + string(metrics.CVEID), + metrics.Outcome.String(), + } + if err := csvWriter.Write(record); err != nil { + logger.Warn("Failed to write record to CSV", slog.String("cve", string(metrics.CVEID)), slog.Any("err", err)) + } + } + + return nil + }) + + if err != nil { + logger.Error("Failed to walk directory for analysis", slog.Any("err", err)) + } +} + // CreateMetricsFile creates the initial file for the metrics record. func CreateMetricsFile(id models.CVEID, vulnDir string) (*os.File, error) { metricsFile := filepath.Join(vulnDir, string(id)+".metrics"+models.Extension) From 4c27138eb7347dd1c782d49bbd45381232d3b575 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 16 Feb 2026 04:36:34 +0000 Subject: [PATCH 26/45] fix formatting --- vulnfeeds/cvelist2osv/default_extractor.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vulnfeeds/cvelist2osv/default_extractor.go b/vulnfeeds/cvelist2osv/default_extractor.go index 3dfabcb6682..9d52f068945 100644 --- a/vulnfeeds/cvelist2osv/default_extractor.go +++ b/vulnfeeds/cvelist2osv/default_extractor.go @@ -135,7 +135,7 @@ func (d *DefaultVersionExtractor) FindNormalAffectedRanges(affected models.Affec return versionRanges, mostFrequentVersionType } -func addRangesToAffected(resolvedRanges []*osvschema.Range, unresolvedRanges []*osvschema.Range, v *vulns.Vulnerability, metrics *models.ConversionMetrics){ +func addRangesToAffected(resolvedRanges []*osvschema.Range, unresolvedRanges []*osvschema.Range, v *vulns.Vulnerability, metrics *models.ConversionMetrics) { if len(resolvedRanges) > 0 { aff := &osvschema.Affected{ Ranges: resolvedRanges, @@ -150,4 +150,4 @@ func addRangesToAffected(resolvedRanges []*osvschema.Range, unresolvedRanges []* } conversion.AddAffected(v, aff, metrics) } -} \ No newline at end of file +} From 98fccb3c723ad5e243cffabc49f5118cde4cb110 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 16 Feb 2026 04:42:58 +0000 Subject: [PATCH 27/45] fix gocritic complaints --- vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 45d4daba0b3..dda99adba1e 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -50,6 +50,11 @@ func loadCPEDictionary(productToRepo *cves.VPRepoCache, f string) error { func main() { flag.Parse() + if !slices.Contains([]string{"OSV", "PackageInfo"}, *outFormat) { + fmt.Fprintf(os.Stderr, "Unsupported output format: %s\n", *outFormat) + os.Exit(1) + } + if *cpuProfile != "" { f, err := os.Create(*cpuProfile) if err != nil { @@ -62,11 +67,6 @@ func main() { defer pprof.StopCPUProfile() } - if !slices.Contains([]string{"OSV", "PackageInfo"}, *outFormat) { - fmt.Fprintf(os.Stderr, "Unsupported output format: %s\n", *outFormat) - os.Exit(1) - } - logger.InitGlobalLogger() data, err := os.ReadFile(*jsonPath) From e9e6fba918770af851bedfa284dd7dfffb678f1b Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 16 Feb 2026 22:45:07 +0000 Subject: [PATCH 28/45] add check for invalid cache --- vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go | 4 ++-- vulnfeeds/conversion/common.go | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 4972d8c91c0..3aecf179b03 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -134,8 +134,8 @@ func processCVE(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache switch *outFormat { case "OSV": outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed, *outputMetrics) - // case "PackageInfo": - // outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed) + case "PackageInfo": + outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed) } return outcome diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 5286faba4d0..1802483ebd1 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -181,7 +181,9 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr if len(unresolvedRanges) == 0 { break // All ranges have been resolved. } - + if cache.IsInvalid(repo) { + continue + } normalizedTags, err := git.NormalizeRepoTags(repo, cache) if err != nil { metrics.AddNote("Failed to normalize tags - %s", repo) From 6afecfa7f47985c178e8a35239b2b38db5d4d23d Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Tue, 17 Feb 2026 05:40:12 +0000 Subject: [PATCH 29/45] Add check for if there are only commits --- vulnfeeds/conversion/nvd/converter.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 05e6d19d691..ece8bb72929 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -4,7 +4,6 @@ package nvd import ( "encoding/json" "errors" - "fmt" "log/slog" "maps" "net/http" @@ -333,6 +332,15 @@ func MergeRangesAndCreateAffected(resolvedRanges []*osvschema.Range, unresolvedR } } + // if there are no resolved version but there are commits, we should create a range for each commit + if len(resolvedRanges) == 0 && len(commits) > 0 { + for _, commit := range commits { + newResolvedRanges = append(newResolvedRanges, conversion.BuildVersionRange(commit.Introduced, commit.LastAffected, commit.Fixed)) + metrics.ResolvedRangesCount++ + } + metrics.Outcome = models.Successful + } + newAffected := &osvschema.Affected{ Ranges: newResolvedRanges, } From 7e8dfd89df7f3af87c7bdc96fcff64471b29c8a9 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Thu, 19 Feb 2026 00:34:44 +0000 Subject: [PATCH 30/45] fix lint --- .../cmd/converters/cve/nvd-cve-osv/main.go | 2 +- vulnfeeds/conversion/common.go | 8 ++++---- vulnfeeds/conversion/nvd/converter.go | 18 +++++++++--------- vulnfeeds/cves/versions.go | 8 ++++---- vulnfeeds/models/metrics.go | 2 +- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 7f63de35667..962ee70c8fa 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -135,7 +135,7 @@ func processCVE(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache case "OSV": outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed, *outputMetrics) case "PackageInfo": - outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed) + outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics) } return outcome diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 1802483ebd1..c64fbee58f1 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -298,14 +298,14 @@ func BuildVersionRange(intro string, lastAff string, fixed string) *osvschema.Ra func MergeTwoRanges(range1, range2 *osvschema.Range) *osvschema.Range { // check if the ranges are the same - if range1.Repo != range2.Repo || range1.Type != range2.Type { + if range1.GetRepo() != range2.GetRepo() || range1.GetType() != range2.GetType() { return nil } mergedRange := &osvschema.Range{ - Repo: range1.Repo, - Type: range1.Type, - Events: append(range1.Events, range2.Events...), + Repo: range1.GetRepo(), + Type: range1.GetType(), + Events: append(range1.Events, range2.GetEvents()...), } db1 := range1.GetDatabaseSpecific() diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index ece8bb72929..0da48cbb6b4 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -114,7 +114,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc } // CVEToPackageInfo takes an NVD CVE record and outputs a PackageInfo struct in a file in the specified directory. -func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool) models.ConversionOutcome { +func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics) models.ConversionOutcome { CPEs := cves.CPEs(cve) // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. maybeVendorName := "ENOCPE" @@ -303,7 +303,7 @@ func MergeRangesAndCreateAffected(resolvedRanges []*osvschema.Range, unresolvedR for _, repo := range successfulRepos { var mergedRange *osvschema.Range for _, vr := range resolvedRanges { - if vr.Repo == repo { + if vr.GetRepo() == repo { if mergedRange == nil { mergedRange = vr } else { @@ -358,22 +358,22 @@ func MergeRangesAndCreateAffected(resolvedRanges []*osvschema.Range, unresolvedR func addEventToRange(versionRange *osvschema.Range, event *osvschema.Event) { // Handle duplicate events being added - for _, e := range versionRange.Events { - if e.Introduced != "" && e.Introduced == event.Introduced { + for _, e := range versionRange.GetEvents() { + if e.GetIntroduced() != "" && e.GetIntroduced() == event.GetIntroduced() { return } - if e.Fixed != "" && e.Fixed == event.Fixed { + if e.GetFixed() != "" && e.GetFixed() == event.GetFixed() { return } - if e.LastAffected != "" && e.LastAffected == event.LastAffected { + if e.GetLastAffected() != "" && e.GetLastAffected() == event.GetLastAffected() { return } } //TODO: maybe handle if the fixed event appeards as an introduced event or similar. - if event.Introduced != "" { - versionRange.Events = append([]*osvschema.Event{&osvschema.Event{ - Introduced: event.Introduced}}, versionRange.Events...) + if event.GetIntroduced() != "" { + versionRange.Events = append([]*osvschema.Event{{ + Introduced: event.GetIntroduced()}}, versionRange.GetEvents()...) } else { versionRange.Events = append(versionRange.Events, event) } diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index af3a4c8409b..e0c5acde4ca 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -61,7 +61,7 @@ var VendorProductDenyList = []VendorProduct{ // [CVE-2021-28957]: Incorrectly associates with github.com/lxml/lxml {"oracle", "zfs_storage_appliance_kit"}, {"gradle", "enterprise"}, // The OSS repo gets mis-attributed via CVE-2020-15767 - {"qualcomm", ""}, // firmware out of scope + {"qualcomm", ""}, // firmware out of scope {"linux", "linux_kernel"}, } @@ -551,7 +551,7 @@ func ValidateAndCanonicalizeLink(link string, httpClient *http.Client) (canonica // For URLs referencing commits in supported Git repository hosts, return a cloneable AffectedCommit. func ExtractCommitsFromRefs(references []models.Reference, httpClient *http.Client) ([]models.AffectedCommit, error) { - var commits []models.AffectedCommit + var commits []models.AffectedCommit //nolint:prealloc for _, ref := range references { // (Potentially faulty) Assumption: All viable Git commit reference links are fix commits. @@ -581,7 +581,6 @@ func extractGitAffectedCommit(link string, commitType models.CommitType, httpCli return ac, nil } - func ExtractGitCommit(link string, httpClient *http.Client, depth int) (string, string, error) { if depth > 10 { return "", "", fmt.Errorf("max recursion depth exceeded for %s", link) @@ -820,9 +819,10 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics } } } - if len(versions) > 0{ + if len(versions) > 0 { metrics.AddNote("Extracted versions from CPEs: %v", versions) } + return versions } diff --git a/vulnfeeds/models/metrics.go b/vulnfeeds/models/metrics.go index 8546e2aed02..b5f9a2f42b7 100644 --- a/vulnfeeds/models/metrics.go +++ b/vulnfeeds/models/metrics.go @@ -73,7 +73,7 @@ const ( VersionSourceGit VersionSource = "GITVERS" VersionSourceCPE VersionSource = "CPEVERS" VersionSourceDescription VersionSource = "DESCRVERS" - VersionSourceRefs VersionSource = "REFS" + VersionSourceRefs VersionSource = "REFS" ) func DetermineOutcome(metrics *ConversionMetrics) { From 6ba77b1ceb920e8db18a89646d2861c94c36d412 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Thu, 19 Feb 2026 22:40:47 +0000 Subject: [PATCH 31/45] fix lint --- vulnfeeds/conversion/nvd/converter.go | 2 ++ vulnfeeds/cves/versions.go | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 0da48cbb6b4..cdb8d79b11c 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -64,6 +64,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc affected := MergeRangesAndCreateAffected(resolvedRanges, unresolvedRanges, nil, nil, metrics) v.Affected = append(v.Affected, affected) outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) + return models.NoRepos } @@ -395,6 +396,7 @@ func convertCommitToEvent(commit models.AffectedCommit) *osvschema.Event { LastAffected: commit.LastAffected, } } + return nil } diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index e0c5acde4ca..e68838f4ce6 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -551,7 +551,7 @@ func ValidateAndCanonicalizeLink(link string, httpClient *http.Client) (canonica // For URLs referencing commits in supported Git repository hosts, return a cloneable AffectedCommit. func ExtractCommitsFromRefs(references []models.Reference, httpClient *http.Client) ([]models.AffectedCommit, error) { - var commits []models.AffectedCommit //nolint:prealloc + var commits []models.AffectedCommit //nolint:prealloc for _, ref := range references { // (Potentially faulty) Assumption: All viable Git commit reference links are fix commits. From b9797457db12a1bb24e0d2b5c5e414148f107342 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Sun, 1 Mar 2026 22:49:36 +0000 Subject: [PATCH 32/45] clean up resolveVersionToCommit docstring --- vulnfeeds/conversion/common.go | 1 - 1 file changed, 1 deletion(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index c64fbee58f1..0688f65b3f5 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -258,7 +258,6 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr } // resolveVersionToCommit is a helper to convert a version string to a commit hash. -// It logs the outcome of the conversion attempt and returns an empty string on failure. func resolveVersionToCommit(version string, normalizedTags map[string]git.NormalizedTag) string { if version == "" { return "" From debeb960939ff7e4124f531b765354366f20390d Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Sun, 1 Mar 2026 23:47:29 +0000 Subject: [PATCH 33/45] Added docstrings and changed order of early exit --- vulnfeeds/conversion/nvd/converter.go | 56 ++++++++++++++++++++++----- vulnfeeds/cves/versions.go | 12 ++---- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index cdb8d79b11c..e49e42e9931 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -42,9 +42,11 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc // Create basic OSV record v := vulns.FromNVDCVE(cve.ID, cve) + // At the bare minimum, we want to attempt to extract the raw version information + // from CPEs, whether or not they can resolve to commits. cpeRanges := cves.ExtractVersionsFromCPEs(cve, nil, metrics) - // if there are no repos, there are no commits from the refs either + // If there are no repos, there are no commits from the refs either if len(cpeRanges) == 0 && len(repos) == 0 { outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) return models.NoRepos @@ -52,6 +54,18 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc successfulRepos := make(map[string]bool) var resolvedRanges, unresolvedRanges []*osvschema.Range + + // Exit early if there are no repositories + if len(repos) == 0 { + affected := MergeRangesAndCreateAffected(resolvedRanges, unresolvedRanges, nil, nil, metrics) + v.Affected = append(v.Affected, affected) + // Exit early + outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) + + return models.NoRepos + } + + // If we have ranges, try to resolve them if len(cpeRanges) > 0 { r, un, sR := conversion.GitVersionsToCommits(cpeRanges, repos, metrics, cache) resolvedRanges = append(resolvedRanges, r...) @@ -60,12 +74,6 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc successfulRepos[s] = true } metrics.VersionSources = append(metrics.VersionSources, models.VersionSourceCPE) - } else if len(repos) == 0 { - affected := MergeRangesAndCreateAffected(resolvedRanges, unresolvedRanges, nil, nil, metrics) - v.Affected = append(v.Affected, affected) - outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) - - return models.NoRepos } // Extract Commits @@ -174,7 +182,7 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach versions.AffectedVersions = nil // these have served their purpose and are not required in the resulting output. - // slices.SortStableFunc(versions.AffectedCommits, models.AffectedCommitCompare) + slices.SortStableFunc(versions.AffectedCommits, models.AffectedCommitCompare) var pkgInfos []vulns.PackageInfo pi := vulns.PackageInfo{VersionInfo: versions} @@ -295,6 +303,15 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * return reposForCVE } +// MergeRangesAndCreateAffected combines resolved and unresolved ranges with commits to create an OSV Affected object. +// It merges ranges for the same repository and adds commit events to the appropriate ranges at the end. +// +// Arguments: +// - resolvedRanges: A slice of resolved OSV ranges to be merged. +// - unresolvedRanges: A slice of unresolved OSV ranges to be included in the database specific field. +// - commits: A slice of affected commits to be converted into events and added to ranges. +// - successfulRepos: A slice of repository URLs that were successfully processed. +// - metrics: A pointer to ConversionMetrics to track the outcome and notes. func MergeRangesAndCreateAffected(resolvedRanges []*osvschema.Range, unresolvedRanges []*osvschema.Range, commits []models.AffectedCommit, successfulRepos []string, metrics *models.ConversionMetrics) *osvschema.Affected { var newResolvedRanges []*osvschema.Range // Combine the ranges appropriately @@ -357,6 +374,12 @@ func MergeRangesAndCreateAffected(resolvedRanges []*osvschema.Range, unresolvedR return newAffected } +// addEventToRange adds an event to a version range, avoiding duplicates. +// Introduced events are prepended to the events list, while others are appended. +// +// Arguments: +// - versionRange: The OSV range to which the event will be added. +// - event: The OSV event (Introduced, Fixed, or LastAffected) to add. func addEventToRange(versionRange *osvschema.Range, event *osvschema.Event) { // Handle duplicate events being added for _, e := range versionRange.GetEvents() { @@ -370,7 +393,7 @@ func addEventToRange(versionRange *osvschema.Range, event *osvschema.Event) { return } } - //TODO: maybe handle if the fixed event appeards as an introduced event or similar. + //TODO: maybe handle if the fixed event appears as an introduced event or similar. if event.GetIntroduced() != "" { versionRange.Events = append([]*osvschema.Event{{ @@ -380,6 +403,8 @@ func addEventToRange(versionRange *osvschema.Range, event *osvschema.Event) { } } +// convertCommitToEvent creates an OSV Event from an AffectedCommit. +// It returns an event with the Introduced, Fixed, or LastAffected value from the commit. func convertCommitToEvent(commit models.AffectedCommit) *osvschema.Event { if commit.Introduced != "" { return &osvschema.Event{ @@ -400,6 +425,19 @@ func convertCommitToEvent(commit models.AffectedCommit) *osvschema.Event { return nil } + +// outputFiles writes the OSV vulnerability record and conversion metrics to files in the specified directory. +// It creates the necessary subdirectories based on the vendor and product names and handles whether or not +// the files should be written based on the rejectFailed and outputMetrics flags. +// +// Arguments: +// - v: The OSV Vulnerability object to be written to a file. +// - dir: The base directory where the output files should be created. +// - vendor: The vendor name used to create the subdirectory. +// - product: The product name used to create the subdirectory. +// - metrics: A pointer to ConversionMetrics to be written to a metrics file. +// - rejectFailed: A boolean indicating whether to skip writing the OSV file if the conversion was not successful. +// - outputMetrics: A boolean indicating whether to write the metrics file. func outputFiles(v *vulns.Vulnerability, dir string, vendor string, product string, metrics *models.ConversionMetrics, rejectFailed bool, outputMetrics bool) { cveID := v.Id vulnDir := filepath.Join(dir, vendor, product) diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index e68838f4ce6..db7d6817a7d 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -826,8 +826,8 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics return versions } -// ExtractVersionInfo extracts version information from a CVE. -// Deprecated: Use ExtractVersions instead. +// ExtractVersionInfo extracts version information from a CVE and saves to a VersionInfo struct. +// This is mostly deprecated, but is still used by the Alpine, Debian, and PyPi converters. func ExtractVersionInfo(cve models.NVDCVE, validVersions []string, httpClient *http.Client, metrics *models.ConversionMetrics) (v models.VersionInfo) { if commit, err := ExtractCommitsFromRefs(cve.References, httpClient); err == nil { v.AffectedCommits = append(v.AffectedCommits, commit...) @@ -838,7 +838,7 @@ func ExtractVersionInfo(cve models.NVDCVE, validVersions []string, httpClient *h metrics.AddNote("Extracted %d commits", len(v.AffectedCommits)) } - // gotVersions := false + // Extract versions from CPEs. for _, config := range cve.Configurations { for _, node := range config.Nodes { if node.Operator != "OR" { @@ -924,12 +924,6 @@ func ExtractVersionInfo(cve models.NVDCVE, validVersions []string, httpClient *h } } } - // if !gotVersions { - // v.AffectedVersions = ExtractVersionsFromText(validVersions, models.EnglishDescription(cve.Descriptions), metrics) - // if len(v.AffectedVersions) > 0 { - // metrics.AddNote("Extracted versions from description: %v", v.AffectedVersions) - // } - // } if len(v.AffectedVersions) == 0 { metrics.AddNote("No versions detected.") From 68296cbc553478461188e6bda245a65515670e7a Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Sun, 1 Mar 2026 23:48:58 +0000 Subject: [PATCH 34/45] we dont want os vulns rn --- vulnfeeds/conversion/nvd/converter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index e49e42e9931..c385189b148 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -256,7 +256,7 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * metrics.AddNote("Failed to parse CPE: %v", CPEstr) continue } - if CPE.Part != "a" && CPE.Part != "o" { // only care about application and operating system CPEs + if CPE.Part != "a" { // only care about application CPEs continue } vendorProductCombinations[cves.VendorProduct{Vendor: CPE.Vendor, Product: CPE.Product}] = true From 097f72016d5f0991be4a84d7629534ac5339f3dc Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Sun, 1 Mar 2026 23:51:37 +0000 Subject: [PATCH 35/45] update docstring --- vulnfeeds/conversion/common.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 0688f65b3f5..ef80876bddf 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -171,7 +171,8 @@ func WriteMetricsFile(metrics *models.ConversionMetrics, metricsFile *os.File) e return nil } -// Examines repos and tries to convert versions to commits by treating them as Git tags. +// GitVersionsToCommits examines repos and tries to convert versions to commits by treating them as Git tags. +// Returns the resolved ranges, unresolved ranges, and successful repos involved. func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) ([]*osvschema.Range, []*osvschema.Range, []string) { var newVersionRanges []*osvschema.Range unresolvedRanges := versionRanges From 0b8caa5c6dc666dc2d2bca8b373b3e1cd53accca Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 2 Mar 2026 00:04:14 +0000 Subject: [PATCH 36/45] Put the check back --- vulnfeeds/conversion/nvd/converter.go | 7 +++++-- vulnfeeds/cves/versions.go | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index c385189b148..6a6ee60b0a2 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -34,9 +34,13 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc maybeProductName := "ENOCPE" if len(CPEs) > 0 { - CPE, _ := cves.ParseCPE(CPEs[0]) // For naming the subdirectory used for output. + CPE, err := cves.ParseCPE(CPEs[0]) // For naming the subdirectory used for output. maybeVendorName = CPE.Vendor maybeProductName = CPE.Product + if err != nil { + metrics.AddNote("Can't generate an OSV record without valid CPE data") + return models.ConversionUnknown + } } // Create basic OSV record @@ -425,7 +429,6 @@ func convertCommitToEvent(commit models.AffectedCommit) *osvschema.Event { return nil } - // outputFiles writes the OSV vulnerability record and conversion metrics to files in the specified directory. // It creates the necessary subdirectories based on the vendor and product names and handles whether or not // the files should be written based on the rejectFailed and outputMetrics flags. diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index db7d6817a7d..fb26dce5d75 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -827,7 +827,7 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics } // ExtractVersionInfo extracts version information from a CVE and saves to a VersionInfo struct. -// This is mostly deprecated, but is still used by the Alpine, Debian, and PyPi converters. +// This is mostly deprecated, but is still used by the Alpine, Debian, and PyPi converters. func ExtractVersionInfo(cve models.NVDCVE, validVersions []string, httpClient *http.Client, metrics *models.ConversionMetrics) (v models.VersionInfo) { if commit, err := ExtractCommitsFromRefs(cve.References, httpClient); err == nil { v.AffectedCommits = append(v.AffectedCommits, commit...) From e6debf35ac9588cf3c37d7023907b1e47ccebe20 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 2 Mar 2026 00:41:05 +0000 Subject: [PATCH 37/45] Create SetOutcome function to handle updating conversion status --- vulnfeeds/conversion/common.go | 12 ------- vulnfeeds/conversion/nvd/converter.go | 51 ++++++++++++++++++++------- vulnfeeds/models/metrics.go | 16 ++++++--- 3 files changed, 50 insertions(+), 29 deletions(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index ef80876bddf..6408db0fd98 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -243,18 +243,6 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr unresolvedRanges = stillUnresolvedRanges } - if len(newVersionRanges) > 0 { - metrics.ResolvedRangesCount += len(newVersionRanges) - metrics.Outcome = models.Successful - } - - if len(unresolvedRanges) > 0 { - metrics.UnresolvedRangesCount += len(unresolvedRanges) - if len(newVersionRanges) == 0 { - metrics.Outcome = models.NoCommitRanges - } - } - return newVersionRanges, unresolvedRanges, successfulRepos } diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 6a6ee60b0a2..af00302a37a 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -61,7 +61,9 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc // Exit early if there are no repositories if len(repos) == 0 { - affected := MergeRangesAndCreateAffected(resolvedRanges, unresolvedRanges, nil, nil, metrics) + metrics.SetOutcome(models.NoRepos) + metrics.UnresolvedRangesCount += len(cpeRanges) + affected := MergeRangesAndCreateAffected(resolvedRanges, cpeRanges, nil, nil, metrics) v.Affected = append(v.Affected, affected) // Exit early outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) @@ -72,8 +74,20 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc // If we have ranges, try to resolve them if len(cpeRanges) > 0 { r, un, sR := conversion.GitVersionsToCommits(cpeRanges, repos, metrics, cache) - resolvedRanges = append(resolvedRanges, r...) - unresolvedRanges = append(unresolvedRanges, un...) + if len(r) > 0 { + metrics.ResolvedRangesCount += len(r) + resolvedRanges = append(resolvedRanges, r...) + metrics.SetOutcome(models.Successful) + } + + if len(un) > 0 { + metrics.UnresolvedRangesCount += len(un) + unresolvedRanges = append(unresolvedRanges, un...) + if len(r) == 0 { + metrics.SetOutcome(models.NoCommitRanges) + } + } + for _, s := range sR { successfulRepos[s] = true } @@ -90,6 +104,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc for _, commit := range commits { successfulRepos[commit.Repo] = true } + metrics.SetOutcome(models.Successful) metrics.VersionSources = append(metrics.VersionSources, models.VersionSourceRefs) } @@ -100,8 +115,19 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc metrics.AddNote("Extracted versions from description: %v", textRanges) } r, un, sR := conversion.GitVersionsToCommits(textRanges, repos, metrics, cache) - resolvedRanges = append(resolvedRanges, r...) - unresolvedRanges = append(unresolvedRanges, un...) + if len(r) > 0 { + metrics.ResolvedRangesCount += len(r) + resolvedRanges = append(resolvedRanges, r...) + metrics.SetOutcome(models.Successful) + } + + if len(un) > 0 { + metrics.UnresolvedRangesCount += len(un) + unresolvedRanges = append(unresolvedRanges, un...) + if len(r) == 0 { + metrics.SetOutcome(models.NoCommitRanges) + } + } for _, s := range sR { successfulRepos[s] = true } @@ -110,13 +136,14 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc if len(resolvedRanges) == 0 && len(commits) == 0 { metrics.AddNote("No ranges detected for %q", maybeProductName) - metrics.Outcome = models.NoRanges - } else { - keys := slices.Collect(maps.Keys(successfulRepos)) - affected := MergeRangesAndCreateAffected(resolvedRanges, unresolvedRanges, commits, keys, metrics) - v.Affected = append(v.Affected, affected) + metrics.SetOutcome(models.NoRanges) } + // Use the successful repos for more efficient merging. + keys := slices.Collect(maps.Keys(successfulRepos)) + affected := MergeRangesAndCreateAffected(resolvedRanges, unresolvedRanges, commits, keys, metrics) + v.Affected = append(v.Affected, affected) + if !outputMetrics && rejectFailed && metrics.Outcome != models.Successful { return metrics.Outcome } @@ -239,7 +266,7 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * if len(refs) == 0 && len(CPEs) == 0 { metrics.AddNote("Skipping due to lack of CPEs and lack of references") // 100% of these in 2022 were rejected CVEs - metrics.Outcome = models.Rejected + metrics.SetOutcome(models.Rejected) return nil } @@ -297,7 +324,6 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * if len(reposForCVE) == 0 { // We have nothing useful to work with, so we'll assume it's out of scope metrics.AddNote("Passing due to lack of viable repository") - metrics.Outcome = models.NoRepos return nil } @@ -360,7 +386,6 @@ func MergeRangesAndCreateAffected(resolvedRanges []*osvschema.Range, unresolvedR newResolvedRanges = append(newResolvedRanges, conversion.BuildVersionRange(commit.Introduced, commit.LastAffected, commit.Fixed)) metrics.ResolvedRangesCount++ } - metrics.Outcome = models.Successful } newAffected := &osvschema.Affected{ diff --git a/vulnfeeds/models/metrics.go b/vulnfeeds/models/metrics.go index b5f9a2f42b7..44beba9816e 100644 --- a/vulnfeeds/models/metrics.go +++ b/vulnfeeds/models/metrics.go @@ -59,6 +59,14 @@ func (m *ConversionMetrics) AddNote(format string, a ...any) { logger.Debug(fmt.Sprintf(format, a...), slog.String("cna", m.CNA), slog.String("cve", string(m.CVEID))) } +// SetOutcome sets the outcome of the conversion only if it's not already set, or has become successful. +func (m *ConversionMetrics) SetOutcome(outcome ConversionOutcome) { + if m.Outcome != ConversionUnknown || outcome != Successful { + return + } + m.Outcome = outcome +} + // AddSource appends a source to the ConversionMetrics func (m *ConversionMetrics) AddSource(source VersionSource) { m.VersionSources = append(m.VersionSources, source) @@ -80,15 +88,15 @@ func DetermineOutcome(metrics *ConversionMetrics) { // check if we have affected ranges/versions. if len(metrics.Repos) == 0 { // Fix unlikely, as no repos to resolve - metrics.Outcome = NoRepos + metrics.SetOutcome(NoRepos) return } if metrics.ResolvedRangesCount > 0 { - metrics.Outcome = Successful + metrics.SetOutcome(Successful) } else if metrics.UnresolvedRangesCount > 0 { - metrics.Outcome = NoCommitRanges + metrics.SetOutcome(NoCommitRanges) } else { - metrics.Outcome = NoRanges + metrics.SetOutcome(NoRanges) } } From 8f332974bd8cf167f74e99518b72d7aaa9c891cc Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 2 Mar 2026 00:49:05 +0000 Subject: [PATCH 38/45] fixing missing outcomes --- vulnfeeds/conversion/nvd/converter.go | 1 + vulnfeeds/models/metrics.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index af00302a37a..4d39b982b44 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -52,6 +52,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc // If there are no repos, there are no commits from the refs either if len(cpeRanges) == 0 && len(repos) == 0 { + metrics.SetOutcome(models.NoRepos) outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) return models.NoRepos } diff --git a/vulnfeeds/models/metrics.go b/vulnfeeds/models/metrics.go index 44beba9816e..0fb45656b72 100644 --- a/vulnfeeds/models/metrics.go +++ b/vulnfeeds/models/metrics.go @@ -61,7 +61,7 @@ func (m *ConversionMetrics) AddNote(format string, a ...any) { // SetOutcome sets the outcome of the conversion only if it's not already set, or has become successful. func (m *ConversionMetrics) SetOutcome(outcome ConversionOutcome) { - if m.Outcome != ConversionUnknown || outcome != Successful { + if m.Outcome != ConversionUnknown || m.Outcome == Successful { return } m.Outcome = outcome From f0cb44d415936580cb612540edbee3136374fc4a Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 2 Mar 2026 01:55:01 +0000 Subject: [PATCH 39/45] lint --- vulnfeeds/conversion/nvd/converter.go | 1 + 1 file changed, 1 insertion(+) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 4d39b982b44..5219b7e4267 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -54,6 +54,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc if len(cpeRanges) == 0 && len(repos) == 0 { metrics.SetOutcome(models.NoRepos) outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) + return models.NoRepos } From 17f32e1f1d973d66409b91efe95264f35935ca7b Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 2 Mar 2026 02:57:39 +0000 Subject: [PATCH 40/45] handle edgecases for mergetworanges --- vulnfeeds/conversion/common.go | 79 +++++++++++++--- vulnfeeds/conversion/common_test.go | 128 ++++++++++++++++++++++++-- vulnfeeds/conversion/nvd/converter.go | 6 +- 3 files changed, 191 insertions(+), 22 deletions(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 6408db0fd98..080a1f409c1 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -284,10 +284,16 @@ func BuildVersionRange(intro string, lastAff string, fixed string) *osvschema.Ra return &versionRange } -func MergeTwoRanges(range1, range2 *osvschema.Range) *osvschema.Range { +// MergeTwoRanges combines two osvschema.Range objects into a single range. +// It merges the events and the DatabaseSpecific fields. If the ranges are +// not for the same repository or are of different types, it returns an error. +// When merging DatabaseSpecific fields, it handles lists, maps, and simple +// strings. If there are mismatching types for the same key, it returns an error. +func MergeTwoRanges(range1, range2 *osvschema.Range) (*osvschema.Range, error) { // check if the ranges are the same if range1.GetRepo() != range2.GetRepo() || range1.GetType() != range2.GetType() { - return nil + // return an error if not the case + return nil, fmt.Errorf("ranges are not the same repo or type") } mergedRange := &osvschema.Range{ @@ -300,7 +306,7 @@ func MergeTwoRanges(range1, range2 *osvschema.Range) *osvschema.Range { db2 := range2.GetDatabaseSpecific() if db1 == nil && db2 == nil { - return mergedRange + return mergedRange, nil } mergedMap := make(map[string]any) @@ -313,17 +319,16 @@ func MergeTwoRanges(range1, range2 *osvschema.Range) *osvschema.Range { if db2 != nil { for k, v := range db2.GetFields() { + val2 := v.AsInterface() if existing, ok := mergedMap[k]; ok { - // If both are lists, append them - if list1, ok := existing.([]any); ok { - if list2, ok := v.AsInterface().([]any); ok { - mergedMap[k] = append(list1, list2...) - continue - } + mergedVal, err := mergeDatabaseSpecificValues(existing, val2) + if err != nil { + return nil, fmt.Errorf("failed to merge database specific key %q: %w", k, err) } + mergedMap[k] = mergedVal + } else { + mergedMap[k] = val2 } - // Otherwise overwrite or add new - mergedMap[k] = v.AsInterface() } } @@ -335,5 +340,55 @@ func MergeTwoRanges(range1, range2 *osvschema.Range) *osvschema.Range { } } - return mergedRange + return mergedRange, nil } + +// mergeDatabaseSpecificValues is a helper function that recursively merges two +// values from a DatabaseSpecific field. It handles lists (by appending), maps +// (by recursively merging keys), and simple strings (by creating a list if they +// differ). It returns an error if the types of the two values do not match. +func mergeDatabaseSpecificValues(val1, val2 any) (any, error) { + switch v1 := val1.(type) { + case []any: + if v2, ok := val2.([]any); ok { + return append(v1, v2...), nil + } + return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2) + case map[string]any: + if v2, ok := val2.(map[string]any); ok { + merged := make(map[string]any) + for k, v := range v1 { + merged[k] = v + } + for k, v := range v2 { + if existing, ok := merged[k]; ok { + mergedVal, err := mergeDatabaseSpecificValues(existing, v) + if err != nil { + return nil, err + } + merged[k] = mergedVal + } else { + merged[k] = v + } + } + return merged, nil + } + return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2) + case string: + if v2, ok := val2.(string); ok { + if v1 == v2 { + return v1, nil + } + return []any{v1, v2}, nil + } + return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2) + default: + if fmt.Sprintf("%T", val1) != fmt.Sprintf("%T", val2) { + return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2) + } + if val1 == val2 { + return val1, nil + } + return []any{val1, val2}, nil + } +} \ No newline at end of file diff --git a/vulnfeeds/conversion/common_test.go b/vulnfeeds/conversion/common_test.go index 3dcb253543f..84ca6444c61 100644 --- a/vulnfeeds/conversion/common_test.go +++ b/vulnfeeds/conversion/common_test.go @@ -70,10 +70,11 @@ func TestBuildVersionRange(t *testing.T) { func TestMergeTwoRanges(t *testing.T) { tests := []struct { - name string - range1 *osvschema.Range - range2 *osvschema.Range - want *osvschema.Range + name string + range1 *osvschema.Range + range2 *osvschema.Range + want *osvschema.Range + wantErr bool }{ { name: "Merge identical ranges", @@ -101,7 +102,7 @@ func TestMergeTwoRanges(t *testing.T) { }, }, { - name: "Different repos should return nil", + name: "Different repos should return nil and error", range1: &osvschema.Range{ Type: osvschema.Range_GIT, Repo: "https://github.com/example/repo1", @@ -110,10 +111,11 @@ func TestMergeTwoRanges(t *testing.T) { Type: osvschema.Range_GIT, Repo: "https://github.com/example/repo2", }, - want: nil, + want: nil, + wantErr: true, }, { - name: "Different types should return nil", + name: "Different types should return nil and error", range1: &osvschema.Range{ Type: osvschema.Range_GIT, Repo: "https://github.com/example/repo", @@ -122,7 +124,8 @@ func TestMergeTwoRanges(t *testing.T) { Type: osvschema.Range_ECOSYSTEM, Repo: "https://github.com/example/repo", }, - want: nil, + want: nil, + wantErr: true, }, { name: "Merge with DatabaseSpecific", @@ -198,10 +201,117 @@ func TestMergeTwoRanges(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := MergeTwoRanges(tt.range1, tt.range2) + got, err := MergeTwoRanges(tt.range1, tt.range2) + if (err != nil) != tt.wantErr { + t.Errorf("MergeTwoRanges() error = %v, wantErr %v", err, tt.wantErr) + return + } if diff := cmp.Diff(tt.want, got, protocmp.Transform()); diff != "" { t.Errorf("mergeTwoRanges() mismatch (-want +got):\n%s", diff) } }) } } + +func TestMergeDatabaseSpecificValues(t *testing.T) { + tests := []struct { + name string + val1 any + val2 any + want any + wantErr bool + }{ + { + name: "Merge lists", + val1: []any{"a", "b"}, + val2: []any{"c", "d"}, + want: []any{"a", "b", "c", "d"}, + }, + { + name: "List and string mismatch", + val1: []any{"a", "b"}, + val2: "c", + wantErr: true, + }, + { + name: "Merge maps", + val1: map[string]any{"key1": "value1"}, + val2: map[string]any{"key2": "value2"}, + want: map[string]any{"key1": "value1", "key2": "value2"}, + }, + { + name: "Merge nested maps", + val1: map[string]any{ + "nested": map[string]any{ + "key1": "value1", + }, + }, + val2: map[string]any{ + "nested": map[string]any{ + "key2": "value2", + }, + }, + want: map[string]any{ + "nested": map[string]any{ + "key1": "value1", + "key2": "value2", + }, + }, + }, + { + name: "Map and string mismatch", + val1: map[string]any{"key1": "value1"}, + val2: "string", + wantErr: true, + }, + { + name: "Merge same strings", + val1: "value1", + val2: "value1", + want: "value1", + }, + { + name: "Merge different strings", + val1: "value1", + val2: "value2", + want: []any{"value1", "value2"}, + }, + { + name: "String and int mismatch", + val1: "value1", + val2: 123, + wantErr: true, + }, + { + name: "Merge same ints", + val1: 123, + val2: 123, + want: 123, + }, + { + name: "Merge different ints", + val1: 123, + val2: 456, + want: []any{123, 456}, + }, + { + name: "Int and float64 mismatch", + val1: 123, + val2: 456.0, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := mergeDatabaseSpecificValues(tt.val1, tt.val2) + if (err != nil) != tt.wantErr { + t.Errorf("mergeDatabaseSpecificValues() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !tt.wantErr && !cmp.Equal(got, tt.want) { + t.Errorf("mergeDatabaseSpecificValues() mismatch (-want +got):\n%s", cmp.Diff(tt.want, got)) + } + }) + } +} diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 5219b7e4267..4ead4c2541c 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -357,7 +357,11 @@ func MergeRangesAndCreateAffected(resolvedRanges []*osvschema.Range, unresolvedR if mergedRange == nil { mergedRange = vr } else { - mergedRange = conversion.MergeTwoRanges(mergedRange, vr) + var err error + mergedRange, err = conversion.MergeTwoRanges(mergedRange, vr) + if err != nil { + metrics.AddNote("Failed to merge ranges: %v", err) + } } } } From 2e7045a44dc034972560152723af7532e26d8aae Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 2 Mar 2026 03:12:03 +0000 Subject: [PATCH 41/45] liiiint --- vulnfeeds/conversion/common.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 080a1f409c1..a539ce393ce 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -5,6 +5,7 @@ package conversion import ( "encoding/csv" "encoding/json" + "errors" "fmt" "io/fs" "log/slog" @@ -293,7 +294,7 @@ func MergeTwoRanges(range1, range2 *osvschema.Range) (*osvschema.Range, error) { // check if the ranges are the same if range1.GetRepo() != range2.GetRepo() || range1.GetType() != range2.GetType() { // return an error if not the case - return nil, fmt.Errorf("ranges are not the same repo or type") + return nil, errors.New("ranges are not the same repo or type") } mergedRange := &osvschema.Range{ @@ -353,6 +354,7 @@ func mergeDatabaseSpecificValues(val1, val2 any) (any, error) { if v2, ok := val2.([]any); ok { return append(v1, v2...), nil } + return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2) case map[string]any: if v2, ok := val2.(map[string]any); ok { @@ -371,16 +373,20 @@ func mergeDatabaseSpecificValues(val1, val2 any) (any, error) { merged[k] = v } } + return merged, nil } + return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2) case string: if v2, ok := val2.(string); ok { if v1 == v2 { return v1, nil } + return []any{v1, v2}, nil } + return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2) default: if fmt.Sprintf("%T", val1) != fmt.Sprintf("%T", val2) { @@ -389,6 +395,7 @@ func mergeDatabaseSpecificValues(val1, val2 any) (any, error) { if val1 == val2 { return val1, nil } + return []any{val1, val2}, nil } -} \ No newline at end of file +} From 3db95a6b3f3cbcfaa075b8df26b10fed159de2a6 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 2 Mar 2026 04:17:43 +0000 Subject: [PATCH 42/45] reduce duplication --- vulnfeeds/conversion/nvd/converter.go | 67 +++++++++++++-------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 4ead4c2541c..0c1e176b14e 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -74,26 +74,11 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc } // If we have ranges, try to resolve them - if len(cpeRanges) > 0 { - r, un, sR := conversion.GitVersionsToCommits(cpeRanges, repos, metrics, cache) - if len(r) > 0 { - metrics.ResolvedRangesCount += len(r) - resolvedRanges = append(resolvedRanges, r...) - metrics.SetOutcome(models.Successful) - } - - if len(un) > 0 { - metrics.UnresolvedRangesCount += len(un) - unresolvedRanges = append(unresolvedRanges, un...) - if len(r) == 0 { - metrics.SetOutcome(models.NoCommitRanges) - } - } - - for _, s := range sR { - successfulRepos[s] = true - } - metrics.VersionSources = append(metrics.VersionSources, models.VersionSourceCPE) + r, un, sR := processRanges(cpeRanges, repos, metrics, cache, models.VersionSourceCPE) + resolvedRanges = append(resolvedRanges, r...) + unresolvedRanges = append(unresolvedRanges, un...) + for _, s := range sR { + successfulRepos[s] = true } // Extract Commits @@ -116,24 +101,12 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc if len(textRanges) > 0 { metrics.AddNote("Extracted versions from description: %v", textRanges) } - r, un, sR := conversion.GitVersionsToCommits(textRanges, repos, metrics, cache) - if len(r) > 0 { - metrics.ResolvedRangesCount += len(r) - resolvedRanges = append(resolvedRanges, r...) - metrics.SetOutcome(models.Successful) - } - - if len(un) > 0 { - metrics.UnresolvedRangesCount += len(un) - unresolvedRanges = append(unresolvedRanges, un...) - if len(r) == 0 { - metrics.SetOutcome(models.NoCommitRanges) - } - } + r, un, sR := processRanges(textRanges, repos, metrics, cache, models.VersionSourceDescription) + resolvedRanges = append(resolvedRanges, r...) + unresolvedRanges = append(unresolvedRanges, un...) for _, s := range sR { successfulRepos[s] = true } - metrics.VersionSources = append(metrics.VersionSources, models.VersionSourceDescription) } if len(resolvedRanges) == 0 && len(commits) == 0 { @@ -501,3 +474,27 @@ func outputFiles(v *vulns.Vulnerability, dir string, vendor string, product stri metricsFile.Close() } } + +// processRanges attempts to resolve the given ranges to commits and updates the metrics accordingly. +func processRanges(ranges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache, source models.VersionSource) ([]*osvschema.Range, []*osvschema.Range, []string) { + if len(ranges) == 0 { + return nil, nil, nil + } + + r, un, sR := conversion.GitVersionsToCommits(ranges, repos, metrics, cache) + if len(r) > 0 { + metrics.ResolvedRangesCount += len(r) + metrics.SetOutcome(models.Successful) + } + + if len(un) > 0 { + metrics.UnresolvedRangesCount += len(un) + if len(r) == 0 { + metrics.SetOutcome(models.NoCommitRanges) + } + } + + metrics.VersionSources = append(metrics.VersionSources, source) + + return r, un, sR +} From d509aa720b89bc6f29850c2a503390eb00fa87d2 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Wed, 4 Mar 2026 05:01:05 +0000 Subject: [PATCH 43/45] don't needlessly return early --- vulnfeeds/conversion/common.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index a539ce393ce..b89a1a42a48 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -324,7 +324,7 @@ func MergeTwoRanges(range1, range2 *osvschema.Range) (*osvschema.Range, error) { if existing, ok := mergedMap[k]; ok { mergedVal, err := mergeDatabaseSpecificValues(existing, val2) if err != nil { - return nil, fmt.Errorf("failed to merge database specific key %q: %w", k, err) + logger.Info("Failed to merge database specific key", "key", k, "err", err) } mergedMap[k] = mergedVal } else { From 78d307acf546dace82f33b54b726925380d3fca4 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 6 Mar 2026 04:53:12 +0000 Subject: [PATCH 44/45] Removed function and inlined it --- vulnfeeds/conversion/common.go | 28 ++++++++++++---------------- vulnfeeds/git/versions.go | 3 +++ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index b89a1a42a48..c5906bc917b 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -211,10 +211,19 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr if introduced == "0" { introducedCommit = "0" } else { - introducedCommit = resolveVersionToCommit(introduced, normalizedTags) + introducedCommit, err = git.VersionToCommit(introduced, normalizedTags) + if err != nil { + metrics.AddNote("error resolving version to commit - %s - %s", introduced, err) + } + } + fixedCommit, err := git.VersionToCommit(fixed, normalizedTags) + if err != nil { + metrics.AddNote("error resolving version to commit - %s - %s", fixed, err) + } + lastAffectedCommit, err := git.VersionToCommit(lastAffected, normalizedTags) + if err != nil { + metrics.AddNote("error resolving version to commit - %s - %s", lastAffected, err) } - fixedCommit := resolveVersionToCommit(fixed, normalizedTags) - lastAffectedCommit := resolveVersionToCommit(lastAffected, normalizedTags) if introducedCommit != "" && (fixedCommit != "" || lastAffectedCommit != "") { var newVR *osvschema.Range @@ -247,19 +256,6 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr return newVersionRanges, unresolvedRanges, successfulRepos } -// resolveVersionToCommit is a helper to convert a version string to a commit hash. -func resolveVersionToCommit(version string, normalizedTags map[string]git.NormalizedTag) string { - if version == "" { - return "" - } - commit, err := git.VersionToCommit(version, normalizedTags) - if err != nil { - return "" - } - - return commit -} - // BuildVersionRange is a helper function that adds 'introduced', 'fixed', or 'last_affected' // events to an OSV version range. If 'intro' is empty, it defaults to "0". func BuildVersionRange(intro string, lastAff string, fixed string) *osvschema.Range { diff --git a/vulnfeeds/git/versions.go b/vulnfeeds/git/versions.go index ed8a0d32673..5a897cd944a 100644 --- a/vulnfeeds/git/versions.go +++ b/vulnfeeds/git/versions.go @@ -82,6 +82,9 @@ func VersionToAffectedCommit(version string, repo string, commitType models.Comm // Take an unnormalized version string, the pre-normalized mapping of tags to commits and return a commit hash. func VersionToCommit(version string, normalizedTags map[string]NormalizedTag) (string, error) { + if version == "" { + return "", fmt.Errorf("version cannot be empty") + } // TODO: try unnormalized version first. normalizedVersion, err := NormalizeVersion(version) if err != nil { From 3683aa4b3bfb72977209f3971ff11c1907de87db Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 6 Mar 2026 05:05:50 +0000 Subject: [PATCH 45/45] lint --- vulnfeeds/git/versions.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vulnfeeds/git/versions.go b/vulnfeeds/git/versions.go index 5a897cd944a..3983cc651c1 100644 --- a/vulnfeeds/git/versions.go +++ b/vulnfeeds/git/versions.go @@ -15,6 +15,7 @@ package git import ( + "errors" "fmt" "regexp" "slices" @@ -83,7 +84,7 @@ func VersionToAffectedCommit(version string, repo string, commitType models.Comm // Take an unnormalized version string, the pre-normalized mapping of tags to commits and return a commit hash. func VersionToCommit(version string, normalizedTags map[string]NormalizedTag) (string, error) { if version == "" { - return "", fmt.Errorf("version cannot be empty") + return "", errors.New("version cannot be empty") } // TODO: try unnormalized version first. normalizedVersion, err := NormalizeVersion(version)