Skip to content

Commit 848e687

Browse files
authored
Add proof of concept vulnfeeds code. (#100)
This is something which can find relevant vulnerabilities from existing feeds such as CVEs and output them in a format compatible with OSV.
1 parent cf24427 commit 848e687

10 files changed

Lines changed: 585 additions & 0 deletions

File tree

vulnfeeds/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pypi.json

vulnfeeds/README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Vuln feeds
2+
This searches a NVD CVE JSON feed for relevant packages and creates
3+
vulnerability entries in the same easy-to-consume format that OSV uses.
4+
5+
## Package name matching
6+
CVE entries do not provide an easy mapping to the exact package name used in a
7+
package manager, so we dump and compare reference URLs for the package against
8+
reference URLs used in the CVE, in addition to some other heuristics to avoid
9+
false positives.
10+
11+
## PyPI
12+
For PyPI, we find package reference URLs by doing a BigQuery query on
13+
the public PyPI dataset:
14+
15+
```bash
16+
bq query --max_rows=10000000 --format=json --nouse_legacy_sql --flagfile=pypi.sql > pypi.json
17+
```
18+
19+
However this includes packages that no longer exist or were deleted, so we check
20+
against the [pypi simple API](https://warehouse.pypa.io/api-reference/legacy.html)
21+
to make sure any matches actually exist.
22+
23+
## Extracting affected versions and commits
24+
Where possible, we try to extract affected version ranges from descriptions and
25+
other fields, and extract commit hashes from e.g. GitHub links.

vulnfeeds/cmd/pypi/main.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// Copyright 2021 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package main
16+
17+
import (
18+
"encoding/json"
19+
"flag"
20+
"io/ioutil"
21+
"log"
22+
"os"
23+
"path/filepath"
24+
25+
"gopkg.in/yaml.v2"
26+
27+
"github.com/google/osv/vulnfeeds/cves"
28+
"github.com/google/osv/vulnfeeds/pypi"
29+
"github.com/google/osv/vulnfeeds/vulns"
30+
)
31+
32+
func main() {
33+
jsonPath := flag.String("nvd_json", "", "Path to NVD CVE JSON.")
34+
pypiJson := flag.String("pypi_json", "", "Path to pypi.json.")
35+
outDir := flag.String("out_dir", "", "Path to output results.")
36+
37+
flag.Parse()
38+
39+
data, err := ioutil.ReadFile(*jsonPath)
40+
if err != nil {
41+
log.Fatalf("Failed to open file: %v", err)
42+
}
43+
44+
var parsed cves.NVDCVE
45+
err = json.Unmarshal(data, &parsed)
46+
if err != nil {
47+
log.Fatalf("Failed to parse NVD CVE JSON: %v", err)
48+
}
49+
50+
ecosystem := pypi.NewPyPI(*pypiJson)
51+
for _, cve := range parsed.CVEItems {
52+
pkg := ""
53+
if pkg = ecosystem.Matches(cve); pkg == "" {
54+
continue
55+
}
56+
57+
v := vulns.FromCVE(cve, pkg, "PyPI", "ECOSYSTEM")
58+
data, err := yaml.Marshal(v)
59+
if err != nil {
60+
log.Fatalf("Failed to marshal YAML: %v", err)
61+
}
62+
63+
pkgDir := filepath.Join(*outDir, pkg)
64+
err = os.MkdirAll(pkgDir, 0755)
65+
if err != nil {
66+
log.Fatalf("Failed to create dir: %v", err)
67+
}
68+
69+
vulnPath := filepath.Join(pkgDir, v.ID+".yaml")
70+
err = ioutil.WriteFile(vulnPath, data, 0644)
71+
if err != nil {
72+
log.Fatalf("Failed to write %s: %v", vulnPath, err)
73+
}
74+
}
75+
}

vulnfeeds/cves/cve.go

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// Copyright 2021 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package cves
16+
17+
import "time"
18+
19+
const (
20+
CVETimeFormat = "2006-01-02T15:04Z07:00"
21+
)
22+
23+
type CVE struct {
24+
CVEDataMeta struct {
25+
ID string
26+
} `json:"CVE_data_meta"`
27+
References struct {
28+
ReferenceData []struct {
29+
URL string `json:"url"`
30+
} `json:"reference_data"`
31+
} `json:"references"`
32+
Description struct {
33+
DescriptionData []struct {
34+
Lang string `json:"lang"`
35+
Value string `json:"value"`
36+
} `json:"description_data"`
37+
} `json:"description"`
38+
}
39+
40+
type CVEItem struct {
41+
CVE CVE `json:"cve"`
42+
Configurations struct {
43+
Nodes []struct {
44+
Operator string `json:"operator"`
45+
CPEMatch []struct {
46+
Vulnerable bool `json:"vulnerable"`
47+
VersionStartExcluding string `json:"versionStartExcluding"`
48+
VersionStartIncluding string `json:"versionStartIncluding"`
49+
VersionEndExcluding string `json:"versionEndExcluding"`
50+
VersionEndIncluding string `json:"versionEndIncluding"`
51+
} `json:"cpe_match"`
52+
} `json:"nodes"`
53+
} `json:"configurations"`
54+
Impact struct {
55+
BaseMetricV3 struct {
56+
CVSSV3 struct {
57+
BaseSeverity string `json:"baseSeverity"`
58+
} `json:"cvssV3"`
59+
} `json:"baseMetricV3"`
60+
} `json:"impact"`
61+
PublishedDate string `json:"publishedDate"`
62+
LastModifiedDate string `json:"lastModifiedDate"`
63+
}
64+
65+
type NVDCVE struct {
66+
CVEItems []CVEItem `json:"CVE_Items"`
67+
CVEDataTimestamp string `json:"CVE_data_timestamp"`
68+
}
69+
70+
func EnglishDescription(cve CVE) string {
71+
for _, desc := range cve.Description.DescriptionData {
72+
if desc.Lang == "en" {
73+
return desc.Value
74+
}
75+
}
76+
return ""
77+
}
78+
79+
func ParseTimestamp(timestamp string) (time.Time, error) {
80+
return time.Parse(CVETimeFormat, timestamp)
81+
}

vulnfeeds/cves/versions.go

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// Copyright 2021 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package cves
16+
17+
import (
18+
"net/url"
19+
"strings"
20+
)
21+
22+
type FixCommit struct {
23+
Repo string
24+
Commit string
25+
}
26+
27+
type AffectedVersion struct {
28+
Introduced string
29+
Fixed string
30+
}
31+
32+
type VersionInfo struct {
33+
FixCommits []FixCommit
34+
AffectedVersions []AffectedVersion
35+
}
36+
37+
func extractGitHubCommit(link string) *FixCommit {
38+
// Example: https://github.com/google/osv/commit/cd4e934d0527e5010e373e7fed54ef5daefba2f5
39+
u, err := url.Parse(link)
40+
if err != nil {
41+
return nil
42+
}
43+
44+
if u.Host != "github.com" {
45+
return nil
46+
}
47+
48+
pathParts := strings.Split(u.Path, "/")
49+
if pathParts[len(pathParts)-2] != "commit" {
50+
return nil
51+
}
52+
53+
// Commit is the last component.
54+
commit := pathParts[len(pathParts)-1]
55+
// Stript the /commit/... to get the repo URL.
56+
u.Path = strings.Join(pathParts[0:len(pathParts)-2], "/")
57+
repo := u.String()
58+
59+
return &FixCommit{
60+
Repo: repo,
61+
Commit: commit,
62+
}
63+
}
64+
65+
func ExtractVersionInfo(cve CVEItem) VersionInfo {
66+
v := VersionInfo{}
67+
for _, reference := range cve.CVE.References.ReferenceData {
68+
// TODO(ochang): Support other common commit URLs.
69+
if commit := extractGitHubCommit(reference.URL); commit != nil {
70+
v.FixCommits = append(v.FixCommits, *commit)
71+
}
72+
}
73+
74+
for _, node := range cve.Configurations.Nodes {
75+
if node.Operator != "OR" {
76+
continue
77+
}
78+
79+
// TODO: Also try to parse description as these are not always reliably set.
80+
for _, match := range node.CPEMatch {
81+
if !match.Vulnerable {
82+
continue
83+
}
84+
85+
if match.VersionStartIncluding != "" || match.VersionEndExcluding != "" {
86+
if match.VersionStartExcluding != "" || match.VersionEndIncluding != "" {
87+
// TODO: handle these.
88+
continue
89+
}
90+
91+
v.AffectedVersions = append(v.AffectedVersions, AffectedVersion{
92+
// TODO: make sure these actually match PyPI numbers.
93+
Introduced: match.VersionStartIncluding,
94+
Fixed: match.VersionEndExcluding,
95+
})
96+
}
97+
}
98+
}
99+
return v
100+
}

vulnfeeds/go.mod

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
module github.com/google/osv/vulnfeeds
2+
3+
go 1.16
4+
5+
require gopkg.in/yaml.v2 v2.4.0 // indirect

vulnfeeds/go.sum

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
2+
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
3+
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=

0 commit comments

Comments
 (0)