Skip to content

Commit 1d69158

Browse files
committed
➖ Don't collect reference ids in NVD importer
Signed-off-by: Shivam Sandbhor <shivam.sandbhor@gmail.com>
1 parent 81b5338 commit 1d69158

2 files changed

Lines changed: 25 additions & 93 deletions

File tree

vulnerabilities/importers/nvd.py

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class NVDDataSource(DataSource):
4949
def updated_advisories(self):
5050
current_year = date.today().year
5151
# NVD json feeds start from 2002.
52-
for year in range(2002, current_year + 1):
52+
for year in range(2002, current_year+1):
5353
download_url = BASE_URL.format(year)
5454
# Etags are like hashes of web responses. We maintain
5555
# (url, etag) mappings in the DB. `create_etag` creates
@@ -74,7 +74,8 @@ def to_advisories(self, nvd_data):
7474
continue
7575

7676
cve_id = cve_item["cve"]["CVE_data_meta"]["ID"]
77-
references = self.extract_references(cve_item)
77+
ref_urls = self.extract_reference_urls(cve_item)
78+
references = [Reference(url=url) for url in ref_urls]
7879
summary = self.extract_summary(cve_item)
7980
yield Advisory(
8081
cve_id=cve_id, summary=summary, vuln_references=references, impacted_package_urls=[]
@@ -88,31 +89,18 @@ def extract_summary(cve_item):
8889
summaries = [desc["value"] for desc in cve_item["cve"]["description"]["description_data"]]
8990
return max(summaries, key=len)
9091

91-
def extract_references(self, cve_item):
92-
refs = []
92+
def extract_reference_urls(self, cve_item):
93+
urls = set()
9394
for reference in cve_item["cve"]["references"]["reference_data"]:
94-
ref_id = self.find_ref_id(reference)
9595
ref_url = reference["url"]
9696

97-
# Skip references which exceed db constraints
98-
if ref_id and len(ref_id) > 50:
97+
if not ref_url:
9998
continue
10099

101-
refs.append(Reference(url=ref_url, reference_id=ref_id))
100+
if ref_url.startswith("http") or ref_url.startswith("ftp"):
101+
urls.add(ref_url)
102102

103-
return refs
104-
105-
@staticmethod
106-
def find_ref_id(reference):
107-
if "https://" in reference["name"] or "http://" in reference["name"]:
108-
if "bugzilla" in reference["url"]:
109-
_, _, bugzilla_id = reference["url"].partition("?id=")
110-
return bugzilla_id
111-
112-
return ""
113-
114-
else:
115-
return reference["name"]
103+
return urls
116104

117105
def is_outdated(self, cve_item):
118106
cve_last_modified_date = cve_item["lastModifiedDate"]

vulnerabilities/tests/test_nvd.py

Lines changed: 16 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -87,38 +87,6 @@ def test_extract_summary_with_multiple_summary(self):
8787
found_summary = NVDDataSource.extract_summary(cve_item)
8888
assert found_summary == expected_summary
8989

90-
def test_find_ref_id_when_has_refid(self):
91-
92-
reference = {
93-
"url": "http://www.securityfocus.com/bid/12577",
94-
"name": "12577",
95-
"refsource": "BID",
96-
"tags": [],
97-
}
98-
99-
assert NVDDataSource.find_ref_id(reference) == "12577"
100-
101-
def test_find_ref_id_when_no_refid(self):
102-
103-
reference = {
104-
"url": "http://www.securityfocus.com/bid/12577",
105-
"name": "http://www.securityfocus.com/bid/12577",
106-
"refsource": "BID",
107-
"tags": [],
108-
}
109-
110-
assert NVDDataSource.find_ref_id(reference) == ""
111-
112-
def test_find_ref_id_when_has_bugzilla(self):
113-
114-
reference = {
115-
"url": "https://bugzilla.mozilla.org/show_bug.cgi?id=12309",
116-
"name": "https://bugzilla.mozilla.org/show_bug.cgi?id=12309",
117-
"refsource": "BID",
118-
"tags": [],
119-
}
120-
assert NVDDataSource.find_ref_id(reference) == "12309"
121-
12290
def test_is_outdated(self):
12391
cve_item = self.nvd_data["CVE_Items"][0]
12492
assert self.data_src.is_outdated(cve_item) is False
@@ -136,46 +104,24 @@ def test_is_outdated(self):
136104
assert self.data_src.is_outdated(cve_item) is False
137105
self.data_src.config.last_run_date = None # cleanup
138106

139-
def test_extract_references(self):
140-
141-
expected_refs = sorted(
142-
[
143-
Reference(url="http://ia.cr/2007/474", reference_id="2007"),
144-
Reference(url="http://shattered.io/", reference_id=""),
145-
Reference(
146-
url="http://www.cwi.nl/news/2017/cwi-and-google-announce-first-collision-industry-security-standard-sha-1", # nopep8
147-
reference_id="",
148-
),
149-
Reference(
150-
url="https://arstechnica.com/security/2017/02/at-deaths-door-for-years-widely-used-sha1-function-is-now-dead/", # nopep8
151-
reference_id="",
152-
),
153-
Reference(
154-
url="https://security.googleblog.com/2015/12/an-update-on-sha-1-certificates-in.html", # nopep8
155-
reference_id="",
156-
),
157-
Reference(
158-
url="https://security.googleblog.com/2017/02/announcing-first-sha1-collision.html", # nopep8
159-
reference_id="",
160-
),
161-
Reference(url="https://sites.google.com/site/itstheshappening", reference_id="",),
162-
Reference(
163-
url="https://www.schneier.com/blog/archives/2005/02/sha1_broken.html",
164-
reference_id="",
165-
),
166-
Reference(
167-
url="https://www.schneier.com/blog/archives/2005/08/new_cryptanalyt.html",
168-
reference_id="",
169-
),
170-
],
171-
key=lambda x: x.url,
172-
)
173-
107+
def test_extract_reference_urls(self):
174108
cve_item = self.nvd_data["CVE_Items"][1]
175-
found_refs = self.data_src.extract_references(cve_item)
176-
found_refs.sort(key=lambda x: x.url)
109+
expected_urls = {
110+
"http://ia.cr/2007/474",
111+
"http://shattered.io/",
112+
"http://www.cwi.nl/news/2017/cwi-and-google-announce-first-collision-industry-security-standard-sha-1", # nopep8
113+
"http://www.securityfocus.com/bid/12577",
114+
"https://arstechnica.com/security/2017/02/at-deaths-door-for-years-widely-used-sha1-function-is-now-dead/", # nopep8
115+
"https://security.googleblog.com/2015/12/an-update-on-sha-1-certificates-in.html",
116+
"https://security.googleblog.com/2017/02/announcing-first-sha1-collision.html",
117+
"https://sites.google.com/site/itstheshappening",
118+
"https://www.schneier.com/blog/archives/2005/02/sha1_broken.html",
119+
"https://www.schneier.com/blog/archives/2005/08/new_cryptanalyt.html",
120+
}
121+
122+
found_urls = self.data_src.extract_reference_urls(cve_item)
177123

178-
assert found_refs == expected_refs
124+
assert found_urls == expected_urls
179125

180126
def test_to_advisories(self):
181127

@@ -193,11 +139,9 @@ def test_to_advisories(self):
193139
[
194140
Reference(
195141
url="http://code.google.com/p/gperftools/source/browse/tags/perftools-0.4/ChangeLog", # nopep8
196-
reference_id="",
197142
),
198143
Reference(
199144
url="http://kqueue.org/blog/2012/03/05/memory-allocator-security-revisited/", # nopep8
200-
reference_id="",
201145
),
202146
],
203147
key=lambda x: x.url,

0 commit comments

Comments
 (0)