Skip to content

Commit 575a911

Browse files
committed
Add CloudVulnDB importer
- add CloudVulnDB v2 importer pipeline - register importer - add tests and fixtures - ignore setup.py in pytest collection
1 parent 2ff2906 commit 575a911

File tree

6 files changed

+285
-0
lines changed

6 files changed

+285
-0
lines changed

pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ addopts = [
4444
"-rfExXw",
4545
"--strict-markers",
4646
"--doctest-modules",
47+
# setup.py imports setuptools which is not available in the Docker runtime
48+
# image. Without this, pytest (which uses python_files = "*.py") tries to
49+
# collect setup.py as a test module and crashes with exit code 2.
50+
"--ignore=setup.py",
51+
"--ignore-glob=*/setup.py",
4752
# Ignore the following doctests until these files are migrated to
4853
# import-improve structure
4954
"--ignore=vulnerabilities/importers/apache_httpd.py",

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
from vulnerabilities.pipelines.v2_importers import apache_kafka_importer as apache_kafka_importer_v2
4848
from vulnerabilities.pipelines.v2_importers import apache_tomcat_importer as apache_tomcat_v2
4949
from vulnerabilities.pipelines.v2_importers import archlinux_importer as archlinux_importer_v2
50+
from vulnerabilities.pipelines.v2_importers import cloudvulndb_importer as cloudvulndb_importer_v2
5051
from vulnerabilities.pipelines.v2_importers import collect_fix_commits as collect_fix_commits_v2
5152
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
5253
from vulnerabilities.pipelines.v2_importers import debian_importer as debian_importer_v2
@@ -109,6 +110,7 @@
109110
project_kb_msr2019_importer_v2.ProjectKBMSR2019Pipeline,
110111
ruby_importer_v2.RubyImporterPipeline,
111112
epss_importer_v2.EPSSImporterPipeline,
113+
cloudvulndb_importer_v2.CloudVulnDBImporterPipeline,
112114
gentoo_importer_v2.GentooImporterPipeline,
113115
nginx_importer_v2.NginxImporterPipeline,
114116
debian_importer_v2.DebianImporterPipeline,
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import hashlib
11+
import json
12+
import logging
13+
from typing import Iterable
14+
from urllib.parse import urlparse
15+
from xml.etree import ElementTree
16+
17+
from dateutil import parser as dateutil_parser
18+
19+
from vulnerabilities.importer import AdvisoryDataV2
20+
from vulnerabilities.importer import ReferenceV2
21+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
22+
from vulnerabilities.utils import fetch_response
23+
from vulnerabilities.utils import find_all_cve
24+
25+
logger = logging.getLogger(__name__)
26+
27+
CLOUDVULNDB_RSS_URL = "https://www.cloudvulndb.org/rss/feed.xml"
28+
29+
30+
class CloudVulnDBImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
31+
"""Collect cloud vulnerabilities from the public CloudVulnDB RSS feed."""
32+
33+
pipeline_id = "cloudvulndb_importer"
34+
spdx_license_expression = "CC-BY-4.0"
35+
license_url = "https://github.com/wiz-sec/open-cvdb/blob/main/LICENSE.md"
36+
repo_url = "https://github.com/wiz-sec/open-cvdb"
37+
precedence = 200
38+
39+
_cached_items = None
40+
41+
@classmethod
42+
def steps(cls):
43+
return (cls.collect_and_store_advisories,)
44+
45+
def get_feed_items(self):
46+
if self._cached_items is None:
47+
response = fetch_response(CLOUDVULNDB_RSS_URL)
48+
self._cached_items = parse_rss_feed(response.text)
49+
return self._cached_items
50+
51+
def advisories_count(self) -> int:
52+
return len(self.get_feed_items())
53+
54+
def collect_advisories(self) -> Iterable[AdvisoryDataV2]:
55+
for item in self.get_feed_items():
56+
advisory = parse_advisory_data(item)
57+
if advisory:
58+
yield advisory
59+
60+
61+
def parse_rss_feed(xml_text: str) -> list:
62+
"""
63+
Parse CloudVulnDB RSS XML and return a list of item dictionaries.
64+
Each dictionary has ``title``, ``link``, ``description``, ``pub_date`` and ``guid`` keys.
65+
"""
66+
try:
67+
root = ElementTree.fromstring(xml_text)
68+
except ElementTree.ParseError as e:
69+
logger.error("Failed to parse CloudVulnDB RSS XML: %s", e)
70+
return []
71+
72+
channel = root.find("channel")
73+
if channel is None:
74+
logger.error("CloudVulnDB RSS feed has no <channel> element")
75+
return []
76+
77+
items = []
78+
for item_el in channel.findall("item"):
79+
items.append(
80+
{
81+
"title": (item_el.findtext("title") or "").strip(),
82+
"link": (item_el.findtext("link") or "").strip(),
83+
"description": (item_el.findtext("description") or "").strip(),
84+
"pub_date": (item_el.findtext("pubDate") or "").strip(),
85+
"guid": (item_el.findtext("guid") or "").strip(),
86+
}
87+
)
88+
89+
return items
90+
91+
92+
def parse_advisory_data(item: dict):
93+
"""
94+
Parse one CloudVulnDB item and return an AdvisoryDataV2 object.
95+
Since the RSS feed does not provide package/version coordinates, ``affected_packages`` is empty.
96+
"""
97+
title = item.get("title") or ""
98+
link = item.get("link") or ""
99+
description = item.get("description") or ""
100+
pub_date = item.get("pub_date") or ""
101+
guid = item.get("guid") or ""
102+
103+
advisory_id = get_advisory_id(guid=guid, link=link, title=title, pub_date=pub_date)
104+
if not advisory_id:
105+
logger.error("Skipping advisory with no usable identifier: %r", item)
106+
return None
107+
108+
aliases = list(dict.fromkeys(find_all_cve(f"{title}\n{description}")))
109+
aliases = [alias for alias in aliases if alias != advisory_id]
110+
111+
date_published = None
112+
if pub_date:
113+
try:
114+
date_published = dateutil_parser.parse(pub_date)
115+
except Exception as e:
116+
logger.warning("Could not parse date %r for advisory %s: %s", pub_date, advisory_id, e)
117+
118+
references = []
119+
if link:
120+
references.append(ReferenceV2(url=link))
121+
122+
summary = title or description
123+
124+
return AdvisoryDataV2(
125+
advisory_id=advisory_id,
126+
aliases=aliases,
127+
summary=summary,
128+
affected_packages=[],
129+
references=references,
130+
date_published=date_published,
131+
url=link or CLOUDVULNDB_RSS_URL,
132+
original_advisory_text=json.dumps(item, indent=2, ensure_ascii=False),
133+
)
134+
135+
136+
def get_advisory_id(guid: str, link: str, title: str, pub_date: str) -> str:
137+
"""
138+
Return a stable advisory identifier using the best available source.
139+
Preference order is GUID, link slug, then deterministic content hash fallback.
140+
"""
141+
guid = (guid or "").strip()
142+
if guid:
143+
return guid
144+
145+
slug = advisory_slug_from_link(link)
146+
if slug:
147+
return slug
148+
149+
fingerprint_source = "|".join([title.strip(), pub_date.strip()])
150+
if not fingerprint_source.strip("|"):
151+
return ""
152+
153+
digest = hashlib.sha256(fingerprint_source.encode("utf-8")).hexdigest()[:16]
154+
return f"cloudvulndb-{digest}"
155+
156+
157+
def advisory_slug_from_link(link: str) -> str:
158+
"""Extract an advisory slug from a CloudVulnDB URL path."""
159+
if not link:
160+
return ""
161+
162+
try:
163+
parsed = urlparse(link)
164+
except Exception:
165+
return ""
166+
167+
parts = [part for part in parsed.path.split("/") if part]
168+
if not parts:
169+
return ""
170+
171+
return parts[-1].strip()
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import os
11+
from unittest import TestCase
12+
13+
from vulnerabilities.pipelines.v2_importers.cloudvulndb_importer import advisory_slug_from_link
14+
from vulnerabilities.pipelines.v2_importers.cloudvulndb_importer import get_advisory_id
15+
from vulnerabilities.pipelines.v2_importers.cloudvulndb_importer import parse_advisory_data
16+
from vulnerabilities.pipelines.v2_importers.cloudvulndb_importer import parse_rss_feed
17+
from vulnerabilities.tests import util_tests
18+
19+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
20+
TEST_DATA = os.path.join(BASE_DIR, "test_data/cloudvulndb")
21+
22+
23+
def _load_rss(filename="cloudvulndb_rss_mock.xml"):
24+
with open(os.path.join(TEST_DATA, filename), encoding="utf-8") as f:
25+
return f.read()
26+
27+
28+
class TestCloudVulnDBImporter(TestCase):
29+
def test_parse_rss_feed_returns_correct_item_count(self):
30+
items = parse_rss_feed(_load_rss())
31+
self.assertEqual(len(items), 2)
32+
33+
def test_parse_advisory_with_guid_and_cves(self):
34+
items = parse_rss_feed(_load_rss())
35+
result = parse_advisory_data(items[0])
36+
self.assertIsNotNone(result)
37+
result_dict = result.to_dict()
38+
expected_file = os.path.join(TEST_DATA, "expected_cloudvulndb_advisory_output1.json")
39+
util_tests.check_results_against_json(result_dict, expected_file)
40+
41+
def test_parse_advisory_without_guid_falls_back_to_link_slug(self):
42+
items = parse_rss_feed(_load_rss())
43+
result = parse_advisory_data(items[1])
44+
self.assertIsNotNone(result)
45+
self.assertEqual(result.advisory_id, "azure-imds-ssrf")
46+
self.assertEqual(result.aliases, [])
47+
48+
def test_get_advisory_id_hash_fallback(self):
49+
advisory_id = get_advisory_id(
50+
guid="",
51+
link="",
52+
title="Example advisory title",
53+
pub_date="Mon, 08 Jul 2024 00:00:00 GMT",
54+
)
55+
self.assertTrue(advisory_id.startswith("cloudvulndb-"))
56+
self.assertEqual(len(advisory_id), len("cloudvulndb-") + 16)
57+
58+
def test_parse_rss_feed_invalid_xml_returns_empty(self):
59+
result = parse_rss_feed("not valid xml <>>>")
60+
self.assertEqual(result, [])
61+
62+
def test_advisory_slug_from_link(self):
63+
slug = advisory_slug_from_link("https://www.cloudvulndb.org/vulnerabilities/aws-example/")
64+
self.assertEqual(slug, "aws-example")
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<rss version="2.0">
3+
<channel>
4+
<title>CloudVulnDB RSS</title>
5+
<link>https://www.cloudvulndb.org</link>
6+
<description>Cloud vulnerabilities and security issues</description>
7+
<item>
8+
<title><![CDATA[AWS Example Privilege Escalation (CVE-2024-11111)]]></title>
9+
<link>https://www.cloudvulndb.org/vulnerabilities/aws-example-privilege-escalation</link>
10+
<guid isPermaLink="false">CLOUD-2024-0001</guid>
11+
<pubDate>Tue, 04 Jun 2024 12:30:00 GMT</pubDate>
12+
<description><![CDATA[An example cloud vulnerability. Additional tracking: CVE-2024-22222.]]></description>
13+
</item>
14+
<item>
15+
<title><![CDATA[Azure IMDS SSRF Exposure]]></title>
16+
<link>https://www.cloudvulndb.org/vulnerabilities/azure-imds-ssrf</link>
17+
<guid></guid>
18+
<pubDate>Fri, 05 Jul 2024 08:00:00 GMT</pubDate>
19+
<description><![CDATA[No CVE assigned.]]></description>
20+
</item>
21+
</channel>
22+
</rss>
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"advisory_id": "CLOUD-2024-0001",
3+
"aliases": [
4+
"CVE-2024-11111",
5+
"CVE-2024-22222"
6+
],
7+
"summary": "AWS Example Privilege Escalation (CVE-2024-11111)",
8+
"affected_packages": [],
9+
"references": [
10+
{
11+
"reference_id": "",
12+
"reference_type": "",
13+
"url": "https://www.cloudvulndb.org/vulnerabilities/aws-example-privilege-escalation"
14+
}
15+
],
16+
"patches": [],
17+
"severities": [],
18+
"date_published": "2024-06-04T12:30:00+00:00",
19+
"weaknesses": [],
20+
"url": "https://www.cloudvulndb.org/vulnerabilities/aws-example-privilege-escalation"
21+
}

0 commit comments

Comments
 (0)