Skip to content

Commit 23c8369

Browse files
committed
Add initial migration to Gentoo importer v2
Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent df91a2c commit 23c8369

File tree

2 files changed

+177
-0
lines changed

2 files changed

+177
-0
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
elixir_security_importer as elixir_security_importer_v2,
5050
)
5151
from vulnerabilities.pipelines.v2_importers import epss_importer_v2
52+
from vulnerabilities.pipelines.v2_importers import gentoo_importer as gentoo_importer_v2
5253
from vulnerabilities.pipelines.v2_importers import fireeye_importer_v2
5354
from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2
5455
from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
@@ -89,6 +90,7 @@
8990
aosp_importer_v2.AospImporterPipeline,
9091
ruby_importer_v2.RubyImporterPipeline,
9192
epss_importer_v2.EPSSImporterPipeline,
93+
gentoo_importer_v2.GentooImporterPipeline,
9294
mattermost_importer_v2.MattermostImporterPipeline,
9395
nvd_importer.NVDImporterPipeline,
9496
github_importer.GitHubAPIImporterPipeline,
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import re
11+
import xml.etree.ElementTree as ET
12+
from pathlib import Path
13+
from typing import Iterable
14+
15+
from fetchcode.vcs import fetch_via_vcs
16+
from packageurl import PackageURL
17+
from univers.version_constraint import VersionConstraint
18+
from univers.version_range import EbuildVersionRange
19+
from univers.versions import GentooVersion
20+
21+
from vulnerabilities.importer import AdvisoryData
22+
from vulnerabilities.importer import AffectedPackageV2
23+
from vulnerabilities.importer import ReferenceV2
24+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
25+
26+
27+
class GentooImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
28+
repo_url = "git+https://anongit.gentoo.org/git/data/glsa.git"
29+
spdx_license_expression = "CC-BY-SA-4.0"
30+
# the license notice is at this url https://anongit.gentoo.org/ says:
31+
# The contents of this document, unless otherwise expressly stated, are licensed
32+
# under the [CC-BY-SA-4.0](https://creativecommons.org/licenses/by-sa/4.0/) license.
33+
license_url = "https://creativecommons.org/licenses/by-sa/4.0/"
34+
pipeline_id = "gentoo_importer_v2"
35+
36+
@classmethod
37+
def steps(cls):
38+
return (
39+
cls.clone,
40+
cls.collect_and_store_advisories,
41+
cls.clean_downloads,
42+
)
43+
44+
def clone(self):
45+
self.log(f"Cloning `{self.repo_url}`")
46+
self.vcs_response = fetch_via_vcs(self.repo_url)
47+
48+
def advisories_count(self):
49+
advisory_dir = Path(self.vcs_response.dest_dir)
50+
return sum(1 for _ in advisory_dir.rglob("*.xml"))
51+
52+
def collect_advisories(self) -> Iterable[AdvisoryData]:
53+
base_path = Path(self.vcs_response.dest_dir)
54+
for file_path in base_path.glob("**/*.xml"):
55+
yield from self.process_file(file_path)
56+
57+
def process_file(self, file):
58+
cves = []
59+
summary = ""
60+
vuln_references = []
61+
xml_root = ET.parse(file).getroot()
62+
id = xml_root.attrib.get("id")
63+
if id:
64+
glsa = "GLSA-" + id
65+
vuln_references = [
66+
ReferenceV2(
67+
reference_id=glsa,
68+
url=f"https://security.gentoo.org/glsa/{id}",
69+
)
70+
]
71+
72+
for child in xml_root:
73+
if child.tag == "references":
74+
cves = self.cves_from_reference(child)
75+
76+
if child.tag == "synopsis":
77+
summary = child.text
78+
79+
if child.tag == "affected":
80+
affected_packages = list(affected_and_safe_purls(child))
81+
82+
# It is very inefficient, to create new Advisory for each CVE
83+
# this way, but there seems no alternative.
84+
for cve in cves:
85+
yield AdvisoryData(
86+
advisory_id=cve,
87+
aliases=[cve],
88+
summary=summary,
89+
references=vuln_references,
90+
affected_packages=affected_packages,
91+
url=f"https://security.gentoo.org/glsa/{id}"
92+
if id
93+
else "https://security.gentoo.org/glsa",
94+
)
95+
96+
def clean_downloads(self):
97+
if self.vcs_response:
98+
self.log("Removing cloned repository")
99+
self.vcs_response.delete()
100+
101+
def on_failure(self):
102+
self.clean_downloads()
103+
104+
@staticmethod
105+
def cves_from_reference(reference):
106+
cves = []
107+
for child in reference:
108+
txt = child.text.strip()
109+
match = re.match(r"CVE-\d{4}-\d{4,}", txt)
110+
if match:
111+
cves.append(match.group())
112+
return cves
113+
114+
115+
def affected_and_safe_purls(affected_elem):
116+
constraints = []
117+
for pkg in affected_elem:
118+
name = pkg.attrib.get("name")
119+
if not name:
120+
continue
121+
pkg_ns, _, pkg_name = name.rpartition("/")
122+
purl = PackageURL(type="ebuild", name=pkg_name, namespace=pkg_ns)
123+
safe_versions, affected_versions = get_safe_and_affected_versions(pkg)
124+
125+
for version in safe_versions:
126+
constraints.append(
127+
VersionConstraint(version=GentooVersion(version), comparator="=").invert()
128+
)
129+
130+
for version in affected_versions:
131+
constraints.append(VersionConstraint(version=GentooVersion(version), comparator="="))
132+
133+
if not constraints:
134+
continue
135+
136+
yield AffectedPackageV2(
137+
package=purl,
138+
affected_version_range=EbuildVersionRange(constraints=constraints),
139+
fixed_version_range=None,
140+
)
141+
142+
143+
def get_safe_and_affected_versions(pkg):
144+
# TODO : Revisit why we are skipping some versions in gentoo importer
145+
skip_versions = {"1.3*", "7.3*", "7.4*"}
146+
safe_versions = set()
147+
affected_versions = set()
148+
for info in pkg:
149+
if info.text in skip_versions:
150+
continue
151+
152+
if info.attrib.get("range"):
153+
if len(info.attrib.get("range")) > 2:
154+
continue
155+
156+
if info.tag == "unaffected":
157+
# quick hack, to know whether this
158+
# version lies in this range, 'e' stands for
159+
# equal, which is paired with 'greater' or 'less'.
160+
# All possible values of info.attrib['range'] =
161+
# {'gt', 'lt', 'rle', 'rge', 'rgt', 'le', 'ge', 'eq'}, out of
162+
# which ('rle', 'rge', 'rgt') are ignored, because they compare
163+
# 'release' not the 'version'.
164+
if "e" in info.attrib["range"]:
165+
safe_versions.add(info.text)
166+
else:
167+
affected_versions.add(info.text)
168+
169+
elif info.tag == "vulnerable":
170+
if "e" in info.attrib["range"]:
171+
affected_versions.add(info.text)
172+
else:
173+
safe_versions.add(info.text)
174+
175+
return safe_versions, affected_versions

0 commit comments

Comments
 (0)