Skip to content

Commit fe1d9ea

Browse files
committed
Split the project-kb into two separate pipelines
Update Project-KB importer Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent de2028a commit fe1d9ea

File tree

13 files changed

+9952
-2739
lines changed

13 files changed

+9952
-2739
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from vulnerabilities.importers import openssl
2525
from vulnerabilities.importers import oss_fuzz
2626
from vulnerabilities.importers import postgresql
27+
from vulnerabilities.importers import project_kb_msr2019
2728
from vulnerabilities.importers import redhat
2829
from vulnerabilities.importers import retiredotnet
2930
from vulnerabilities.importers import ruby
@@ -58,7 +59,12 @@
5859
from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2
5960
from vulnerabilities.pipelines.v2_importers import oss_fuzz as oss_fuzz_v2
6061
from vulnerabilities.pipelines.v2_importers import postgresql_importer as postgresql_importer_v2
61-
from vulnerabilities.pipelines.v2_importers import project_kb_importer as project_kb_importer_v2
62+
from vulnerabilities.pipelines.v2_importers import (
63+
project_kb_msr2019_importer as project_kb_msr2019_importer_v2,
64+
)
65+
from vulnerabilities.pipelines.v2_importers import (
66+
project_kb_statements_importer as project_kb_statements_importer_v2,
67+
)
6268
from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2
6369
from vulnerabilities.pipelines.v2_importers import pysec_importer as pysec_importer_v2
6470
from vulnerabilities.pipelines.v2_importers import redhat_importer as redhat_importer_v2
@@ -87,7 +93,8 @@
8793
github_osv_importer_v2.GithubOSVImporterPipeline,
8894
redhat_importer_v2.RedHatImporterPipeline,
8995
aosp_importer_v2.AospImporterPipeline,
90-
project_kb_importer_v2.ProjectKBPipeline,
96+
project_kb_statements_importer_v2.ProjectKBStatementsPipeline,
97+
project_kb_msr2019_importer_v2.ProjectKBMSR2019Pipeline,
9198
ruby_importer_v2.RubyImporterPipeline,
9299
epss_importer_v2.EPSSImporterPipeline,
93100
mattermost_importer_v2.MattermostImporterPipeline,
@@ -119,6 +126,7 @@
119126
mozilla.MozillaImporter,
120127
gentoo.GentooImporter,
121128
istio.IstioImporter,
129+
project_kb_msr2019.ProjectKBMSRImporter,
122130
suse_scores.SUSESeverityScoreImporter,
123131
elixir_security.ElixirSecurityImporter,
124132
xen.XenImporter,
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
from vulnerabilities.importer import AdvisoryData
11+
from vulnerabilities.importer import Importer
12+
from vulnerabilities.importer import Reference
13+
from vulnerabilities.utils import fetch_and_read_from_csv
14+
from vulnerabilities.utils import is_cve
15+
16+
# Reading CSV file from a url using `requests` is bit too complicated.
17+
# Use `urllib.request` for that purpose.
18+
19+
20+
class ProjectKBMSRImporter(Importer):
21+
22+
url = "https://raw.githubusercontent.com/SAP/project-kb/master/MSR2019/dataset/vulas_db_msr2019_release.csv"
23+
spdx_license_expression = "Apache-2.0"
24+
license_url = "https://github.com/SAP/project-kb/blob/main/LICENSE.txt"
25+
importer_name = "ProjectKB MSRImporter"
26+
27+
def advisory_data(self):
28+
raw_data = fetch_and_read_from_csv(self.url)
29+
yield from self.to_advisories(raw_data)
30+
31+
def to_advisories(self, csv_reader):
32+
# Project KB MSR csv file has no header row
33+
for row in csv_reader:
34+
vuln_id, proj_home, fix_commit, _ = row
35+
commit_link = proj_home + "/commit/" + fix_commit
36+
37+
if not is_cve(vuln_id):
38+
continue
39+
40+
reference = Reference(url=commit_link)
41+
yield AdvisoryData(
42+
aliases=[vuln_id],
43+
summary="",
44+
references=[reference],
45+
url=self.url,
46+
)
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import csv
11+
from pathlib import Path
12+
from typing import Iterable
13+
14+
from fetchcode.vcs import fetch_via_vcs
15+
16+
from vulnerabilities.importer import AdvisoryData
17+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
18+
from vulnerabilities.pipes.advisory import append_patch_classifications
19+
20+
21+
class ProjectKBMSR2019Pipeline(VulnerableCodeBaseImporterPipelineV2):
22+
"""
23+
ProjectKB Importer Pipeline
24+
Collect advisory from ProjectKB data:
25+
- CSV database https://github.com/SAP/project-kb/blob/main/MSR2019/dataset/vulas_db_msr2019_release.csv
26+
"""
27+
28+
pipeline_id = "project-kb-MSR-2019_v2"
29+
spdx_license_expression = "Apache-2.0"
30+
license_url = "https://github.com/SAP/project-kb/blob/main/LICENSE.txt"
31+
repo_url = "git+https://github.com/SAP/project-kb"
32+
33+
@classmethod
34+
def steps(cls):
35+
return (cls.clone_repo, cls.collect_and_store_advisories, cls.clean_downloads)
36+
37+
def clone_repo(self):
38+
self.log("Cloning ProjectKB advisory data...")
39+
self.vcs_response = fetch_via_vcs(self.repo_url)
40+
41+
def advisories_count(self):
42+
csv_path = Path(self.vcs_response.dest_dir) / "MSR2019/dataset/vulas_db_msr2019_release.csv"
43+
44+
with open(csv_path, newline="", encoding="utf-8") as f:
45+
reader = csv.reader(f)
46+
next(reader, None)
47+
count = sum(1 for _ in reader)
48+
49+
self.log(f"Estimated advisories to process: {count}")
50+
return count
51+
52+
def collect_advisories(self) -> Iterable[AdvisoryData]:
53+
self.log("Collecting fix commits from ProjectKB ( vulas_db_msr2019_release )...")
54+
csv_path = Path(self.vcs_response.dest_dir) / "MSR2019/dataset/vulas_db_msr2019_release.csv"
55+
56+
with open(csv_path, newline="", encoding="utf-8") as f:
57+
reader = csv.reader(f)
58+
next(reader, None) # skip header
59+
rows = [r for r in reader if len(r) == 4 and r[0]] # vuln_id, vcs_url, commit_hash, poc
60+
61+
for vuln_id, vcs_url, commit_hash, _ in rows:
62+
if not vuln_id or not vcs_url or not commit_hash:
63+
continue
64+
65+
patches = []
66+
affected_packages = []
67+
references = []
68+
append_patch_classifications(
69+
url=vcs_url,
70+
commit_hash=commit_hash,
71+
patch_text=None,
72+
affected_packages=affected_packages,
73+
references=references,
74+
patches=patches,
75+
)
76+
77+
yield AdvisoryData(
78+
advisory_id=vuln_id,
79+
affected_packages=affected_packages,
80+
patches=patches,
81+
references_v2=references,
82+
url="https://github.com/SAP/project-kb/blob/main/MSR2019/dataset/vulas_db_msr2019_release.csv",
83+
)
84+
85+
def clean_downloads(self):
86+
"""Remove the cloned repository from disk."""
87+
self.log("Removing cloned repository...")
88+
if self.vcs_response:
89+
self.vcs_response.delete()
90+
91+
def on_failure(self):
92+
"""Ensure cleanup happens on pipeline failure."""
93+
self.clean_downloads()

0 commit comments

Comments
 (0)