Skip to content

Commit 6318895

Browse files
committed
Refactor the pipeline for collecting GitHub/GitLab issues and PRS
Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 575fedc commit 6318895

File tree

12 files changed

+258
-234
lines changed

12 files changed

+258
-234
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,4 @@ websocket-client==0.59.0
127127
yarl==1.7.2
128128
zipp==3.19.1
129129
PyGithub==2.6.1
130+
python-gitlab~=7.1.0

vulnerabilities/importers/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,13 @@
4444
from vulnerabilities.pipelines.v2_importers import aosp_importer as aosp_importer_v2
4545
from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2
4646
from vulnerabilities.pipelines.v2_importers import archlinux_importer as archlinux_importer_v2
47+
from vulnerabilities.pipelines.v2_importers import collect_issue_pr as collect_issue_pr_v2
4748
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
4849
from vulnerabilities.pipelines.v2_importers import (
4950
elixir_security_importer as elixir_security_importer_v2,
5051
)
5152
from vulnerabilities.pipelines.v2_importers import epss_importer_v2
5253
from vulnerabilities.pipelines.v2_importers import fireeye_importer_v2
53-
from vulnerabilities.pipelines.v2_importers import github_issue_pr as github_issue_pr_v2
5454
from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2
5555
from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
5656
from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2
@@ -101,7 +101,7 @@
101101
epss_importer_v2.EPSSImporterPipeline,
102102
nginx_importer_v2.NginxImporterPipeline,
103103
mattermost_importer_v2.MattermostImporterPipeline,
104-
github_issue_pr_v2.GithubPipelineIssuePRPipeline,
104+
collect_issue_pr_v2.CollectIssuePRPipeline,
105105
nvd_importer.NVDImporterPipeline,
106106
github_importer.GitHubAPIImporterPipeline,
107107
gitlab_importer.GitLabImporterPipeline,
@@ -137,5 +137,7 @@
137137
ubuntu_usn.UbuntuUSNImporter,
138138
fireeye.FireyeImporter,
139139
oss_fuzz.OSSFuzzImporter,
140+
collect_issue_pr_v2.CollectKubernetesPRSIssues,
141+
collect_issue_pr_v2.CollectWiresharkPRSIssues,
140142
]
141143
)

vulnerabilities/pipelines/__init__.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,33 @@
88
#
99

1010
import logging
11+
import re
1112
import traceback
13+
from abc import abstractmethod
14+
from collections import defaultdict
1215
from datetime import datetime
1316
from datetime import timezone
1417
from timeit import default_timer as timer
1518
from traceback import format_exc as traceback_format_exc
1619
from typing import Iterable
1720
from typing import List
21+
from urllib.parse import urlparse
1822

23+
import gitlab
1924
from aboutcode.pipeline import LoopProgress
2025
from aboutcode.pipeline import PipelineDefinition
2126
from aboutcode.pipeline import humanize_time
27+
from github import Github
2228

2329
from vulnerabilities.importer import AdvisoryData
30+
from vulnerabilities.importer import ReferenceV2
2431
from vulnerabilities.improver import MAX_CONFIDENCE
2532
from vulnerabilities.models import Advisory
2633
from vulnerabilities.models import PipelineRun
2734
from vulnerabilities.pipes.advisory import import_advisory
2835
from vulnerabilities.pipes.advisory import insert_advisory
2936
from vulnerabilities.pipes.advisory import insert_advisory_v2
37+
from vulnerablecode.settings import env
3038

3139
module_logger = logging.getLogger(__name__)
3240

@@ -321,3 +329,99 @@ def collect_and_store_advisories(self):
321329
continue
322330

323331
self.log(f"Successfully collected {collected_advisory_count:,d} advisories")
332+
333+
334+
class VCSCollector(VulnerableCodeBaseImporterPipeline):
335+
"""
336+
Pipeline to collect GitHub/GitLab issues and PRs related to vulnerabilities.
337+
"""
338+
339+
vcs_url: str
340+
CVE_PATTERN = re.compile(r"(CVE-\d{4}-\d+)", re.IGNORECASE)
341+
SUPPORTED_IDENTIFIERS = ["CVE-"]
342+
343+
collected_items: dict = {}
344+
345+
def advisories_count(self) -> int:
346+
return 0
347+
348+
@classmethod
349+
def steps(cls):
350+
return (
351+
cls.configure_target,
352+
cls.fetch_entries,
353+
cls.collect_items,
354+
cls.collect_and_store_advisories,
355+
)
356+
357+
def configure_target(self):
358+
parsed_url = urlparse(self.repo_url)
359+
parts = parsed_url.path.strip("/").split("/")
360+
if len(parts) < 2:
361+
raise ValueError(f"Invalid URL: {self.repo_url}")
362+
363+
self.repo_name = f"{parts[0]}/{parts[1]}"
364+
365+
@abstractmethod
366+
def fetch_entries(self):
367+
raise NotImplementedError
368+
369+
@abstractmethod
370+
def collect_items(self):
371+
raise NotImplementedError
372+
373+
def collect_advisories(self):
374+
"""
375+
Generate AdvisoryData objects for each vulnerability ID grouped with its related GitHub/Gitlab issues and PRs.
376+
"""
377+
self.log("Generating AdvisoryData objects from GitHub/Gitlab issues and PRs.")
378+
for vuln_id, refs in self.collected_items.items():
379+
print(vuln_id, refs)
380+
references = [ReferenceV2(reference_type=ref_id, url=url) for ref_id, url in refs]
381+
yield AdvisoryData(
382+
advisory_id=vuln_id,
383+
aliases=[],
384+
references_v2=references,
385+
url=self.repo_url,
386+
)
387+
388+
389+
class GitHubCollector(VCSCollector):
390+
def fetch_entries(self):
391+
"""Fetch GitHub Data Entries"""
392+
github_token = env.str("GITHUB_TOKEN")
393+
g = Github(login_or_token=github_token)
394+
base_query = f"repo:{self.repo_name} ({' OR '.join(self.SUPPORTED_IDENTIFIERS)})"
395+
self.issues = g.search_issues(f"{base_query} is:issue")
396+
self.prs = g.search_issues(f"{base_query} is:pr")
397+
398+
def collect_items(self):
399+
self.collected_items = defaultdict(list)
400+
401+
for i_type, items in [("Issue", self.issues), ("PR", self.prs)]:
402+
for item in items:
403+
matches = self.CVE_PATTERN.findall(item.title + " " + (item.body or ""))
404+
for match in matches:
405+
self.collected_items[match].append(("Issue", item.html_url))
406+
407+
408+
class GitLabCollector(VCSCollector):
409+
def fetch_entries(self):
410+
"""Fetch GitLab Data Entries"""
411+
gitlab_token = env.str("GITLAB_TOKEN")
412+
gl = gitlab.Gitlab("https://gitlab.com/", private_token=gitlab_token)
413+
project = gl.projects.get(self.repo_name)
414+
base_query = " ".join(self.SUPPORTED_IDENTIFIERS)
415+
self.issues = project.search(scope="issues", search=base_query)
416+
self.prs = project.search(scope="merge_requests", search=base_query)
417+
418+
def collect_items(self):
419+
self.collected_items = defaultdict(list)
420+
for i_type, items in [("Issue", self.issues), ("PR", self.prs)]:
421+
for item in items:
422+
title = item.get("title") or ""
423+
description = item.get("description") or ""
424+
matches = self.CVE_PATTERN.findall(title + " " + description)
425+
for match in matches:
426+
url = item.get("web_url")
427+
self.collected_items[match].append((i_type, url))
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
from vulnerabilities.pipelines import GitHubCollector
10+
from vulnerabilities.pipelines import GitLabCollector
11+
12+
13+
class CollectKubernetesPRSIssues(GitHubCollector):
14+
pipeline_id = "collect-kubernetes-prs-issues"
15+
repo_url = "https://github.com/kubernetes/kubernetes"
16+
17+
18+
class CollectWiresharkPRSIssues(GitLabCollector):
19+
pipeline_id = "collect-wireshark-prs-issues"
20+
repo_url = "https://gitlab.com/wireshark/wireshark"

vulnerabilities/pipelines/v2_importers/github_issue_pr.py

Lines changed: 0 additions & 92 deletions
This file was deleted.

0 commit comments

Comments
 (0)