Skip to content

Commit 7975a38

Browse files
authored
Merge pull request #1992 from ziadhany/parsing-commit
Add support for parsing Git commit messages
2 parents 376a4d5 + 7261a6b commit 7975a38

File tree

6 files changed

+594
-0
lines changed

6 files changed

+594
-0
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
from vulnerabilities.pipelines.v2_importers import apache_kafka_importer as apache_kafka_importer_v2
4848
from vulnerabilities.pipelines.v2_importers import apache_tomcat_importer as apache_tomcat_v2
4949
from vulnerabilities.pipelines.v2_importers import archlinux_importer as archlinux_importer_v2
50+
from vulnerabilities.pipelines.v2_importers import collect_fix_commits as collect_fix_commits_v2
5051
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
5152
from vulnerabilities.pipelines.v2_importers import debian_importer as debian_importer_v2
5253
from vulnerabilities.pipelines.v2_importers import (
@@ -149,5 +150,42 @@
149150
ubuntu_usn.UbuntuUSNImporter,
150151
fireeye.FireyeImporter,
151152
oss_fuzz.OSSFuzzImporter,
153+
collect_fix_commits_v2.CollectLinuxFixCommitsPipeline,
154+
collect_fix_commits_v2.CollectBusyBoxFixCommitsPipeline,
155+
collect_fix_commits_v2.CollectNginxFixCommitsPipeline,
156+
collect_fix_commits_v2.CollectApacheTomcatFixCommitsPipeline,
157+
collect_fix_commits_v2.CollectMysqlServerFixCommitsPipeline,
158+
collect_fix_commits_v2.CollectPostgresqlFixCommitsPipeline,
159+
collect_fix_commits_v2.CollectMongodbFixCommitsPipeline,
160+
collect_fix_commits_v2.CollectRedisFixCommitsPipeline,
161+
collect_fix_commits_v2.CollectSqliteFixCommitsPipeline,
162+
collect_fix_commits_v2.CollectPhpFixCommitsPipeline,
163+
collect_fix_commits_v2.CollectPythonCpythonFixCommitsPipeline,
164+
collect_fix_commits_v2.CollectRubyFixCommitsPipeline,
165+
collect_fix_commits_v2.CollectGoFixCommitsPipeline,
166+
collect_fix_commits_v2.CollectNodeJsFixCommitsPipeline,
167+
collect_fix_commits_v2.CollectRustFixCommitsPipeline,
168+
collect_fix_commits_v2.CollectOpenjdkFixCommitsPipeline,
169+
collect_fix_commits_v2.CollectSwiftFixCommitsPipeline,
170+
collect_fix_commits_v2.CollectDjangoFixCommitsPipeline,
171+
collect_fix_commits_v2.CollectRailsFixCommitsPipeline,
172+
collect_fix_commits_v2.CollectLaravelFixCommitsPipeline,
173+
collect_fix_commits_v2.CollectSpringFrameworkFixCommitsPipeline,
174+
collect_fix_commits_v2.CollectReactFixCommitsPipeline,
175+
collect_fix_commits_v2.CollectAngularFixCommitsPipeline,
176+
collect_fix_commits_v2.CollectWordpressFixCommitsPipeline,
177+
collect_fix_commits_v2.CollectDockerMobyFixCommitsPipeline,
178+
collect_fix_commits_v2.CollectKubernetesFixCommitsPipeline,
179+
collect_fix_commits_v2.CollectQemuFixCommitsPipeline,
180+
collect_fix_commits_v2.CollectXenProjectFixCommitsPipeline,
181+
collect_fix_commits_v2.CollectVirtualboxFixCommitsPipeline,
182+
collect_fix_commits_v2.CollectContainerdFixCommitsPipeline,
183+
collect_fix_commits_v2.CollectAnsibleFixCommitsPipeline,
184+
collect_fix_commits_v2.CollectTerraformFixCommitsPipeline,
185+
collect_fix_commits_v2.CollectWiresharkFixCommitsPipeline,
186+
collect_fix_commits_v2.CollectTcpdumpFixCommitsPipeline,
187+
collect_fix_commits_v2.CollectGitFixCommitsPipeline,
188+
collect_fix_commits_v2.CollectJenkinsFixCommitsPipeline,
189+
collect_fix_commits_v2.CollectGitlabFixCommitsPipeline,
152190
]
153191
)
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
from vulnerabilities.pipes.vcs_collector_utils import CollectVCSFixCommitPipeline
2+
3+
4+
class CollectLinuxFixCommitsPipeline(CollectVCSFixCommitPipeline):
5+
pipeline_id = "collect_linux_fix_commits"
6+
repo_url = "https://github.com/torvalds/linux"
7+
8+
9+
class CollectBusyBoxFixCommitsPipeline(CollectVCSFixCommitPipeline):
10+
pipeline_id = "collect_busybox_fix_commits"
11+
repo_url = "https://github.com/mirror/busybox"
12+
13+
14+
class CollectNginxFixCommitsPipeline(CollectVCSFixCommitPipeline):
15+
pipeline_id = "collect_nginx_fix_commits"
16+
repo_url = "https://github.com/nginx/nginx"
17+
18+
19+
class CollectApacheTomcatFixCommitsPipeline(CollectVCSFixCommitPipeline):
20+
pipeline_id = "collect_apache_tomcat_fix_commits"
21+
repo_url = "https://github.com/apache/tomcat"
22+
23+
24+
class CollectMysqlServerFixCommitsPipeline(CollectVCSFixCommitPipeline):
25+
pipeline_id = "collect_mysql_server_fix_commits"
26+
repo_url = "https://github.com/mysql/mysql-server"
27+
28+
29+
class CollectPostgresqlFixCommitsPipeline(CollectVCSFixCommitPipeline):
30+
pipeline_id = "collect_postgresql_fix_commits"
31+
repo_url = "https://github.com/postgres/postgres"
32+
33+
34+
class CollectMongodbFixCommitsPipeline(CollectVCSFixCommitPipeline):
35+
pipeline_id = "collect_mongodb_fix_commits"
36+
repo_url = "https://github.com/mongodb/mongo"
37+
38+
39+
class CollectRedisFixCommitsPipeline(CollectVCSFixCommitPipeline):
40+
pipeline_id = "collect_redis_fix_commits"
41+
repo_url = "https://github.com/redis/redis"
42+
43+
44+
class CollectSqliteFixCommitsPipeline(CollectVCSFixCommitPipeline):
45+
pipeline_id = "collect_sqlite_fix_commits"
46+
repo_url = "https://github.com/sqlite/sqlite"
47+
48+
49+
class CollectPhpFixCommitsPipeline(CollectVCSFixCommitPipeline):
50+
pipeline_id = "collect_php_fix_commits"
51+
repo_url = "https://github.com/php/php-src"
52+
53+
54+
class CollectPythonCpythonFixCommitsPipeline(CollectVCSFixCommitPipeline):
55+
pipeline_id = "collect_python_cpython_fix_commits"
56+
repo_url = "https://github.com/python/cpython"
57+
58+
59+
class CollectRubyFixCommitsPipeline(CollectVCSFixCommitPipeline):
60+
pipeline_id = "collect_ruby_fix_commits"
61+
repo_url = "https://github.com/ruby/ruby"
62+
63+
64+
class CollectGoFixCommitsPipeline(CollectVCSFixCommitPipeline):
65+
pipeline_id = "collect_go_fix_commits"
66+
repo_url = "https://github.com/golang/go"
67+
68+
69+
class CollectNodeJsFixCommitsPipeline(CollectVCSFixCommitPipeline):
70+
pipeline_id = "collect_node_js_fix_commits"
71+
repo_url = "https://github.com/nodejs/node"
72+
73+
74+
class CollectRustFixCommitsPipeline(CollectVCSFixCommitPipeline):
75+
pipeline_id = "collect_rust_fix_commits"
76+
repo_url = "https://github.com/rust-lang/rust"
77+
78+
79+
class CollectOpenjdkFixCommitsPipeline(CollectVCSFixCommitPipeline):
80+
pipeline_id = "collect_openjdk_fix_commits"
81+
repo_url = "https://github.com/openjdk/jdk"
82+
83+
84+
class CollectSwiftFixCommitsPipeline(CollectVCSFixCommitPipeline):
85+
pipeline_id = "collect_swift_fix_commits"
86+
repo_url = "https://github.com/swiftlang/swift"
87+
88+
89+
class CollectDjangoFixCommitsPipeline(CollectVCSFixCommitPipeline):
90+
pipeline_id = "collect_django_fix_commits"
91+
repo_url = "https://github.com/django/django"
92+
93+
94+
class CollectRailsFixCommitsPipeline(CollectVCSFixCommitPipeline):
95+
pipeline_id = "collect_rails_fix_commits"
96+
repo_url = "https://github.com/rails/rails"
97+
98+
99+
class CollectLaravelFixCommitsPipeline(CollectVCSFixCommitPipeline):
100+
pipeline_id = "collect_laravel_fix_commits"
101+
repo_url = "https://github.com/laravel/framework"
102+
103+
104+
class CollectSpringFrameworkFixCommitsPipeline(CollectVCSFixCommitPipeline):
105+
pipeline_id = "collect_spring_framework_fix_commits"
106+
repo_url = "https://github.com/spring-projects/spring-framework"
107+
108+
109+
class CollectReactFixCommitsPipeline(CollectVCSFixCommitPipeline):
110+
pipeline_id = "collect_react_fix_commits"
111+
repo_url = "https://github.com/facebook/react"
112+
113+
114+
class CollectAngularFixCommitsPipeline(CollectVCSFixCommitPipeline):
115+
pipeline_id = "collect_angular_fix_commits"
116+
repo_url = "https://github.com/angular/angular"
117+
118+
119+
class CollectWordpressFixCommitsPipeline(CollectVCSFixCommitPipeline):
120+
pipeline_id = "collect_wordpress_fix_commits"
121+
repo_url = "https://github.com/WordPress/WordPress"
122+
123+
124+
class CollectDockerMobyFixCommitsPipeline(CollectVCSFixCommitPipeline):
125+
pipeline_id = "collect_docker_moby_fix_commits"
126+
repo_url = "https://github.com/moby/moby"
127+
128+
129+
class CollectKubernetesFixCommitsPipeline(CollectVCSFixCommitPipeline):
130+
pipeline_id = "collect_kubernetes_fix_commits"
131+
repo_url = "https://github.com/kubernetes/kubernetes"
132+
133+
134+
class CollectQemuFixCommitsPipeline(CollectVCSFixCommitPipeline):
135+
pipeline_id = "collect_qemu_fix_commits"
136+
repo_url = "https://gitlab.com/qemu-project/qemu"
137+
138+
139+
class CollectXenProjectFixCommitsPipeline(CollectVCSFixCommitPipeline):
140+
pipeline_id = "collect_xen_project_fix_commits"
141+
repo_url = "https://github.com/xen-project/xen"
142+
143+
144+
class CollectVirtualboxFixCommitsPipeline(CollectVCSFixCommitPipeline):
145+
pipeline_id = "collect_virtualbox_fix_commits"
146+
repo_url = "https://github.com/mirror/vbox"
147+
148+
149+
class CollectContainerdFixCommitsPipeline(CollectVCSFixCommitPipeline):
150+
pipeline_id = "collect_containerd_fix_commits"
151+
repo_url = "https://github.com/containerd/containerd"
152+
153+
154+
class CollectAnsibleFixCommitsPipeline(CollectVCSFixCommitPipeline):
155+
pipeline_id = "collect_ansible_fix_commits"
156+
repo_url = "https://github.com/ansible/ansible"
157+
158+
159+
class CollectTerraformFixCommitsPipeline(CollectVCSFixCommitPipeline):
160+
pipeline_id = "collect_terraform_fix_commits"
161+
repo_url = "https://github.com/hashicorp/terraform"
162+
163+
164+
class CollectWiresharkFixCommitsPipeline(CollectVCSFixCommitPipeline):
165+
pipeline_id = "collect_wireshark_fix_commits"
166+
repo_url = "https://gitlab.com/wireshark/wireshark"
167+
168+
169+
class CollectTcpdumpFixCommitsPipeline(CollectVCSFixCommitPipeline):
170+
pipeline_id = "collect_tcpdump_fix_commits"
171+
repo_url = "https://github.com/the-tcpdump-group/tcpdump"
172+
173+
174+
class CollectGitFixCommitsPipeline(CollectVCSFixCommitPipeline):
175+
pipeline_id = "collect_git_fix_commits"
176+
repo_url = "https://github.com/git/git"
177+
178+
179+
class CollectJenkinsFixCommitsPipeline(CollectVCSFixCommitPipeline):
180+
pipeline_id = "collect_jenkins_fix_commits"
181+
repo_url = "https://github.com/jenkinsci/jenkins"
182+
183+
184+
class CollectGitlabFixCommitsPipeline(CollectVCSFixCommitPipeline):
185+
pipeline_id = "collect_gitlab_fix_commits"
186+
repo_url = "https://gitlab.com/gitlab-org/gitlab-foss"
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import re
11+
import shutil
12+
import tempfile
13+
from collections import defaultdict
14+
15+
from git import Repo
16+
from packageurl import PackageURL
17+
from packageurl.contrib.purl2url import purl2url
18+
from packageurl.contrib.url2purl import url2purl
19+
20+
from vulnerabilities.importer import AdvisoryDataV2
21+
from vulnerabilities.importer import AffectedPackageV2
22+
from vulnerabilities.importer import PackageCommitPatchData
23+
from vulnerabilities.importer import ReferenceV2
24+
from vulnerabilities.models import AdvisoryReference
25+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
26+
27+
28+
class CollectVCSFixCommitPipeline(VulnerableCodeBaseImporterPipelineV2):
29+
"""
30+
Pipeline to collect fix commits from any git repository.
31+
"""
32+
33+
repo_url: str
34+
patterns: list[str] = [
35+
r"\bCVE-\d{4}-\d{4,19}\b",
36+
r"GHSA-[2-9cfghjmpqrvwx]{4}-[2-9cfghjmpqrvwx]{4}-[2-9cfghjmpqrvwx]{4}",
37+
]
38+
39+
@classmethod
40+
def steps(cls):
41+
return (
42+
cls.clone,
43+
cls.collect_and_store_advisories,
44+
cls.clean_downloads,
45+
)
46+
47+
def clone(self):
48+
"""Clone the repository."""
49+
self.repo = Repo.clone_from(
50+
url=self.repo_url,
51+
to_path=tempfile.mkdtemp(),
52+
bare=True,
53+
no_checkout=True,
54+
multi_options=["--filter=blob:none"],
55+
)
56+
57+
def advisories_count(self) -> int:
58+
return 0
59+
60+
def extract_vulnerability_id(self, commit) -> list[str]:
61+
"""
62+
Extract vulnerability id from a commit message.
63+
Returns a list of matched vulnerability IDs
64+
"""
65+
matches = []
66+
for pattern in self.patterns:
67+
found = re.findall(pattern, commit.message, flags=re.IGNORECASE)
68+
matches.extend(found)
69+
return matches
70+
71+
def collect_fix_commits(self):
72+
"""
73+
Iterate through repository commits and group them by vulnerability identifiers.
74+
return a list with (vuln_id, [(commit_id, commit_message)]).
75+
"""
76+
self.log("Processing git repository fix commits (grouped by vulnerability IDs).")
77+
78+
grouped_commits = defaultdict(list)
79+
for commit in self.repo.iter_commits("--all"):
80+
matched_ids = self.extract_vulnerability_id(commit)
81+
if not matched_ids:
82+
continue
83+
84+
commit_id = commit.hexsha
85+
commit_message = commit.message.strip()
86+
87+
for vuln_id in matched_ids:
88+
grouped_commits[vuln_id].append((commit_id, commit_message))
89+
90+
self.log(f"Found {len(grouped_commits)} vulnerabilities with related commits.")
91+
self.log("Finished processing all commits.")
92+
return grouped_commits
93+
94+
def collect_advisories(self):
95+
"""
96+
Generate AdvisoryData objects for each vulnerability ID grouped with its related commits.
97+
"""
98+
self.log("Generating AdvisoryData objects from grouped commits.")
99+
grouped_commits = self.collect_fix_commits()
100+
purl = url2purl(self.repo_url)
101+
for vuln_id, commits_data in grouped_commits.items():
102+
103+
if not commits_data or not vuln_id:
104+
continue
105+
106+
summary = ""
107+
commit_hash_set = set()
108+
for commit_hash, commit_message in commits_data:
109+
summary += f"{commit_hash}:{commit_message}\n"
110+
commit_hash_set.add(commit_hash)
111+
112+
affected_packages = []
113+
references = []
114+
for commit_hash in commit_hash_set:
115+
affected_package = AffectedPackageV2(
116+
package=purl,
117+
fixed_by_commit_patches=[
118+
PackageCommitPatchData(vcs_url=self.repo_url, commit_hash=commit_hash)
119+
],
120+
)
121+
affected_packages.append(affected_package)
122+
123+
purl_with_commit_hash = PackageURL(
124+
type=purl.type, namespace=purl.namespace, name=purl.name, version=commit_hash
125+
)
126+
ref_url = purl2url(purl=str(purl_with_commit_hash))
127+
if not ref_url:
128+
continue
129+
130+
references.append(
131+
ReferenceV2(
132+
reference_id=commit_hash,
133+
reference_type=AdvisoryReference.COMMIT,
134+
url=ref_url,
135+
)
136+
)
137+
138+
yield AdvisoryDataV2(
139+
advisory_id=vuln_id,
140+
summary=summary,
141+
affected_packages=affected_packages,
142+
references=references,
143+
url=self.repo_url,
144+
)
145+
146+
def clean_downloads(self):
147+
"""Cleanup any temporary repository data."""
148+
self.log("Cleaning up local repository resources.")
149+
if hasattr(self, "repo") and self.repo.working_dir:
150+
shutil.rmtree(path=self.repo.working_dir)
151+
152+
def on_failure(self):
153+
"""Ensure cleanup is always performed on failure."""
154+
self.clean_downloads()

0 commit comments

Comments
 (0)