Skip to content

Commit f1d5ae1

Browse files
committed
Move the CollectVCSFixCommitPipeline base pipelines to pipes
Remove CollectOpensslFixCommits Pipeline Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 4885821 commit f1d5ae1

File tree

6 files changed

+175
-162
lines changed

6 files changed

+175
-162
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@
153153
collect_fix_commits_v2.CollectRustFixCommitsPipeline,
154154
collect_fix_commits_v2.CollectOpenjdkFixCommitsPipeline,
155155
collect_fix_commits_v2.CollectSwiftFixCommitsPipeline,
156-
collect_fix_commits_v2.CollectOpensslFixCommitsPipeline,
157156
collect_fix_commits_v2.CollectDjangoFixCommitsPipeline,
158157
collect_fix_commits_v2.CollectRailsFixCommitsPipeline,
159158
collect_fix_commits_v2.CollectLaravelFixCommitsPipeline,

vulnerabilities/pipelines/__init__.py

Lines changed: 0 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,7 @@
88
#
99

1010
import logging
11-
import re
12-
import shutil
13-
import tempfile
1411
import traceback
15-
from collections import defaultdict
1612
from datetime import datetime
1713
from datetime import timezone
1814
from timeit import default_timer as timer
@@ -23,12 +19,8 @@
2319
from aboutcode.pipeline import LoopProgress
2420
from aboutcode.pipeline import PipelineDefinition
2521
from aboutcode.pipeline import humanize_time
26-
from git import Repo
27-
from packageurl.contrib.url2purl import url2purl
2822

2923
from vulnerabilities.importer import AdvisoryData
30-
from vulnerabilities.importer import AffectedPackageV2
31-
from vulnerabilities.importer import PackageCommitPatchData
3224
from vulnerabilities.improver import MAX_CONFIDENCE
3325
from vulnerabilities.models import Advisory
3426
from vulnerabilities.models import PipelineRun
@@ -329,109 +321,3 @@ def collect_and_store_advisories(self):
329321
continue
330322

331323
self.log(f"Successfully collected {collected_advisory_count:,d} advisories")
332-
333-
334-
class CollectVCSFixCommitPipeline(VulnerableCodeBaseImporterPipelineV2):
335-
"""
336-
Pipeline to collect fix commits from any git repository.
337-
"""
338-
339-
repo_url: str
340-
patterns: list[str] = [
341-
r"\bCVE-\d{4}-\d{4,19}\b",
342-
r"GHSA-[2-9cfghjmpqrvwx]{4}-[2-9cfghjmpqrvwx]{4}-[2-9cfghjmpqrvwx]{4}",
343-
]
344-
345-
@classmethod
346-
def steps(cls):
347-
return (
348-
cls.clone,
349-
cls.collect_and_store_advisories,
350-
cls.clean_downloads,
351-
)
352-
353-
def clone(self):
354-
"""Clone the repository."""
355-
self.repo = Repo.clone_from(
356-
url=self.repo_url,
357-
to_path=tempfile.mkdtemp(),
358-
bare=True,
359-
no_checkout=True,
360-
multi_options=["--filter=blob:none"],
361-
)
362-
363-
def advisories_count(self) -> int:
364-
return 0
365-
366-
def extract_vulnerability_id(self, commit) -> list[str]:
367-
"""
368-
Extract vulnerability id from a commit message.
369-
Returns a list of matched vulnerability IDs
370-
"""
371-
matches = []
372-
for pattern in self.patterns:
373-
found = re.findall(pattern, commit.message, flags=re.IGNORECASE)
374-
matches.extend(found)
375-
return matches
376-
377-
def collect_fix_commits(self):
378-
"""
379-
Iterate through repository commits and group them by vulnerability identifiers.
380-
return a list with (vuln_id, [(commit_id, commit_message)]).
381-
"""
382-
self.log("Processing git repository fix commits (grouped by vulnerability IDs).")
383-
384-
grouped_commits = defaultdict(list)
385-
for commit in self.repo.iter_commits("--all"):
386-
matched_ids = self.extract_vulnerability_id(commit)
387-
if not matched_ids:
388-
continue
389-
390-
commit_id = commit.hexsha
391-
commit_message = commit.message.strip()
392-
393-
for vuln_id in matched_ids:
394-
grouped_commits[vuln_id].append((commit_id, commit_message))
395-
396-
self.log(f"Found {len(grouped_commits)} vulnerabilities with related commits.")
397-
self.log("Finished processing all commits.")
398-
return grouped_commits
399-
400-
def collect_advisories(self):
401-
"""
402-
Generate AdvisoryData objects for each vulnerability ID grouped with its related commits.
403-
"""
404-
self.log("Generating AdvisoryData objects from grouped commits.")
405-
grouped_commits = self.collect_fix_commits()
406-
purl = url2purl(self.repo_url)
407-
408-
for vuln_id, commits_data in grouped_commits.items():
409-
if not commits_data or not vuln_id:
410-
continue
411-
412-
commit_hash_set = {commit_hash for commit_hash, _ in commits_data}
413-
affected_packages = [
414-
AffectedPackageV2(
415-
package=purl,
416-
fixed_by_commit_patches=[
417-
PackageCommitPatchData(vcs_url=self.repo_url, commit_hash=commit_hash)
418-
for commit_hash in commit_hash_set
419-
],
420-
)
421-
]
422-
423-
yield AdvisoryData(
424-
advisory_id=vuln_id,
425-
affected_packages=affected_packages,
426-
url=self.repo_url,
427-
)
428-
429-
def clean_downloads(self):
430-
"""Cleanup any temporary repository data."""
431-
self.log("Cleaning up local repository resources.")
432-
if hasattr(self, "repo") and self.repo.working_dir:
433-
shutil.rmtree(path=self.repo.working_dir)
434-
435-
def on_failure(self):
436-
"""Ensure cleanup is always performed on failure."""
437-
self.clean_downloads()
Lines changed: 38 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,191 +1,186 @@
1-
from vulnerabilities.pipelines import CollectVCSFixCommitPipeline
1+
from vulnerabilities.pipes.vcs_collector_utils import CollectVCSFixCommitPipeline
22

33

44
class CollectLinuxFixCommitsPipeline(CollectVCSFixCommitPipeline):
55
pipeline_id = "collect_linux_fix_commits"
6-
repo_url = "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git"
6+
repo_url = "https://github.com/torvalds/linux"
77

88

99
class CollectBusyBoxFixCommitsPipeline(CollectVCSFixCommitPipeline):
1010
pipeline_id = "collect_busybox_fix_commits"
11-
repo_url = "https://github.com/mirror/busybox.git"
11+
repo_url = "https://github.com/mirror/busybox"
1212

1313

1414
class CollectNginxFixCommitsPipeline(CollectVCSFixCommitPipeline):
1515
pipeline_id = "collect_nginx_fix_commits"
16-
repo_url = "https://github.com/nginx/nginx.git"
16+
repo_url = "https://github.com/nginx/nginx"
1717

1818

1919
class CollectApacheTomcatFixCommitsPipeline(CollectVCSFixCommitPipeline):
2020
pipeline_id = "collect_apache_tomcat_fix_commits"
21-
repo_url = "https://github.com/apache/tomcat.git"
21+
repo_url = "https://github.com/apache/tomcat"
2222

2323

2424
class CollectMysqlServerFixCommitsPipeline(CollectVCSFixCommitPipeline):
2525
pipeline_id = "collect_mysql_server_fix_commits"
26-
repo_url = "https://github.com/mysql/mysql-server.git"
26+
repo_url = "https://github.com/mysql/mysql-server"
2727

2828

2929
class CollectPostgresqlFixCommitsPipeline(CollectVCSFixCommitPipeline):
3030
pipeline_id = "collect_postgresql_fix_commits"
31-
repo_url = "https://github.com/postgres/postgres.git"
31+
repo_url = "https://github.com/postgres/postgres"
3232

3333

3434
class CollectMongodbFixCommitsPipeline(CollectVCSFixCommitPipeline):
3535
pipeline_id = "collect_mongodb_fix_commits"
36-
repo_url = "https://github.com/mongodb/mongo.git"
36+
repo_url = "https://github.com/mongodb/mongo"
3737

3838

3939
class CollectRedisFixCommitsPipeline(CollectVCSFixCommitPipeline):
4040
pipeline_id = "collect_redis_fix_commits"
41-
repo_url = "https://github.com/redis/redis.git"
41+
repo_url = "https://github.com/redis/redis"
4242

4343

4444
class CollectSqliteFixCommitsPipeline(CollectVCSFixCommitPipeline):
4545
pipeline_id = "collect_sqlite_fix_commits"
46-
repo_url = "https://github.com/sqlite/sqlite.git"
46+
repo_url = "https://github.com/sqlite/sqlite"
4747

4848

4949
class CollectPhpFixCommitsPipeline(CollectVCSFixCommitPipeline):
5050
pipeline_id = "collect_php_fix_commits"
51-
repo_url = "https://github.com/php/php-src.git"
51+
repo_url = "https://github.com/php/php-src"
5252

5353

5454
class CollectPythonCpythonFixCommitsPipeline(CollectVCSFixCommitPipeline):
5555
pipeline_id = "collect_python_cpython_fix_commits"
56-
repo_url = "https://github.com/python/cpython.git"
56+
repo_url = "https://github.com/python/cpython"
5757

5858

5959
class CollectRubyFixCommitsPipeline(CollectVCSFixCommitPipeline):
6060
pipeline_id = "collect_ruby_fix_commits"
61-
repo_url = "https://github.com/ruby/ruby.git"
61+
repo_url = "https://github.com/ruby/ruby"
6262

6363

6464
class CollectGoFixCommitsPipeline(CollectVCSFixCommitPipeline):
6565
pipeline_id = "collect_go_fix_commits"
66-
repo_url = "https://github.com/golang/go.git"
66+
repo_url = "https://github.com/golang/go"
6767

6868

6969
class CollectNodeJsFixCommitsPipeline(CollectVCSFixCommitPipeline):
7070
pipeline_id = "collect_node_js_fix_commits"
71-
repo_url = "https://github.com/nodejs/node.git"
71+
repo_url = "https://github.com/nodejs/node"
7272

7373

7474
class CollectRustFixCommitsPipeline(CollectVCSFixCommitPipeline):
7575
pipeline_id = "collect_rust_fix_commits"
76-
repo_url = "https://github.com/rust-lang/rust.git"
76+
repo_url = "https://github.com/rust-lang/rust"
7777

7878

7979
class CollectOpenjdkFixCommitsPipeline(CollectVCSFixCommitPipeline):
8080
pipeline_id = "collect_openjdk_fix_commits"
81-
repo_url = "https://github.com/openjdk/jdk.git"
81+
repo_url = "https://github.com/openjdk/jdk"
8282

8383

8484
class CollectSwiftFixCommitsPipeline(CollectVCSFixCommitPipeline):
8585
pipeline_id = "collect_swift_fix_commits"
86-
repo_url = "https://github.com/swiftlang/swift.git"
87-
88-
89-
class CollectOpensslFixCommitsPipeline(CollectVCSFixCommitPipeline):
90-
pipeline_id = "collect_openssl_fix_commits"
91-
repo_url = "https://github.com/openssl/openssl.git"
86+
repo_url = "https://github.com/swiftlang/swift"
9287

9388

9489
class CollectDjangoFixCommitsPipeline(CollectVCSFixCommitPipeline):
9590
pipeline_id = "collect_django_fix_commits"
96-
repo_url = "https://github.com/django/django.git"
91+
repo_url = "https://github.com/django/django"
9792

9893

9994
class CollectRailsFixCommitsPipeline(CollectVCSFixCommitPipeline):
10095
pipeline_id = "collect_rails_fix_commits"
101-
repo_url = "https://github.com/rails/rails.git"
96+
repo_url = "https://github.com/rails/rails"
10297

10398

10499
class CollectLaravelFixCommitsPipeline(CollectVCSFixCommitPipeline):
105100
pipeline_id = "collect_laravel_fix_commits"
106-
repo_url = "https://github.com/laravel/framework.git"
101+
repo_url = "https://github.com/laravel/framework"
107102

108103

109104
class CollectSpringFrameworkFixCommitsPipeline(CollectVCSFixCommitPipeline):
110105
pipeline_id = "collect_spring_framework_fix_commits"
111-
repo_url = "https://github.com/spring-projects/spring-framework.git"
106+
repo_url = "https://github.com/spring-projects/spring-framework"
112107

113108

114109
class CollectReactFixCommitsPipeline(CollectVCSFixCommitPipeline):
115110
pipeline_id = "collect_react_fix_commits"
116-
repo_url = "https://github.com/facebook/react.git"
111+
repo_url = "https://github.com/facebook/react"
117112

118113

119114
class CollectAngularFixCommitsPipeline(CollectVCSFixCommitPipeline):
120115
pipeline_id = "collect_angular_fix_commits"
121-
repo_url = "https://github.com/angular/angular.git"
116+
repo_url = "https://github.com/angular/angular"
122117

123118

124119
class CollectWordpressFixCommitsPipeline(CollectVCSFixCommitPipeline):
125120
pipeline_id = "collect_wordpress_fix_commits"
126-
repo_url = "https://github.com/WordPress/WordPress.git"
121+
repo_url = "https://github.com/WordPress/WordPress"
127122

128123

129124
class CollectDockerMobyFixCommitsPipeline(CollectVCSFixCommitPipeline):
130125
pipeline_id = "collect_docker_moby_fix_commits"
131-
repo_url = "https://github.com/moby/moby.git"
126+
repo_url = "https://github.com/moby/moby"
132127

133128

134129
class CollectKubernetesFixCommitsPipeline(CollectVCSFixCommitPipeline):
135130
pipeline_id = "collect_kubernetes_fix_commits"
136-
repo_url = "https://github.com/kubernetes/kubernetes.git"
131+
repo_url = "https://github.com/kubernetes/kubernetes"
137132

138133

139134
class CollectQemuFixCommitsPipeline(CollectVCSFixCommitPipeline):
140135
pipeline_id = "collect_qemu_fix_commits"
141-
repo_url = "https://gitlab.com/qemu-project/qemu.git"
136+
repo_url = "https://gitlab.com/qemu-project/qemu"
142137

143138

144139
class CollectXenProjectFixCommitsPipeline(CollectVCSFixCommitPipeline):
145140
pipeline_id = "collect_xen_project_fix_commits"
146-
repo_url = "https://github.com/xen-project/xen.git"
141+
repo_url = "https://github.com/xen-project/xen"
147142

148143

149144
class CollectVirtualboxFixCommitsPipeline(CollectVCSFixCommitPipeline):
150145
pipeline_id = "collect_virtualbox_fix_commits"
151-
repo_url = "https://github.com/mirror/vbox.git"
146+
repo_url = "https://github.com/mirror/vbox"
152147

153148

154149
class CollectContainerdFixCommitsPipeline(CollectVCSFixCommitPipeline):
155150
pipeline_id = "collect_containerd_fix_commits"
156-
repo_url = "https://github.com/containerd/containerd.git"
151+
repo_url = "https://github.com/containerd/containerd"
157152

158153

159154
class CollectAnsibleFixCommitsPipeline(CollectVCSFixCommitPipeline):
160155
pipeline_id = "collect_ansible_fix_commits"
161-
repo_url = "https://github.com/ansible/ansible.git"
156+
repo_url = "https://github.com/ansible/ansible"
162157

163158

164159
class CollectTerraformFixCommitsPipeline(CollectVCSFixCommitPipeline):
165160
pipeline_id = "collect_terraform_fix_commits"
166-
repo_url = "https://github.com/hashicorp/terraform.git"
161+
repo_url = "https://github.com/hashicorp/terraform"
167162

168163

169164
class CollectWiresharkFixCommitsPipeline(CollectVCSFixCommitPipeline):
170165
pipeline_id = "collect_wireshark_fix_commits"
171-
repo_url = "https://gitlab.com/wireshark/wireshark.git"
166+
repo_url = "https://gitlab.com/wireshark/wireshark"
172167

173168

174169
class CollectTcpdumpFixCommitsPipeline(CollectVCSFixCommitPipeline):
175170
pipeline_id = "collect_tcpdump_fix_commits"
176-
repo_url = "https://github.com/the-tcpdump-group/tcpdump.git"
171+
repo_url = "https://github.com/the-tcpdump-group/tcpdump"
177172

178173

179174
class CollectGitFixCommitsPipeline(CollectVCSFixCommitPipeline):
180175
pipeline_id = "collect_git_fix_commits"
181-
repo_url = "https://github.com/git/git.git"
176+
repo_url = "https://github.com/git/git"
182177

183178

184179
class CollectJenkinsFixCommitsPipeline(CollectVCSFixCommitPipeline):
185180
pipeline_id = "collect_jenkins_fix_commits"
186-
repo_url = "https://github.com/jenkinsci/jenkins.git"
181+
repo_url = "https://github.com/jenkinsci/jenkins"
187182

188183

189184
class CollectGitlabFixCommitsPipeline(CollectVCSFixCommitPipeline):
190185
pipeline_id = "collect_gitlab_fix_commits"
191-
repo_url = "https://gitlab.com/gitlab-org/gitlab-foss.git"
186+
repo_url = "https://gitlab.com/gitlab-org/gitlab-foss"

0 commit comments

Comments
 (0)