Skip to content

Commit 082134e

Browse files
committed
Add CollectRepoFixCommitPipeline
Add a test for CollectRepoFixCommitPipeline Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 976c5ad commit 082134e

File tree

7 files changed

+293
-189
lines changed

7 files changed

+293
-189
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@
4343
from vulnerabilities.pipelines import pysec_importer
4444
from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2
4545
from vulnerabilities.pipelines.v2_importers import archlinux_importer as archlinux_importer_v2
46+
from vulnerabilities.pipelines.v2_importers import (
47+
collect_repo_fix_commits as collect_repo_fix_commits,
48+
)
4649
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
4750
from vulnerabilities.pipelines.v2_importers import (
4851
elixir_security_importer as elixir_security_importer_v2,
@@ -115,5 +118,6 @@
115118
ubuntu_usn.UbuntuUSNImporter,
116119
fireeye.FireyeImporter,
117120
oss_fuzz.OSSFuzzImporter,
121+
collect_repo_fix_commits.CollectRepoFixCommitPipeline,
118122
]
119123
)

vulnerabilities/improvers/__init__.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@
1919
from vulnerabilities.pipelines import flag_ghost_packages
2020
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
2121
from vulnerabilities.pipelines import remove_duplicate_advisories
22-
from vulnerabilities.pipelines.v2_improvers import (
23-
collect_repo_fix_commits as collect_repo_fix_commits_v2,
24-
)
2522
from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2
2623
from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
2724
from vulnerabilities.pipelines.v2_improvers import (
@@ -70,7 +67,6 @@
7067
compute_package_risk_v2.ComputePackageRiskPipeline,
7168
compute_version_rank_v2.ComputeVersionRankPipeline,
7269
compute_advisory_todo_v2.ComputeToDo,
73-
collect_repo_fix_commits_v2.CollectRepoFixCommitPipeline,
7470
compute_advisory_todo.ComputeToDo,
7571
]
7672
)
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import os
2+
import re
3+
import shutil
4+
import subprocess
5+
import tempfile
6+
from collections import defaultdict
7+
8+
from git import Repo
9+
10+
from vulnerabilities.importer import AdvisoryData
11+
from vulnerabilities.importer import ReferenceV2
12+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
13+
14+
SECURITY_PATTERNS = [
15+
r"\bCVE-\d{4}-\d{4,19}\b",
16+
r"\bGHSA-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}\b",
17+
r"\bPYSEC-\d{4}-\d{1,6}\b",
18+
r"\bXSA-\d{1,4}\b",
19+
]
20+
21+
22+
class CollectRepoFixCommitPipeline(VulnerableCodeBaseImporterPipelineV2):
23+
"""
24+
Pipeline to collect fix commits from any git repository.
25+
"""
26+
27+
pipeline_id = "repo_fix_commit"
28+
29+
@classmethod
30+
def steps(cls):
31+
return (
32+
cls.clone,
33+
cls.collect_and_store_advisories,
34+
cls.clean_downloads,
35+
)
36+
37+
def clone(self):
38+
"""Clone the repository."""
39+
self.repo_url = "https://github.com/torvalds/linux"
40+
repo_path = tempfile.mkdtemp()
41+
cmd = [
42+
"git",
43+
"clone",
44+
"--bare",
45+
"--filter=blob:none",
46+
"--no-checkout",
47+
self.repo_url,
48+
repo_path,
49+
]
50+
subprocess.run(cmd, check=True)
51+
self.repo = Repo(repo_path)
52+
53+
def advisories_count(self) -> int:
54+
return int(self.repo.git.rev_list("--count", "HEAD"))
55+
56+
def classify_commit_type(self, commit) -> list[str]:
57+
"""
58+
Extract vulnerability identifiers from a commit message.
59+
Returns a list of matched vulnerability IDs (normalized to uppercase).
60+
"""
61+
matches = []
62+
for pattern in SECURITY_PATTERNS:
63+
found = re.findall(pattern, commit.message, flags=re.IGNORECASE)
64+
matches.extend(found)
65+
return matches
66+
67+
def collect_fix_commits(self):
68+
"""
69+
Iterate through repository commits and group them by vulnerability identifiers.
70+
return a list with (vuln_id, [(commit_id, commit_message)]).
71+
"""
72+
self.log("Processing git repository fix commits (grouped by vulnerability IDs).")
73+
74+
grouped_commits = defaultdict(list)
75+
for commit in self.repo.iter_commits("--all"):
76+
matched_ids = self.classify_commit_type(commit)
77+
if not matched_ids:
78+
continue
79+
80+
commit_id = commit.hexsha
81+
commit_message = commit.message.strip()
82+
83+
for vuln_id in matched_ids:
84+
grouped_commits[vuln_id].append((commit_id, commit_message))
85+
86+
self.log(f"Found {len(grouped_commits)} vulnerabilities with related commits.")
87+
self.log("Finished processing all commits.")
88+
return grouped_commits
89+
90+
def collect_advisories(self):
91+
"""
92+
Generate AdvisoryData objects for each vulnerability ID grouped with its related commits.
93+
"""
94+
self.log("Generating AdvisoryData objects from grouped commits.")
95+
grouped_commits = self.collect_fix_commits()
96+
for vuln_id, commits in grouped_commits.items():
97+
references = [ReferenceV2(url=f"{self.repo_url}/commit/{cid}") for cid, _ in commits]
98+
99+
summary_lines = [f"- {cid}: {msg}" for cid, msg in commits]
100+
summary = f"Commits fixing {vuln_id}:\n" + "\n".join(summary_lines)
101+
yield AdvisoryData(
102+
advisory_id=vuln_id,
103+
aliases=[vuln_id],
104+
summary=summary,
105+
references_v2=references,
106+
url=self.repo_url,
107+
)
108+
109+
def clean_downloads(self):
110+
"""Cleanup any temporary repository data."""
111+
self.log("Cleaning up local repository resources.")
112+
if os.path.isdir(self.repo.working_tree_dir):
113+
shutil.rmtree(path=self.repo.working_tree_dir)
114+
115+
def on_failure(self):
116+
"""Ensure cleanup is always performed on failure."""
117+
self.clean_downloads()

vulnerabilities/pipelines/v2_improvers/collect_repo_fix_commits.py

Lines changed: 0 additions & 185 deletions
This file was deleted.

0 commit comments

Comments
 (0)