|
| 1 | +# |
| 2 | +# Copyright (c) nexB Inc. and others. All rights reserved. |
| 3 | +# VulnerableCode is a trademark of nexB Inc. |
| 4 | +# SPDX-License-Identifier: Apache-2.0 |
| 5 | +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. |
| 6 | +# See https://github.com/aboutcode-org/vulnerablecode for support or download. |
| 7 | +# See https://aboutcode.org for more information about nexB OSS projects. |
| 8 | +# |
| 9 | + |
| 10 | +import json |
| 11 | +import shutil |
| 12 | +from datetime import timezone |
| 13 | +from pathlib import Path |
| 14 | + |
| 15 | +import dateparser |
| 16 | +from fetchcode.vcs import fetch_via_vcs |
| 17 | + |
| 18 | +from vulnerabilities.importer import AdvisoryData |
| 19 | +from vulnerabilities.importer import ReferenceV2 |
| 20 | +from vulnerabilities.importer import VulnerabilitySeverity |
| 21 | +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 |
| 22 | +from vulnerabilities.severity_systems import GENERIC |
| 23 | + |
| 24 | + |
| 25 | +class AospImporterPipeline(VulnerableCodeBaseImporterPipelineV2): |
| 26 | + """ |
| 27 | + Pipeline to collect fix commits from Aosp Dataset: |
| 28 | + """ |
| 29 | + |
| 30 | + pipeline_id = "aosp_dataset_fix_commits" |
| 31 | + spdx_license_expression = "Apache-2.0" |
| 32 | + license_url = "https://github.com/quarkslab/aosp_dataset/blob/master/LICENSE" |
| 33 | + importer_name = "aosp_dataset" |
| 34 | + qualified_name = "aosp_dataset_fix_commits" |
| 35 | + |
| 36 | + @classmethod |
| 37 | + def steps(cls): |
| 38 | + return ( |
| 39 | + cls.clone, |
| 40 | + cls.collect_and_store_advisories, |
| 41 | + cls.clean_downloads, |
| 42 | + ) |
| 43 | + |
| 44 | + def clone(self): |
| 45 | + self.repo_url = "git+https://github.com/quarkslab/aosp_dataset" |
| 46 | + self.log(f"Cloning `{self.repo_url}`") |
| 47 | + self.vcs_response = fetch_via_vcs(self.repo_url) |
| 48 | + |
| 49 | + def advisories_count(self): |
| 50 | + root = Path(self.vcs_response.dest_dir) |
| 51 | + return sum(1 for _ in root.rglob("*.json")) |
| 52 | + |
| 53 | + def collect_advisories(self): |
| 54 | + self.log(f"Processing aosp_dataset fix commits.") |
| 55 | + base_path = Path(self.vcs_response.dest_dir) / "cves" |
| 56 | + for file_path in base_path.rglob("*.json"): |
| 57 | + if not file_path.name.startswith("CVE-"): |
| 58 | + continue |
| 59 | + |
| 60 | + with open(file_path) as f: |
| 61 | + vulnerability_data = json.load(f) |
| 62 | + |
| 63 | + vulnerability_id = vulnerability_data.get("cveId", []) |
| 64 | + if ( |
| 65 | + not vulnerability_id or "," in vulnerability_id |
| 66 | + ): # escape invalid multiple CVE-2017-13077, CVE-2017-13078 |
| 67 | + continue |
| 68 | + |
| 69 | + summary = vulnerability_data.get("vulnerabilityType") |
| 70 | + date_reported = vulnerability_data.get("dateReported") |
| 71 | + date_published = dateparser.parse(date_reported) if date_reported else None |
| 72 | + if date_published and not date_published.tzinfo: |
| 73 | + date_published = date_published.replace(tzinfo=timezone.utc) |
| 74 | + |
| 75 | + severities = [] |
| 76 | + severity_value = vulnerability_data.get("severity") |
| 77 | + if severity_value: |
| 78 | + severities.append( |
| 79 | + VulnerabilitySeverity( |
| 80 | + system=GENERIC, |
| 81 | + value=severity_value, |
| 82 | + ) |
| 83 | + ) |
| 84 | + |
| 85 | + references = [] |
| 86 | + for commit_data in vulnerability_data.get("fixes", []): |
| 87 | + vcs_url = commit_data.get("patchUrl") |
| 88 | + commit_id = commit_data.get("commitId") |
| 89 | + |
| 90 | + """ |
| 91 | + https://us.codeaurora.org/cgit/quic/la/kernel/msm/commit/?id=17bfaf64ad503d2e6607d2d3e0956f25bf07eb43 |
| 92 | + http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=f54e18f1b831c92f6512d2eedb224cd63d607d3d |
| 93 | + https://android.googlesource.com/platform/system/bt/+/514139f4b40cbb035bb92f3e24d5a389d75db9e6 |
| 94 | + https://source.codeaurora.org/quic/la//kernel/msm-4.4/commit/?id=b108c651cae9913da1ab163cb4e5f7f2db87b747 |
| 95 | + |
| 96 | + Check if commit in the url split based on it to get the commit hash and the vcs url |
| 97 | + if not split base on /+/ |
| 98 | + """ |
| 99 | + |
| 100 | + if not vcs_url: |
| 101 | + continue |
| 102 | + |
| 103 | + fixed_by_commits = [] |
| 104 | + repo_url, commit_id = url.split("/+/") |
| 105 | + |
| 106 | + fixed_commit = CodeCommitData( |
| 107 | + commit_hash=commit_hash, |
| 108 | + url=vcs_url, |
| 109 | + ) |
| 110 | + |
| 111 | + fixed_by_commits.append(fixed_commit) |
| 112 | + |
| 113 | + yield AdvisoryData( |
| 114 | + advisory_id=vulnerability_id, |
| 115 | + summary=summary, |
| 116 | + references_v2=references, |
| 117 | + severities=severities, |
| 118 | + fixed_by_commits=fixed_by_commits, |
| 119 | + date_published=date_published, |
| 120 | + url=f"https://raw.githubusercontent.com/quarkslab/aosp_dataset/refs/heads/master/cves/{file_path.name}", |
| 121 | + ) |
| 122 | + |
| 123 | + def clean_downloads(self): |
| 124 | + """Cleanup any temporary repository data.""" |
| 125 | + self.log("Cleaning up local repository resources.") |
| 126 | + if hasattr(self, "repo") and self.repo.working_dir: |
| 127 | + shutil.rmtree(path=self.repo.working_dir) |
| 128 | + |
| 129 | + def on_failure(self): |
| 130 | + """Ensure cleanup is always performed on failure.""" |
| 131 | + self.clean_downloads() |
0 commit comments