Skip to content

Commit edaecd0

Browse files
committed
Update aosp importer
Fix patch_checksum constraint Remove unused imports Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 4bad019 commit edaecd0

7 files changed

Lines changed: 33 additions & 18 deletions

File tree

vulnerabilities/importer.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import dataclasses
1111
import datetime
1212
import functools
13-
import hashlib
1413
import logging
1514
import traceback
1615
import xml.etree.ElementTree as ET
@@ -37,6 +36,7 @@
3736
from vulnerabilities.severity_systems import SCORING_SYSTEMS
3837
from vulnerabilities.severity_systems import ScoringSystem
3938
from vulnerabilities.utils import classproperty
39+
from vulnerabilities.utils import compute_patch_checksum
4040
from vulnerabilities.utils import get_reference_id
4141
from vulnerabilities.utils import is_commit
4242
from vulnerabilities.utils import is_cve
@@ -202,6 +202,7 @@ class PackageCommitPatchData:
202202
vcs_url: str
203203
commit_hash: str
204204
patch_text: Optional[str] = None
205+
patch_checksum: Optional[str] = dataclasses.field(init=False, default=None)
205206

206207
def __post_init__(self):
207208
if not self.commit_hash:
@@ -213,6 +214,9 @@ def __post_init__(self):
213214
if not self.vcs_url:
214215
raise ValueError("Commit must have a non-empty vcs_url.")
215216

217+
if self.patch_text:
218+
self.patch_checksum = compute_patch_checksum(self.patch_text)
219+
216220
def __lt__(self, other):
217221
if not isinstance(other, PackageCommitPatchData):
218222
return NotImplemented
@@ -224,6 +228,7 @@ def _cmp_key(self):
224228
self.vcs_url,
225229
self.commit_hash,
226230
self.patch_text,
231+
self.patch_checksum,
227232
)
228233

229234
def to_dict(self) -> dict:
@@ -232,6 +237,7 @@ def to_dict(self) -> dict:
232237
"vcs_url": self.vcs_url,
233238
"commit_hash": self.commit_hash,
234239
"patch_text": self.patch_text,
240+
"patch_checksum": self.patch_checksum,
235241
}
236242

237243
@classmethod
@@ -256,7 +262,7 @@ def __post_init__(self):
256262
raise ValueError("A patch must include either patch_url or patch_text")
257263

258264
if self.patch_text:
259-
self.patch_checksum = hashlib.sha512(self.patch_text.encode()).hexdigest()
265+
self.patch_checksum = compute_patch_checksum(self.patch_text)
260266

261267
def __lt__(self, other):
262268
if not isinstance(other, PatchData):

vulnerabilities/migrations/0104_packagecommitpatch_patch_and_more.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Generated by Django 4.2.25 on 2025-12-01 14:42
1+
# Generated by Django 4.2.25 on 2025-12-02 00:23
22

33
from django.db import migrations, models
44

@@ -65,7 +65,10 @@ class Migration(migrations.Migration):
6565
(
6666
"patch_checksum",
6767
models.CharField(
68-
help_text="SHA512 checksum of the patch content.", max_length=128
68+
blank=True,
69+
help_text="SHA512 checksum of the patch content.",
70+
max_length=128,
71+
null=True,
6972
),
7073
),
7174
],

vulnerabilities/models.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
from vulnerabilities import utils
6666
from vulnerabilities.severity_systems import EPSS
6767
from vulnerabilities.severity_systems import SCORING_SYSTEMS
68+
from vulnerabilities.utils import compute_patch_checksum
6869
from vulnerabilities.utils import normalize_purl
6970
from vulnerabilities.utils import purl_to_dict
7071
from vulnerablecode import __version__ as VULNERABLECODE_VERSION
@@ -2765,17 +2766,23 @@ class Patch(models.Model):
27652766

27662767
patch_checksum = models.CharField(
27672768
max_length=128,
2769+
blank=True,
2770+
null=True,
27682771
help_text="SHA512 checksum of the patch content.",
27692772
)
27702773

2774+
def save(self, *args, **kwargs):
2775+
if self.patch_text:
2776+
self.patch_checksum = compute_patch_checksum(self.patch_text)
2777+
super().save(*args, **kwargs)
2778+
27712779
class Meta:
27722780
unique_together = ["patch_checksum", "patch_url"]
27732781

27742782

27752783
class PackageCommitPatch(models.Model):
27762784
"""
2777-
A specific patch implementation for structured Package/VCS data.
2778-
Inherits from Patch.
2785+
A specific patch implementation for structured Package/VCS data
27792786
"""
27802787

27812788
commit_hash = models.CharField(

vulnerabilities/pipelines/v2_importers/aosp_importer.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,11 @@
1414

1515
import dateparser
1616
from fetchcode.vcs import fetch_via_vcs
17-
from packageurl.contrib.purl2url import get_repo_url
18-
from packageurl.contrib.url2purl import url2purl
1917

20-
from aboutcode.hashid import get_core_purl
2118
from vulnerabilities.importer import AdvisoryData
2219
from vulnerabilities.importer import AffectedPackageV2
2320
from vulnerabilities.importer import PackageCommitPatchData
2421
from vulnerabilities.importer import PatchData
25-
from vulnerabilities.importer import ReferenceV2
2622
from vulnerabilities.importer import VulnerabilitySeverity
2723
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
2824
from vulnerabilities.pipes.advisory import classify_patch_source
@@ -37,8 +33,6 @@ class AospImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
3733
pipeline_id = "aosp_dataset_fix_commits"
3834
spdx_license_expression = "Apache-2.0"
3935
license_url = "https://github.com/quarkslab/aosp_dataset/blob/master/LICENSE"
40-
importer_name = "aosp_dataset"
41-
qualified_name = "aosp_dataset_fix_commits"
4236

4337
@classmethod
4438
def steps(cls):

vulnerabilities/pipes/advisory.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,8 @@ def get_or_create_advisory_patches(
147147
pairs = [(c.patch_url, c.patch_checksum) for c in base_patches_data]
148148

149149
query = Q()
150-
for patch_checksum, patch_text in pairs:
151-
query |= Q(patch_checksum=patch_checksum, patch_text=patch_text)
150+
for patch_checksum, patch_url in pairs:
151+
query |= Q(patch_checksum=patch_checksum, patch_url=patch_url)
152152

153153
existing_commits_qs = Patch.objects.filter(query)
154154
existing_pairs = set(existing_commits_qs.values_list("patch_url", "patch_checksum"))

vulnerabilities/tests/test_data/aosp/CVE-aosp_test3-expected.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
{
2121
"vcs_url": "https://github.com/torvalds/linux",
2222
"commit_hash": "0048b4837affd153897ed1222283492070027aa9",
23-
"patch_text": null
23+
"patch_text": null,
24+
"patch_checksum": null
2425
}
2526
]
2627
}

vulnerabilities/utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,12 @@
3434
from cwe2.database import InvalidCWEError
3535
from packageurl import PackageURL
3636
from packageurl.contrib.django.utils import without_empty_values
37-
from packageurl.contrib.purl2url import get_repo_url
38-
from packageurl.contrib.url2purl import url2purl
3937
from univers.version_range import RANGE_CLASS_BY_SCHEMES
4038
from univers.version_range import AlpineLinuxVersionRange
4139
from univers.version_range import NginxVersionRange
4240
from univers.version_range import VersionRange
4341

4442
from aboutcode.hashid import build_vcid
45-
from aboutcode.hashid import get_core_purl
4643

4744
logger = logging.getLogger(__name__)
4845

@@ -684,3 +681,10 @@ def create_registry(pipelines):
684681
registry[key] = pipeline
685682

686683
return registry
684+
685+
686+
def compute_patch_checksum(patch_text: str | None) -> str | None:
687+
"""
688+
Compute SHA-512 checksum for patch text.
689+
"""
690+
return hashlib.sha512(patch_text.encode("utf-8")).hexdigest()

0 commit comments

Comments
 (0)