Skip to content

Commit 0f6ab4d

Browse files
committed
Add new content ID function
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent fabe035 commit 0f6ab4d

File tree

2 files changed

+46
-1
lines changed

2 files changed

+46
-1
lines changed

vulnerabilities/models.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
from vulnerabilities import utils
5454
from vulnerabilities.severity_systems import EPSS
5555
from vulnerabilities.severity_systems import SCORING_SYSTEMS
56-
from vulnerabilities.utils import normalize_purl
56+
from vulnerabilities.utils import compute_content_id, normalize_purl
5757
from vulnerabilities.utils import purl_to_dict
5858
from vulnerablecode import __version__ as VULNERABLECODE_VERSION
5959

@@ -1368,6 +1368,11 @@ def save(self, *args, **kwargs):
13681368
checksum.update(value)
13691369
self.unique_content_id = checksum.hexdigest()
13701370
super().save(*args, **kwargs)
1371+
1372+
def save(self, *args, **kwargs):
1373+
advisory_data = self.to_advisory_data()
1374+
self.unique_content_id = compute_content_id(advisory_data, include_metadata=False)
1375+
super().save(*args, **kwargs)
13711376

13721377
def to_advisory_data(self) -> "AdvisoryData":
13731378
from vulnerabilities.importer import AdvisoryData

vulnerabilities/utils.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import bisect
1111
import csv
1212
import dataclasses
13+
import hashlib
1314
import json
1415
import logging
1516
import os
@@ -546,3 +547,42 @@ def get_purl_version_class(purl):
546547
if check_version_class:
547548
purl_version_class = check_version_class.version_class
548549
return purl_version_class
550+
551+
552+
def compute_content_id(advisory_data, include_metadata=False):
553+
"""
554+
Computes a unique content_id for an advisory by normalizing its data and hashing it.
555+
556+
:param advisory_data: An AdvisoryData object
557+
:param include_metadata: Boolean indicating whether to include `created_by` and `url`
558+
:return: SHA-256 hash digest as content_id
559+
"""
560+
561+
def normalize_text(text):
562+
"""Normalize text by removing spaces and converting to lowercase."""
563+
return text.replace(" ", "").lower() if text else ""
564+
565+
def normalize_list(lst):
566+
"""Sort a list to ensure consistent ordering."""
567+
return sorted(lst) if lst else []
568+
569+
def normalize_dict(obj):
570+
"""Ensure dictionary keys are ordered."""
571+
return json.loads(json.dumps(obj, sort_keys=True)) if obj else {}
572+
573+
# Normalize fields
574+
normalized_data = {
575+
"summary": normalize_text(advisory_data.summary),
576+
"affected_packages": normalize_list(advisory_data.affected_packages),
577+
"references": normalize_list(advisory_data.references),
578+
"weaknesses": normalize_list(advisory_data.weaknesses),
579+
}
580+
581+
if include_metadata:
582+
normalized_data["created_by"] = advisory_data.created_by
583+
normalized_data["url"] = advisory_data.url
584+
585+
normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True)
586+
content_id = hashlib.sha512(normalized_json.encode("utf-8")).hexdigest()
587+
588+
return content_id

0 commit comments

Comments
 (0)