|
10 | 10 | import bisect |
11 | 11 | import csv |
12 | 12 | import dataclasses |
| 13 | +import hashlib |
13 | 14 | import json |
14 | 15 | import logging |
15 | 16 | import os |
@@ -546,3 +547,42 @@ def get_purl_version_class(purl): |
546 | 547 | if check_version_class: |
547 | 548 | purl_version_class = check_version_class.version_class |
548 | 549 | return purl_version_class |
| 550 | + |
| 551 | + |
| 552 | +def compute_content_id(advisory_data, include_metadata=False): |
| 553 | + """ |
| 554 | + Computes a unique content_id for an advisory by normalizing its data and hashing it. |
| 555 | +
|
| 556 | + :param advisory_data: An AdvisoryData object |
| 557 | + :param include_metadata: Boolean indicating whether to include `created_by` and `url` |
| 558 | + :return: SHA-256 hash digest as content_id |
| 559 | + """ |
| 560 | + |
| 561 | + def normalize_text(text): |
| 562 | + """Normalize text by removing spaces and converting to lowercase.""" |
| 563 | + return text.replace(" ", "").lower() if text else "" |
| 564 | + |
| 565 | + def normalize_list(lst): |
| 566 | + """Sort a list to ensure consistent ordering.""" |
| 567 | + return sorted(lst) if lst else [] |
| 568 | + |
| 569 | + def normalize_dict(obj): |
| 570 | + """Ensure dictionary keys are ordered.""" |
| 571 | + return json.loads(json.dumps(obj, sort_keys=True)) if obj else {} |
| 572 | + |
| 573 | + # Normalize fields |
| 574 | + normalized_data = { |
| 575 | + "summary": normalize_text(advisory_data.summary), |
| 576 | + "affected_packages": normalize_list(advisory_data.affected_packages), |
| 577 | + "references": normalize_list(advisory_data.references), |
| 578 | + "weaknesses": normalize_list(advisory_data.weaknesses), |
| 579 | + } |
| 580 | + |
| 581 | + if include_metadata: |
| 582 | + normalized_data["created_by"] = advisory_data.created_by |
| 583 | + normalized_data["url"] = advisory_data.url |
| 584 | + |
| 585 | + normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True) |
| 586 | + content_id = hashlib.sha512(normalized_json.encode("utf-8")).hexdigest() |
| 587 | + |
| 588 | + return content_id |
0 commit comments