Skip to content

Commit 281ae60

Browse files
authored
Merge pull request #2207 from aboutcode-org/store_advisory_content_hash
Store advisory content hash
2 parents 2dbbd38 + 3b780d5 commit 281ae60

File tree

13 files changed

+238
-44
lines changed

13 files changed

+238
-44
lines changed

vulnerabilities/improvers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
2121
from vulnerabilities.pipelines import remove_duplicate_advisories
2222
from vulnerabilities.pipelines.v2_improvers import collect_ssvc_trees
23+
from vulnerabilities.pipelines.v2_improvers import compute_advisory_content_hash
2324
from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2
2425
from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
2526
from vulnerabilities.pipelines.v2_improvers import (
@@ -74,5 +75,6 @@
7475
compute_advisory_todo.ComputeToDo,
7576
collect_ssvc_trees.CollectSSVCPipeline,
7677
relate_severities.RelateSeveritiesPipeline,
78+
compute_advisory_content_hash.ComputeAdvisoryContentHash,
7779
]
7880
)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Generated by Django 5.2.11 on 2026-03-11 08:46
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("vulnerabilities", "0115_impactedpackageaffecting_and_more"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="advisoryv2",
15+
name="advisory_content_hash",
16+
field=models.CharField(
17+
blank=True,
18+
help_text="A unique hash computed from the content of the advisory used to identify advisories with the same content.",
19+
max_length=64,
20+
null=True,
21+
),
22+
),
23+
]

vulnerabilities/models.py

Lines changed: 7 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3010,6 +3010,13 @@ class AdvisoryV2(models.Model):
30103010
help_text="Related advisories that are used to calculate the severity of this advisory.",
30113011
)
30123012

3013+
advisory_content_hash = models.CharField(
3014+
max_length=64,
3015+
blank=True,
3016+
null=True,
3017+
help_text="A unique hash computed from the content of the advisory used to identify advisories with the same content.",
3018+
)
3019+
30133020
@property
30143021
def risk_score(self):
30153022
"""
@@ -3078,35 +3085,6 @@ def get_aliases(self):
30783085
"""
30793086
return self.aliases.all()
30803087

3081-
def compute_advisory_content(self):
3082-
"""
3083-
Compute a unique content hash for an advisory by normalizing its data and hashing it.
3084-
3085-
:param advisory: An Advisory object
3086-
:return: SHA-256 hash digest as content hash
3087-
"""
3088-
normalized_data = {
3089-
"summary": normalize_text(self.summary),
3090-
"impacted_packages": sorted(
3091-
[impact.to_dict() for impact in self.impacted_packages.all()],
3092-
key=lambda x: json.dumps(x, sort_keys=True),
3093-
),
3094-
"patches": sorted(
3095-
[patch.to_patch_data().to_dict() for patch in self.patches.all()],
3096-
key=lambda x: json.dumps(x, sort_keys=True),
3097-
),
3098-
"severities": sorted(
3099-
[sev.to_vulnerability_severity_data().to_dict() for sev in self.severities.all()],
3100-
key=lambda x: (x.get("system"), x.get("value")),
3101-
),
3102-
"weaknesses": normalize_list([weakness.cwe_id for weakness in self.weaknesses.all()]),
3103-
}
3104-
3105-
normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True)
3106-
content_hash = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest()
3107-
3108-
return content_hash
3109-
31103088
alias = get_aliases
31113089

31123090

vulnerabilities/pipelines/v2_importers/alpine_linux_importer.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,8 @@ def load_advisories(
193193

194194
fixed_version_range = None
195195
try:
196-
fixed_version_range = AlpineLinuxVersionRange.from_versions([version])
196+
if version:
197+
fixed_version_range = AlpineLinuxVersionRange.from_versions([version])
197198
except InvalidVersion as e:
198199
logger(
199200
f"{version!r} is not a valid AlpineVersion {e!r}",

vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -330,19 +330,20 @@ def to_version_ranges(self, versions_data, fixed_versions):
330330
"=": "=",
331331
}
332332
comparator = comparator_by_range_expression.get(range_expression)
333-
if comparator:
333+
if comparator and version_value and version_value not in self.ignorable_versions:
334334
constraints.append(
335335
VersionConstraint(comparator=comparator, version=SemverVersion(version_value))
336336
)
337337

338338
for fixed_version in fixed_versions:
339339
# The VersionConstraint method `invert()` inverts the fixed_version's comparator,
340340
# enabling inclusion of multiple fixed versions with the `affected_version_range` values.
341-
constraints.append(
342-
VersionConstraint(
343-
comparator="=",
344-
version=SemverVersion(fixed_version),
345-
).invert()
346-
)
341+
if fixed_version and fixed_version not in self.ignorable_versions:
342+
constraints.append(
343+
VersionConstraint(
344+
comparator="=",
345+
version=SemverVersion(fixed_version),
346+
).invert()
347+
)
347348

348349
return ApacheVersionRange(constraints=constraints)

vulnerabilities/pipelines/v2_importers/elixir_security_importer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class ElixirSecurityImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
3535
spdx_license_expression = "CC0-1.0"
3636
license_url = "https://github.com/dependabot/elixir-security-advisories/blob/master/LICENSE.txt"
3737
repo_url = "git+https://github.com/dependabot/elixir-security-advisories"
38+
run_once = True
3839

3940
precedence = 200
4041

vulnerabilities/pipelines/v2_importers/gitlab_importer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ def parse_gitlab_advisory(
252252
original_advisory_text=json.dumps(gitlab_advisory, indent=2, ensure_ascii=False),
253253
)
254254
affected_version_range = None
255+
fixed_version_range = None
255256
fixed_versions = gitlab_advisory.get("fixed_versions") or []
256257
affected_range = gitlab_advisory.get("affected_range")
257258
gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"])
@@ -285,7 +286,8 @@ def parse_gitlab_advisory(
285286
if affected_version_range:
286287
vrc = affected_version_range.__class__
287288

288-
fixed_version_range = vrc.from_versions(parsed_fixed_versions)
289+
if parsed_fixed_versions:
290+
fixed_version_range = vrc.from_versions(parsed_fixed_versions)
289291
if not fixed_version_range and not affected_version_range:
290292
return
291293

vulnerabilities/pipelines/v2_importers/ruby_importer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,9 @@ def get_affected_packages(record, purl):
162162
affected_packages = []
163163
for unaffected_version in record.get("unaffected_versions", []):
164164
try:
165-
affected_version_range = GemVersionRange.from_native(unaffected_version).invert()
165+
if unaffected_version:
166+
unaffected_version = unaffected_version.strip()
167+
affected_version_range = GemVersionRange.from_native(unaffected_version).invert()
166168
validate_comparators(affected_version_range.constraints)
167169
affected_packages.append(
168170
AffectedPackageV2(
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
11+
from aboutcode.pipeline import LoopProgress
12+
13+
from vulnerabilities.models import AdvisoryV2
14+
from vulnerabilities.pipelines import VulnerableCodePipeline
15+
from vulnerabilities.utils import compute_advisory_content
16+
17+
18+
class ComputeAdvisoryContentHash(VulnerableCodePipeline):
19+
"""Compute Advisory Content Hash for Advisory."""
20+
21+
pipeline_id = "compute_advisory_content_hash_v2"
22+
23+
@classmethod
24+
def steps(cls):
25+
return (cls.compute_advisory_content_hash,)
26+
27+
def compute_advisory_content_hash(self):
28+
"""Compute Advisory Content Hash for Advisory."""
29+
30+
advisories = AdvisoryV2.objects.filter(advisory_content_hash__isnull=True)
31+
32+
advisories_count = advisories.count()
33+
34+
progress = LoopProgress(
35+
total_iterations=advisories_count,
36+
logger=self.log,
37+
progress_step=1,
38+
)
39+
40+
to_update = []
41+
batch_size = 5000
42+
43+
for advisory in progress.iter(advisories.iterator(chunk_size=batch_size)):
44+
advisory.advisory_content_hash = compute_advisory_content(advisory)
45+
to_update.append(advisory)
46+
47+
if len(to_update) >= batch_size:
48+
AdvisoryV2.objects.bulk_update(
49+
to_update,
50+
["advisory_content_hash"],
51+
batch_size=batch_size,
52+
)
53+
to_update.clear()
54+
55+
if to_update:
56+
AdvisoryV2.objects.bulk_update(
57+
to_update,
58+
["advisory_content_hash"],
59+
batch_size=batch_size,
60+
)
61+
62+
self.log("Finished computing advisory_content_hash")

vulnerabilities/pipes/advisory.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from vulnerabilities.models import VulnerabilitySeverity
4949
from vulnerabilities.models import Weakness
5050
from vulnerabilities.pipes.univers_utils import get_exact_purls_v2
51+
from vulnerabilities.utils import compute_advisory_content
5152

5253

5354
def get_or_create_aliases(aliases: List) -> QuerySet:
@@ -301,6 +302,7 @@ def insert_advisory_v2(
301302
advisory_obj = None
302303
created = False
303304
content_id = compute_content_id_v2(advisory_data=advisory)
305+
advisory_content_hash = compute_advisory_content(advisory_data=advisory)
304306
try:
305307
default_data = {
306308
"datasource_id": pipeline_id,
@@ -311,6 +313,7 @@ def insert_advisory_v2(
311313
"original_advisory_text": advisory.original_advisory_text,
312314
"url": advisory.url,
313315
"precedence": precedence,
316+
"advisory_content_hash": advisory_content_hash,
314317
}
315318

316319
advisory_obj, created = AdvisoryV2.objects.get_or_create(

0 commit comments

Comments
 (0)