Skip to content

Commit 277117d

Browse files
committed
Make improvers query correct and faster
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent 928d3fb commit 277117d

File tree

10 files changed

+131
-65
lines changed

10 files changed

+131
-65
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Generated by Django 5.2.11 on 2026-03-17 09:07
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("vulnerabilities", "0116_advisoryv2_advisory_content_hash"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="advisoryv2",
15+
name="risk_score",
16+
field=models.DecimalField(
17+
blank=True,
18+
decimal_places=1,
19+
help_text="Risk expressed as a number ranging from 0 to 10. Risk is calculated from weighted severity and exploitability values. It is the maximum value of (the weighted severity multiplied by its exploitability) or 10. Risk = min(weighted severity * exploitability, 10)",
20+
max_digits=3,
21+
null=True,
22+
),
23+
),
24+
]

vulnerabilities/models.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3047,17 +3047,13 @@ class AdvisoryV2(models.Model):
30473047
help_text="A unique hash computed from the content of the advisory used to identify advisories with the same content.",
30483048
)
30493049

3050-
@property
3051-
def risk_score(self):
3052-
"""
3053-
Risk expressed as a number ranging from 0 to 10.
3054-
Risk is calculated from weighted severity and exploitability values.
3055-
It is the maximum value of (the weighted severity multiplied by its exploitability) or 10
3056-
Risk = min(weighted severity * exploitability, 10)
3057-
"""
3058-
if self.exploitability and self.weighted_severity:
3059-
risk_score = min(float(self.exploitability * self.weighted_severity), 10.0)
3060-
return round(risk_score, 1)
3050+
risk_score = models.DecimalField(
3051+
null=True,
3052+
blank=True,
3053+
max_digits=3,
3054+
decimal_places=1,
3055+
help_text="Risk expressed as a number ranging from 0 to 10. Risk is calculated from weighted severity and exploitability values. It is the maximum value of (the weighted severity multiplied by its exploitability) or 10. Risk = min(weighted severity * exploitability, 10)",
3056+
)
30613057

30623058
objects = AdvisoryV2QuerySet.as_manager()
30633059

vulnerabilities/pipelines/v2_improvers/collect_ssvc_trees.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ def steps(cls):
3636

3737
def collect_ssvc_data(self):
3838
vulnrichment_advisories = (
39-
AdvisoryV2.objects.filter(
39+
AdvisoryV2.objects.latest_per_avid()
40+
.filter(
4041
severities__scoring_system=SCORING_SYSTEMS["ssvc"],
4142
)
4243
.distinct()

vulnerabilities/pipelines/v2_improvers/compute_advisory_content_hash.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def steps(cls):
2727
def compute_advisory_content_hash(self):
2828
"""Compute Advisory Content Hash for Advisory."""
2929

30-
advisories = AdvisoryV2.objects.filter(advisory_content_hash__isnull=True)
30+
advisories = AdvisoryV2.objects.latest_per_avid().filter(advisory_content_hash__isnull=True)
3131

3232
advisories_count = advisories.count()
3333

vulnerabilities/pipelines/v2_improvers/compute_package_risk.py

Lines changed: 68 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88
#
99
from aboutcode.pipeline import LoopProgress
1010
from django.db.models import Prefetch
11-
from django.db.models import Q
1211

12+
from vulnerabilities.models import AdvisoryExploit
13+
from vulnerabilities.models import AdvisoryReference
1314
from vulnerabilities.models import AdvisorySeverity
1415
from vulnerabilities.models import AdvisoryV2
1516
from vulnerabilities.models import PackageV2
@@ -36,61 +37,92 @@ def steps(cls):
3637
)
3738

3839
def compute_and_store_vulnerability_risk_score(self):
40+
3941
affected_advisories = (
40-
AdvisoryV2.objects.filter(impacted_packages__affecting_packages__isnull=False)
42+
AdvisoryV2.objects.latest_per_avid()
43+
.filter(impacted_packages__affecting_packages__isnull=False)
44+
.only("id")
4145
.prefetch_related(
42-
"references",
43-
"severities",
44-
"exploits",
46+
Prefetch(
47+
"references", queryset=AdvisoryReference.objects.only("id", "reference_type")
48+
),
49+
Prefetch(
50+
"severities",
51+
queryset=AdvisorySeverity.objects.only("id", "value", "url", "scoring_system"),
52+
),
53+
Prefetch("exploits", queryset=AdvisoryExploit.objects.only("id")),
4554
Prefetch(
4655
"related_advisory_severities",
47-
queryset=AdvisoryV2.objects.prefetch_related("severities"),
56+
queryset=AdvisoryV2.objects.only("id").prefetch_related(
57+
Prefetch(
58+
"severities",
59+
queryset=AdvisorySeverity.objects.only(
60+
"id", "value", "url", "scoring_system"
61+
),
62+
)
63+
),
4864
),
4965
)
5066
.distinct()
5167
)
5268

69+
estimated_vulnerability_count = affected_advisories.count()
70+
5371
self.log(
54-
f"Calculating risk for {affected_advisories.count():,d} advisory with a affected packages records"
72+
f"Calculating risk for {estimated_vulnerability_count:,d} advisory with a affected packages records"
5573
)
5674

57-
progress = LoopProgress(total_iterations=affected_advisories.count(), logger=self.log)
75+
progress = LoopProgress(
76+
logger=self.log, total_iterations=estimated_vulnerability_count, progress_step=5
77+
)
5878

5979
updatables = []
6080
updated_vulnerability_count = 0
6181
batch_size = 5000
6282

6383
for advisory in progress.iter(affected_advisories.iterator(chunk_size=batch_size)):
84+
6485
references = advisory.references.all()
6586
exploits = advisory.exploits.all()
6687

67-
severities = AdvisorySeverity.objects.filter(
68-
Q(advisories=advisory) | Q(advisories__related_to_advisory_severities=advisory)
69-
).distinct()
88+
severities = list(advisory.severities.all())
89+
90+
for rel in advisory.related_advisory_severities.all():
91+
severities.extend(rel.severities.all())
7092

71-
weighted_severity, exploitability = compute_vulnerability_risk_factors(
72-
references=references,
73-
severities=severities,
74-
exploits=exploits,
75-
)
76-
advisory.weighted_severity = weighted_severity
77-
advisory.exploitability = exploitability
78-
updatables.append(advisory)
93+
94+
try:
95+
weighted_severity, exploitability = compute_vulnerability_risk_factors(
96+
references=references,
97+
severities=severities,
98+
exploits=exploits,
99+
)
100+
101+
advisory.weighted_severity = weighted_severity
102+
advisory.exploitability = exploitability
103+
if advisory.exploitability and advisory.weighted_severity:
104+
risk_score = min(float(advisory.exploitability * advisory.weighted_severity), 10.0)
105+
advisory.risk_score = round(risk_score, 1)
106+
updatables.append(advisory)
107+
except Exception as e:
108+
self.log(f"Error computing risk score for advisory {advisory.advisory_id}: {e}")
79109

80110
if len(updatables) >= batch_size:
81111
updated_vulnerability_count += bulk_update(
82112
model=AdvisoryV2,
83113
items=updatables,
84-
fields=["weighted_severity", "exploitability"],
114+
fields=["weighted_severity", "exploitability", "risk_score"],
85115
logger=self.log,
86116
)
87-
88-
updated_vulnerability_count += bulk_update(
89-
model=AdvisoryV2,
90-
items=updatables,
91-
fields=["weighted_severity", "exploitability"],
92-
logger=self.log,
93-
)
117+
updatables.clear()
118+
119+
if updatables:
120+
updated_vulnerability_count += bulk_update(
121+
model=AdvisoryV2,
122+
items=updatables,
123+
fields=["weighted_severity", "exploitability", "risk_score"],
124+
logger=self.log,
125+
)
94126

95127
self.log(
96128
f"Successfully added risk score for {updated_vulnerability_count:,d} vulnerability"
@@ -109,17 +141,19 @@ def compute_and_store_package_risk_score(self):
109141

110142
updatables = []
111143
updated_package_count = 0
112-
batch_size = 10000
144+
batch_size = 1000
113145

114146
for package in progress.iter(affected_packages.iterator(chunk_size=batch_size)):
115-
risk_score = compute_package_risk_v2(package)
116-
117-
if not risk_score:
147+
try:
148+
risk_score = compute_package_risk_v2(package)
149+
if not risk_score:
150+
continue
151+
package.risk_score = risk_score
152+
updatables.append(package)
153+
except Exception as e:
154+
self.log(f"Error computing risk score for package {package.purl}: {e}")
118155
continue
119156

120-
package.risk_score = risk_score
121-
updatables.append(package)
122-
123157
if len(updatables) >= batch_size:
124158
updated_package_count += bulk_update(
125159
model=PackageV2,

vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def add_vulnerability_exploit(row, logger):
8989
for adv in alias.advisories.all():
9090
advisories.add(adv)
9191
else:
92-
advs = AdvisoryV2.objects.filter(advisory_id=raw_alias)
92+
advs = AdvisoryV2.objects.filter(advisory_id=raw_alias).latest_per_avid()
9393
for adv in advs:
9494
advisories.add(adv)
9595
except AdvisoryAlias.DoesNotExist:

vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def add_vulnerability_exploit(kev_vul, logger):
7878
for adv in alias.advisories.all():
7979
advisories.add(adv)
8080
else:
81-
advs = AdvisoryV2.objects.filter(advisory_id=cve_id)
81+
advs = AdvisoryV2.objects.filter(advisory_id=cve_id).latest_per_avid()
8282
for adv in advs:
8383
advisories.add(adv)
8484
except AdvisoryAlias.DoesNotExist:

vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def add_advisory_exploit(record, logger):
8383
for adv in alias.advisories.all():
8484
advisories.add(adv)
8585
else:
86-
advs = AdvisoryV2.objects.filter(advisory_id=ref)
86+
advs = AdvisoryV2.objects.filter(advisory_id=ref).latest_per_avid()
8787
for adv in advs:
8888
advisories.add(adv)
8989
except AdvisoryAlias.DoesNotExist:

vulnerabilities/pipelines/v2_improvers/relate_severities.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,23 +61,30 @@ def relate_severities(self):
6161
severity_score_advisories = (
6262
AdvisoryV2.objects.filter(datasource_id__in=self.pipelines)
6363
.filter(severities__scoring_system__in=self.SUPPORTED_SYSTEMS)
64-
.distinct()
6564
.latest_per_avid()
65+
.distinct()
6666
)
6767

6868
total = severity_score_advisories.count()
6969
self.log(f"Processing {total:,d} advisories records")
7070

7171
advisory_id_map = {}
7272

73-
qs = AdvisoryV2.objects.filter(
74-
advisory_id__in=severity_score_advisories.values("advisory_id")
75-
).values("id", "advisory_id")
76-
77-
alias_qs = AdvisoryV2.objects.filter(
78-
aliases__alias__in=severity_score_advisories.values("advisory_id")
79-
).values("id", "aliases__alias")
73+
qs = (
74+
AdvisoryV2.objects.filter(
75+
advisory_id__in=severity_score_advisories.values("advisory_id")
76+
)
77+
.latest_per_avid()
78+
.values("id", "advisory_id")
79+
)
8080

81+
alias_qs = (
82+
AdvisoryV2.objects.filter(
83+
aliases__alias__in=severity_score_advisories.values("advisory_id")
84+
)
85+
.latest_per_avid()
86+
.values("id", "aliases__alias")
87+
)
8188
for row in qs:
8289
advisory_id_map.setdefault(row["advisory_id"], set()).add(row["id"])
8390

vulnerabilities/risk.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88
#
99
from urllib.parse import urlparse
1010

11-
from vulnerabilities.models import VulnerabilityReference
11+
from vulnerabilities.models import AdvisoryV2, VulnerabilityReference
1212
from vulnerabilities.severity_systems import EPSS
1313
from vulnerabilities.weight_config import WEIGHT_CONFIG
14+
from django.db.models import Max
15+
1416

1517
DEFAULT_WEIGHT = 5
1618

@@ -123,12 +125,14 @@ def compute_package_risk_v2(package):
123125
Calculate the risk for a package by iterating over all vulnerabilities that affects this package
124126
and determining the associated risk.
125127
"""
126-
result = []
127-
for impact in package.affected_in_impacts.all():
128-
if risk := impact.advisory.risk_score:
129-
result.append(float(risk))
130128

131-
if not result:
129+
max_risk = (
130+
AdvisoryV2.objects
131+
.latest_affecting_advisories_for_purl(package.purl)
132+
.aggregate(max_risk=Max("risk_score"))
133+
)["max_risk"]
134+
135+
if max_risk is None:
132136
return
133137

134-
return round(max(result), 1)
138+
return round(float(max_risk), 1)

0 commit comments

Comments
 (0)