Skip to content

Commit a67ffd2

Browse files
committed
Move export serializers to the pipes module
Signed-off-by: Keshav Priyadarshi <git@keshav.space>
1 parent 5fc2008 commit a67ffd2

File tree

2 files changed

+156
-145
lines changed

2 files changed

+156
-145
lines changed

vulnerabilities/pipelines/exporters/federate_vulnerabilities.py

Lines changed: 7 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,13 @@
1414
from operator import attrgetter
1515
from pathlib import Path
1616

17-
import saneyaml
1817
from aboutcode.pipeline import LoopProgress
1918
from django.conf import settings
20-
from django.db.models import Prefetch
2119
from django.utils import timezone
2220

2321
from aboutcode.federated import DataFederation
24-
from vulnerabilities.models import AdvisoryV2
25-
from vulnerabilities.models import ImpactedPackage
26-
from vulnerabilities.models import ImpactedPackageAffecting
27-
from vulnerabilities.models import ImpactedPackageFixedBy
28-
from vulnerabilities.models import PackageV2
2922
from vulnerabilities.pipelines import VulnerableCodePipeline
23+
from vulnerabilities.pipes import export
3024
from vulnerabilities.pipes import federatedcode
3125
from vulnerabilities.utils import load_json
3226

@@ -99,7 +93,7 @@ def publish_package_related_advisories(self):
9993
chunk_size = 500
10094
files_to_commit = set()
10195

102-
packages_count, package_qs = package_prefetched_qs(self.checkpoint)
96+
packages_count, package_qs = export.package_prefetched_qs(self.checkpoint)
10397
grouped_packages = itertools.groupby(
10498
package_qs.iterator(chunk_size=chunk_size),
10599
key=attrgetter("type", "namespace", "name", "version"),
@@ -112,11 +106,11 @@ def publish_package_related_advisories(self):
112106
logger=self.log,
113107
)
114108
for _, packages in progress.iter(grouped_packages):
115-
purl, package_vulnerabilities = get_package_related_advisory(packages)
109+
purl, package_vulnerabilities = export.get_package_related_advisory(packages)
116110
package_repo, datafile_path = self.data_cluster.get_datafile_repo_and_path(purl)
117111
package_vulnerability_path = f"packages/{package_repo}/{datafile_path}"
118112

119-
write_file(
113+
export.write_file(
120114
repo_path=self.repo_path,
121115
file_path=package_vulnerability_path,
122116
data=package_vulnerabilities,
@@ -156,7 +150,7 @@ def publish_advisories(self):
156150
batch_size = 4000
157151
chunk_size = 1000
158152
files_to_commit = set()
159-
advisory_qs = advisory_prefetched_qs(self.checkpoint)
153+
advisory_qs = export.advisory_prefetched_qs(self.checkpoint)
160154
advisory_count = advisory_qs.count()
161155

162156
self.log(f"Exporting {advisory_count} advisory.")
@@ -166,9 +160,9 @@ def publish_advisories(self):
166160
logger=self.log,
167161
)
168162
for advisory in progress.iter(advisory_qs.iterator(chunk_size=chunk_size)):
169-
advisory_data = serialize_advisory(advisory)
163+
advisory_data = export.serialize_advisory(advisory)
170164
adv_file = f"advisories/{advisory.avid}.yml"
171-
write_file(
165+
export.write_file(
172166
repo_path=self.repo_path,
173167
file_path=adv_file,
174168
data=advisory_data,
@@ -232,135 +226,3 @@ def commit_message(
232226
commit_count=commit_count,
233227
total_commit_count=total_commit_count,
234228
)
235-
236-
237-
def package_prefetched_qs(checkpoint):
238-
count = None
239-
qs = (
240-
PackageV2.objects.order_by("type", "namespace", "name", "version")
241-
.only("package_url", "type", "namespace", "name", "version")
242-
.prefetch_related(
243-
Prefetch(
244-
"affected_in_impacts",
245-
queryset=ImpactedPackage.objects.only("advisory_id").prefetch_related(
246-
Prefetch(
247-
"advisory",
248-
queryset=AdvisoryV2.objects.only("avid"),
249-
)
250-
),
251-
),
252-
Prefetch(
253-
"fixed_in_impacts",
254-
queryset=ImpactedPackage.objects.only("advisory_id").prefetch_related(
255-
Prefetch(
256-
"advisory",
257-
queryset=AdvisoryV2.objects.only("avid"),
258-
)
259-
),
260-
),
261-
)
262-
)
263-
264-
if checkpoint:
265-
affected_package_ids_qs = (
266-
ImpactedPackageAffecting.objects.filter(created_at__gte=checkpoint)
267-
.values_list("package_id", flat=True)
268-
.distinct()
269-
)
270-
fixing_package_ids_qs = (
271-
ImpactedPackageFixedBy.objects.filter(created_at__gte=checkpoint)
272-
.values_list("package_id", flat=True)
273-
.distinct()
274-
)
275-
276-
updated_packages = affected_package_ids_qs.union(fixing_package_ids_qs)
277-
count = updated_packages.count()
278-
qs = qs.filter(id__in=updated_packages)
279-
280-
count = qs.count() if not count else count
281-
282-
return count, qs
283-
284-
285-
def get_package_related_advisory(packages):
286-
package_vulnerabilities = []
287-
for package in packages:
288-
affected_by_vulnerabilities = [
289-
impact.advisory.avid for impact in package.affected_in_impacts.all()
290-
]
291-
fixing_vulnerabilities = [impact.advisory.avid for impact in package.fixed_in_impacts.all()]
292-
293-
package_vulnerability = {
294-
"purl": package.package_url,
295-
"affected_by_advisories": sorted(affected_by_vulnerabilities),
296-
"fixing_advisories": sorted(fixing_vulnerabilities),
297-
}
298-
package_vulnerabilities.append(package_vulnerability)
299-
300-
return package.package_url, package_vulnerabilities
301-
302-
303-
def advisory_prefetched_qs(checkpoint):
304-
qs = AdvisoryV2.objects.order_by("date_collected").prefetch_related(
305-
"impacted_packages",
306-
"aliases",
307-
"references",
308-
"severities",
309-
"weaknesses",
310-
)
311-
312-
return qs.filter(date_collected__gte=checkpoint) if checkpoint else qs
313-
314-
315-
def serialize_severity(sev):
316-
return {
317-
"score": sev.value,
318-
"scoring_system": sev.scoring_system,
319-
"scoring_elements": sev.scoring_elements,
320-
"published_at": str(sev.published_at),
321-
"url": sev.url,
322-
}
323-
324-
325-
def serialize_references(reference):
326-
return {
327-
"url": reference.url,
328-
"reference_type": reference.reference_type,
329-
"reference_id": reference.reference_id,
330-
}
331-
332-
333-
def serialize_advisory(advisory):
334-
"""Return a plain data mapping serialized from advisory object."""
335-
aliases = sorted([a.alias for a in advisory.aliases.all()])
336-
severities = [serialize_severity(sev) for sev in advisory.severities.all()]
337-
weaknesses = [wkns.cwe for wkns in advisory.weaknesses.all()]
338-
references = [serialize_references(ref) for ref in advisory.references.all()]
339-
impacts = [
340-
{
341-
"purl": impact.base_purl,
342-
"affected_versions": impact.affecting_vers,
343-
"fixed_versions": impact.fixed_vers,
344-
}
345-
for impact in advisory.impacted_packages.all()
346-
]
347-
348-
return {
349-
"advisory_id": advisory.advisory_id,
350-
"datasource_id": advisory.avid,
351-
"datasource_url": advisory.url,
352-
"aliases": aliases,
353-
"summary": advisory.summary,
354-
"impacted_packages": impacts,
355-
"severities": severities,
356-
"weaknesses": weaknesses,
357-
"references": references,
358-
}
359-
360-
361-
def write_file(repo_path, file_path, data):
362-
"""Write ``data`` as YAML to ``repo_path``."""
363-
write_to = repo_path / file_path
364-
write_to.parent.mkdir(parents=True, exist_ok=True)
365-
with open(write_to, encoding="utf-8", mode="w") as f:
366-
f.write(saneyaml.dump(data))

vulnerabilities/pipes/export.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# Copyright (c) nexB Inc. and others. All rights reserved.
2+
# VulnerableCode is a trademark of nexB Inc.
3+
# SPDX-License-Identifier: Apache-2.0
4+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
5+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
6+
# See https://aboutcode.org for more information about nexB OSS projects.
7+
#
8+
9+
import saneyaml
10+
from django.db.models import Prefetch
11+
12+
from vulnerabilities.models import AdvisoryV2
13+
from vulnerabilities.models import ImpactedPackage
14+
from vulnerabilities.models import ImpactedPackageAffecting
15+
from vulnerabilities.models import ImpactedPackageFixedBy
16+
from vulnerabilities.models import PackageCommitPatch
17+
from vulnerabilities.models import PackageV2
18+
19+
20+
def package_prefetched_qs(checkpoint):
21+
count = None
22+
qs = (
23+
PackageV2.objects.order_by("type", "namespace", "name", "version")
24+
.only("package_url", "type", "namespace", "name", "version")
25+
.prefetch_related(
26+
Prefetch(
27+
"affected_in_impacts",
28+
queryset=ImpactedPackage.objects.only("advisory_id").prefetch_related(
29+
Prefetch(
30+
"advisory",
31+
queryset=AdvisoryV2.objects.only("avid"),
32+
)
33+
),
34+
),
35+
Prefetch(
36+
"fixed_in_impacts",
37+
queryset=ImpactedPackage.objects.only("advisory_id").prefetch_related(
38+
Prefetch(
39+
"advisory",
40+
queryset=AdvisoryV2.objects.only("avid"),
41+
)
42+
),
43+
),
44+
)
45+
)
46+
47+
if checkpoint:
48+
affected_package_ids_qs = (
49+
ImpactedPackageAffecting.objects.filter(created_at__gte=checkpoint)
50+
.values_list("package_id", flat=True)
51+
.distinct()
52+
)
53+
fixing_package_ids_qs = (
54+
ImpactedPackageFixedBy.objects.filter(created_at__gte=checkpoint)
55+
.values_list("package_id", flat=True)
56+
.distinct()
57+
)
58+
59+
updated_packages = affected_package_ids_qs.union(fixing_package_ids_qs)
60+
count = updated_packages.count()
61+
qs = qs.filter(id__in=updated_packages)
62+
63+
count = qs.count() if not count else count
64+
65+
return count, qs
66+
67+
68+
def get_package_related_advisory(packages):
69+
package_vulnerabilities = []
70+
for package in packages:
71+
affected_by_vulnerabilities = [
72+
impact.advisory.avid for impact in package.affected_in_impacts.all()
73+
]
74+
fixing_vulnerabilities = [impact.advisory.avid for impact in package.fixed_in_impacts.all()]
75+
76+
package_vulnerability = {
77+
"purl": package.package_url,
78+
"affected_by_advisories": sorted(affected_by_vulnerabilities),
79+
"fixing_advisories": sorted(fixing_vulnerabilities),
80+
}
81+
package_vulnerabilities.append(package_vulnerability)
82+
83+
return package.package_url, package_vulnerabilities
84+
85+
86+
def advisory_prefetched_qs(checkpoint):
87+
qs = AdvisoryV2.objects.order_by("date_collected").prefetch_related(
88+
"impacted_packages",
89+
"aliases",
90+
"references",
91+
"severities",
92+
"weaknesses",
93+
)
94+
95+
return qs.filter(date_collected__gte=checkpoint) if checkpoint else qs
96+
97+
98+
def serialize_severity(sev):
99+
return {
100+
"score": sev.value,
101+
"scoring_system": sev.scoring_system,
102+
"scoring_elements": sev.scoring_elements,
103+
"published_at": str(sev.published_at),
104+
"url": sev.url,
105+
}
106+
107+
108+
def serialize_references(reference):
109+
return {
110+
"url": reference.url,
111+
"reference_type": reference.reference_type,
112+
"reference_id": reference.reference_id,
113+
}
114+
115+
116+
def serialize_advisory(advisory):
117+
"""Return a plain data mapping serialized from advisory object."""
118+
aliases = sorted([a.alias for a in advisory.aliases.all()])
119+
severities = [serialize_severity(sev) for sev in advisory.severities.all()]
120+
weaknesses = [wkns.cwe for wkns in advisory.weaknesses.all()]
121+
references = [serialize_references(ref) for ref in advisory.references.all()]
122+
impacts = [
123+
{
124+
"purl": impact.base_purl,
125+
"affected_versions": impact.affecting_vers,
126+
"fixed_versions": impact.fixed_vers,
127+
}
128+
for impact in advisory.impacted_packages.all()
129+
]
130+
131+
return {
132+
"advisory_id": advisory.advisory_id,
133+
"datasource_id": advisory.avid,
134+
"datasource_url": advisory.url,
135+
"aliases": aliases,
136+
"summary": advisory.summary,
137+
"impacted_packages": impacts,
138+
"severities": severities,
139+
"weaknesses": weaknesses,
140+
"references": references,
141+
}
142+
143+
144+
def write_file(repo_path, file_path, data):
145+
"""Write ``data`` as YAML to ``repo_path``."""
146+
write_to = repo_path / file_path
147+
write_to.parent.mkdir(parents=True, exist_ok=True)
148+
with open(write_to, encoding="utf-8", mode="w") as f:
149+
f.write(saneyaml.dump(data))

0 commit comments

Comments
 (0)