Skip to content

Commit 2288fce

Browse files
committed
Export package and advisory in two separate steps
Signed-off-by: Keshav Priyadarshi <git@keshav.space>
1 parent 7116cb0 commit 2288fce

File tree

2 files changed

+121
-56
lines changed

2 files changed

+121
-56
lines changed

vulnerabilities/pipelines/exporters/federate_vulnerabilities.py

Lines changed: 119 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@
1515
import saneyaml
1616
from aboutcode.pipeline import LoopProgress
1717
from django.conf import settings
18+
from django.db.models import Prefetch
1819

1920
from aboutcode.federated import DataFederation
21+
from vulnerabilities.models import AdvisoryV2
22+
from vulnerabilities.models import ImpactedPackage
2023
from vulnerabilities.models import PackageV2
2124
from vulnerabilities.pipelines import VulnerableCodePipeline
2225
from vulnerabilities.pipes import federatedcode
@@ -25,7 +28,7 @@
2528
class FederatePackageVulnerabilities(VulnerableCodePipeline):
2629
"""Export package vulnerabilities and advisory to FederatedCode."""
2730

28-
pipeline_id = "federate_package_vulnerabilities_v2"
31+
pipeline_id = "federate_vulnerabilities_v2"
2932

3033
@classmethod
3134
def steps(cls):
@@ -34,7 +37,8 @@ def steps(cls):
3437
cls.create_federatedcode_working_dir,
3538
cls.fetch_federation_config,
3639
cls.clone_vulnerabilities_repo,
37-
cls.publish_vulnerabilities,
40+
cls.publish_package_vulnerabilities,
41+
cls.publish_advisories,
3842
cls.delete_working_dir,
3943
)
4044

@@ -61,13 +65,13 @@ def clone_vulnerabilities_repo(self):
6165
logger=self.log,
6266
)
6367

64-
def publish_vulnerabilities(self):
65-
"""Publish package vulnerabilities and advisory to FederatedCode"""
68+
def publish_package_vulnerabilities(self):
69+
"""Publish package vulnerabilities to FederatedCode"""
6670
repo_path = Path(self.repo.working_dir)
6771
commit_count = 1
6872
batch_size = 2000
73+
chunk_size = 1000
6974
files_to_commit = set()
70-
exported_avids = set()
7175

7276
distinct_packages_count = (
7377
PackageV2.objects.values("type", "namespace", "name")
@@ -76,44 +80,19 @@ def publish_vulnerabilities(self):
7680
)
7781
package_qs = package_prefetched_qs()
7882
grouped_packages = itertools.groupby(
79-
package_qs.iterator(chunk_size=2000),
83+
package_qs.iterator(chunk_size=chunk_size),
8084
key=attrgetter("type", "namespace", "name"),
8185
)
8286

8387
self.log(f"Exporting vulnerabilities for {distinct_packages_count} packages.")
8488
progress = LoopProgress(
8589
total_iterations=distinct_packages_count,
86-
progress_step=1,
90+
progress_step=5,
8791
logger=self.log,
8892
)
8993
for _, packages in progress.iter(grouped_packages):
90-
package_urls = []
91-
package_vulnerabilities = []
92-
for package in packages:
93-
purl = package.package_url
94-
package_urls.append(purl)
95-
package_vulnerabilities.append(serialize_package_vulnerability(package))
96-
97-
impacts = itertools.chain(
98-
package.affected_in_impacts.all(),
99-
package.fixed_in_impacts.all(),
100-
)
101-
for impact in impacts:
102-
adv = impact.advisory
103-
avid = adv.avid
104-
if avid in exported_avids:
105-
continue
106-
107-
exported_avids.add(avid)
108-
advisory = serialize_advisory(adv)
109-
adv_file = f"vulnerabilities/{avid}.yml"
110-
write_file(
111-
repo_path=repo_path,
112-
file_path=adv_file,
113-
data=advisory,
114-
)
115-
files_to_commit.add(adv_file)
116-
94+
package_urls, package_vulnerabilities = get_package_vulnerabilities(packages)
95+
purl = package_urls[0]
11796
package_repo, datafile_path = self.data_cluster.get_datafile_repo_and_path(purl=purl)
11897
package_vulnerability_path = datafile_path.replace("/purls.yml", "/vulnerabilities.yml")
11998
package_vulnerability_path = f"packages/{package_repo}/{package_vulnerability_path}"
@@ -135,7 +114,7 @@ def publish_vulnerabilities(self):
135114

136115
if len(files_to_commit) > batch_size:
137116
if federatedcode.commit_and_push_changes(
138-
commit_message=self.commit_message(commit_count),
117+
commit_message=self.commit_message("package vulnerabilities", commit_count),
139118
repo=self.repo,
140119
files_to_commit=files_to_commit,
141120
logger=self.log,
@@ -145,15 +124,67 @@ def publish_vulnerabilities(self):
145124

146125
if files_to_commit:
147126
federatedcode.commit_and_push_changes(
148-
commit_message=self.commit_message(commit_count, commit_count),
127+
commit_message=self.commit_message(
128+
"package vulnerabilities",
129+
commit_count,
130+
commit_count,
131+
),
149132
repo=self.repo,
150133
files_to_commit=files_to_commit,
151134
logger=self.log,
152135
)
153136

154-
self.log(
155-
f"Federated {distinct_packages_count} package and {len(exported_avids)} vulnerabilities."
137+
self.log(f"Federated {distinct_packages_count} package vulnerabilities.")
138+
139+
def publish_advisories(self):
140+
"""Publish advisory to FederatedCode"""
141+
repo_path = Path(self.repo.working_dir)
142+
commit_count = 1
143+
batch_size = 2000
144+
chunk_size = 1000
145+
files_to_commit = set()
146+
advisory_qs = advisory_prefetched_qs()
147+
advisory_count = advisory_qs.count()
148+
149+
self.log(f"Exporting vulnerabilities for {advisory_count} advisory.")
150+
progress = LoopProgress(
151+
total_iterations=advisory_count,
152+
progress_step=5,
153+
logger=self.log,
156154
)
155+
for advisory in progress.iter(advisory_qs.iterator(chunk_size=chunk_size)):
156+
advisory_data = serialize_advisory(advisory)
157+
adv_file = f"vulnerabilities/{advisory.avid}.yml"
158+
write_file(
159+
repo_path=repo_path,
160+
file_path=adv_file,
161+
data=advisory_data,
162+
)
163+
files_to_commit.add(adv_file)
164+
165+
if len(files_to_commit) > batch_size:
166+
if federatedcode.commit_and_push_changes(
167+
commit_message=self.commit_message("advisories", commit_count),
168+
repo=self.repo,
169+
files_to_commit=files_to_commit,
170+
logger=self.log,
171+
):
172+
commit_count += 1
173+
files_to_commit.clear()
174+
175+
if files_to_commit:
176+
federatedcode.commit_and_push_changes(
177+
commit_message=self.commit_message(
178+
"advisories",
179+
commit_count,
180+
commit_count,
181+
),
182+
repo=self.repo,
183+
files_to_commit=files_to_commit,
184+
logger=self.log,
185+
)
186+
187+
self.log(f"Successfully federated {advisory_count} vulnerabilities.")
157188

158189
def delete_working_dir(self):
159190
"""Remove temporary working dir."""
@@ -163,33 +194,67 @@ def delete_working_dir(self):
163194
def on_failure(self):
164195
self.delete_working_dir()
165196

166-
def commit_message(self, commit_count, total_commit_count="many"):
197+
def commit_message(
198+
self,
199+
item_type,
200+
commit_count,
201+
total_commit_count="many",
202+
):
167203
"""Commit message for pushing Package vulnerability."""
168204
return federatedcode.commit_message(
205+
item_type=item_type,
169206
commit_count=commit_count,
170207
total_commit_count=total_commit_count,
171208
)
172209

173210

174211
def package_prefetched_qs():
175-
return PackageV2.objects.order_by("type", "namespace", "name", "version").prefetch_related(
176-
"affected_in_impacts",
177-
"affected_in_impacts__advisory",
178-
"affected_in_impacts__advisory__impacted_packages",
179-
"affected_in_impacts__advisory__aliases",
180-
"affected_in_impacts__advisory__references",
181-
"affected_in_impacts__advisory__severities",
182-
"affected_in_impacts__advisory__weaknesses",
183-
"fixed_in_impacts",
184-
"fixed_in_impacts__advisory",
185-
"fixed_in_impacts__advisory__impacted_packages",
186-
"fixed_in_impacts__advisory__aliases",
187-
"fixed_in_impacts__advisory__references",
188-
"fixed_in_impacts__advisory__severities",
189-
"fixed_in_impacts__advisory__weaknesses",
212+
return (
213+
PackageV2.objects.order_by("type", "namespace", "name", "version")
214+
.only("id", "package_url", "type", "namespace", "name", "version")
215+
.prefetch_related(
216+
Prefetch(
217+
"affected_in_impacts",
218+
queryset=ImpactedPackage.objects.only("id", "advisory_id").prefetch_related(
219+
Prefetch(
220+
"advisory",
221+
queryset=AdvisoryV2.objects.only("id", "avid"),
222+
)
223+
),
224+
),
225+
Prefetch(
226+
"fixed_in_impacts",
227+
queryset=ImpactedPackage.objects.only("id", "advisory_id").prefetch_related(
228+
Prefetch(
229+
"advisory",
230+
queryset=AdvisoryV2.objects.only("id", "avid"),
231+
)
232+
),
233+
),
234+
)
190235
)
191236

192237

238+
def advisory_prefetched_qs():
239+
return AdvisoryV2.objects.prefetch_related(
240+
"impacted_packages",
241+
"aliases",
242+
"references",
243+
"severities",
244+
"weaknesses",
245+
)
246+
247+
248+
def get_package_vulnerabilities(packages):
249+
"""Return list of PURLs and serialized package vulnerability"""
250+
package_urls = []
251+
package_vulnerabilities = []
252+
for package in packages:
253+
package_urls.append(package.package_url)
254+
package_vulnerabilities.append(serialize_package_vulnerability(package))
255+
return package_urls, package_vulnerabilities
256+
257+
193258
def serialize_package_vulnerability(package):
194259
affected_by_vulnerabilities = [
195260
impact.advisory.avid for impact in package.affected_in_impacts.all()

vulnerabilities/pipes/federatedcode.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def commit_changes(repo, files_to_commit, commit_message):
157157
)
158158

159159

160-
def commit_message(commit_count, total_commit_count):
160+
def commit_message(item_type, commit_count, total_commit_count):
161161
"""Commit message for pushing Package vulnerability."""
162162
from vulnerablecode import __version__ as VERSION
163163

@@ -167,7 +167,7 @@ def commit_message(commit_count, total_commit_count):
167167
tool_name = "pkg:github/aboutcode-org/vulnerablecode"
168168

169169
return f"""\
170-
Add new Package vulnerability ({commit_count}/{total_commit_count})
170+
Add new {item_type} ({commit_count}/{total_commit_count})
171171
172172
Tool: {tool_name}@v{VERSION}
173173

0 commit comments

Comments
 (0)