1515import saneyaml
1616from aboutcode .pipeline import LoopProgress
1717from django .conf import settings
18+ from django .db .models import Prefetch
1819
1920from aboutcode .federated import DataFederation
21+ from vulnerabilities .models import AdvisoryV2
22+ from vulnerabilities .models import ImpactedPackage
2023from vulnerabilities .models import PackageV2
2124from vulnerabilities .pipelines import VulnerableCodePipeline
2225from vulnerabilities .pipes import federatedcode
2528class FederatePackageVulnerabilities (VulnerableCodePipeline ):
2629 """Export package vulnerabilities and advisory to FederatedCode."""
2730
28- pipeline_id = "federate_package_vulnerabilities_v2 "
31+ pipeline_id = "federate_vulnerabilities_v2 "
2932
3033 @classmethod
3134 def steps (cls ):
@@ -34,7 +37,8 @@ def steps(cls):
3437 cls .create_federatedcode_working_dir ,
3538 cls .fetch_federation_config ,
3639 cls .clone_vulnerabilities_repo ,
37- cls .publish_vulnerabilities ,
40+ cls .publish_package_vulnerabilities ,
41+ cls .publish_advisories ,
3842 cls .delete_working_dir ,
3943 )
4044
@@ -61,13 +65,13 @@ def clone_vulnerabilities_repo(self):
6165 logger = self .log ,
6266 )
6367
64- def publish_vulnerabilities (self ):
65- """Publish package vulnerabilities and advisory to FederatedCode"""
68+ def publish_package_vulnerabilities (self ):
69+ """Publish package vulnerabilities to FederatedCode"""
6670 repo_path = Path (self .repo .working_dir )
6771 commit_count = 1
6872 batch_size = 2000
73+ chunk_size = 1000
6974 files_to_commit = set ()
70- exported_avids = set ()
7175
7276 distinct_packages_count = (
7377 PackageV2 .objects .values ("type" , "namespace" , "name" )
@@ -76,44 +80,19 @@ def publish_vulnerabilities(self):
7680 )
7781 package_qs = package_prefetched_qs ()
7882 grouped_packages = itertools .groupby (
79- package_qs .iterator (chunk_size = 2000 ),
83+ package_qs .iterator (chunk_size = chunk_size ),
8084 key = attrgetter ("type" , "namespace" , "name" ),
8185 )
8286
8387 self .log (f"Exporting vulnerabilities for { distinct_packages_count } packages." )
8488 progress = LoopProgress (
8589 total_iterations = distinct_packages_count ,
86- progress_step = 1 ,
90+ progress_step = 5 ,
8791 logger = self .log ,
8892 )
8993 for _ , packages in progress .iter (grouped_packages ):
90- package_urls = []
91- package_vulnerabilities = []
92- for package in packages :
93- purl = package .package_url
94- package_urls .append (purl )
95- package_vulnerabilities .append (serialize_package_vulnerability (package ))
96-
97- impacts = itertools .chain (
98- package .affected_in_impacts .all (),
99- package .fixed_in_impacts .all (),
100- )
101- for impact in impacts :
102- adv = impact .advisory
103- avid = adv .avid
104- if avid in exported_avids :
105- continue
106-
107- exported_avids .add (avid )
108- advisory = serialize_advisory (adv )
109- adv_file = f"vulnerabilities/{ avid } .yml"
110- write_file (
111- repo_path = repo_path ,
112- file_path = adv_file ,
113- data = advisory ,
114- )
115- files_to_commit .add (adv_file )
116-
94+ package_urls , package_vulnerabilities = get_package_vulnerabilities (packages )
95+ purl = package_urls [0 ]
11796 package_repo , datafile_path = self .data_cluster .get_datafile_repo_and_path (purl = purl )
11897 package_vulnerability_path = datafile_path .replace ("/purls.yml" , "/vulnerabilities.yml" )
11998 package_vulnerability_path = f"packages/{ package_repo } /{ package_vulnerability_path } "
@@ -135,7 +114,7 @@ def publish_vulnerabilities(self):
135114
136115 if len (files_to_commit ) > batch_size :
137116 if federatedcode .commit_and_push_changes (
138- commit_message = self .commit_message (commit_count ),
117+ commit_message = self .commit_message ("package vulnerabilities" , commit_count ),
139118 repo = self .repo ,
140119 files_to_commit = files_to_commit ,
141120 logger = self .log ,
@@ -145,15 +124,67 @@ def publish_vulnerabilities(self):
145124
146125 if files_to_commit :
147126 federatedcode .commit_and_push_changes (
148- commit_message = self .commit_message (commit_count , commit_count ),
127+ commit_message = self .commit_message (
128+ "package vulnerabilities" ,
129+ commit_count ,
130+ commit_count ,
131+ ),
149132 repo = self .repo ,
150133 files_to_commit = files_to_commit ,
151134 logger = self .log ,
152135 )
153136
154- self .log (
155- f"Federated { distinct_packages_count } package and { len (exported_avids )} vulnerabilities."
137+ self .log (f"Federated { distinct_packages_count } package vulnerabilities." )
138+
139+ def publish_advisories (self ):
140+ """Publish advisory to FederatedCode"""
141+ repo_path = Path (self .repo .working_dir )
142+ commit_count = 1
143+ batch_size = 2000
144+ chunk_size = 1000
145+ files_to_commit = set ()
146+ advisory_qs = advisory_prefetched_qs ()
147+ advisory_count = advisory_qs .count ()
148+
149+ self .log (f"Exporting vulnerabilities for { advisory_count } advisory." )
150+ progress = LoopProgress (
151+ total_iterations = advisory_count ,
152+ progress_step = 5 ,
153+ logger = self .log ,
156154 )
155+ for advisory in progress .iter (advisory_qs .iterator (chunk_size = chunk_size )):
156+ advisory_data = serialize_advisory (advisory )
157+ adv_file = f"vulnerabilities/{ advisory .avid } .yml"
158+ write_file (
159+ repo_path = repo_path ,
160+ file_path = adv_file ,
161+ data = advisory_data ,
162+ )
163+ files_to_commit .add (adv_file )
164+
165+ if len (files_to_commit ) > batch_size :
166+ if federatedcode .commit_and_push_changes (
167+ commit_message = self .commit_message ("advisories" , commit_count ),
168+ repo = self .repo ,
169+ files_to_commit = files_to_commit ,
170+ logger = self .log ,
171+ ):
172+ commit_count += 1
173+ files_to_commit .clear ()
174+
175+ if files_to_commit :
176+ federatedcode .commit_and_push_changes (
177+ commit_message = self .commit_message (
178+ "advisories" ,
179+ commit_count ,
180+ commit_count ,
181+ ),
182+ repo = self .repo ,
183+ files_to_commit = files_to_commit ,
184+ logger = self .log ,
185+ )
186+
187+ self .log (f"Successfully federated { advisory_count } vulnerabilities." )
157188
158189 def delete_working_dir (self ):
159190 """Remove temporary working dir."""
@@ -163,33 +194,67 @@ def delete_working_dir(self):
163194 def on_failure (self ):
164195 self .delete_working_dir ()
165196
166- def commit_message (self , commit_count , total_commit_count = "many" ):
197+ def commit_message (
198+ self ,
199+ item_type ,
200+ commit_count ,
201+ total_commit_count = "many" ,
202+ ):
167203 """Commit message for pushing Package vulnerability."""
168204 return federatedcode .commit_message (
205+ item_type = item_type ,
169206 commit_count = commit_count ,
170207 total_commit_count = total_commit_count ,
171208 )
172209
173210
174211def package_prefetched_qs ():
175- return PackageV2 .objects .order_by ("type" , "namespace" , "name" , "version" ).prefetch_related (
176- "affected_in_impacts" ,
177- "affected_in_impacts__advisory" ,
178- "affected_in_impacts__advisory__impacted_packages" ,
179- "affected_in_impacts__advisory__aliases" ,
180- "affected_in_impacts__advisory__references" ,
181- "affected_in_impacts__advisory__severities" ,
182- "affected_in_impacts__advisory__weaknesses" ,
183- "fixed_in_impacts" ,
184- "fixed_in_impacts__advisory" ,
185- "fixed_in_impacts__advisory__impacted_packages" ,
186- "fixed_in_impacts__advisory__aliases" ,
187- "fixed_in_impacts__advisory__references" ,
188- "fixed_in_impacts__advisory__severities" ,
189- "fixed_in_impacts__advisory__weaknesses" ,
212+ return (
213+ PackageV2 .objects .order_by ("type" , "namespace" , "name" , "version" )
214+ .only ("id" , "package_url" , "type" , "namespace" , "name" , "version" )
215+ .prefetch_related (
216+ Prefetch (
217+ "affected_in_impacts" ,
218+ queryset = ImpactedPackage .objects .only ("id" , "advisory_id" ).prefetch_related (
219+ Prefetch (
220+ "advisory" ,
221+ queryset = AdvisoryV2 .objects .only ("id" , "avid" ),
222+ )
223+ ),
224+ ),
225+ Prefetch (
226+ "fixed_in_impacts" ,
227+ queryset = ImpactedPackage .objects .only ("id" , "advisory_id" ).prefetch_related (
228+ Prefetch (
229+ "advisory" ,
230+ queryset = AdvisoryV2 .objects .only ("id" , "avid" ),
231+ )
232+ ),
233+ ),
234+ )
190235 )
191236
192237
238+ def advisory_prefetched_qs ():
239+ return AdvisoryV2 .objects .prefetch_related (
240+ "impacted_packages" ,
241+ "aliases" ,
242+ "references" ,
243+ "severities" ,
244+ "weaknesses" ,
245+ )
246+
247+
248+ def get_package_vulnerabilities (packages ):
249+ """Return list of PURLs and serialized package vulnerability"""
250+ package_urls = []
251+ package_vulnerabilities = []
252+ for package in packages :
253+ package_urls .append (package .package_url )
254+ package_vulnerabilities .append (serialize_package_vulnerability (package ))
255+ return package_urls , package_vulnerabilities
256+
257+
193258def serialize_package_vulnerability (package ):
194259 affected_by_vulnerabilities = [
195260 impact .advisory .avid for impact in package .affected_in_impacts .all ()
0 commit comments