1414from operator import attrgetter
1515from pathlib import Path
1616
17- import saneyaml
1817from aboutcode .pipeline import LoopProgress
1918from django .conf import settings
20- from django .db .models import Prefetch
2119from django .utils import timezone
2220
2321from aboutcode .federated import DataFederation
24- from vulnerabilities .models import AdvisoryV2
25- from vulnerabilities .models import ImpactedPackage
26- from vulnerabilities .models import ImpactedPackageAffecting
27- from vulnerabilities .models import ImpactedPackageFixedBy
28- from vulnerabilities .models import PackageV2
2922from vulnerabilities .pipelines import VulnerableCodePipeline
23+ from vulnerabilities .pipes import export
3024from vulnerabilities .pipes import federatedcode
3125from vulnerabilities .utils import load_json
3226
@@ -99,7 +93,7 @@ def publish_package_related_advisories(self):
9993 chunk_size = 500
10094 files_to_commit = set ()
10195
102- packages_count , package_qs = package_prefetched_qs (self .checkpoint )
96+ packages_count , package_qs = export . package_prefetched_qs (self .checkpoint )
10397 grouped_packages = itertools .groupby (
10498 package_qs .iterator (chunk_size = chunk_size ),
10599 key = attrgetter ("type" , "namespace" , "name" , "version" ),
@@ -112,11 +106,11 @@ def publish_package_related_advisories(self):
112106 logger = self .log ,
113107 )
114108 for _ , packages in progress .iter (grouped_packages ):
115- purl , package_vulnerabilities = get_package_related_advisory (packages )
109+ purl , package_vulnerabilities = export . get_package_related_advisory (packages )
116110 package_repo , datafile_path = self .data_cluster .get_datafile_repo_and_path (purl )
117111 package_vulnerability_path = f"packages/{ package_repo } /{ datafile_path } "
118112
119- write_file (
113+ export . write_file (
120114 repo_path = self .repo_path ,
121115 file_path = package_vulnerability_path ,
122116 data = package_vulnerabilities ,
@@ -156,7 +150,7 @@ def publish_advisories(self):
156150 batch_size = 4000
157151 chunk_size = 1000
158152 files_to_commit = set ()
159- advisory_qs = advisory_prefetched_qs (self .checkpoint )
153+ advisory_qs = export . advisory_prefetched_qs (self .checkpoint )
160154 advisory_count = advisory_qs .count ()
161155
162156 self .log (f"Exporting { advisory_count } advisory." )
@@ -166,9 +160,9 @@ def publish_advisories(self):
166160 logger = self .log ,
167161 )
168162 for advisory in progress .iter (advisory_qs .iterator (chunk_size = chunk_size )):
169- advisory_data = serialize_advisory (advisory )
163+ advisory_data = export . serialize_advisory (advisory )
170164 adv_file = f"advisories/{ advisory .avid } .yml"
171- write_file (
165+ export . write_file (
172166 repo_path = self .repo_path ,
173167 file_path = adv_file ,
174168 data = advisory_data ,
@@ -232,135 +226,3 @@ def commit_message(
232226 commit_count = commit_count ,
233227 total_commit_count = total_commit_count ,
234228 )
235-
236-
237- def package_prefetched_qs (checkpoint ):
238- count = None
239- qs = (
240- PackageV2 .objects .order_by ("type" , "namespace" , "name" , "version" )
241- .only ("package_url" , "type" , "namespace" , "name" , "version" )
242- .prefetch_related (
243- Prefetch (
244- "affected_in_impacts" ,
245- queryset = ImpactedPackage .objects .only ("advisory_id" ).prefetch_related (
246- Prefetch (
247- "advisory" ,
248- queryset = AdvisoryV2 .objects .only ("avid" ),
249- )
250- ),
251- ),
252- Prefetch (
253- "fixed_in_impacts" ,
254- queryset = ImpactedPackage .objects .only ("advisory_id" ).prefetch_related (
255- Prefetch (
256- "advisory" ,
257- queryset = AdvisoryV2 .objects .only ("avid" ),
258- )
259- ),
260- ),
261- )
262- )
263-
264- if checkpoint :
265- affected_package_ids_qs = (
266- ImpactedPackageAffecting .objects .filter (created_at__gte = checkpoint )
267- .values_list ("package_id" , flat = True )
268- .distinct ()
269- )
270- fixing_package_ids_qs = (
271- ImpactedPackageFixedBy .objects .filter (created_at__gte = checkpoint )
272- .values_list ("package_id" , flat = True )
273- .distinct ()
274- )
275-
276- updated_packages = affected_package_ids_qs .union (fixing_package_ids_qs )
277- count = updated_packages .count ()
278- qs = qs .filter (id__in = updated_packages )
279-
280- count = qs .count () if not count else count
281-
282- return count , qs
283-
284-
285- def get_package_related_advisory (packages ):
286- package_vulnerabilities = []
287- for package in packages :
288- affected_by_vulnerabilities = [
289- impact .advisory .avid for impact in package .affected_in_impacts .all ()
290- ]
291- fixing_vulnerabilities = [impact .advisory .avid for impact in package .fixed_in_impacts .all ()]
292-
293- package_vulnerability = {
294- "purl" : package .package_url ,
295- "affected_by_advisories" : sorted (affected_by_vulnerabilities ),
296- "fixing_advisories" : sorted (fixing_vulnerabilities ),
297- }
298- package_vulnerabilities .append (package_vulnerability )
299-
300- return package .package_url , package_vulnerabilities
301-
302-
303- def advisory_prefetched_qs (checkpoint ):
304- qs = AdvisoryV2 .objects .order_by ("date_collected" ).prefetch_related (
305- "impacted_packages" ,
306- "aliases" ,
307- "references" ,
308- "severities" ,
309- "weaknesses" ,
310- )
311-
312- return qs .filter (date_collected__gte = checkpoint ) if checkpoint else qs
313-
314-
315- def serialize_severity (sev ):
316- return {
317- "score" : sev .value ,
318- "scoring_system" : sev .scoring_system ,
319- "scoring_elements" : sev .scoring_elements ,
320- "published_at" : str (sev .published_at ),
321- "url" : sev .url ,
322- }
323-
324-
325- def serialize_references (reference ):
326- return {
327- "url" : reference .url ,
328- "reference_type" : reference .reference_type ,
329- "reference_id" : reference .reference_id ,
330- }
331-
332-
333- def serialize_advisory (advisory ):
334- """Return a plain data mapping serialized from advisory object."""
335- aliases = sorted ([a .alias for a in advisory .aliases .all ()])
336- severities = [serialize_severity (sev ) for sev in advisory .severities .all ()]
337- weaknesses = [wkns .cwe for wkns in advisory .weaknesses .all ()]
338- references = [serialize_references (ref ) for ref in advisory .references .all ()]
339- impacts = [
340- {
341- "purl" : impact .base_purl ,
342- "affected_versions" : impact .affecting_vers ,
343- "fixed_versions" : impact .fixed_vers ,
344- }
345- for impact in advisory .impacted_packages .all ()
346- ]
347-
348- return {
349- "advisory_id" : advisory .advisory_id ,
350- "datasource_id" : advisory .avid ,
351- "datasource_url" : advisory .url ,
352- "aliases" : aliases ,
353- "summary" : advisory .summary ,
354- "impacted_packages" : impacts ,
355- "severities" : severities ,
356- "weaknesses" : weaknesses ,
357- "references" : references ,
358- }
359-
360-
361- def write_file (repo_path , file_path , data ):
362- """Write ``data`` as YAML to ``repo_path``."""
363- write_to = repo_path / file_path
364- write_to .parent .mkdir (parents = True , exist_ok = True )
365- with open (write_to , encoding = "utf-8" , mode = "w" ) as f :
366- f .write (saneyaml .dump (data ))
0 commit comments