88#
99
1010import logging
11+ from datetime import timedelta
1112from traceback import format_exc as traceback_format_exc
1213
1314from aboutcode .pipeline import LoopProgress
15+ from django .db .models import F
16+ from django .db .models import Q
17+ from django .utils import timezone
1418from fetchcode .package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS
1519from packageurl import PackageURL
1620from univers .version_range import RANGE_CLASS_BY_SCHEMES
1923from vulnerabilities .models import ImpactedPackage
2024from vulnerabilities .models import ImpactedPackageAffecting
2125from vulnerabilities .models import PackageV2
26+ from vulnerabilities .models import PipelineSchedule
2227from vulnerabilities .pipelines import VulnerableCodePipeline
2328from vulnerabilities .pipes .fetchcode_utils import get_versions
2429from vulnerabilities .utils import update_purl_version
2530
2631
2732class UnfurlVersionRangePipeline (VulnerableCodePipeline ):
33+ """
34+ Unfurl affected version ranges by first processing those that have
35+ never been unfurled and then handling ranges that were last unfurled
36+ two or more days ago.
37+ """
2838
2939 pipeline_id = "unfurl_version_range_v2"
3040
41+ run_interval = 2
42+ run_priority = PipelineSchedule .ExecutionPriority .HIGH
43+
44+ # Days elapsed before version range is re-unfurled
45+ reunfurl_after_days = 2
46+
3147 @classmethod
3248 def steps (cls ):
3349 return (cls .unfurl_version_range ,)
3450
3551 def unfurl_version_range (self ):
36- impacted_packages = ImpactedPackage .objects .all ().order_by ("-created_at" )
37- impacted_packages_count = impacted_packages .count ()
38-
3952 processed_impacted_packages_count = 0
4053 processed_affected_packages_count = 0
4154 cached_versions = {}
55+ update_unfurl_date = []
56+ update_successful_unfurl_date = []
57+ update_batch_size = 5000
58+ chunk_size = 5000
59+
60+ impacted_packages = impacted_package_qs (cutoff_day = self .reunfurl_after_days )
61+ impacted_packages_count = impacted_packages .count ()
4262 self .log (f"Unfurl affected vers range for { impacted_packages_count :,d} ImpactedPackage." )
43- progress = LoopProgress (total_iterations = impacted_packages_count , logger = self .log )
44- for impact in progress .iter (impacted_packages ):
63+
64+ progress = LoopProgress (
65+ total_iterations = impacted_packages_count , progress_step = 5 , logger = self .log
66+ )
67+ for impact in progress .iter (impacted_packages .iterator (chunk_size = chunk_size )):
68+ update_unfurl_date .append (impact .pk )
4569 purl = PackageURL .from_string (impact .base_purl )
4670 if not impact .affecting_vers or not any (
4771 c in impact .affecting_vers for c in ("<" , ">" , "!" )
4872 ):
73+ update_successful_unfurl_date .append (impact .pk )
4974 continue
5075 if purl .type not in FETCHCODE_SUPPORTED_ECOSYSTEMS :
5176 continue
5277 if purl .type not in RANGE_CLASS_BY_SCHEMES :
5378 continue
5479
55- versions = get_purl_versions (purl , cached_versions ) or []
80+ versions = get_purl_versions (purl , cached_versions , self . log ) or []
5681 affected_purls = get_affected_purls (
5782 versions = versions ,
58- affecting_vers = impact .affecting_vers ,
59- base_purl = purl ,
83+ impact = impact ,
6084 logger = self .log ,
6185 )
6286 if not affected_purls :
@@ -68,14 +92,31 @@ def unfurl_version_range(self):
6892 relation = ImpactedPackageAffecting ,
6993 logger = self .log ,
7094 )
95+ update_successful_unfurl_date .append (impact .pk )
7196 processed_impacted_packages_count += 1
7297
98+ if len (update_unfurl_date ) > update_batch_size :
99+ ImpactedPackage .objects .filter (pk__in = update_unfurl_date ).update (
100+ last_range_unfurl_at = timezone .now ()
101+ )
102+ ImpactedPackage .objects .filter (pk__in = update_successful_unfurl_date ).update (
103+ last_successful_range_unfurl_at = timezone .now ()
104+ )
105+ update_unfurl_date .clear ()
106+ update_successful_unfurl_date .clear ()
107+
108+ ImpactedPackage .objects .filter (pk__in = update_unfurl_date ).update (
109+ last_range_unfurl_at = timezone .now ()
110+ )
111+ ImpactedPackage .objects .filter (pk__in = update_successful_unfurl_date ).update (
112+ last_successful_range_unfurl_at = timezone .now ()
113+ )
73114 self .log (f"Successfully processed { processed_impacted_packages_count :,d} ImpactedPackage." )
74115 self .log (f"{ processed_affected_packages_count :,d} new Impact-Package relation created." )
75116
76117
77- def get_affected_purls (versions , affecting_vers , base_purl , logger ):
78- affecting_version_range = VersionRange .from_string (affecting_vers )
118+ def get_affected_purls (versions , impact , logger ):
119+ affecting_version_range = VersionRange .from_string (impact . affecting_vers )
79120 version_class = affecting_version_range .version_class
80121
81122 try :
@@ -84,7 +125,7 @@ def get_affected_purls(versions, affecting_vers, base_purl, logger):
84125 versions = [version_class (v ) for v in versions ]
85126 except Exception as e :
86127 logger (
87- f"Error while parsing versions for { base_purl !s} : { e !r} \n { traceback_format_exc ()} " ,
128+ f"Error while parsing versions for { impact . base_purl !s} : { e !r} \n { traceback_format_exc ()} " ,
88129 level = logging .ERROR ,
89130 )
90131 return
@@ -95,21 +136,24 @@ def get_affected_purls(versions, affecting_vers, base_purl, logger):
95136 if version in affecting_version_range :
96137 affected_purls .append (
97138 update_purl_version (
98- purl = base_purl ,
139+ purl = impact . base_purl ,
99140 version = str (version ),
100141 )
101142 )
102143 except Exception as e :
103144 logger (
104- f"Error while checking { version !s} in { affecting_version_range !s} : { e !r} \n { traceback_format_exc ()} " ,
145+ (
146+ f"Error while checking { version !s} in { affecting_version_range !s} for "
147+ f"advisory { impact .advisory .avid } : { e !r} \n { traceback_format_exc ()} "
148+ ),
105149 level = logging .ERROR ,
106150 )
107151 return affected_purls
108152
109153
110- def get_purl_versions (purl , cached_versions ):
154+ def get_purl_versions (purl , cached_versions , logger ):
111155 if not purl in cached_versions :
112- purls = get_versions (purl )
156+ purls = get_versions (purl , logger )
113157 if purls is not None :
114158 cached_versions [purl ] = purls
115159 return cached_versions .get (purl ) or []
@@ -135,3 +179,16 @@ def bulk_create_with_m2m(purls, impact, relation, logger):
135179 return 0
136180
137181 return len (relations )
182+
183+
184+ def impacted_package_qs (cutoff_day = 2 ):
185+ cutoff = timezone .now () - timedelta (days = cutoff_day )
186+ return (
187+ ImpactedPackage .objects .filter (
188+ (Q (last_range_unfurl_at__isnull = True ) | Q (last_range_unfurl_at__lte = cutoff ))
189+ & Q (affecting_vers__isnull = False )
190+ & ~ Q (affecting_vers = "" )
191+ )
192+ .order_by (F ("last_range_unfurl_at" ).asc (nulls_first = True ))
193+ .only ("pk" , "affecting_vers" , "advisory" , "base_purl" )
194+ )
0 commit comments