1212from collections import namedtuple
1313from pathlib import Path
1414
15- # Importing these types using in the string type hints is helpful for some
16- # editors to actually support hinting for these types.
17- # pylint: disable=unused-import
18- from typing import Any , Dict , List , Set , Tuple , Type
15+ from typing import Any , Dict , List , Set , Tuple , Self
1916
2017import aiohttp
2118import jinja2
@@ -38,7 +35,12 @@ class CacheItem:
3835 An item stored in the URL/ETag cache.
3936 """
4037
41- def __init__ (self , url , etag , last_modified , file_hash ):
38+ url : str
39+ etag : str
40+ last_modified : str
41+ hash : str
42+
43+ def __init__ (self , url : str , etag : str , last_modified : str , file_hash : str ):
4244 self .url = url
4345 self .etag = etag
4446 self .last_modified = last_modified
@@ -63,7 +65,7 @@ def to_json(self) -> Dict[str, Dict[str, str]]:
6365 }
6466
6567 @classmethod
66- def from_json (cls , data ) -> " List[Type[CacheItem]]" :
68+ def from_json (cls , data ) -> List [Self ] :
6769 """
6870 Load an item from the cache.
6971 """
@@ -76,13 +78,13 @@ def from_json(cls, data) -> "List[Type[CacheItem]]":
7678
7779 @classmethod
7880 async def from_http_response (
79- cls , response : aiohttp .ClientResponse
80- ) -> "Type[CacheItem]" :
81+ cls , response : aiohttp .ClientResponse , request_url : str
82+ ) -> Self :
8183 """
8284 Parse a cache item from an HTTP response
8385 """
8486 headers = response .headers
85- url = response . url
87+ url = request_url
8688 data = await response .read ()
8789 file_hash = hashlib .sha1 (data ).hexdigest ()
8890
@@ -94,6 +96,8 @@ class Cache:
9496 A cache of all downloaded items.
9597 """
9698
99+ _items : List [CacheItem ]
100+
97101 def __init__ (self ):
98102 self ._items = []
99103
@@ -106,6 +110,11 @@ def __setitem__(self, url, item):
106110 self ._items .remove (match )
107111 self ._items .append (item )
108112
113+ def __delitem__ (self , url ):
114+ if matches := [cached for cached in self ._items if cached .url == url ]:
115+ for match in matches :
116+ self ._items .remove (match )
117+
109118 def __bool__ (self ):
110119 return bool (self ._items )
111120
@@ -119,7 +128,7 @@ def __repr__(self):
119128 return f"<Cache items={ self ._items !r} >"
120129
121130 @classmethod
122- def from_json (cls , data : Dict [str , Dict [str , str ]]) -> 'Type[Cache]' :
131+ def from_json (cls , data : Dict [str , Dict [str , str ]]) -> Self :
123132 """
124133 Load a cache from a dictionary.
125134 """
@@ -160,6 +169,7 @@ async def check_software_hash(
160169 """
161170
162171 headers = {}
172+ cache_item = None
163173 if check_data .url in cache :
164174 cache_item = cache [check_data .url ]
165175 if cache_item .etag :
@@ -169,14 +179,27 @@ async def check_software_hash(
169179
170180 try :
171181 async with session .get (
172- check_data .url , headers = headers , timeout = 600
182+ check_data .url ,
183+ headers = headers ,
184+ timeout = aiohttp .ClientTimeout (total = 600 ),
173185 ) as response :
174186 if response .status == 200 :
175- cache_item = await CacheItem .from_http_response (response )
187+ cache_item = await CacheItem .from_http_response (
188+ response , check_data .url
189+ )
190+ cache [check_data .url ] = cache_item
191+ elif response .status == 304 :
192+ # The cached data matched (Not Modified) so there's nothing to do
176193 cache [check_data .url ] = cache_item
177- except aiohttp .ClientError :
194+ else :
195+ print (
196+ f"{ check_data .source_file } : Fetch failed { check_data .url } ({ response .status } )" ,
197+ file = sys .stderr ,
198+ )
199+ return False
200+ except aiohttp .ClientError as client_error :
178201 print (
179- f"{ check_data .source_file } : Unable to download { check_data .url } " ,
202+ f"{ check_data .source_file } : Unable to download { check_data .url } ( { client_error } ) " ,
180203 file = sys .stderr ,
181204 )
182205 return False
@@ -205,7 +228,9 @@ def process_variable(source: str, variable: str, value: str) -> str:
205228 return template .render (** {variable : value })
206229
207230
208- def urls_for_file (file : str , ansible_data : Dict [str , Any ], lookup_data : Dict [str , Any ]):
231+ def urls_for_file (
232+ file : str , ansible_data : Dict [str , Any ], lookup_data : Dict [str , Any ]
233+ ) -> set [CheckData ]:
209234 """
210235 Return a set of all URLs in the given file key in the URLs mapping.
211236 """
@@ -240,6 +265,15 @@ def load_cache() -> Cache:
240265 return Cache ()
241266
242267
268+ def trim_cache (cache , urls ):
269+ """
270+ Trim the cache to only keep current URLs
271+ """
272+ for item in cache :
273+ if item not in urls :
274+ del cache [item ]
275+
276+
243277def write_cache (cache : Cache ):
244278 """
245279 Save the cache to disk.
@@ -249,7 +283,7 @@ def write_cache(cache: Cache):
249283 json .dump (cache .to_json (), cache_file , indent = 4 )
250284
251285
252- def get_urls () -> Tuple [Set [str ], int ]:
286+ def get_urls () -> Tuple [Set [CheckData ], int ]:
253287 """
254288 Load the list of URLs to validate hashes for as well as the number of errors
255289 encountered parsing the list.
@@ -292,9 +326,10 @@ async def main():
292326 for result in await asyncio .gather (* tasks ):
293327 # This relies on the fact that True gets coerced to 1 and that False gets
294328 # coerced to 0 when converted to an integer. The check method returns True
295- # on success and we need to count failures.
329+ # on success, and we need to count failures.
296330 errors += not result
297331
332+ trim_cache (cache , [check_data .url for check_data in to_check ])
298333 write_cache (cache )
299334 print (f"Wrote cache: { cache } " )
300335 return errors
0 commit comments